Pandas
|
mydataset = { 'cars': ["BMW", "Volvo", "Ford"], 'passings': [3, 7, 2] } myvar = pd.DataFrame(mydataset) print(myvar) |
cars passings 0 BMW 3 1 Volvo 7 2 Ford 2
|
df = pd.read_csv('data.csv') print(df) |
df = pd.read_csv(filepath_or_buffer="./testdata.csv",sep=",",dtype=str) |
PyPDF2
from PyPDF2 import PdfFileReader
|
def text_extractor(path): with open(path, 'rb') as f: pdf = PdfFileReader(f)
# get the first page page = pdf.getPage(1) print("page : \n ", page) print('\n Page type: {}'.format(str(type(page)))) text = page.extractText() print("page text: ", text)
|
if __name__ == '__main__': path = 'filename.pdf'
# enter file for input text_extractor(path)
|
icecream
|
output |
def foo(i): return i + 333
|
ic| foo(123): 456 |
d = {'key': {1: 'one'}} ic(d['key'][1])
|
ic| d['key'][1]: 'one' |
class klass(): attr = 'yep' ic(klass.attr)
|
ic| klass.attr: 'yep' |
Extract Text From HTML Pages / Websites
conda install -c conda-forge readability-lxml
|
import requests from readability import Document response = requests.get('http://example.com') doc = Document(response.text) print(doc.title()) print(doc.summary())
|
Example Domain
<html><body><div><body id="readabilityBody">
<div>
<h1>Example Domain</h1>
<p>This domain is for use in illustrative examples in documents. You may use this
domain in literature without prior coordination or asking for permission.</p>
<p><a href="https://www.iana.org/domains/example">More information...</a></p>
</div>
</body>
</div></body></html>
Beautiful Soup (XML extractor)
Beautiful Soup is a Python library for pulling data out of HTML and XML files. |
conda install -c conda-forge beautifulsoup4
|
from bs4 import BeautifulSoup html_doc = str(doc.summary()) soup = BeautifulSoup(html_doc, 'html.parser') print(html_doc)
# prints with tags print(soup.prettify())
# formats string from html tags soup = BeautifulSoup(html_doc, features="lxml") print(soup.get_text())
|
# BeautifulSoup ( html_string, parser_type), lxml is a python module |
Keywords: assert try except
x = "goodbye" if condition returns False, AssertionError is raised: assert x == "hello" or to add a message to console: assert x == "hello", "should be goodbye"
|
try except block with AssertionError |
x = 'hello' try: assert x == 'goodbye' except: print("An exception occurred")
(so that the program continues running after the error) |
https://www.w3schools.com/python/ref_keyword_assert.asp
The assert keyword is used when debugging code. The assert keyword lets you test if a condition in your code returns True, if not, the program will raise an AssertionError.
Try / Except / Catch differences
try contains the code that may raise exceptions or errors.
except is used to catch the exceptions and handle them.
catch code is executed only when the corresponding exception is raised.
|
|
One Line Loops
for i in range(10): print(i)
print([i**2 for i in range(10) if i%2==0])
for i in range(10): print(i**2 if i<5 else 0)
print("Text: ", [token.text for token in doc])
Formatting Strings with Variables
quantity = 3
itemno = 567
price = 49.95
myorder = "I will pay {2} dollars for {0} pieces of item {1}."
print(myorder.format(quantity, itemno, price))
Read Write Print Files
open file and read lines with formatting |
with open("textfile.txt", encoding = 'utf-8') as f: lines = f.readlines()
# as a list docs = list(nlp.pipe(lines))
|
(to spaCy docs)for text in docs:
#iterate each list item for firsts in text:
# iterate each word in list (sentence) if firsts.is_sent_start: print(firsts.text)
|
doc = [nlp(txt.strip()) for txt in lines] # removes escape sequences |
or |
f = open("Handbooklist.txt", "r") print(f.read())
|
f = open("demofile.txt", "r") print(f.readline())
# read first line |
f = open("demo.txt", 'r', encoding='utf8') print(f.read())
# ^ another way to strip |
f = open("demo.txt", "r") for x in f: print(x) # and/or links.append(x.strip())
# declare arr beforehand; links = [ ] |
f.close()
# best to close when done unless first option used |
Write Files ('w' overwrites; 'a' appends file) |
f = open("myfile.txt", "w") f.write(doclist) f.close()
|
List to New Text File: with open(r'newfile.txt', 'w') as fp: for text in doclist:
# write each item on a new line fp.write("%s\n" % text) print('Done')
|
Type Casting
Get Type from Variablename = "freeCodeCamp" print("The variable, name is of type:", type(name))
|
output: The variable, name is of type: <class 'str'> |
Specify/Convert Variable Type |
x = int(1)
# x will be 1 y = float("2.8")
# y will be 2.8 z = str(3)
# z will be "3" |
Regular Expression (or re / regex)
|
re.sub(pattern, repl, string, count=0, flags=0) result = re.sub('abc', '', input) result = re.sub('abc', 'def', input) result = re.sub(r'\s+', ' ', input) result = re.sub('abc(def)ghi', r'\1', input)
|
# Delete pattern abc # Replace pattern abc -> def # Eliminate duplicate whitespaces using wildcards # Replace a string with a part of itself
|
re.search(pattern, string, flags=0) re.search("c", "abcdef") re.search("^c", "abcdef") re.search('^X', 'A\nB\nX', re.MULTILINE)
|
# Match # No match (beginning of str) # Match; multiline (beg of each line)
|
re.match("c", "abcdef")
|
# No match # (.match for beginning of string) |
Dictionary
Create a dictionary |
thisdict = { "brand": "Ford", "model": "Mustang", "year": 1964, "year": 2020, "colors": ["red", "white", "blue"] }
|
print(thisdict["year"]) #print dictionary key for 'year' #(notice the overwritten output; duplicates are not allowed)
Output: |
1964 2020
|
|
{'brand': 'Ford', 'electric': False, 'year': 1964, 'colors': ['red', 'white', 'blue']} |
Alternative way to create dictionary (Constructor) |
thisdict = dict(brand = "Ford", electric = False, year = 2020, colors = ["red", "white", "blue"])
|
Alternate retrievals of key values |
x = thisdict.get("model") print(x)
Output: Mustang |
or to get dictionary key names and/or values
|
x = thisdict.keys()
Output: dict_keys(['brand', 'electric', 'year', 'colors']) x = thisdict.values()
Output: dict_values(['Ford', False, 2020, ['red', 'white', 'blue']]) x = thisdict.items() #get all keys & values
|
Make changes to dictionary |
car["colors"] = "red" print(x)
Output: dict_values(['Ford', 'Mustang', 2020, 'red']) or thisdict.update({"year": 2022})
|
check if key/entry exists: |
if "model" in thisdict: print("Yes, this key is in {thisdict} dictionary")
|
Remove Items |
thisdict.pop("model") #removes specified key item thisdict.popitem() #removes last inserted* item del thisdict["model"] #to remove key item or... del thisdict #to delete the entire dictionary
|
Loop/Iterations on Dictionaries |
for x in thisdict: print(thisdict[x]) #print each value in dict per line
for x in thisdict.values(): print(x) #print values of dictionary
for x in thisdict.keys(): print(x) #print keys of dictionary
for x, y in thisdict.items(): print(x) #loop through both keys and values
|
Nested Dictionary |
child1 = { "name" : "Emil", "year" : 2004 } child2 = { "name" : "Tobias", "year" : 2007 } child3 = { "name" : "Linus", "year" : 2011 } myfamily = { "child1" : child1, "child2" : child2, "child3" : child3 } print(myfamily["child2"]["name"])
Output: Tobias |
Other uses with Dictionary No. of components: len(thisdict) Confirm dict data type: type(thisdict) Make copy of dictionary: mydict = thisdict.copy()
|
|
clear() Removes all the elements
fromkeys() Returns dict w/ specified keys and value
get() Returns value of specified key
items() Returns a tupled list for each key value pair
keys() Returns a list of dict keys
pop() Removes element of specified key
popitem() Removes last inserted key-value pair
setdefault() Returns value of specified key
update() Updates dict w/ specified key-value pairs
values() Returns list of all values in dict
|