\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{drykka01} \pdfinfo{ /Title (python-data-analysis.pdf) /Creator (Cheatography) /Author (drykka01) /Subject (Python (Data Analysis) Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{2E0C70} \definecolor{LightBackground}{HTML}{F8F7FA} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Python (Data Analysis) Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{drykka01} via \textcolor{DarkBackground}{\uline{cheatography.com/36646/cs/11516/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}drykka01 \\ \uline{cheatography.com/drykka01} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 17th July, 2017.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{3} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Pandas Data Frame}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{df\_1 = pd.DataFrame(\{'A': {[}0, 1, 2{]}, \newline 'B': {[}3, 4, 5{]}\}) \newline df\_2 = pd.DataFrame({[}{[}0, 1, 2{]}, {[}3, 4, 5{]}{]}, \newline columns={[}'A', 'B', 'C'{]}) \newline -{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}- \newline df{[}:5{]} {\bf{\#First 5 lines}} \newline df.head() \newline df{[}df.key==10{]} {\bf{\#Where key = 10}} \newline df{[}df.key==10{]}{[}:5{]} {\bf{\#First 5 lines \& Where key=10}} \newline -{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}- \newline df\_3.iloc{[}0{]} \newline df\_3.loc{[}'column-A'{]} \newline df\_3{[}'row-A'{]} \newline df\_3.iloc{[}1, 3{]} \newline df\_3.iloc{[}1:4{]} \newline df\_3{[}{[}'row-A', 'row-D'{]}{]} \newline df.values.sum() \newline -{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}- \newline df\_1 = pd.DataFrame(\{'A': {[}0, 1, 2{]}, \newline 'B': {[}3, 4, 5{]}\}) \newline \newline df\_1.shift(axis=0) \newline A B \newline 0 NaN NaN \newline 1 0.0 3.0 \newline 2 1.0 4.0 \newline \newline df\_1.shift(axis=1) \newline A B \newline 0 NaN 0.0 \newline 1 NaN 1.0 \newline 2 NaN 2.0 \newline \newline {\bf{\# To replace NaN with value (eg. 0)}} \newline df\_1.shift(axis=0).fillna(0) \newline -{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}- \newline {\bf{ \# To see the difference between columns/rows }} \newline df\_1.diff(axis=0) \newline df\_1.diff(axis=1) \newline -{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}- \newline df = pd.DataFrame(\{ \newline 'a': {[}1, 2, 3{]}, \newline 'b': {[}10, 20, 30{]}, \newline 'c': {[}5, 10, 15{]} \newline \}) \newline def add\_one(x): \newline return x + 1 \newline df.applymap(add\_one) \newline \newline a b c \newline 0 2 11 6 \newline 1 3 21 11 \newline 2 4 31 16 \newline \newline {\bf{ \# applymap is different from apply in Pandas DF}} \newline {\bf{ \# apply does column by column (or row by row) }} \newline {\bf{ \# instead of element by element }} \newline def \seqsplit{standardize\_column(column):} \newline return (column - \seqsplit{column.mean())/column.std() } \newline df.apply(standardize\_column) \newline -{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}- \newline {\bf{ \# only works in a single series (column or rows) }} \newline {\bf{ \# from entire DF use apply() }} \newline df.iloc{[}:, 0{]}.sort\_values() \newline -{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}- \newline {\bf{ \# columns vs rows operations, respectively }} \newline df - df.mean(axis=0) \newline df.sub(df.mean(axis=1),axis=0) \newline -{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}- \newline {\bf{ \# group by }} \newline df.groupby('column').groups \newline df.groupby({[}'column1', 'column2'{]}).groups \newline df.groupby('column').sum() \newline df.groupby('column').get\_group('value1') \newline list(df.groupby('column')) \newline df.groupby('c4'){[}{[}'c1','c2'{]}{]}.apply(func) \newline \newline http://wesmckinney.com/blog/groupby-fu-improvements-in-grouping-and-aggregating-data-in-pandas/ \newline -{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}- \newline {\bf{ \# merge (join) }} \newline dfLeft.merge(dfRight, \newline left\_on={[}'A','B','C'{]}, \newline right\_on={[}'A','BB','CC'{]}, \newline how='inner')} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.94103 cm} x{3.03597 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Numpy Functions}} \tn % Row 0 \SetRowColor{LightBackground} a.max() & Max value \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} a.argmax() & Index of max value \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} a.mean(axis=0) & For 2D arrays (Column) \tn % Row Count 3 (+ 1) % Row 3 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{a.mean(axis='columns')} \tn % Row Count 4 (+ 1) % Row 4 \SetRowColor{LightBackground} a.mean(axis=1) & For 2D arrays (Row) \tn % Row Count 5 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{a.mean(axis='index')} \tn % Row Count 6 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Important - Numpy Arrays}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{import numpy as np \newline \newline a=np.array({[}1,2,3,4,5{]}) \newline slice=a{[}:3{]} \newline slice{[}0{]}=100 \newline print a \newline \newline \{\{fa-television\}\} {[}100,2,3,4,5{]}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{RegEx}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{pattern = "({[}0-9{]})\{2\}" \newline prog = re.compile(pattern) \newline match = re.match(prog, str) \newline \newline if not match: \newline print date\_str} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{http://regexr.com/ \newline https://docs.python.org/3.6/library/re.html} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{PyMongo}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{from}} pymongo {\bf{import}} MongoClient \newline client = \seqsplit{MongoClient("mongodb://localhost:27017")} \newline db = client.examples \newline \newline {\bf{\#what do you want to find}} \newline query = \{ "abc" : "xyz", "ghi": "mop" \} \newline query = \{ "tuv" : \{ "\$gt" : 1, "\$lte" : 100 \} \} \newline query = \{ "abc" : \{ "\$ne" : "xyz" \} \} \newline {\bf{\#what information you want to see, removing id}} \newline projection = \{ "\_id": 0, "name": 1 \} \newline autos = db.autos.find(query, projection) \newline {\bf{\#find}} \newline db.autos.find().count() {\bf{\#count all}} \newline {\bf{\#insert}} \newline a = \{\} {\bf{\#dic}} \newline db.autos.insert(a) \newline {\bf{\#check if doc with field exists \& count}} \newline db.cities.find(\{"abc":\{"\$exists":1\}\}).count() \newline {\bf{\#pretty print}} \newline db.cities.find().pretty() \newline {\bf{\#regex}} \newline db.cities.find(\{"abc":\{"\$regex":"{[}0-9{]}"\}\}) \newline {\bf{\#in - return docs with at least one of the values}} \newline db.cities.find(\{"abc":\{"\$in":{[}1,2,3{]}\}\}) \newline {\bf{\#all - return docs with all of the values}} \newline db.cities.find(\{"abc":\{"\$all":{[}1,2,3{]}\}\}) \newline {\bf{\#arrays inside arrays}} \newline "n1" : \{ "n2" : \{ "n3" : {[}4{]} \} \} \newline db.city.find( \{"n1.n2.n3" : 4\} ) \newline {\bf{\#update V1}} \newline doc = db.data.find\_one(\{"a":"b"\}) \newline doc{[}"new"{]} = "value" \newline db.data.save(doc) \newline {\bf{\#update V2 (adding)}} \newline doc = db.data.update(\{"a":"b"\},\{"\$set": \{"c":"d"\}\}) \newline {\bf{\#update V3 (removing) \& multi lines}} \newline doc = db.data.update(\{"a":"b"\}, \newline \{"\$unset": \{"c":""\}\},multi=True) \newline {\bf{\#remove}} \newline db.data.remove(\{"a":"b"\}) \newline {\bf{\textless{}command line\textgreater{}}} \newline mongoimport -d examples -c myautos2 -{}-file autos.json} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{https://docs.mongodb.com/manual/reference/operator/query/}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Load Data}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{\# Read from csv file}} \newline {\bf{import}} csv \newline {\bf{def}} parse\_file(datafile): \newline data = {[}{]} \newline {\bf{with}} open(datafile,'rb') {\bf{as}} f: \newline reader = csv.reader(f) \newline {\bf{for}} row {\bf{in}} reader: \newline data.append(row) {\bf{\# List of Lists}} \newline \newline {\bf{with}} open(input\_file, "r") {\bf{as}} f: \newline reader = csv.DictReader(f) \newline header = reader.fieldnames \newline rows = list(reader) {\bf{\# Lists of Dic +Header}} \newline \newline {\bf{import}} unicodecsv \newline {\bf{with}} open('file.csv', 'rb') {\bf{as}} f: \newline reader = unicodecsv.DictReader(f) \newline file\_dic = list(reader) {\bf{\# Dic?}} \newline \newline {\bf{import}} pandas {\bf{as}} pd \newline file\_df = pd.read\_csv('file.csv', index\_col=False, header=0); {\bf{\# Data Frame}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Data Types}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{Dictionary}} \newline dictionary = \{\} \newline key = 'abc' \newline value = '123' \newline dictionary{[}key{]} = value \newline for key in dictionary: \newline print(dictionary{[}key{]}) \newline \{\{fa-television\}\} {\emph{123}} \newline \newline for key, value in dictionary.items(): \newline print(key) \newline print(value) \newline \{\{fa-television\}\} {\emph{abc}} \newline \{\{fa-television\}\} {\emph{123}} \newline \newline {\bf{Set}} \newline uniq\_dataset = set() \newline uniq\_dataset.add('A') \newline uniq\_dataset.add('A') \newline uniq\_dataset.add('B') \newline print(uniq\_dataset) \newline \{\{fa-television\}\} {\emph{\{'A', 'B'\}}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Panda Series}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{import pandas as pd \newline -{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}- \newline s1 = pd.Series({[}1, 2, 3, 4{]}, \newline index={[}'a', 'b', 'c', 'd'{]}) \newline s2 = pd.Series({[}10, 20, 30, 40{]}, \newline index={[}'b', 'd', 'a', 'c'{]}) \newline print s1 + s2 \newline \newline a 31 \newline b 12 \newline c 43 \newline d 24 \newline dtype: int64 \newline -{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}- \newline {\emph{Obs: NaN if the index does not exists for both}} \newline res = s1 + s2 \newline res.dropna() \newline {\emph{or}} \newline s1.add(s2,fill\_value=0) \newline -{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}-{}- \newline s = pd.Series({[}1, 2, 3, 4, 5{]}) \newline def add\_three(x): \newline return x + 3 \newline print s.apply(add\_three) \newline \newline 4 5 6 7 8} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.44333 cm} x{3.53367 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Indexes and Slices}} \tn % Row 0 \SetRowColor{LightBackground} len(a) & 6 \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} a{[}0{]} & 0 \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} a{[}5{]} & 5 \tn % Row Count 3 (+ 1) % Row 3 \SetRowColor{white} a{[}-1{]} & 5 \tn % Row Count 4 (+ 1) % Row 4 \SetRowColor{LightBackground} a{[}-2{]} & 4 \tn % Row Count 5 (+ 1) % Row 5 \SetRowColor{white} a{[}1:{]} & {[}1,2,3,4,5{]} \tn % Row Count 6 (+ 1) % Row 6 \SetRowColor{LightBackground} a{[}:5{]} & {[}0,1,2,3,4{]} \tn % Row Count 7 (+ 1) % Row 7 \SetRowColor{white} a{[}:-2{]} & {[}0,1,2,3{]} \tn % Row Count 8 (+ 1) % Row 8 \SetRowColor{LightBackground} a{[}1:3{]} & {[}1,2{]} \tn % Row Count 9 (+ 1) % Row 9 \SetRowColor{white} a{[}1:-1{]} & {[}1,2,3,4{]} \tn % Row Count 10 (+ 1) % Row 10 \SetRowColor{LightBackground} b=a{[}:{]} & Shallow copy of a \tn % Row Count 11 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Indexes and Slices of a={[}0,1,2,3,4,5{]}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{XML}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{import}} xml.etree.ElementTree {\bf{as}} ET \newline tree = \seqsplit{ET.parse('country\_data.xml')} \newline root = tree.getroot() {\bf{\# First tag}} \newline \newline {\bf{for}} child {\bf{in}} root: \newline {\bf{print}} child.tag, child.attrib \newline \newline {\bf{print}} root{[}0{]}{[}1{]}.text \newline \newline {\bf{for}} country {\bf{in}} root.findall('country'): \newline rank = \seqsplit{country.find('rank').text} \newline name = country.get('name') \newline {\bf{print}} name, rank \newline \newline {\emph{\textless{}au id="A1" ca="yes"\textgreater{}}} \newline {\emph{\textless{}snm\textgreater{}Mei-Dan\textless{}/snm\textgreater{}}} \newline {\emph{\textless{}fnm\textgreater{}Omer\textless{}/fnm\textgreater{}}} \newline {\emph{\textless{}insr iid="I1"/\textgreater{}}} \newline {\emph{\textless{}insr iid="I2"/\textgreater{}}} \newline {\emph{\textless{}email\textgreater{}omer@extremegate.com\textless{}/email\textgreater{}}} \newline {\emph{\textless{}/au\textgreater{} }} \newline \newline {\bf{for}} author {\bf{in}} \seqsplit{root.findall('./fm/bibl/aug/au'):} \newline insr = {[}{]} \newline {\bf{for}} i {\bf{in}} \seqsplit{author.findall('./insr'):} \newline \seqsplit{insr.append(i.get('iid'))} \newline {\bf{\#insr.append(i.attrib{[}"iid"{]})}} \newline \newline data = \{ \newline "fnm": \seqsplit{author.find('./fnm').text}, \newline "snm": \seqsplit{author.find('./snm').text}, \newline "email": \seqsplit{author.find('./email').text}, \newline "insr": insr \newline \}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{https://docs.python.org/2/library/xml.etree.elementtree.html}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{HTML Requests}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{from bs4 import BeautifulSoup \newline import requests \newline \newline html\_page = "page\_source.html" \newline \newline with open(page) as fp: \newline soup = BeautifulSoup(fp, "lxml") \newline \newline r = \seqsplit{requests.post("http://www.transtats.bts.gov/} \newline Data\_Elements.aspx?Data=2", \newline data=\{'AirportList': "BOS", \newline 'CarrierList': "VX", \newline 'Submit': 'Submit', \newline "\_\_EVENTTARGET": "", \newline "\_\_EVENTARGUMENT": "", \newline "\_\_EVENTVALIDATION": \seqsplit{soup.find(id="\_\_EVENTVALIDATION")}, \newline "\_\_VIEWSTATE": \seqsplit{soup.find(id="\_\_VIEWSTATE")} \newline \}) \newline \newline print r.text} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{https://www.crummy.com/software/BeautifulSoup/bs4/doc/}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Write to CSV}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{import}} csv \newline \newline {\bf{with}} open("output.csv", "wb") {\bf{as}} f: \newline writer = csv.writer(f) \newline \seqsplit{writer.writerows(your\_list\_of\_lists)}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Dates}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{from datetime import datetime as dt \newline \newline date\_str = '2017-04-19' \newline date\_dt = dt.strptime(date\_str, '\%Y-\%m-\%d') \newline \newline print(date\_dt) \newline \{\{fa-television\}\} 2017-04-19 00:00:00 \newline \newline dt.strptime(date\_str, \seqsplit{'\%Y-\%m-\%dT\%H:\%M:\%S\%z')} \newline \newline datetime(2000, 1, 1)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{https://docs.python.org/3/library/datetime.html\#strftime-and-strptime-behavior}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{If}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{if}} x {\bf{not in}} dictionary {\bf{and}} y != z: \newline ~~~print() \newline \newline {\bf{if not}} x {\bf{or}} y \textgreater{} 7: \newline ~~~print() \newline \newline {\bf{if}} x \textless{}= y \textless{}= z: \newline ~~~print() \newline \newline {\bf{if}} "a" {\bf{in}} "abc": \newline ~~~print()} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Loops}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{for i in range(list.size): \newline \newline for e in list: \newline \newline {\bf{\# Iterate through two lists in parallel}} \newline {\bf{\# zip stops when the shorter of foo or bar stops}} \newline for a,b in list(zip(foo,bar)):} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.83689 cm} x{2.14011 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Lists - General}} \tn % Row 0 \SetRowColor{LightBackground} list = {[}{]} & new list \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} list.append('shemp') & appendat end \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} list.insert(0, 'xxx') & insert at index 0 \tn % Row Count 3 (+ 1) % Row 3 \SetRowColor{white} print list.index('curly') & 2 \tn % Row Count 5 (+ 2) % Row 4 \SetRowColor{LightBackground} list.extend({[}'yyy', 'zzz'{]}) & list of elems at end \tn % Row Count 7 (+ 2) % Row 5 \SetRowColor{white} list.remove('curly') & search and remove \tn % Row Count 8 (+ 1) % Row 6 \SetRowColor{LightBackground} list.pop(1) & remove and return \tn % Row Count 9 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{List of Dictionaries}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{people = {[} \newline \{'name': "Tom", 'age': 10\}, \newline \{'name': "Mark", 'age': 5\}, \newline \{'name': "Pam", 'age': 7\} \newline {]} \newline \newline list(filter(lambda person: person{[}'name'{]} == 'Pam', people)) \newline \{\{fa-television\}\} {[}\{'age': 7, 'name': 'Pam'\}{]} \newline \newline people{[}1{]}{[}'name'{]} \newline \{\{fa-television\}\} Mark} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Dictionary of Lists}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{from}} collections {\bf{import}} defaultdict \newline \newline s = {[}('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1){]} \newline d = defaultdict(list) \newline {\bf{for}} k, v {\bf{in}} s: \newline d{[}k{]}.append(v) \newline \newline d.items() \newline \{\{fa-television\}\} {[}('blue', {[}2, 4{]}), ('red', {[}1{]}), ('yellow', {[}1, 3{]}){]}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{The idea is to group the values by the keys} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Matplotlib}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{data = {[}1, 2, 1, 3, 3, 1, 4, 2{]} \newline \newline import matplotlib.pyplot as plt \newline plt.hist(data) \newline \newline {\bf{To show it:}} \newline \%matplotlib inline {\emph{(for notebooks)}} \newline or \newline plt.show()} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{https://matplotlib.org/users/pyplot\_tutorial.html}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Statistics}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{\#Correlation}} \newline std\_x = (x - x.mean()) / x.std(ddof=0) \newline std\_y = (y - y.mean()) / y.std(ddof=0) \newline correlation = (std\_x * std\_y).mean() \newline {\bf{\#By default, Pandas' std() function computes the standard deviation using Bessel's correction. Calling std(ddof=0) ensures that Bessel's correction will not be used. NumPy's corrcoef() function can be used to calculate Pearson's r, also known as the correlation coefficient.}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Excel Files}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{import}} xlrd \newline datafile = "file.xls" \newline \newline workbook = \seqsplit{xlrd.open\_workbook(datafile)} \newline sheet = \seqsplit{workbook.sheet\_by\_index(0)} \newline \newline sheet\_data = {[}{[}sheet.cell\_value(r, col) {\bf{for}} col {\bf{in}} range(sheet.ncols){]} {\bf{for}} r {\bf{in}} range(sheet.nrows){]} \newline \newline {\bf{\# (column, line)}} \newline sheet.cell\_type(3, 2) \newline sheet.cell\_value(3, 2) \newline {\bf{\# (column, start\_line, end\_line)}} \newline sheet.col\_values(3, start\_rowx=1, end\_rowx=4) \newline \newline sheet.ncols \newline \newline xlrd.xldate\_as\_tuple(cell\_in\_exceltime, 0)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Pretty Print}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{import}} pprint \newline pprint.pprint(stuff)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{https://docs.python.org/3/library/pprint.html}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Map, Filter and Reduce}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{\#Map}} \newline map(function\_to\_apply, list\_of\_inputs) \newline map(float, list) {\bf{\#Convert all in list to float}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{http://book.pythontips.com/en/latest/map\_filter.html}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{JSON requests (WS)}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{import}} json \newline {\bf{import}} requests \newline \newline BASE\_URL = \seqsplit{"http://musicbrainz.org/ws/2/"} \newline ARTIST\_URL = BASE\_URL + "artist/" \newline \newline params{[}"fmt"{]} = "json" \newline params{[}"query"{]} = "artist:Avril" \newline \newline r = requests.get(url + uid, params) \newline {\bf{print}} r.json()} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{http://docs.python-requests.org/en/master/user/quickstart/}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Pkg}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{!pip install \textless{}link\textgreater{}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}