\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{elhamsh} \pdfinfo{ /Title (data-science.pdf) /Creator (Cheatography) /Author (elhamsh) /Subject (Data Science Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{A3A3A3} \definecolor{LightBackground}{HTML}{F3F3F3} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Data Science Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{elhamsh} via \textcolor{DarkBackground}{\uline{cheatography.com/31327/cs/13764/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}elhamsh \\ \uline{cheatography.com/elhamsh} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 20th December, 2017.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{3} \begin{tabularx}{5.377cm}{x{1.94103 cm} x{3.03597 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Pandas}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{import pandas as pd} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} df.iloc{[}:5,:{]} & return slice of data:all columns first 5 rows \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} type(df) & DataFrame \tn % Row Count 4 (+ 1) % Row 3 \SetRowColor{white} df.shape & (len, \#ofcols) \tn % Row Count 5 (+ 1) % Row 4 \SetRowColor{LightBackground} df.columns & name of cols \tn % Row Count 6 (+ 1) % Row 5 \SetRowColor{white} df.index & return index column \tn % Row Count 7 (+ 1) % Row 6 \SetRowColor{LightBackground} df.head(3) & return first 3 rows \tn % Row Count 8 (+ 1) % Row 7 \SetRowColor{white} df.iloc{[}-5:,:{]} & return last 5 rows \tn % Row Count 9 (+ 1) % Row 8 \SetRowColor{LightBackground} df.tail() & return last 5 rows \tn % Row Count 10 (+ 1) % Row 9 \SetRowColor{white} df.info() & return index, column types, \# of row, \# of not null cols \tn % Row Count 13 (+ 3) % Row 10 \SetRowColor{LightBackground} type(df{[}'low'{]}) & Series \tn % Row Count 14 (+ 1) % Row 11 \SetRowColor{white} type(df{[}'low'{]}.values) & numpy.ndarray \tn % Row Count 16 (+ 2) % Row 12 \SetRowColor{LightBackground} np.log10(df{[}'low'{]}) & return data frame \tn % Row Count 18 (+ 2) % Row 13 \SetRowColor{white} np.log10(df{[}'low'{]}.values) & return list of list \tn % Row Count 20 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Each column in pandas is a Series. \newline You can run numpy on df or a col of df} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.84149 cm} x{3.13551 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Statistical Data Analysis}} \tn % Row 0 \SetRowColor{LightBackground} df.describe() & count, mean,std,max, quartiles for each col of non-null rows \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} df{[}'low'{]}.count() & return \# of not null rows \tn % Row Count 5 (+ 2) % Row 2 \SetRowColor{LightBackground} df{[}cols{]}.count() & return a series \tn % Row Count 7 (+ 2) % Row 3 \SetRowColor{white} df{[}'low'{]}.mean() & return mean ignoring nulls \tn % Row Count 9 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{df.std()} \tn % Row Count 10 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{df.median()} \tn % Row Count 11 (+ 1) % Row 6 \SetRowColor{LightBackground} \seqsplit{df.quantile(q)} & q=.5:median q={[}.25,.75{]}:IQrange \tn % Row Count 13 (+ 2) % Row 7 \SetRowColor{white} df{[}'low'{]}.min() & alphabetic order for non-numerics \tn % Row Count 15 (+ 2) % Row 8 \SetRowColor{LightBackground} df{[}'low'{]}.max() & alphabetic order for non-numerics \tn % Row Count 17 (+ 2) % Row 9 \SetRowColor{white} \seqsplit{df.mean(axis='columns')} & mean of all columns for each row \tn % Row Count 19 (+ 2) % Row 10 \SetRowColor{LightBackground} df.low & df{[}'low'{]} \tn % Row Count 20 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Time series}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{index\_col='Date', parse\_date=True} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} df.loc{[}'2015-2'{]} & return for all days \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} df.loc{[}'2015-2-20'{]} & return all rows with this date \tn % Row Count 4 (+ 2) % Row 3 \SetRowColor{white} df.loc{[}'2015-2-20': '2015-3'{]} & range \tn % Row Count 6 (+ 2) % Row 4 \SetRowColor{LightBackground} newD = \seqsplit{pd.to\_datetime('Date'} ) & y-m-d h:m:s \tn % Row Count 8 (+ 2) % Row 5 \SetRowColor{white} df.reindex(newD) & reindexing with matching dates. if doesn't match,new rows w. null value \tn % Row Count 12 (+ 4) % Row 6 \SetRowColor{LightBackground} df.reindex(newD,method='ffill') & fill empty values forward fill:value of previous rows \tn % Row Count 15 (+ 3) % Row 7 \SetRowColor{white} method='bfill' & backward fill: value of later rows \tn % Row Count 17 (+ 2) % Row 8 \SetRowColor{LightBackground} \seqsplit{df.resample('D').mean()} & daily mean \tn % Row Count 19 (+ 2) % Row 9 \SetRowColor{white} 'H', 'min', '2W' & hour, minute, 2 weekshour, minute, 2 weeks \tn % Row Count 22 (+ 3) % Row 10 \SetRowColor{LightBackground} 'Y', 'Q', 'M', 'B' & year, quarter, month, business day \tn % Row Count 24 (+ 2) % Row 11 \SetRowColor{white} \seqsplit{df.resample('W').sum().max()} & max of weekly sum \tn % Row Count 26 (+ 2) % Row 12 \SetRowColor{LightBackground} \seqsplit{df.resample('4h').ffill()} & every 4hours. fill nan w. previous valuesevery 4hours. fill nan w. previous values \tn % Row Count 31 (+ 5) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Time series (cont)}} \tn % Row 13 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{df1+df2} \tn % Row Count 1 (+ 1) % Row 14 \SetRowColor{white} df{[}'Temperature'{]}{[}'2010-august'{]} & select temp col of aug. \tn % Row Count 3 (+ 2) % Row 15 \SetRowColor{LightBackground} df{[}'Temperature'{]}{[}'2010-2'{]} & select temp col of feb. \tn % Row Count 5 (+ 2) % Row 16 \SetRowColor{white} \seqsplit{unsmooth.rolling(window=24).mean()} & moving average 24h \tn % Row Count 7 (+ 2) % Row 17 \SetRowColor{LightBackground} df{[}'type'{]}.str.upper() & return a column converted to uppercase \tn % Row Count 9 (+ 2) % Row 18 \SetRowColor{white} df{[}'product'{]}.str.contains('ware') & return boolean if substring 'ware' exists \tn % Row Count 12 (+ 3) % Row 19 \SetRowColor{LightBackground} True+True & 2 \tn % Row Count 13 (+ 1) % Row 20 \SetRowColor{white} False + False & 0 \tn % Row Count 14 (+ 1) % Row 21 \SetRowColor{LightBackground} df{[}'product'{]}.str.contains('ware').sum() & \# of rows contains substring 'ware' \tn % Row Count 16 (+ 2) % Row 22 \SetRowColor{white} df{[}'date'{]}.dt.hour & return hour of each row 0-23 \tn % Row Count 18 (+ 2) % Row 23 \SetRowColor{LightBackground} df{[}'date'{]}.dt.tz\_localize('US/Central') & set timezone \tn % Row Count 20 (+ 2) % Row 24 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{df{[}'date'{]}.dt.tz\_convert('US/Eastern')} \tn % Row Count 21 (+ 1) % Row 25 \SetRowColor{LightBackground} df{[}'date'{]}.resample('A').first() & yearly from the initial date in data (1960-12-31) \tn % Row Count 24 (+ 3) % Row 26 \SetRowColor{white} df{[}'date'{]}.resample('A').first().interpolate('linear') & replace nan with interpolation \tn % Row Count 27 (+ 3) % Row 27 \SetRowColor{LightBackground} \seqsplit{df.columns.str.strip()} & removes space from df.columns \tn % Row Count 29 (+ 2) % Row 28 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{df.set\_index('Date', inplace=True)} \tn % Row Count 30 (+ 1) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Time series (cont)}} \tn % Row 29 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{newD = \seqsplit{pd.to\_datetime('Date\_list'}, format='\%Y-\%M-\%D \%H:\%M)} \tn % Row Count 2 (+ 2) % Row 30 \SetRowColor{white} \seqsplit{pd.Series(Columns\_list}, index=newD) & Construct a pandas Series c \tn % Row Count 4 (+ 2) % Row 31 \SetRowColor{LightBackground} ts2\_interp = \seqsplit{ts2.reindex(ts1.index).interpolate(how='linear')} & Reset the index of ts2 to ts1, and then use linear interpolation to fill in the NaNs: ts2\_interp \tn % Row Count 9 (+ 5) % Row 32 \SetRowColor{white} \seqsplit{timezone.dt.tz\_localize('US/Central')} & localize the local time timezone to 'US/Central \tn % Row Count 12 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Build DF}} \tn % Row 0 \SetRowColor{LightBackground} \seqsplit{df=pd.read\_csv("filepath"}, index\_col=0) & add index column 0-len(inp) \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{\seqsplit{index\_col='nameofacolumn'}} \tn % Row Count 3 (+ 1) % Row 2 \SetRowColor{LightBackground} df.index={[}'A', 'B', ...{]} & assign index to df. len(index)==len(df) \tn % Row Count 5 (+ 2) % Row 3 \SetRowColor{white} pd.DataFrame(\{'id':{[}1,2,3{]}, 'gen':'M'\}) & key: columns, values: row \tn % Row Count 7 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{\seqsplit{pd.DataFrame(dict\_of\_lists)}} \tn % Row Count 8 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{\seqsplit{zipped=list(zip(list\_labels}, list\_values))} \tn % Row Count 9 (+ 1) % Row 6 \SetRowColor{LightBackground} \seqsplit{pd.DataFrame(dict(zipped))} & list\_labels, list\_values = list of list \tn % Row Count 11 (+ 2) % Row 7 \SetRowColor{white} \seqsplit{pd.read\_csv("filepath"}, header=None) & no header \tn % Row Count 13 (+ 2) % Row 8 \SetRowColor{LightBackground} \seqsplit{pd.read\_csv("filepath"}, options) & col\_n:list of column names \tn % Row Count 15 (+ 2) % Row 9 \SetRowColor{white} hearder=0, names=col\_n & rename the header \tn % Row Count 17 (+ 2) % Row 10 \SetRowColor{LightBackground} header=None, names=col\_n & no header in file \& header is col\_n \tn % Row Count 19 (+ 2) % Row 11 \SetRowColor{white} na\_values='-1' & convert specific value (-1) to a nan \tn % Row Count 21 (+ 2) % Row 12 \SetRowColor{LightBackground} na\_values=\{'colname':{[}'-1', ''{]}\} & define a dic for each col \tn % Row Count 23 (+ 2) % Row 13 \SetRowColor{white} parse\_dates={[}{[}0,1,1{]}{]} & convert 3 columns of date to one col \tn % Row Count 25 (+ 2) % Row 14 \SetRowColor{LightBackground} parse\_dates=True & convert column with date to dateformat \tn % Row Count 27 (+ 2) % Row 15 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{delimiter=' '} \tn % Row Count 28 (+ 1) % Row 16 \SetRowColor{LightBackground} header=3 & header is in index 3 \tn % Row Count 29 (+ 1) % Row 17 \SetRowColor{white} comment='\#' & ignore all lines start with '\#' in the input \tn % Row Count 32 (+ 3) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Build DF (cont)}} \tn % Row 18 \SetRowColor{LightBackground} index\_col = 'dates' & set a column as index \tn % Row Count 2 (+ 2) % Row 19 \SetRowColor{white} df{[}cols{]} & take specific columns \tn % Row Count 4 (+ 2) % Row 20 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{df.to\_csv('outputpath')} \tn % Row Count 5 (+ 1) % Row 21 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{\seqsplit{df.to\_excel('outputpath')}} \tn % Row Count 6 (+ 1) % Row 22 \SetRowColor{LightBackground} pd.DataFrame(\{'smoothed':smoothed, 'unsmoothed':unsmoothed\}) & create df.if they have index, will merge based on index \tn % Row Count 9 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.84149 cm} x{3.13551 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{categorical}} \tn % Row 0 \SetRowColor{LightBackground} df{[}'type'{]}.decribe() & count not null,\# of unique,top item,freq. of top \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} df{[}'type'{]}.unique() & \#of unique items \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} df.loc{[}df{[}'type'{]}==x,:{]} & df{[}df{[}'type'{]}==x{]} \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} del def{[}'type'{]} & delete a column \tn % Row Count 8 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.24425 cm} x{3.73275 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Numpy+Df}} \tn % Row 0 \SetRowColor{LightBackground} df.values & Create array of DataFrame values \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} df{[}colname{]}=0 & create a columns with zero elements in df \tn % Row Count 4 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Cleanning}} \tn % Row 0 \SetRowColor{LightBackground} df\_dropped = \seqsplit{df.drop(list\_to\_drop}, axis='columns') & Remove the appropriate columns list\_to\_drop \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} \seqsplit{df.set\_index(colname)} & Set colname as the index \tn % Row Count 5 (+ 2) % Row 2 \SetRowColor{LightBackground} pd.to\_numeric() & It converts a Series of values to floating-point values. Furthermore, by specifying the keyword argument errors='coerce', you can force strings like 'M' to be interpreted as NaN. \tn % Row Count 14 (+ 9) % Row 3 \SetRowColor{white} df.reset\_index(){[}colname{]} & Extract the colname column from df using .reset\_index() \tn % Row Count 17 (+ 3) % Row 4 \SetRowColor{LightBackground} df.loc{[}df{[}colname{]}=='sth'{]} & choose the rows in df for df{[}colname{]}='sth' \tn % Row Count 20 (+ 3) % Row 5 \SetRowColor{white} df.loc{[}df{[}colname{]}.str.contain('sth'){]} & choose the rows in df where the column df{[}colname{]} contain 'sth' \tn % Row Count 24 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Plot}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{import matplotlib.pyplot as plt} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} plt.plot(df{[}'low'{]}.values) & x axis= index of value \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} plt.show() & show the image \tn % Row Count 4 (+ 1) % Row 3 \SetRowColor{white} plt.plot(df{[}'low'{]}) & x axis is index of df (eg date) \tn % Row Count 6 (+ 2) % Row 4 \SetRowColor{LightBackground} df{[}'low'{]}.plot() & plot series directly. has also x label \tn % Row Count 8 (+ 2) % Row 5 \SetRowColor{white} df.plot() & show all columns in df with legend \tn % Row Count 10 (+ 2) % Row 6 \SetRowColor{LightBackground} plt.yscale('log') & log scale on vertical axis \tn % Row Count 12 (+ 2) % Row 7 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{df{[}'low'{]}.plot(color='b',style='.-', legend=True)} \tn % Row Count 13 (+ 1) % Row 8 \SetRowColor{LightBackground} plt.axis((minx, maxx,miny,maxy)) & zoom \tn % Row Count 15 (+ 2) % Row 9 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{plt.title('title')} \tn % Row Count 16 (+ 1) % Row 10 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{plt.ylabel('label)} \tn % Row Count 17 (+ 1) % Row 11 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{plt.xlabel('xlabel')} \tn % Row Count 18 (+ 1) % Row 12 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{plt.savefig('a.pdf')} \tn % Row Count 19 (+ 1) % Row 13 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{plt.savefig('a.jpg')} \tn % Row Count 20 (+ 1) % Row 14 \SetRowColor{LightBackground} \seqsplit{df.plot(subplots=True)} & Draw each column in one subplot. \tn % Row Count 22 (+ 2) % Row 15 \SetRowColor{white} df.plot(x='colname',y='colname',kind='scatter') & plot 2 columns \tn % Row Count 25 (+ 3) % Row 16 \SetRowColor{LightBackground} kind = 'box' & box plot \tn % Row Count 26 (+ 1) % Row 17 \SetRowColor{white} kind = 'hist' & histogram \tn % Row Count 27 (+ 1) % Row 18 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{kind='area'} \tn % Row Count 28 (+ 1) % Row 19 \SetRowColor{white} bins=30 & integer:\#of bins \tn % Row Count 29 (+ 1) % Row 20 \SetRowColor{LightBackground} range=(4,8) & tuple (min,max) \tn % Row Count 30 (+ 1) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Plot (cont)}} \tn % Row 21 \SetRowColor{LightBackground} normed=True & boolean. normalize to one for hist \tn % Row Count 2 (+ 2) % Row 22 \SetRowColor{white} cumulative=True & boolean for hist \tn % Row Count 3 (+ 1) % Row 23 \SetRowColor{LightBackground} alpha=0.3 & visibility of several histograms \tn % Row Count 5 (+ 2) % Row 24 \SetRowColor{white} s=sizes & sizes= array of size of each circle in scatter plot \tn % Row Count 8 (+ 3) % Row 25 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{fig, axes=subplots(nrows=1,ncols=1)} \tn % Row Count 9 (+ 1) % Row 26 \SetRowColor{white} df{[}'low'{]}.plot(ax=axes{[}0{]}, ...) & ...: kind, bins, normed,cumulative \tn % Row Count 11 (+ 2) % Row 27 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{df.plot(y='colname',kind='box')} \tn % Row Count 12 (+ 1) % Row 28 \SetRowColor{white} style='k.-' & color,marker,line type \tn % Row Count 14 (+ 2) % Row 29 \SetRowColor{LightBackground} plt.clf() & clears the entire current figure with all its axes, but leaves the window opened, such that it may be reused for other plots \tn % Row Count 21 (+ 7) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.38896 cm} x{2.58804 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Indexing}} \tn % Row 0 \SetRowColor{LightBackground} df{[}'colname'{]}{[}'rowname'{]} & rowname is index\_col \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{df.colname{[}'rowname'{]}} \tn % Row Count 3 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{df.loc{[}'rowname','colname'{]}} \tn % Row Count 4 (+ 1) % Row 3 \SetRowColor{white} df.loc{[}'rownstart','rownend',:{]} & row names are inclusive. \tn % Row Count 6 (+ 2) % Row 4 \SetRowColor{LightBackground} df{[}{[}'low'{]}{]} & returns a single column data frame \tn % Row Count 8 (+ 2) % Row 5 \SetRowColor{white} df{[}'low'{]} & returns a series with index of df \tn % Row Count 10 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}