\documentclass[10pt,a4paper]{article}

% Packages
\usepackage{fancyhdr}           % For header and footer
\usepackage{multicol}           % Allows multicols in tables
\usepackage{tabularx}           % Intelligent column widths
\usepackage{tabulary}           % Used in header and footer
\usepackage{hhline}             % Border under tables
\usepackage{graphicx}           % For images
\usepackage{xcolor}             % For hex colours
%\usepackage[utf8x]{inputenc}    % For unicode character support
\usepackage[T1]{fontenc}        % Without this we get weird character replacements
\usepackage{colortbl}           % For coloured tables
\usepackage{setspace}           % For line height
\usepackage{lastpage}           % Needed for total page number
\usepackage{seqsplit}           % Splits long words.
%\usepackage{opensans}          % Can't make this work so far. Shame. Would be lovely.
\usepackage[normalem]{ulem}     % For underlining links
% Most of the following are not required for the majority
% of cheat sheets but are needed for some symbol support.
\usepackage{amsmath}            % Symbols
\usepackage{MnSymbol}           % Symbols
\usepackage{wasysym}            % Symbols
%\usepackage[english,german,french,spanish,italian]{babel}              % Languages

% Document Info
\author{elhamsh}
\pdfinfo{
  /Title (data-science.pdf)
  /Creator (Cheatography)
  /Author (elhamsh)
  /Subject (Data Science Cheat Sheet)
}

% Lengths and widths
\addtolength{\textwidth}{6cm}
\addtolength{\textheight}{-1cm}
\addtolength{\hoffset}{-3cm}
\addtolength{\voffset}{-2cm}
\setlength{\tabcolsep}{0.2cm} % Space between columns
\setlength{\headsep}{-12pt} % Reduce space between header and content
\setlength{\headheight}{85pt} % If less, LaTeX automatically increases it
\renewcommand{\footrulewidth}{0pt} % Remove footer line
\renewcommand{\headrulewidth}{0pt} % Remove header line
\renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit
% This two commands together give roughly
% the right line height in the tables
\renewcommand{\arraystretch}{1.3}
\onehalfspacing

% Commands
\newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour
\newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols
\newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns
\newcommand{\tn}{\tabularnewline} % Required as custom column type in use

% Font and Colours
\definecolor{HeadBackground}{HTML}{333333}
\definecolor{FootBackground}{HTML}{666666}
\definecolor{TextColor}{HTML}{333333}
\definecolor{DarkBackground}{HTML}{A3A3A3}
\definecolor{LightBackground}{HTML}{F3F3F3}
\renewcommand{\familydefault}{\sfdefault}
\color{TextColor}

% Header and Footer
\pagestyle{fancy}
\fancyhead{} % Set header to blank
\fancyfoot{} % Set footer to blank
\fancyhead[L]{
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{C}
    \SetRowColor{DarkBackground}
    \vspace{-7pt}
    {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent
        \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}}
    }
\end{tabulary}
\columnbreak
\begin{tabulary}{11cm}{L}
    \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Data Science Cheat Sheet}}}} \\
    \normalsize{by \textcolor{DarkBackground}{elhamsh} via \textcolor{DarkBackground}{\uline{cheatography.com/31327/cs/13764/}}}
\end{tabulary}
\end{multicols}}

\fancyfoot[L]{ \footnotesize
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{LL}
  \SetRowColor{FootBackground}
  \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}}  \\
  \vspace{-2pt}elhamsh \\
  \uline{cheatography.com/elhamsh} \\
  \end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}}  \\
   \vspace{-2pt}Not Yet Published.\\
   Updated 20th December, 2017.\\
   Page {\thepage} of \pageref{LastPage}.
\end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}}  \\
  \SetRowColor{white}
  \vspace{-5pt}
  %\includegraphics[width=48px,height=48px]{dave.jpeg}
  Measure your website readability!\\
  www.readability-score.com
\end{tabulary}
\end{multicols}}


\begin{document}
\raggedright
\raggedcolumns

% Set font size to small. Switch to any value
% from this page to resize cheat sheet text:
% www.emerson.emory.edu/services/latex/latex_169.html
\footnotesize % Small font.

\begin{multicols*}{3}

\begin{tabularx}{5.377cm}{x{1.94103 cm} x{3.03597 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Pandas}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{import pandas as pd} \tn 
% Row Count 1 (+ 1)
% Row 1
\SetRowColor{white}
df.iloc{[}:5,:{]} & return slice of data:all columns first 5 rows \tn 
% Row Count 3 (+ 2)
% Row 2
\SetRowColor{LightBackground}
type(df) & DataFrame \tn 
% Row Count 4 (+ 1)
% Row 3
\SetRowColor{white}
df.shape & (len, \#ofcols) \tn 
% Row Count 5 (+ 1)
% Row 4
\SetRowColor{LightBackground}
df.columns & name of cols \tn 
% Row Count 6 (+ 1)
% Row 5
\SetRowColor{white}
df.index & return index column \tn 
% Row Count 7 (+ 1)
% Row 6
\SetRowColor{LightBackground}
df.head(3) & return first 3 rows \tn 
% Row Count 8 (+ 1)
% Row 7
\SetRowColor{white}
df.iloc{[}-5:,:{]} & return last 5 rows \tn 
% Row Count 9 (+ 1)
% Row 8
\SetRowColor{LightBackground}
df.tail() & return last 5 rows \tn 
% Row Count 10 (+ 1)
% Row 9
\SetRowColor{white}
df.info() & return index, column types, \# of row, \# of not null cols \tn 
% Row Count 13 (+ 3)
% Row 10
\SetRowColor{LightBackground}
type(df{[}'low'{]}) & Series \tn 
% Row Count 14 (+ 1)
% Row 11
\SetRowColor{white}
type(df{[}'low'{]}.values) & numpy.ndarray \tn 
% Row Count 16 (+ 2)
% Row 12
\SetRowColor{LightBackground}
np.log10(df{[}'low'{]}) & return data frame \tn 
% Row Count 18 (+ 2)
% Row 13
\SetRowColor{white}
np.log10(df{[}'low'{]}.values) & return list of list \tn 
% Row Count 20 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{Each column in pandas is a Series. \newline You can run numpy on df or a col of df}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{x{1.84149 cm} x{3.13551 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Statistical Data Analysis}}  \tn
% Row 0
\SetRowColor{LightBackground}
df.describe() & count, mean,std,max, quartiles for each col of non-null rows \tn 
% Row Count 3 (+ 3)
% Row 1
\SetRowColor{white}
df{[}'low'{]}.count() & return \# of not null rows \tn 
% Row Count 5 (+ 2)
% Row 2
\SetRowColor{LightBackground}
df{[}cols{]}.count() & return a series \tn 
% Row Count 7 (+ 2)
% Row 3
\SetRowColor{white}
df{[}'low'{]}.mean() & return mean ignoring nulls \tn 
% Row Count 9 (+ 2)
% Row 4
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{df.std()} \tn 
% Row Count 10 (+ 1)
% Row 5
\SetRowColor{white}
\mymulticolumn{2}{x{5.377cm}}{df.median()} \tn 
% Row Count 11 (+ 1)
% Row 6
\SetRowColor{LightBackground}
\seqsplit{df.quantile(q)} & q=.5:median q={[}.25,.75{]}:IQrange \tn 
% Row Count 13 (+ 2)
% Row 7
\SetRowColor{white}
df{[}'low'{]}.min() & alphabetic order for non-numerics \tn 
% Row Count 15 (+ 2)
% Row 8
\SetRowColor{LightBackground}
df{[}'low'{]}.max() & alphabetic order for non-numerics \tn 
% Row Count 17 (+ 2)
% Row 9
\SetRowColor{white}
\seqsplit{df.mean(axis='columns')} & mean of all columns for each row \tn 
% Row Count 19 (+ 2)
% Row 10
\SetRowColor{LightBackground}
df.low & df{[}'low'{]} \tn 
% Row Count 20 (+ 1)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Time series}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{index\_col='Date', parse\_date=True} \tn 
% Row Count 1 (+ 1)
% Row 1
\SetRowColor{white}
df.loc{[}'2015-2'{]} & return for all days \tn 
% Row Count 2 (+ 1)
% Row 2
\SetRowColor{LightBackground}
df.loc{[}'2015-2-20'{]} & return all rows with this date \tn 
% Row Count 4 (+ 2)
% Row 3
\SetRowColor{white}
df.loc{[}'2015-2-20': '2015-3'{]} & range \tn 
% Row Count 6 (+ 2)
% Row 4
\SetRowColor{LightBackground}
newD = \seqsplit{pd.to\_datetime('Date'} ) & y-m-d h:m:s \tn 
% Row Count 8 (+ 2)
% Row 5
\SetRowColor{white}
df.reindex(newD) & reindexing with matching dates. if doesn't match,new rows w. null value \tn 
% Row Count 12 (+ 4)
% Row 6
\SetRowColor{LightBackground}
df.reindex(newD,method='ffill') & fill empty values forward fill:value of previous rows \tn 
% Row Count 15 (+ 3)
% Row 7
\SetRowColor{white}
method='bfill' & backward fill: value of later rows \tn 
% Row Count 17 (+ 2)
% Row 8
\SetRowColor{LightBackground}
\seqsplit{df.resample('D').mean()} & daily mean \tn 
% Row Count 19 (+ 2)
% Row 9
\SetRowColor{white}
'H', 'min', '2W' & hour, minute, 2 weekshour, minute, 2 weeks \tn 
% Row Count 22 (+ 3)
% Row 10
\SetRowColor{LightBackground}
'Y', 'Q', 'M', 'B' & year, quarter, month, business day \tn 
% Row Count 24 (+ 2)
% Row 11
\SetRowColor{white}
\seqsplit{df.resample('W').sum().max()} & max of weekly sum \tn 
% Row Count 26 (+ 2)
% Row 12
\SetRowColor{LightBackground}
\seqsplit{df.resample('4h').ffill()} & every 4hours. fill nan w. previous valuesevery 4hours. fill nan w. previous values \tn 
% Row Count 31 (+ 5)
\end{tabularx}
\par\addvspace{1.3em}

\vfill
\columnbreak
\begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Time series (cont)}}  \tn
% Row 13
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{df1+df2} \tn 
% Row Count 1 (+ 1)
% Row 14
\SetRowColor{white}
df{[}'Temperature'{]}{[}'2010-august'{]} & select temp col of aug. \tn 
% Row Count 3 (+ 2)
% Row 15
\SetRowColor{LightBackground}
df{[}'Temperature'{]}{[}'2010-2'{]} & select temp col of feb. \tn 
% Row Count 5 (+ 2)
% Row 16
\SetRowColor{white}
\seqsplit{unsmooth.rolling(window=24).mean()} & moving average 24h \tn 
% Row Count 7 (+ 2)
% Row 17
\SetRowColor{LightBackground}
df{[}'type'{]}.str.upper() & return a column converted to uppercase \tn 
% Row Count 9 (+ 2)
% Row 18
\SetRowColor{white}
df{[}'product'{]}.str.contains('ware') & return boolean if substring 'ware' exists \tn 
% Row Count 12 (+ 3)
% Row 19
\SetRowColor{LightBackground}
True+True & 2 \tn 
% Row Count 13 (+ 1)
% Row 20
\SetRowColor{white}
False + False & 0 \tn 
% Row Count 14 (+ 1)
% Row 21
\SetRowColor{LightBackground}
df{[}'product'{]}.str.contains('ware').sum() & \# of rows contains substring 'ware' \tn 
% Row Count 16 (+ 2)
% Row 22
\SetRowColor{white}
df{[}'date'{]}.dt.hour & return hour of each row 0-23 \tn 
% Row Count 18 (+ 2)
% Row 23
\SetRowColor{LightBackground}
df{[}'date'{]}.dt.tz\_localize('US/Central') & set timezone \tn 
% Row Count 20 (+ 2)
% Row 24
\SetRowColor{white}
\mymulticolumn{2}{x{5.377cm}}{df{[}'date'{]}.dt.tz\_convert('US/Eastern')} \tn 
% Row Count 21 (+ 1)
% Row 25
\SetRowColor{LightBackground}
df{[}'date'{]}.resample('A').first() & yearly from the initial date in data (1960-12-31) \tn 
% Row Count 24 (+ 3)
% Row 26
\SetRowColor{white}
df{[}'date'{]}.resample('A').first().interpolate('linear') & replace nan with interpolation \tn 
% Row Count 27 (+ 3)
% Row 27
\SetRowColor{LightBackground}
\seqsplit{df.columns.str.strip()} & removes space from df.columns \tn 
% Row Count 29 (+ 2)
% Row 28
\SetRowColor{white}
\mymulticolumn{2}{x{5.377cm}}{df.set\_index('Date', inplace=True)} \tn 
% Row Count 30 (+ 1)
\end{tabularx}
\par\addvspace{1.3em}

\vfill
\columnbreak
\begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Time series (cont)}}  \tn
% Row 29
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{newD = \seqsplit{pd.to\_datetime('Date\_list'},  format='\%Y-\%M-\%D \%H:\%M)} \tn 
% Row Count 2 (+ 2)
% Row 30
\SetRowColor{white}
\seqsplit{pd.Series(Columns\_list}, index=newD) & Construct a pandas Series c \tn 
% Row Count 4 (+ 2)
% Row 31
\SetRowColor{LightBackground}
ts2\_interp = \seqsplit{ts2.reindex(ts1.index).interpolate(how='linear')} & Reset the index of ts2 to ts1, and then use linear interpolation to fill in the NaNs: ts2\_interp \tn 
% Row Count 9 (+ 5)
% Row 32
\SetRowColor{white}
\seqsplit{timezone.dt.tz\_localize('US/Central')} & localize the local time timezone to  'US/Central \tn 
% Row Count 12 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Build DF}}  \tn
% Row 0
\SetRowColor{LightBackground}
\seqsplit{df=pd.read\_csv("filepath"}, index\_col=0) & add index column 0-len(inp) \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
\mymulticolumn{2}{x{5.377cm}}{\seqsplit{index\_col='nameofacolumn'}} \tn 
% Row Count 3 (+ 1)
% Row 2
\SetRowColor{LightBackground}
df.index={[}'A', 'B', ...{]} & assign index to df. len(index)==len(df) \tn 
% Row Count 5 (+ 2)
% Row 3
\SetRowColor{white}
pd.DataFrame(\{'id':{[}1,2,3{]}, 'gen':'M'\}) & key: columns, values: row \tn 
% Row Count 7 (+ 2)
% Row 4
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{\seqsplit{pd.DataFrame(dict\_of\_lists)}} \tn 
% Row Count 8 (+ 1)
% Row 5
\SetRowColor{white}
\mymulticolumn{2}{x{5.377cm}}{\seqsplit{zipped=list(zip(list\_labels}, list\_values))} \tn 
% Row Count 9 (+ 1)
% Row 6
\SetRowColor{LightBackground}
\seqsplit{pd.DataFrame(dict(zipped))} & list\_labels, list\_values = list of list \tn 
% Row Count 11 (+ 2)
% Row 7
\SetRowColor{white}
\seqsplit{pd.read\_csv("filepath"}, header=None) & no header \tn 
% Row Count 13 (+ 2)
% Row 8
\SetRowColor{LightBackground}
\seqsplit{pd.read\_csv("filepath"}, options) & col\_n:list of column names \tn 
% Row Count 15 (+ 2)
% Row 9
\SetRowColor{white}
hearder=0, names=col\_n & rename  the header \tn 
% Row Count 17 (+ 2)
% Row 10
\SetRowColor{LightBackground}
header=None, names=col\_n & no header in file \& header is col\_n \tn 
% Row Count 19 (+ 2)
% Row 11
\SetRowColor{white}
na\_values='-1' & convert specific value (-1) to a nan \tn 
% Row Count 21 (+ 2)
% Row 12
\SetRowColor{LightBackground}
na\_values=\{'colname':{[}'-1', ''{]}\} & define a dic for each col \tn 
% Row Count 23 (+ 2)
% Row 13
\SetRowColor{white}
parse\_dates={[}{[}0,1,1{]}{]} & convert 3 columns of date to one col \tn 
% Row Count 25 (+ 2)
% Row 14
\SetRowColor{LightBackground}
parse\_dates=True & convert column with date to dateformat \tn 
% Row Count 27 (+ 2)
% Row 15
\SetRowColor{white}
\mymulticolumn{2}{x{5.377cm}}{delimiter=' '} \tn 
% Row Count 28 (+ 1)
% Row 16
\SetRowColor{LightBackground}
header=3 & header is in index 3 \tn 
% Row Count 29 (+ 1)
% Row 17
\SetRowColor{white}
comment='\#' & ignore all lines start with '\#' in the input \tn 
% Row Count 32 (+ 3)
\end{tabularx}
\par\addvspace{1.3em}

\vfill
\columnbreak
\begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Build DF (cont)}}  \tn
% Row 18
\SetRowColor{LightBackground}
index\_col = 'dates' & set a column as index \tn 
% Row Count 2 (+ 2)
% Row 19
\SetRowColor{white}
df{[}cols{]} & take specific columns \tn 
% Row Count 4 (+ 2)
% Row 20
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{df.to\_csv('outputpath')} \tn 
% Row Count 5 (+ 1)
% Row 21
\SetRowColor{white}
\mymulticolumn{2}{x{5.377cm}}{\seqsplit{df.to\_excel('outputpath')}} \tn 
% Row Count 6 (+ 1)
% Row 22
\SetRowColor{LightBackground}
pd.DataFrame(\{'smoothed':smoothed, 'unsmoothed':unsmoothed\}) & create df.if they have index, will merge based on index \tn 
% Row Count 9 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{x{1.84149 cm} x{3.13551 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{categorical}}  \tn
% Row 0
\SetRowColor{LightBackground}
df{[}'type'{]}.decribe() & count not null,\# of unique,top item,freq. of top \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
df{[}'type'{]}.unique() & \#of unique items \tn 
% Row Count 4 (+ 2)
% Row 2
\SetRowColor{LightBackground}
df.loc{[}df{[}'type'{]}==x,:{]} & df{[}df{[}'type'{]}==x{]} \tn 
% Row Count 6 (+ 2)
% Row 3
\SetRowColor{white}
del def{[}'type'{]} & delete a column \tn 
% Row Count 8 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{x{1.24425 cm} x{3.73275 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Numpy+Df}}  \tn
% Row 0
\SetRowColor{LightBackground}
df.values & Create array of DataFrame values \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
df{[}colname{]}=0 & create a columns with zero elements in df \tn 
% Row Count 4 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Cleanning}}  \tn
% Row 0
\SetRowColor{LightBackground}
df\_dropped = \seqsplit{df.drop(list\_to\_drop}, axis='columns') & Remove the appropriate columns list\_to\_drop \tn 
% Row Count 3 (+ 3)
% Row 1
\SetRowColor{white}
\seqsplit{df.set\_index(colname)} & Set colname as the index \tn 
% Row Count 5 (+ 2)
% Row 2
\SetRowColor{LightBackground}
pd.to\_numeric() & It converts a Series of values to floating-point values. Furthermore, by specifying the keyword argument errors='coerce', you can force strings like 'M' to be interpreted as NaN. \tn 
% Row Count 14 (+ 9)
% Row 3
\SetRowColor{white}
df.reset\_index(){[}colname{]} & Extract the colname column from df using .reset\_index() \tn 
% Row Count 17 (+ 3)
% Row 4
\SetRowColor{LightBackground}
df.loc{[}df{[}colname{]}=='sth'{]} & choose the rows in df for df{[}colname{]}='sth' \tn 
% Row Count 20 (+ 3)
% Row 5
\SetRowColor{white}
df.loc{[}df{[}colname{]}.str.contain('sth'){]} & choose the rows in df where the column df{[}colname{]} contain 'sth' \tn 
% Row Count 24 (+ 4)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Plot}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{import matplotlib.pyplot as plt} \tn 
% Row Count 1 (+ 1)
% Row 1
\SetRowColor{white}
plt.plot(df{[}'low'{]}.values) & x axis= index of value \tn 
% Row Count 3 (+ 2)
% Row 2
\SetRowColor{LightBackground}
plt.show() & show the image \tn 
% Row Count 4 (+ 1)
% Row 3
\SetRowColor{white}
plt.plot(df{[}'low'{]}) & x axis is index of df (eg date) \tn 
% Row Count 6 (+ 2)
% Row 4
\SetRowColor{LightBackground}
df{[}'low'{]}.plot() & plot series directly. has also x label \tn 
% Row Count 8 (+ 2)
% Row 5
\SetRowColor{white}
df.plot() & show all columns in df with legend \tn 
% Row Count 10 (+ 2)
% Row 6
\SetRowColor{LightBackground}
plt.yscale('log') & log scale on vertical axis \tn 
% Row Count 12 (+ 2)
% Row 7
\SetRowColor{white}
\mymulticolumn{2}{x{5.377cm}}{df{[}'low'{]}.plot(color='b',style='.-', legend=True)} \tn 
% Row Count 13 (+ 1)
% Row 8
\SetRowColor{LightBackground}
plt.axis((minx, maxx,miny,maxy)) & zoom \tn 
% Row Count 15 (+ 2)
% Row 9
\SetRowColor{white}
\mymulticolumn{2}{x{5.377cm}}{plt.title('title')} \tn 
% Row Count 16 (+ 1)
% Row 10
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{plt.ylabel('label)} \tn 
% Row Count 17 (+ 1)
% Row 11
\SetRowColor{white}
\mymulticolumn{2}{x{5.377cm}}{plt.xlabel('xlabel')} \tn 
% Row Count 18 (+ 1)
% Row 12
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{plt.savefig('a.pdf')} \tn 
% Row Count 19 (+ 1)
% Row 13
\SetRowColor{white}
\mymulticolumn{2}{x{5.377cm}}{plt.savefig('a.jpg')} \tn 
% Row Count 20 (+ 1)
% Row 14
\SetRowColor{LightBackground}
\seqsplit{df.plot(subplots=True)} & Draw each column in one subplot. \tn 
% Row Count 22 (+ 2)
% Row 15
\SetRowColor{white}
df.plot(x='colname',y='colname',kind='scatter') & plot 2 columns \tn 
% Row Count 25 (+ 3)
% Row 16
\SetRowColor{LightBackground}
kind = 'box' & box plot \tn 
% Row Count 26 (+ 1)
% Row 17
\SetRowColor{white}
kind = 'hist' & histogram \tn 
% Row Count 27 (+ 1)
% Row 18
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{kind='area'} \tn 
% Row Count 28 (+ 1)
% Row 19
\SetRowColor{white}
bins=30 & integer:\#of bins \tn 
% Row Count 29 (+ 1)
% Row 20
\SetRowColor{LightBackground}
range=(4,8) & tuple (min,max) \tn 
% Row Count 30 (+ 1)
\end{tabularx}
\par\addvspace{1.3em}

\vfill
\columnbreak
\begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Plot (cont)}}  \tn
% Row 21
\SetRowColor{LightBackground}
normed=True & boolean. normalize to one for hist \tn 
% Row Count 2 (+ 2)
% Row 22
\SetRowColor{white}
cumulative=True & boolean for hist \tn 
% Row Count 3 (+ 1)
% Row 23
\SetRowColor{LightBackground}
alpha=0.3 & visibility of several histograms \tn 
% Row Count 5 (+ 2)
% Row 24
\SetRowColor{white}
s=sizes & sizes= array of size of each circle in scatter plot \tn 
% Row Count 8 (+ 3)
% Row 25
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{fig, axes=subplots(nrows=1,ncols=1)} \tn 
% Row Count 9 (+ 1)
% Row 26
\SetRowColor{white}
df{[}'low'{]}.plot(ax=axes{[}0{]}, ...) & ...: kind, bins, normed,cumulative \tn 
% Row Count 11 (+ 2)
% Row 27
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{df.plot(y='colname',kind='box')} \tn 
% Row Count 12 (+ 1)
% Row 28
\SetRowColor{white}
style='k.-' & color,marker,line type \tn 
% Row Count 14 (+ 2)
% Row 29
\SetRowColor{LightBackground}
plt.clf() & clears the entire current figure with all its axes, but leaves the window opened, such that it may be reused for other plots \tn 
% Row Count 21 (+ 7)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{x{2.38896 cm} x{2.58804 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Indexing}}  \tn
% Row 0
\SetRowColor{LightBackground}
df{[}'colname'{]}{[}'rowname'{]} & rowname is index\_col \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
\mymulticolumn{2}{x{5.377cm}}{df.colname{[}'rowname'{]}} \tn 
% Row Count 3 (+ 1)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{df.loc{[}'rowname','colname'{]}} \tn 
% Row Count 4 (+ 1)
% Row 3
\SetRowColor{white}
df.loc{[}'rownstart','rownend',:{]} & row names are inclusive. \tn 
% Row Count 6 (+ 2)
% Row 4
\SetRowColor{LightBackground}
df{[}{[}'low'{]}{]} & returns a single column data frame \tn 
% Row Count 8 (+ 2)
% Row 5
\SetRowColor{white}
df{[}'low'{]} & returns a series with index of df \tn 
% Row Count 10 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}


% That's all folks
\end{multicols*}

\end{document}