\documentclass[10pt,a4paper]{article}

% Packages
\usepackage{fancyhdr}           % For header and footer
\usepackage{multicol}           % Allows multicols in tables
\usepackage{tabularx}           % Intelligent column widths
\usepackage{tabulary}           % Used in header and footer
\usepackage{hhline}             % Border under tables
\usepackage{graphicx}           % For images
\usepackage{xcolor}             % For hex colours
%\usepackage[utf8x]{inputenc}    % For unicode character support
\usepackage[T1]{fontenc}        % Without this we get weird character replacements
\usepackage{colortbl}           % For coloured tables
\usepackage{setspace}           % For line height
\usepackage{lastpage}           % Needed for total page number
\usepackage{seqsplit}           % Splits long words.
%\usepackage{opensans}          % Can't make this work so far. Shame. Would be lovely.
\usepackage[normalem]{ulem}     % For underlining links
% Most of the following are not required for the majority
% of cheat sheets but are needed for some symbol support.
\usepackage{amsmath}            % Symbols
\usepackage{MnSymbol}           % Symbols
\usepackage{wasysym}            % Symbols
%\usepackage[english,german,french,spanish,italian]{babel}              % Languages

% Document Info
\author{Justin1209 (Justin1209)}
\pdfinfo{
  /Title (pandas.pdf)
  /Creator (Cheatography)
  /Author (Justin1209 (Justin1209))
  /Subject (Pandas Cheat Sheet)
}

% Lengths and widths
\addtolength{\textwidth}{6cm}
\addtolength{\textheight}{-1cm}
\addtolength{\hoffset}{-3cm}
\addtolength{\voffset}{-2cm}
\setlength{\tabcolsep}{0.2cm} % Space between columns
\setlength{\headsep}{-12pt} % Reduce space between header and content
\setlength{\headheight}{85pt} % If less, LaTeX automatically increases it
\renewcommand{\footrulewidth}{0pt} % Remove footer line
\renewcommand{\headrulewidth}{0pt} % Remove header line
\renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit
% This two commands together give roughly
% the right line height in the tables
\renewcommand{\arraystretch}{1.3}
\onehalfspacing

% Commands
\newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour
\newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols
\newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns
\newcommand{\tn}{\tabularnewline} % Required as custom column type in use

% Font and Colours
\definecolor{HeadBackground}{HTML}{333333}
\definecolor{FootBackground}{HTML}{666666}
\definecolor{TextColor}{HTML}{333333}
\definecolor{DarkBackground}{HTML}{5FD4CC}
\definecolor{LightBackground}{HTML}{EBF9F8}
\renewcommand{\familydefault}{\sfdefault}
\color{TextColor}

% Header and Footer
\pagestyle{fancy}
\fancyhead{} % Set header to blank
\fancyfoot{} % Set footer to blank
\fancyhead[L]{
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{C}
    \SetRowColor{DarkBackground}
    \vspace{-7pt}
    {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent
        \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}}
    }
\end{tabulary}
\columnbreak
\begin{tabulary}{11cm}{L}
    \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Pandas Cheat Sheet}}}} \\
    \normalsize{by \textcolor{DarkBackground}{Justin1209 (Justin1209)} via \textcolor{DarkBackground}{\uline{cheatography.com/101982/cs/21202/}}}
\end{tabulary}
\end{multicols}}

\fancyfoot[L]{ \footnotesize
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{LL}
  \SetRowColor{FootBackground}
  \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}}  \\
  \vspace{-2pt}Justin1209 (Justin1209) \\
  \uline{cheatography.com/justin1209} \\
  \end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}}  \\
   \vspace{-2pt}Published 23rd November, 2019.\\
   Updated 31st January, 2020.\\
   Page {\thepage} of \pageref{LastPage}.
\end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}}  \\
  \SetRowColor{white}
  \vspace{-5pt}
  %\includegraphics[width=48px,height=48px]{dave.jpeg}
  Measure your website readability!\\
  www.readability-score.com
\end{tabulary}
\end{multicols}}


\begin{document}
\raggedright
\raggedcolumns

% Set font size to small. Switch to any value
% from this page to resize cheat sheet text:
% www.emerson.emory.edu/services/latex/latex_169.html
\footnotesize % Small font.

\begin{multicols*}{3}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Import the Pandas Module}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{import}} {\emph{pandas}} {\bf{as}} {\emph{pd}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Create a DataFrame}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{\# Method 1}} \newline df1 = pd.DataFrame(\{ \newline     {\emph{'name'}}: {[}'John Smith', 'Jane Doe'{]}, \newline     {\emph{'address'}}: {[}'13 Main St.', '46 Maple Ave.'{]}, \newline     {\emph{'age'}}: {[}34, 28{]} \newline \}) \newline  \newline {\bf{\# Method 2}} \newline df2 = pd.DataFrame({[} \newline     {[}'John Smith', '123 Main St.', 34{]}, \newline     {[}'Jane Doe', '456 Maple Ave.', 28{]}, \newline     {[}'Joe Schmo', '9 Broadway', 51{]} \newline     {]}, \newline     {\bf{columns}}={[}{\emph{'name', 'address', 'age'}}{]})} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Loading and Saving CSVs}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{\# Load a CSV File in to a DataFrame}} \newline df = \seqsplit{pd.read\_csv('my-csv-file.csv')} \newline  \newline {\bf{\# Saving DataFrame to a CSV File}} \newline df.to\_csv('new-csv-file.csv') \newline  \newline {\bf{\# Load DataFrame in Chunks (For large Datasets)}} \newline \# Initialize reader object: urb\_pop\_reader \newline urb\_pop\_reader = \seqsplit{pd.read\_csv('ind\_pop\_data.csv'}, chunksize=1000) \newline  \newline \# Get the first DataFrame chunk: df\_urb\_pop \newline df\_urb\_pop = next(urb\_pop\_reader)} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{x{0.9954 cm} x{3.9816 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Inspect a DataFrame}}  \tn
% Row 0
\SetRowColor{LightBackground}
\seqsplit{df.head(5)} & First 5 rows \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
\seqsplit{df.info()} & Statistics of columns {\emph{(row count, null values, datatype)}} \tn 
% Row Count 4 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Reshape (for Scikit)}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{nums = np.array(range(1, 11)) \newline -\textgreater{} {[} 1 2 3 4 5 6 7 8 9 10{]} \newline  \newline nums = nums.reshape(-1, 1) \newline -\textgreater{} {[} {[}1{]}, \newline {[}2{]}, \newline {[}3{]}, \newline {[}4{]}, \newline {[}5{]}, \newline {[}6{]}, \newline {[}7{]}, \newline {[}8{]}, \newline {[}9{]}, \newline {[}10{]}{]}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{You can think of {\bf{reshape()}} as rotating this array. Rather than one big row of numbers, nums is now a big column of numbers - there's one number in each row.}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Converting Datatypes}}  \tn
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{\# {\emph{Convert argument to numeric type}} \newline % Row Count 1 (+ 1)
`pandas.to\_numeric(arg, errors="raise")` \newline % Row Count 2 (+ 1)
{\emph{errors:}} \newline % Row Count 3 (+ 1)
"raise" -\textgreater{} raise an exception \newline % Row Count 4 (+ 1)
"coerce" -\textgreater{} invalid parsing will be set as NaN% Row Count 5 (+ 1)
} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{DataFrame for Select Columns / Rows}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{df = pd.DataFrame({[} \newline   {[}'January', 100, 100, 23, 100{]}, \newline   {[}'February', 51, 45, 145, 45{]}, \newline   {[}'March', 81, 96, 65, 96{]}, \newline   {[}'April', 80, 80, 54, 180{]}, \newline   {[}'May', 51, 54, 54, 154{]}, \newline   {[}'June', 112, 109, 79, 129{]}{]}, \newline  \newline   columns={[}'month', 'east', 'north', 'south', 'west'{]} \newline )} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Select Columns}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{\# Select one Column}} \newline {\bf{clinic\_north = df.north}} \newline -{}-\textgreater{} Reshape values for Scikit learn: clinic\_north.{\bf{values.reshape(-1, 1)}} \newline  \newline {\bf{\# Select multiple Columns}} \newline {\bf{clinic\_north\_south}} = df{[}{[}'north', 'south'{]}{]}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Make sure that you have a {\emph{double set of brackets}} {\bf{{[}{[} {]}{]}}}, or this command won't work!}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Select Rows}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{\# Select one Row}} \newline march = df.iloc{[}2{]} \newline  \newline  \newline {\bf{\# Select multiple Rows}} \newline jan\_feb\_march = df.iloc{[}:3{]} \newline feb\_march\_april = df.iloc{[}1:4{]} \newline may\_june = df.iloc{[}-2:{]} \newline  \newline  \newline {\bf{\# Select Rows with Logic}} \newline january = df{[}df.month {\bf{==}} 'January'{]} \newline {\bf{-\textgreater{} \textless{}, \textgreater{}, \textless{}=, \textgreater{}=, !=, ==}} \newline  \newline march\_april = df{[}(df.month == 'March') | (df.month == 'April'){]} \newline {\bf{-\textgreater{} \&, |}} \newline  \newline january\_february\_march = df{[}df{\bf{.month.isin}}({[}'January', 'February', 'March'{]}){]} \newline {\bf{-\textgreater{} column\_name.isin({[}" ", " "{]})}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Selecting a Subset of a Dataframe often results in {\bf{non-consecutive indices.}} \newline  \newline Using {\bf{.reset\_index()}} will create a {\emph{new DataFrame}} move the old indices into a new colum called {\emph{index}}.  \newline  \newline Use {\bf{.reset\_index(drop=True)}} if you dont need the {\emph{index}} column. \newline Use {\bf{.reset\_index(inplace=True)}} to prevent a {\emph{new DataFrame}} from brein created.}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Adding a Column}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{df = pd.DataFrame({[} \newline   {[}1, '3 inch screw', 0.5, 0.75{]}, \newline   {[}2, '2 inch nail', 0.10, 0.25{]}, \newline   {[}3, 'hammer', 3.00, 5.50{]}, \newline   {[}4, 'screwdriver', 2.50, 3.00{]} \newline {]}, \newline   columns={[}'Product ID', 'Description', 'Cost to Manufacture', 'Price'{]} \newline ) \newline  \newline {\bf{\# Add a Column with specified row-values}} \newline df{[}'Sold in Bulk?'{]} = {[}'Yes', 'Yes', 'No', 'No'{]} \newline  \newline {\bf{\# Add a Column with same value in every row}} \newline df{[}'Is taxed?'{]} = 'Yes' \newline  \newline {\bf{\# Add a Column with calculation}} \newline df{[}'Revenue'{]} = df{[}'Price'{]} - df{[}'Cost to Manufacture'{]}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Performing Column Operation}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{df = pd.DataFrame({[} \newline   {[}'JOHN SMITH', 'john.smith@gmail.com'{]}, \newline   {[}'Jane Doe', 'jdoe@yahoo.com'{]}, \newline   {[}'joe schmo', 'joeschmo@hotmail.com'{]} \newline {]}, \newline columns={[}'Name', 'Email'{]}) \newline  \newline {\bf{\# Changing a column with an Operation}} \newline df{[}'Name'{]} = df.Name.{\bf{apply(lower)}} \newline {\emph{-\textgreater{} lower, upper}} \newline  \newline {\bf{\# Perform a lambda Operation on a Column}} \newline get\_last\_name = {\bf{lambda x: x.split(" "){[}-1{]}}} \newline df{[}'last\_name'{]} = df.Name.{\bf{apply(get\_last\_name)}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Performing a Operation on Multiple Columns}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{df = pd.DataFrame({[} \newline   {[}"Apple", 1.00, "No"{]}, \newline   {[}"Milk", 4.20, "No"{]}, \newline   {[}"Paper Towels", 5.00, "Yes"{]}, \newline   {[}"Light Bulbs", 3.75, "Yes"{]}, \newline {]}, \newline   columns={[}"Item", "Price", "Is taxed?"{]}) \newline  \newline  \newline {\bf{\# Lambda Function}} \newline df{[}'Price with Tax'{]} = df.apply(lambda row: \newline      row{[}'Price'{]} * 1.075 \newline      if row{[}'Is taxed?'{]} == 'Yes' \newline      else row{[}'Price'{]}, \newline      {\bf{axis=1}} \newline )} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{We apply a {\bf{lambda to rows}}, as opposed to columns, when we want to perform functionality that needs to access more than one column at a time.}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Rename Columns}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{\# Method 1}} \newline df{\bf{.columns}} = {[}'NewName\_1', 'NewName\_2, 'NewName\_3', '...'{]} \newline  \newline {\bf{\# Method 2}} \newline df{\bf{.rename}}(columns=\{ \newline     'OldName\_1': 'NewName\_1', \newline     'OldName\_2': 'NewName\_2' \newline \}, {\bf{inplace=True}})} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Using {\bf{inplace=True}} lets us edit the original DataFrame.}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Series vs. Dataframes}}  \tn
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{\# Dataframe and Series}} \newline % Row Count 1 (+ 1)
\seqsplit{print(type(clinic\_north)):} \newline % Row Count 2 (+ 1)
{\emph{\#  \textless{}class 'pandas.core.series.Series'\textgreater{}}} \newline % Row Count 3 (+ 1)
print(type(df)): \newline % Row Count 4 (+ 1)
{\emph{\#  \textless{}class 'pandas.core.frame.DataFrame'\textgreater{}}} \newline % Row Count 5 (+ 1)
\seqsplit{print(type(clinic\_north\_south))} \newline % Row Count 6 (+ 1)
{\emph{\# \textless{}class 'pandas.core.frame.DataFrame'\textgreater{}}}% Row Count 7 (+ 1)
} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{In Pandas  \newline - a {\bf{series is a one-dimensional object}} that contains any type of data. \newline  \newline - a {\bf{dataframe is a two-dimensional object}} that can hold multiple columns of different types of data. \newline  \newline A {\emph{single column of a dataframe is a series}}, and a {\bf{dataframe is a container of two or more series objects}}.}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{x{2.28942 cm} x{2.68758 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Column Statistics}}  \tn
% Row 0
\SetRowColor{LightBackground}
Mean = Average & df.{\emph{column}}.{\bf{mean()}} \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
Median & df.{\emph{column}}.{\bf{median()}} \tn 
% Row Count 4 (+ 2)
% Row 2
\SetRowColor{LightBackground}
Minimal Value & df.{\emph{column}}.{\bf{min()}} \tn 
% Row Count 5 (+ 1)
% Row 3
\SetRowColor{white}
Maximum Value & df.{\emph{column}}.{\bf{max()}} \tn 
% Row Count 6 (+ 1)
% Row 4
\SetRowColor{LightBackground}
Number of Values & df.{\emph{column}}.{\bf{count()}} \tn 
% Row Count 8 (+ 2)
% Row 5
\SetRowColor{white}
Unique Values & df.{\emph{column}}.{\bf{nunique()}} \tn 
% Row Count 10 (+ 2)
% Row 6
\SetRowColor{LightBackground}
Standard Deviation & df.{\emph{column}}.{\bf{std()}} \tn 
% Row Count 11 (+ 1)
% Row 7
\SetRowColor{white}
List of Unique Values & df.{\emph{column}}.{\bf{unique()}} \tn 
% Row Count 13 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{{\bf{Dont't forget reset\_index()}} at the end of a {\bf{groupby}} operation}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Calculating Aggregate Functions}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{\# Group By}} \newline grouped = df.{\bf{groupby({[}'col1', 'col2'{]})}}.col3 \newline .{\emph{measurement()}}.{\bf{reset\_index()}} \newline {\emph{\# -\textgreater{} group by column1 and column2, calculate values of column3}} \newline  \newline {\bf{\# Percentile}} \newline high\_earners = \seqsplit{df.groupby('category').wage} \newline     .{\bf{apply(lambda}} x: np.percentile(x, 75){\bf{)}} \newline     .{\bf{reset\_index()}} \newline {\emph{\# np.percentile can calculate any percentile over an array of values}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Don't forget reset.index()}}}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Pivot Tables}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{orders = \seqsplit{pd.read\_csv('orders.csv')} \newline  \newline shoe\_counts = orders. \newline groupby({[}'shoe\_type', 'shoe\_color'{]}). \newline id.count().reset\_index() \newline  \newline shoe\_counts\_pivot = shoe\_counts.pivot( \newline {\bf{index}} = 'shoe\_type', \newline {\bf{columns}} = 'shoe\_color', \newline {\bf{values}} = 'id').reset\_index()} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{We have to build a temporary table where we group by the columns we want to include in the pivot table}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Merge (Same Column Name)}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{sales = \seqsplit{pd.read\_csv('sales.csv')} \newline targets = \seqsplit{pd.read\_csv('targets.csv')} \newline men\_women = \seqsplit{pd.read\_csv('men\_women\_sales.csv')} \newline  \newline {\bf{\# Method 1}} \newline sales\_targets = pd{\bf{.merge}}(sales, targets, {\bf{how=}}" ") \newline {\emph{\# how: "inner"(default), "outer", "left", "right"}} \newline  \newline {\bf{\#Method 2 (Method Chaining)}} \newline all\_data = sales{\bf{.merge}}(targets){\bf{.merge}}(men\_women)} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Inner Merge (Different Column Name)}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{orders = \seqsplit{pd.read\_csv('orders.csv')} \newline products = \seqsplit{pd.read\_csv('products.csv')} \newline  \newline {\bf{\# Method 1: Rename Columns}} \newline orders\_products = pd{\bf{.merge}}(orders, products{\bf{.rename}}(columns=\{'id':'product\_id'\}), {\bf{how=" "}}) \newline {\bf{.reset\_index()}} \newline {\emph{\# how: "inner"(default), "outer", "left", "right"}} \newline  \newline {\bf{\# Method 2: }} \newline orders\_products =  \newline pd{\bf{.merge}}(orders, products, \newline                 {\bf{left\_on}}="product\_id", \newline                 {\bf{right\_on}}="id", \newline                 {\bf{suffixes}}={[}"\_orders","\_products"{]})} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Method 2: \newline If we use this syntax, we'll end up with {\bf{two columns called id.}}  \newline Pandas won't let you have two columns with the same name, so it will change them to id\_x and id\_y. \newline We can help make them more useful by using the keyword {\bf{suffixes.}}}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Concatenate}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{bakery = \seqsplit{pd.read\_csv('bakery.csv')} \newline ice\_cream = \seqsplit{pd.read\_csv('ice\_cream.csv')} \newline  \newline menu = pd{\bf{.concat}}({[}bakery, ice\_cream{]})} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Melt}}  \tn
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{`pandas.melt(DataFrame, id\_vars, value\_vars, var\_name, value\_name='value')` \newline % Row Count 2 (+ 2)
{\emph{id\_vars:}} Column(s) to use as identifier variables. \newline % Row Count 4 (+ 2)
{\emph{value\_vars:}} Column(s) to unpivot. If not specified, uses all columns that are not set as id\_vars. \newline % Row Count 6 (+ 2)
{\emph{var\_name:}} Name to use for the 'variable' column. \newline % Row Count 8 (+ 2)
{\emph{value\_name:}} Name to use for the 'value' column.% Row Count 10 (+ 2)
} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Unpivot a DataFrame from wide to long format, optionally leaving identifiers set.}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Assert Statements}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\# Test if country is of type object \newline {\bf{assert gapminder.country.dtypes == np.object}} \newline  \newline \# Test if year is of type int64 \newline {\bf{assert gapminder.year.dtypes == np.int64}} \newline  \newline \# Test if life\_expectancy is of type float64 \newline {\bf{assert \seqsplit{gapminder.life\_expectancy.dtypes} == np.float64}} \newline  \newline \# Assert that country does not contain any missing values \newline {\bf{assert \seqsplit{pd.notnull(gapminder.country)}.all()}} \newline  \newline \# Assert that year does not contain any missing values \newline {\bf{assert \seqsplit{pd.notnull(gapminder.year)}.all()}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}


% That's all folks
\end{multicols*}

\end{document}