\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{Cheto} \pdfinfo{ /Title (essential-pandas.pdf) /Creator (Cheatography) /Author (Cheto) /Subject (Essential Pandas Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{36A2C9} \definecolor{LightBackground}{HTML}{F2F9FB} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Essential Pandas Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{Cheto} via \textcolor{DarkBackground}{\uline{cheatography.com/162578/cs/34032/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}Cheto \\ \uline{cheatography.com/cheto} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Published 6th September, 2022.\\ Updated 6th September, 2022.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{2} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Loading Pandas}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Import Pandas Module with the alias {\emph{pd}}}} \{\{nl\}\} `import pandas as pd`} \tn % Row Count 2 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Creating Dataframes From Files}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{From a csv file}} \{\{nl\}\} `df = \seqsplit{pd.read\_csv('file.csv')`}} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{From a python dictionary}}\{\{nl\}\}`df = pd.DataFrame.from\_dict(\textless{}dict\textgreater{})`} \tn % Row Count 4 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Displaying Dataframe Info}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Display first five rows in dataframe}}\{\{nl\}\}`df.head()`} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{Display last five rows in dataframe}}\{\{nl\}\}`df.tail()`} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Show all column names}}\{\{nl\}\}`df.columns`} \tn % Row Count 5 (+ 1) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{Show all object types in dataframe}}\{\{nl\}\}`df.dtypes`} \tn % Row Count 7 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Show statistics for all int and float columns}}\{\{nl\}\}`df.describe()`} \tn % Row Count 9 (+ 2) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{Show statistics for 'object' type columns}}\{\{nl\}\}`df.describe(include='object')`} \tn % Row Count 11 (+ 2) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Show number of rows and columns}}\{\{nl\}\}`df.shape`} \tn % Row Count 13 (+ 2) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{Display True for each NaN value, False otherwise}}\{\{nl\}\}`df.isnull()`} \tn % Row Count 15 (+ 2) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Display a table with the number of NaN values for each column}}\{\{nl\}\}`df.isnull().sum()`} \tn % Row Count 17 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Updating}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Delete all rows containing {\emph{NaN}} values in the {\emph{df}} Dataframe}}\{\{nl\}\}` df.dropna(inplace=True)`} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{Delete {\emph{'col\_name'}} column}}\{\{nl\}\}` df.drop('col\_name', axis=1)`} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Example of a calculated column}}\{\{nl\}\}`df{[}'new\_col'{]} = df{[}'col\_1'{]} + df{[}'col\_2'{]}`} \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{Update the entire column to value {\emph{\textless{}value\textgreater{}}}}}\{\{nl\}\}` df{[}'new\_col'{]} = \textless{}value\textgreater{}`} \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Update the cell at {\emph{(a,b)}} to {\emph{\textless{}value\textgreater{}}}}}\{\{nl\}\}` df.iloc{[}a,b{]} = \textless{}value\textgreater{}`} \tn % Row Count 10 (+ 2) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{Update (or creates) {\emph{'col\_a'}} with the result of lambda function applied to {\emph{'col\_b'}}}}\{\{nl\}\}`df{[}'col\_a'{]} = df{[}'col\_b'{]}.apply(\textless{}lambda function\textgreater{})`} \tn % Row Count 13 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Filtering Columns}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Display an entire column as a series}}\{\{nl\}\}`df{[}'column\_name'{]}`} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{Display all columns in the given list}}\{\{nl\}\}`df{[}{[}'col\_1', 'col\_2', ... 'col\_n' {]}{]}`} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Show all unique elements in 'column\_name'}}\{\{nl\}\}`df{[}'column\_name'{]}.unique()`} \tn % Row Count 6 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Filtering Rows}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Display all rows satisfying \textless{}condition\textgreater{}}}\{\{nl\}\}`df{[}\textless{}condition\textgreater{}{]}`} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{Display all rows where `df{[}'col\_name'{]} == \textless{}value\textgreater{}`}}\{\{nl\}\}`df{[}df{[}'col\_name'{]} == \textless{}value\textgreater{}{]}`} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Show all rows satisfying both conditions}}\{\{nl\}\}`df{[}(\textless{}condition\_1\textgreater{}{]}) \& (\textless{}condition\_2\textgreater{}){]}`} \tn % Row Count 6 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Indexing with iloc}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Displays the entire row indexed n}}\{\{nl\}\}`df.iloc{[}n{]}`} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{Displays the element in row {\emph{n}} \& column {\emph{m}}}}\{\{nl\}\}`df.iloc{[}n, m{]}`} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Displays a slice of rows: from row {\emph{a}} to row {\emph{b}}}}\{\{nl\}\}`df.iloc{[}a:b{]}`} \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{Displays rows {\emph{a}} to {\emph{b}} only in the columns {\emph{c}} to {\emph{d}}}}\{\{nl\}\}`df{[}a:b, c:d{]}`} \tn % Row Count 8 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Indexing with loc}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Shows all rows indexed with `\textless{}ind\textgreater{}`}}\{\{nl\}\}`df.loc{[}\textless{}ind\textgreater{}{]}`} \tn % Row Count 2 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Manipulating Dataframes}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Create a copy of the dataframe}}\{\{nl\}\}`new\_df = df.copy()`} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{Set {\emph{'column\_name'}} as the index}}\{\{nl\}\}`df.set\_index('column\_name', inplace=True)`} \tn % Row Count 4 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Delete / Output}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Output to {\emph{csv}} file}}\{\{nl\}\}`df.to\_csv('output.csv')`} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{Output to {\emph{json}} file}}\{\{nl\}\}`df.to\_json()`} \tn % Row Count 3 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Output to {\emph{html}} file}}\{\{nl\}\}`df.to\_html()`} \tn % Row Count 4 (+ 1) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{Delete a Dataframe}}\{\{nl\}\}`del df`} \tn % Row Count 5 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}