\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{LasseBoe (Lasse1618)} \pdfinfo{ /Title (python-pandas.pdf) /Creator (Cheatography) /Author (LasseBoe (Lasse1618)) /Subject (Python Pandas Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{000987} \definecolor{LightBackground}{HTML}{F7F7FB} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Python Pandas Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{LasseBoe (Lasse1618)} via \textcolor{DarkBackground}{\uline{cheatography.com/110079/cs/21581/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}LasseBoe (Lasse1618) \\ \uline{cheatography.com/lasse1618} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 23rd November, 2020.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{3} \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Basics}} \tn % Row 0 \SetRowColor{LightBackground} \%matplotlib inline & plots into notebook \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} df = pd.read\_csv({\emph{path}}, index\_col ='{\emph{name}}') & loads dataframe \tn % Row Count 4 (+ 3) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{df.head()} \tn % Row Count 5 (+ 1) % Row 3 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{df.tail()} \tn % Row Count 6 (+ 1) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{df.values} \tn % Row Count 7 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{df.plot()\{\{nl\}\}df.plot(style='.')} \tn % Row Count 8 (+ 1) % Row 6 \SetRowColor{LightBackground} df.index & returns row indexes \tn % Row Count 9 (+ 1) % Row 7 \SetRowColor{white} df{[}{\emph{col}}{]}.loc{[}{\emph{index}}{]} & returns value with given column and index \tn % Row Count 12 (+ 3) % Row 8 \SetRowColor{LightBackground} df.loc{[}:, 'col\_n+1'{]} = x & referring to a col that doesn't exist creates a new one \tn % Row Count 15 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{p{0.4977 cm} p{0.4977 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{STOPPED FILLING AT LECTURE 6 LINE 122}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{} \tn % Row Count 0 (+ 0) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Basic Dataframe Analysis}} \tn % Row 0 \SetRowColor{LightBackground} df.isnull() & returns bool \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} df{[}.isnull().sum & returns sum of trues \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} df.isnull().any & checks whether there is a true \tn % Row Count 4 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{`df{[}{\emph{col}}{]}.`max()\{\{nl\}\}`df{[}{\emph{col}}{]}.`min()} \tn % Row Count 5 (+ 1) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{`df{[}{\emph{col}}{]}.`idxmax()\{\{nl\}\}`df{[}{\emph{col}}{]}.`idxmin()} \tn % Row Count 6 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{`df{[}{\emph{col}}{]}.`median()} \tn % Row Count 7 (+ 1) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{`df{[}{\emph{col}}{]}.`mean()} \tn % Row Count 8 (+ 1) % Row 7 \SetRowColor{white} `df{[}{\emph{col}}{]}.`describe() & gives statistic analysis \tn % Row Count 10 (+ 2) % Row 8 \SetRowColor{LightBackground} `df{[}{\emph{col}}{]}.`quantile(.5) & 50\% quantile \tn % Row Count 12 (+ 2) % Row 9 \SetRowColor{white} df.boxplot(by = '{\emph{col}}') & boxplot grouped by column \tn % Row Count 14 (+ 2) % Row 10 \SetRowColor{LightBackground} df.hist(bins = 20) & histogram in 20 bars \tn % Row Count 15 (+ 1) % Row 11 \SetRowColor{white} df.plot.scatter(x = '{\emph{name}}', y = '{\emph{name}}') & scatterplot \tn % Row Count 18 (+ 3) % Row 12 \SetRowColor{LightBackground} \seqsplit{pd.plotting.scatter\_matrix(df)} & multiple scatterplots \tn % Row Count 20 (+ 2) % Row 13 \SetRowColor{white} \seqsplit{pd.plotting.parallel\_coordinates(df}, '{\emph{name}}') & lines drawn connecting dimensions of an entry \tn % Row Count 23 (+ 3) % Row 14 \SetRowColor{LightBackground} df{[}'col\_name'{]}.unique & returns list of singled entries \tn % Row Count 25 (+ 2) % Row 15 \SetRowColor{white} pd.get\_dummies(df, columns={[}'Name'{]}) & dummie column (0 or 1) that indicates whether the entry in another column is a certain entry \tn % Row Count 30 (+ 5) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Basic Dataframe Analysis (cont)}} \tn % Row 16 \SetRowColor{LightBackground} np.random.choice(n, x, replace=false) & selects random set \tn % Row Count 2 (+ 2) % Row 17 \SetRowColor{white} np.setdiff1d(set\_1, set\_2) & New set with only the differing entries \tn % Row Count 4 (+ 2) % Row 18 \SetRowColor{LightBackground} df.to\_numpy() & gives array of entries \tn % Row Count 6 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.89126 cm} x{3.08574 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Working with a Dataframe}} \tn % Row 0 \SetRowColor{LightBackground} df{[}'{\emph{col1}}'{]} == x & bool if entry is x \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} df{[}df == x{]} = y & replace all values of a kind \tn % Row Count 4 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Label-based indexing with .loc / .iloc}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{df.loc{[}{\emph{rowindex}}, {\emph{columnname}}{]}} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} df.loc{[}3, {\emph{col1}}{]}\{\{nl\}\}df.loc{[}3:6, {[}'{\emph{col1}}', '{\emph{col2}}'{]}{]} & 3rd entry of 1st column \tn % Row Count 4 (+ 3) % Row 2 \SetRowColor{LightBackground} df.loc{[}:, '{\emph{col1}}' {]} == '{\emph{name}}' & column with t/f whether entry in col1 is name \tn % Row Count 7 (+ 3) % Row 3 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{} \tn % Row Count 7 (+ 0) % Row 4 \SetRowColor{LightBackground} df.iloc{[}3:-1, 2:{]} & {[}rows, columns{]} \tn % Row Count 8 (+ 1) % Row 5 \SetRowColor{white} df.iloc{[}:, {[}3, 1{]}{]} & columns with index 3 \& 1 \tn % Row Count 10 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{`.loc` is label based, `.iloc` is integer index based} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Series}} \tn % Row 0 \SetRowColor{LightBackground} s1 = pd.Series({[}1, 2, 3{]}, index={[}'a', 'b', 'c'{]}) & creates a pandas series \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{s1.add(s2, fill\_value=0)} \tn % Row Count 4 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{s.isnull() ; s.notnull()} \tn % Row Count 5 (+ 1) % Row 3 \SetRowColor{white} s.dropna() & drops all rows with missing values \tn % Row Count 7 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{s.fillna(x)} \tn % Row Count 8 (+ 1) % Row 5 \SetRowColor{white} s = pd.DataFrame(\{'Size':s1, 'Weight':s2\}) & Best way to define dataframe out of series:\{\{nl\}\}Give dict out of columns \tn % Row Count 12 (+ 4) % Row 6 \SetRowColor{LightBackground} 'e' in s1 & returns bool \tn % Row Count 13 (+ 1) % Row 7 \SetRowColor{white} s.name = 'str' & names series \tn % Row Count 14 (+ 1) % Row 8 \SetRowColor{LightBackground} s.index.name = 'str' & names index\{\{nl\}\}If s doesn't exist, this creates a df \tn % Row Count 17 (+ 3) % Row 9 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{s.columns{[}'Red', 'Green'{]}\{\{nl\}\}s.columns.name = 'Color'} \tn % Row Count 19 (+ 2) % Row 10 \SetRowColor{LightBackground} s.reindex{[}('m', 'n', 'o'{]}, method = 'ffill') & ffill = forward fill \tn % Row Count 22 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}