\documentclass[10pt,a4paper]{article}

% Packages
\usepackage{fancyhdr}           % For header and footer
\usepackage{multicol}           % Allows multicols in tables
\usepackage{tabularx}           % Intelligent column widths
\usepackage{tabulary}           % Used in header and footer
\usepackage{hhline}             % Border under tables
\usepackage{graphicx}           % For images
\usepackage{xcolor}             % For hex colours
%\usepackage[utf8x]{inputenc}    % For unicode character support
\usepackage[T1]{fontenc}        % Without this we get weird character replacements
\usepackage{colortbl}           % For coloured tables
\usepackage{setspace}           % For line height
\usepackage{lastpage}           % Needed for total page number
\usepackage{seqsplit}           % Splits long words.
%\usepackage{opensans}          % Can't make this work so far. Shame. Would be lovely.
\usepackage[normalem]{ulem}     % For underlining links
% Most of the following are not required for the majority
% of cheat sheets but are needed for some symbol support.
\usepackage{amsmath}            % Symbols
\usepackage{MnSymbol}           % Symbols
\usepackage{wasysym}            % Symbols
%\usepackage[english,german,french,spanish,italian]{babel}              % Languages

% Document Info
\author{rjyurk100}
\pdfinfo{
  /Title (exam-3.pdf)
  /Creator (Cheatography)
  /Author (rjyurk100)
  /Subject (exam 3 Cheat Sheet)
}

% Lengths and widths
\addtolength{\textwidth}{6cm}
\addtolength{\textheight}{-1cm}
\addtolength{\hoffset}{-3cm}
\addtolength{\voffset}{-2cm}
\setlength{\tabcolsep}{0.2cm} % Space between columns
\setlength{\headsep}{-12pt} % Reduce space between header and content
\setlength{\headheight}{85pt} % If less, LaTeX automatically increases it
\renewcommand{\footrulewidth}{0pt} % Remove footer line
\renewcommand{\headrulewidth}{0pt} % Remove header line
\renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit
% This two commands together give roughly
% the right line height in the tables
\renewcommand{\arraystretch}{1.3}
\onehalfspacing

% Commands
\newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour
\newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols
\newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns
\newcommand{\tn}{\tabularnewline} % Required as custom column type in use

% Font and Colours
\definecolor{HeadBackground}{HTML}{333333}
\definecolor{FootBackground}{HTML}{666666}
\definecolor{TextColor}{HTML}{333333}
\definecolor{DarkBackground}{HTML}{A3A3A3}
\definecolor{LightBackground}{HTML}{F3F3F3}
\renewcommand{\familydefault}{\sfdefault}
\color{TextColor}

% Header and Footer
\pagestyle{fancy}
\fancyhead{} % Set header to blank
\fancyfoot{} % Set footer to blank
\fancyhead[L]{
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{C}
    \SetRowColor{DarkBackground}
    \vspace{-7pt}
    {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent
        \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}}
    }
\end{tabulary}
\columnbreak
\begin{tabulary}{11cm}{L}
    \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{exam 3 Cheat Sheet}}}} \\
    \normalsize{by \textcolor{DarkBackground}{rjyurk100} via \textcolor{DarkBackground}{\uline{cheatography.com/208728/cs/45067/}}}
\end{tabulary}
\end{multicols}}

\fancyfoot[L]{ \footnotesize
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{LL}
  \SetRowColor{FootBackground}
  \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}}  \\
  \vspace{-2pt}rjyurk100 \\
  \uline{cheatography.com/rjyurk100} \\
  \end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}}  \\
   \vspace{-2pt}Not Yet Published.\\
   Updated 21st November, 2024.\\
   Page {\thepage} of \pageref{LastPage}.
\end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}}  \\
  \SetRowColor{white}
  \vspace{-5pt}
  %\includegraphics[width=48px,height=48px]{dave.jpeg}
  Measure your website readability!\\
  www.readability-score.com
\end{tabulary}
\end{multicols}}


\begin{document}
\raggedright
\raggedcolumns

% Set font size to small. Switch to any value
% from this page to resize cheat sheet text:
% www.emerson.emory.edu/services/latex/latex_169.html
\footnotesize % Small font.

\begin{multicols*}{2}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{14}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{Define a function = \newline  \newline def hello(): \newline  \newline print( 'Hello World' ) \newline  \newline Then invoke it with = hello() \newline  \newline Define a function with a PARAMETER (argument) \newline  \newline Def welcome(name): \newline  \newline Print ( f ' Hello, \{name\} ' ) \newline  \newline Invoke it with = welcome( 'Amy' ) \newline  \newline Two required Parameters: \newline  \newline Def welcome\_greeting(name, greeting\_text): \newline  \newline print( f ' Hey, \{name\}. \{greeting\_text\} ') \newline  \newline Invoke it with = welcome\_greeting( 'Liz' , 'How are you?' ) - these are known as KWARG \newline  \newline Define a method to do a calculation \newline  \newline Def exponent(base, exponent): \newline  \newline Power = base {\emph{* exponent \newline  \newline Return power \newline  \newline Num1 = 2 \newline  \newline Num2 = 3 \newline  \newline Answer = exponent(num1, num2) \newline  \newline print(answer) \newline  \newline print( exponent(2,3)) \newline  \newline Def sum\_of\_numbers( }}parameters): \newline  \newline Total = 0 \newline  \newline For each\_number in parameters: \newline  \newline Total = total + each\_number \newline  \newline Return total \newline  \newline Sum = sum\_of\_numbers(1,2,3,4,5,6) \newline  \newline print(sum} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{16}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{Df = pd.read\_csv(url) \newline  \newline print(df.to\_string()) \newline  \newline Null is null, a null is something = a special creation to indicate the absence of a value - its a made up value \newline  \newline Df.shape = not a method does not need () \newline  \newline Look at only one column = df{[}'School Name'{]} \newline  \newline print( df{[} 'School Name' {]}.to\_string( )) \newline  \newline Find the unique names= df{[}'School Name'{]}.unique() \newline  \newline type(unique\_schools) shows the type.. This is not a data frame \newline  \newline Statistics: \newline  \newline df{[}'Starting Salary'{]}.max() or df{[}'Starting Salary'{]}.mean() or df{[}'Starting Salary'{]}.min() \newline  \newline Find the NAs = df{[}'Starting Salary'{]}.isna() then to count the trues = na\_rows.sum() \newline  \newline Based on a condition \newline  \newline Df2 = df.query(" `Starting Salary` \textgreater{} 75000 ")} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{20}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{Change individual values \newline  \newline Df.loc{[}20, 'Starting Salary'{]} = ' ' \newline  \newline Convert to numeric type = df{[}'Starting Salary'{]} = pd.to\_numeric( df{[}'Starting Salary'{]} ) \newline  \newline Df.loc{[}139, 'Starting Salary'{]} = 46000 \newline  \newline \#Convert starting salary to numeric FORCE CONVERT or "COERCE" conversion \newline  \newline Error\_columns = pd.to\_numeric( df{[}'Starting Salary'{]}, errors= 'coerce') \newline  \newline print(error\_columns) \newline  \newline \#find the NAs \newline  \newline Nas = error\_columns.isna() \newline  \newline print(Nas) \newline  \newline Df{[}20:25{]} \newline  \newline \#fix columns \newline  \newline Df.loc{[}70, 'Starting Salary'{]} = 42600 \newline  \newline df{[}Nas{]} \newline  \newline Save it to the original by overwriting \newline  \newline df{[}'Starting Salary'{]} = pd.to\_numeric(df{[}'Starting Salary'{]})} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{15}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{Import pandas as pd \newline  \newline Data\_list = {[}45, 74, 78{]} \newline  \newline Series\_of\_numbers = pd.series(data\_list) \newline  \newline print(series\_of\_numbers{[}1{]}) \newline  \newline Years = {[}2021, 2022, 2023{]} \newline  \newline Create series with labels and use KWARG \newline  \newline Series\_of\_numbers = \seqsplit{pd.series(data=data\_list}, index=years) \newline  \newline print(series\_of\_numbers) \newline  \newline Show me the value for 2021 \newline  \newline print(series\_of\_numbers{[}2021{]}) \newline  \newline Create a series with integrated data labels \newline  \newline Grade\_distribution = \{'A' : 34, 'B' : 56\} \newline  \newline Convert the dictionary \textasciicircum{} to a series = grade\_series = \seqsplit{pd.series(data=grade\_distribution)} \newline  \newline print(grade\_series) or print(grade\_series{[}'A'{]}) \newline  \newline 2 dimensional data - in multiple lists \newline  \newline Quiz\_scores = \{ \newline  \newline 'Quiz1' : {[}32, 56, 56{]} , \newline  \newline 'Quiz2' : {[}78, 34, 32{]}\} \newline  \newline Df = \seqsplit{pd.DataFrame(data=quiz\_scores)} \newline  \newline print(df) \newline  \newline Overwrite the df like this: \newline  \newline Df = \seqsplit{pd.DataFrame(data=quiz\_scores}, index={[}'Mike' , 'Susan', 'Amy'{]} \newline  \newline df.head() = top 5 rows df.tail() = bottom 5 rows df{[}40:60{]} = select row} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{18}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{Find all the schools with the name Pitt \newline  \newline Df2 = df.query( " `School Name` == 'Pitt' ") \newline  \newline df2.head() \newline  \newline Remove a column: \seqsplit{df.drop(columns='Starting} Salary', inplace = True) \newline  \newline Or df = \seqsplit{df.drop(columns='Starting} Salary') \newline  \newline Drop a row df.drop(index=2, inplace = True) \newline  \newline Delete entire row of data when one column had missing data df=df.dropna()} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{19}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{Load descriptives for the df = df.describe() \newline  \newline Load tab-delimitted file \newline  \newline Df2 = pd.read\_csv(URL, sep='\textbackslash{}t' ) \newline  \newline Replace function: \newline  \newline Df{[} 'School Name'{]}.replace('-', ' -', regex=True, inplace=True) \newline  \newline Fillnas = df{[}'Starting Salary'{]}.fillna(0, inplace=True) \newline  \newline How many unique school names are there: \newline  \newline len( df{[}'School Name'{]}.unique()) \newline  \newline Show only the rows in which df are duplicate: \newline  \newline Duplicates = df.duplicated(subset= 'School Name') \newline  \newline Boolean series = df{[}duplicates{]} \newline  \newline Df2 = \seqsplit{df.drop\_duplicates(subset='School} Name', keep='first') \newline  \newline Find out schools with specified \newline  \newline PA\_schools = df2{[}'School Name'{]}.str.contains('Pennsylvania') \newline  \newline Use a boolean series df2{[}PA\_schools{]} \newline  \newline Overwrite instead on inplace \newline  \newline Df2 = \seqsplit{df2.sort\_values('Starting} Salary', ascending = False) \newline  \newline Fix one bad value: \newline  \newline Df2.loc{[}2, 'Starting Salary'{]} = df2{[}'Starting Salary'{]}.mean()} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{19}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{Load descriptives for the df = df.describe() \newline  \newline Load tab-delimitted file \newline  \newline Df2 = pd.read\_csv(URL, sep='\textbackslash{}t' ) \newline  \newline Replace function: \newline  \newline Df{[} 'School Name'{]}.replace('-', ' -', regex=True, inplace=True) \newline  \newline Fillnas = df{[}'Starting Salary'{]}.fillna(0, inplace=True) \newline  \newline How many unique school names are there: \newline  \newline len( df{[}'School Name'{]}.unique()) \newline  \newline Show only the rows in which df are duplicate: \newline  \newline Duplicates = df.duplicated(subset= 'School Name') \newline  \newline Boolean series = df{[}duplicates{]} \newline  \newline Df2 = \seqsplit{df.drop\_duplicates(subset='School} Name', keep='first') \newline  \newline Find out schools with specified \newline  \newline PA\_schools = df2{[}'School Name'{]}.str.contains('Pennsylvania') \newline  \newline Use a boolean series df2{[}PA\_schools{]} \newline  \newline Overwrite instead on inplace \newline  \newline Df2 = \seqsplit{df2.sort\_values('Starting} Salary', ascending = False) \newline  \newline Fix one bad value: \newline  \newline Df2.loc{[}2, 'Starting Salary'{]} = df2{[}'Starting Salary'{]}.mean()} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}


% That's all folks
\end{multicols*}

\end{document}