\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{rjyurk100} \pdfinfo{ /Title (exam-3.pdf) /Creator (Cheatography) /Author (rjyurk100) /Subject (exam 3 Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{A3A3A3} \definecolor{LightBackground}{HTML}{F3F3F3} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{exam 3 Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{rjyurk100} via \textcolor{DarkBackground}{\uline{cheatography.com/208728/cs/45067/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}rjyurk100 \\ \uline{cheatography.com/rjyurk100} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 21st November, 2024.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{2} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{14}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Define a function = \newline \newline def hello(): \newline \newline print( 'Hello World' ) \newline \newline Then invoke it with = hello() \newline \newline Define a function with a PARAMETER (argument) \newline \newline Def welcome(name): \newline \newline Print ( f ' Hello, \{name\} ' ) \newline \newline Invoke it with = welcome( 'Amy' ) \newline \newline Two required Parameters: \newline \newline Def welcome\_greeting(name, greeting\_text): \newline \newline print( f ' Hey, \{name\}. \{greeting\_text\} ') \newline \newline Invoke it with = welcome\_greeting( 'Liz' , 'How are you?' ) - these are known as KWARG \newline \newline Define a method to do a calculation \newline \newline Def exponent(base, exponent): \newline \newline Power = base {\emph{* exponent \newline \newline Return power \newline \newline Num1 = 2 \newline \newline Num2 = 3 \newline \newline Answer = exponent(num1, num2) \newline \newline print(answer) \newline \newline print( exponent(2,3)) \newline \newline Def sum\_of\_numbers( }}parameters): \newline \newline Total = 0 \newline \newline For each\_number in parameters: \newline \newline Total = total + each\_number \newline \newline Return total \newline \newline Sum = sum\_of\_numbers(1,2,3,4,5,6) \newline \newline print(sum} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{16}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Df = pd.read\_csv(url) \newline \newline print(df.to\_string()) \newline \newline Null is null, a null is something = a special creation to indicate the absence of a value - its a made up value \newline \newline Df.shape = not a method does not need () \newline \newline Look at only one column = df{[}'School Name'{]} \newline \newline print( df{[} 'School Name' {]}.to\_string( )) \newline \newline Find the unique names= df{[}'School Name'{]}.unique() \newline \newline type(unique\_schools) shows the type.. This is not a data frame \newline \newline Statistics: \newline \newline df{[}'Starting Salary'{]}.max() or df{[}'Starting Salary'{]}.mean() or df{[}'Starting Salary'{]}.min() \newline \newline Find the NAs = df{[}'Starting Salary'{]}.isna() then to count the trues = na\_rows.sum() \newline \newline Based on a condition \newline \newline Df2 = df.query(" `Starting Salary` \textgreater{} 75000 ")} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{20}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Change individual values \newline \newline Df.loc{[}20, 'Starting Salary'{]} = ' ' \newline \newline Convert to numeric type = df{[}'Starting Salary'{]} = pd.to\_numeric( df{[}'Starting Salary'{]} ) \newline \newline Df.loc{[}139, 'Starting Salary'{]} = 46000 \newline \newline \#Convert starting salary to numeric FORCE CONVERT or "COERCE" conversion \newline \newline Error\_columns = pd.to\_numeric( df{[}'Starting Salary'{]}, errors= 'coerce') \newline \newline print(error\_columns) \newline \newline \#find the NAs \newline \newline Nas = error\_columns.isna() \newline \newline print(Nas) \newline \newline Df{[}20:25{]} \newline \newline \#fix columns \newline \newline Df.loc{[}70, 'Starting Salary'{]} = 42600 \newline \newline df{[}Nas{]} \newline \newline Save it to the original by overwriting \newline \newline df{[}'Starting Salary'{]} = pd.to\_numeric(df{[}'Starting Salary'{]})} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{15}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Import pandas as pd \newline \newline Data\_list = {[}45, 74, 78{]} \newline \newline Series\_of\_numbers = pd.series(data\_list) \newline \newline print(series\_of\_numbers{[}1{]}) \newline \newline Years = {[}2021, 2022, 2023{]} \newline \newline Create series with labels and use KWARG \newline \newline Series\_of\_numbers = \seqsplit{pd.series(data=data\_list}, index=years) \newline \newline print(series\_of\_numbers) \newline \newline Show me the value for 2021 \newline \newline print(series\_of\_numbers{[}2021{]}) \newline \newline Create a series with integrated data labels \newline \newline Grade\_distribution = \{'A' : 34, 'B' : 56\} \newline \newline Convert the dictionary \textasciicircum{} to a series = grade\_series = \seqsplit{pd.series(data=grade\_distribution)} \newline \newline print(grade\_series) or print(grade\_series{[}'A'{]}) \newline \newline 2 dimensional data - in multiple lists \newline \newline Quiz\_scores = \{ \newline \newline 'Quiz1' : {[}32, 56, 56{]} , \newline \newline 'Quiz2' : {[}78, 34, 32{]}\} \newline \newline Df = \seqsplit{pd.DataFrame(data=quiz\_scores)} \newline \newline print(df) \newline \newline Overwrite the df like this: \newline \newline Df = \seqsplit{pd.DataFrame(data=quiz\_scores}, index={[}'Mike' , 'Susan', 'Amy'{]} \newline \newline df.head() = top 5 rows df.tail() = bottom 5 rows df{[}40:60{]} = select row} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{18}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Find all the schools with the name Pitt \newline \newline Df2 = df.query( " `School Name` == 'Pitt' ") \newline \newline df2.head() \newline \newline Remove a column: \seqsplit{df.drop(columns='Starting} Salary', inplace = True) \newline \newline Or df = \seqsplit{df.drop(columns='Starting} Salary') \newline \newline Drop a row df.drop(index=2, inplace = True) \newline \newline Delete entire row of data when one column had missing data df=df.dropna()} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{19}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Load descriptives for the df = df.describe() \newline \newline Load tab-delimitted file \newline \newline Df2 = pd.read\_csv(URL, sep='\textbackslash{}t' ) \newline \newline Replace function: \newline \newline Df{[} 'School Name'{]}.replace('-', ' -', regex=True, inplace=True) \newline \newline Fillnas = df{[}'Starting Salary'{]}.fillna(0, inplace=True) \newline \newline How many unique school names are there: \newline \newline len( df{[}'School Name'{]}.unique()) \newline \newline Show only the rows in which df are duplicate: \newline \newline Duplicates = df.duplicated(subset= 'School Name') \newline \newline Boolean series = df{[}duplicates{]} \newline \newline Df2 = \seqsplit{df.drop\_duplicates(subset='School} Name', keep='first') \newline \newline Find out schools with specified \newline \newline PA\_schools = df2{[}'School Name'{]}.str.contains('Pennsylvania') \newline \newline Use a boolean series df2{[}PA\_schools{]} \newline \newline Overwrite instead on inplace \newline \newline Df2 = \seqsplit{df2.sort\_values('Starting} Salary', ascending = False) \newline \newline Fix one bad value: \newline \newline Df2.loc{[}2, 'Starting Salary'{]} = df2{[}'Starting Salary'{]}.mean()} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{19}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Load descriptives for the df = df.describe() \newline \newline Load tab-delimitted file \newline \newline Df2 = pd.read\_csv(URL, sep='\textbackslash{}t' ) \newline \newline Replace function: \newline \newline Df{[} 'School Name'{]}.replace('-', ' -', regex=True, inplace=True) \newline \newline Fillnas = df{[}'Starting Salary'{]}.fillna(0, inplace=True) \newline \newline How many unique school names are there: \newline \newline len( df{[}'School Name'{]}.unique()) \newline \newline Show only the rows in which df are duplicate: \newline \newline Duplicates = df.duplicated(subset= 'School Name') \newline \newline Boolean series = df{[}duplicates{]} \newline \newline Df2 = \seqsplit{df.drop\_duplicates(subset='School} Name', keep='first') \newline \newline Find out schools with specified \newline \newline PA\_schools = df2{[}'School Name'{]}.str.contains('Pennsylvania') \newline \newline Use a boolean series df2{[}PA\_schools{]} \newline \newline Overwrite instead on inplace \newline \newline Df2 = \seqsplit{df2.sort\_values('Starting} Salary', ascending = False) \newline \newline Fix one bad value: \newline \newline Df2.loc{[}2, 'Starting Salary'{]} = df2{[}'Starting Salary'{]}.mean()} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}