Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{3} \begin{tabularx}{5.377cm}{x{1.09494 cm} x{3.88206 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Phases of Data Analysis}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Ask}} & Define the problem you are trying to solve. \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} {\bf{Prepare}} & What data do I need to solve this problem? Do I have access to obtain it? \tn % Row Count 5 (+ 3) % Row 2 \SetRowColor{LightBackground} {\bf{Process}} & Clean the data of errors and inaccuracies. \tn % Row Count 7 (+ 2) % Row 3 \SetRowColor{white} {\bf{Analyze}} & Perform calculations to tell a data story. Exploratory Analysis, Statistical modelling \tn % Row Count 10 (+ 3) % Row 4 \SetRowColor{LightBackground} {\bf{Share}} & Clear visuals of the data and solution. This includes the reproducible code. \tn % Row Count 13 (+ 3) % Row 5 \SetRowColor{white} {\bf{Act}} & Provide recommendations based on data. \tn % Row Count 15 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.23965 cm} x{2.73735 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{File Manipulation}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Get Working Directory}} & getwd() \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} {\bf{Set Working Directory}} & setwd() \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} {\bf{See Directory Contents}} & dir() \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} {\bf{Create Folder}} & dir.create("tFolder") \tn % Row Count 7 (+ 1) % Row 4 \SetRowColor{LightBackground} {\bf{Create File}} & \seqsplit{file.create("test.csv")} \tn % Row Count 9 (+ 2) % Row 5 \SetRowColor{white} {\bf{Copy File}} & file.copy("test.csv", "tFolder") \tn % Row Count 11 (+ 2) % Row 6 \SetRowColor{LightBackground} {\bf{Edit File}} & myedit(test.R) \tn % Row Count 12 (+ 1) % Row 7 \SetRowColor{white} {\bf{Delete File}} & unlink("test.csv") \tn % Row Count 13 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{3.53367 cm} x{1.44333 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Structure \& Dimensions}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Structure}} & str(data) \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} {\bf{Get \# of Rows \& Columns}} & dim(data) \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} {\bf{Return \# of Rows}} & nrow(data) \tn % Row Count 3 (+ 1) % Row 3 \SetRowColor{white} {\bf{Return \# of Cols}} & ncol(data) \tn % Row Count 4 (+ 1) % Row 4 \SetRowColor{LightBackground} {\bf{Return 1st 6 Rows}} & head(data) \tn % Row Count 5 (+ 1) % Row 5 \SetRowColor{white} {\bf{Get Class Type}} & \seqsplit{class(data)} \tn % Row Count 6 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.89126 cm} x{3.08574 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Importing Data}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Web Scraping}} & con = \seqsplit{url("http://google.com")} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} & htmlCode = readlines(con) \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} & close(con) \tn % Row Count 5 (+ 1) % Row 3 \SetRowColor{white} {\bf{Remote File}} & fileUrl \textless{}- \seqsplit{"https://website.com/data.csv"} \tn % Row Count 7 (+ 2) % Row 4 \SetRowColor{LightBackground} & download.file(fileUrl, destfile = "./myData.csv", method = "curl") \tn % Row Count 10 (+ 3) % Row 5 \SetRowColor{white} {\bf{Import Data as Table}} & inData \textless{}- read.table("data.csv", sep = " ", header = TRUE) \tn % Row Count 13 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Applying Functions}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Apply a function over an array}} & {\bf{apply}}(data,Margin,Function) \#1=Rows 2=Cols \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} {\bf{Apply a function to each element of list, vector, or DF and return a list}} & {\bf{lapply}}(data, Function) \tn % Row Count 7 (+ 4) % Row 2 \SetRowColor{LightBackground} {\bf{Same as lapply, but returns a vector instead}} & {\bf{sapply}}(data, Function) \tn % Row Count 10 (+ 3) % Row 3 \SetRowColor{white} {\bf{Apply a function to a subset specified by the FactorList}} & {\bf{tapply}}(vector, factorList, Function) \tn % Row Count 13 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.9908 cm} x{2.9862 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Clean \& Test Data}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Check for NAs}} & colSums(is.na(data)) \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} {\bf{Logical NA Test}} & \seqsplit{all(colSums(is.na(data))} == 0) \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} {\bf{Trim Whitespace}} & trimws(charVector) \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} {\bf{Verify Data Type}} & class(data) {\bf{or}} str(data) \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} {\bf{Find Specific}} & test{[}test\$someCol \%in\% \tn % Row Count 10 (+ 2) % Row 5 \SetRowColor{white} & c("abcdefg", "hello"),{]} \tn % Row Count 11 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.18988 cm} x{2.78712 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{String Manipulation}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Uppercase}} & \seqsplit{toupper(names(charVector))} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} {\bf{Lowercase}} & \seqsplit{tolower(names(charVector))} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} {\bf{String Split}} & \seqsplit{strsplit(names(charVector)}, "\textbackslash{}\textbackslash{}.") \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} {\bf{Find \& Replace 1st}} & sub("\_", "", names(charVector)) \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} {\bf{Find \& Replace All}} & gsub("\_", "", names(charVector)) \tn % Row Count 10 (+ 2) % Row 5 \SetRowColor{white} {\bf{Get Location of Value}} & grep("F", LETTERS) \tn % Row Count 12 (+ 2) % Row 6 \SetRowColor{LightBackground} {\bf{Get Value from location}} & grep("F", LETTERS, value=TRUE) \tn % Row Count 14 (+ 2) % Row 7 \SetRowColor{white} {\bf{Table Count Instances}} & table(grepl("F", LETTERS)) \tn % Row Count 16 (+ 2) % Row 8 \SetRowColor{LightBackground} {\bf{Get Substring}} & substr(charData, 1, 7) \tn % Row Count 17 (+ 1) % Row 9 \SetRowColor{white} {\bf{Paste with Space}} & paste("Test", "Message") \tn % Row Count 19 (+ 2) % Row 10 \SetRowColor{LightBackground} {\bf{Paste Without Space}} & paste0("Test", "Message") \tn % Row Count 21 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.63781 cm} x{2.33919 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Statistics}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Statistical Summary}} & summary(data) \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} {\bf{Mean}} & mean(data) \tn % Row Count 3 (+ 1) % Row 2 \SetRowColor{LightBackground} {\bf{Standard Deviation}} & sd(vector) \tn % Row Count 5 (+ 2) % Row 3 \SetRowColor{white} {\bf{Variance}} & var(vector) \tn % Row Count 6 (+ 1) % Row 4 \SetRowColor{LightBackground} {\bf{Range}} & range(vector) \tn % Row Count 7 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{} \tn % Row Count 7 (+ 0) % Row 6 \SetRowColor{LightBackground} {\bf{Normal Distribution}} & rnorm(n, mean, sd) \tn % Row Count 9 (+ 2) % Row 7 \SetRowColor{white} {\bf{Binomial Distribution}} & rbinom(n, size, prob) \tn % Row Count 11 (+ 2) % Row 8 \SetRowColor{LightBackground} {\bf{Poisson Distribution}} & rpois(n, size) \tn % Row Count 13 (+ 2) % Row 9 \SetRowColor{white} {\bf{Uniform Distribution}} & runif(n, min=0, max=10) \tn % Row Count 15 (+ 2) % Row 10 \SetRowColor{LightBackground} {\bf{Exponential Distribution}} & rexp(n) \tn % Row Count 17 (+ 2) % Row 11 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{} \tn % Row Count 17 (+ 0) % Row 12 \SetRowColor{LightBackground} {\bf{K-Means Clustering}} & kmeans(data, centers = 3) \tn % Row Count 19 (+ 2) % Row 13 \SetRowColor{white} {\bf{Hierarchical Clustering}} & \seqsplit{hclust(dist(data))} \tn % Row Count 21 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} 