\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{jbjoker} \pdfinfo{ /Title (data-analysis-with-r.pdf) /Creator (Cheatography) /Author (jbjoker) /Subject (Data Analysis with R Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{0D12A3} \definecolor{LightBackground}{HTML}{F7F7FC} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Data Analysis with R Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{jbjoker} via \textcolor{DarkBackground}{\uline{cheatography.com/181883/cs/37815/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}jbjoker \\ \uline{cheatography.com/jbjoker} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 30th May, 2023.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{3} \begin{tabularx}{5.377cm}{x{1.09494 cm} x{3.88206 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Phases of Data Analysis}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Ask}} & Define the problem you are trying to solve. \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} {\bf{Prepare}} & What data do I need to solve this problem? Do I have access to obtain it? \tn % Row Count 5 (+ 3) % Row 2 \SetRowColor{LightBackground} {\bf{Process}} & Clean the data of errors and inaccuracies. \tn % Row Count 7 (+ 2) % Row 3 \SetRowColor{white} {\bf{Analyze}} & Perform calculations to tell a data story. Exploratory Analysis, Statistical modelling \tn % Row Count 10 (+ 3) % Row 4 \SetRowColor{LightBackground} {\bf{Share}} & Clear visuals of the data and solution. This includes the reproducible code. \tn % Row Count 13 (+ 3) % Row 5 \SetRowColor{white} {\bf{Act}} & Provide recommendations based on data. \tn % Row Count 15 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.23965 cm} x{2.73735 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{File Manipulation}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Get Working Directory}} & getwd() \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} {\bf{Set Working Directory}} & setwd() \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} {\bf{See Directory Contents}} & dir() \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} {\bf{Create Folder}} & dir.create("tFolder") \tn % Row Count 7 (+ 1) % Row 4 \SetRowColor{LightBackground} {\bf{Create File}} & \seqsplit{file.create("test.csv")} \tn % Row Count 9 (+ 2) % Row 5 \SetRowColor{white} {\bf{Copy File}} & file.copy("test.csv", "tFolder") \tn % Row Count 11 (+ 2) % Row 6 \SetRowColor{LightBackground} {\bf{Edit File}} & myedit(test.R) \tn % Row Count 12 (+ 1) % Row 7 \SetRowColor{white} {\bf{Delete File}} & unlink("test.csv") \tn % Row Count 13 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{3.53367 cm} x{1.44333 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Structure \& Dimensions}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Structure}} & str(data) \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} {\bf{Get \# of Rows \& Columns}} & dim(data) \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} {\bf{Return \# of Rows}} & nrow(data) \tn % Row Count 3 (+ 1) % Row 3 \SetRowColor{white} {\bf{Return \# of Cols}} & ncol(data) \tn % Row Count 4 (+ 1) % Row 4 \SetRowColor{LightBackground} {\bf{Return 1st 6 Rows}} & head(data) \tn % Row Count 5 (+ 1) % Row 5 \SetRowColor{white} {\bf{Get Class Type}} & \seqsplit{class(data)} \tn % Row Count 6 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.89126 cm} x{3.08574 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Importing Data}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Web Scraping}} & con = \seqsplit{url("http://google.com")} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} & htmlCode = readlines(con) \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} & close(con) \tn % Row Count 5 (+ 1) % Row 3 \SetRowColor{white} {\bf{Remote File}} & fileUrl \textless{}- \seqsplit{"https://website.com/data.csv"} \tn % Row Count 7 (+ 2) % Row 4 \SetRowColor{LightBackground} & download.file(fileUrl, destfile = "./myData.csv", method = "curl") \tn % Row Count 10 (+ 3) % Row 5 \SetRowColor{white} {\bf{Import Data as Table}} & inData \textless{}- read.table("data.csv", sep = " ", header = TRUE) \tn % Row Count 13 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Applying Functions}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Apply a function over an array}} & {\bf{apply}}(data,Margin,Function) \#1=Rows 2=Cols \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} {\bf{Apply a function to each element of list, vector, or DF and return a list}} & {\bf{lapply}}(data, Function) \tn % Row Count 7 (+ 4) % Row 2 \SetRowColor{LightBackground} {\bf{Same as lapply, but returns a vector instead}} & {\bf{sapply}}(data, Function) \tn % Row Count 10 (+ 3) % Row 3 \SetRowColor{white} {\bf{Apply a function to a subset specified by the FactorList}} & {\bf{tapply}}(vector, factorList, Function) \tn % Row Count 13 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.9908 cm} x{2.9862 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Clean \& Test Data}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Check for NAs}} & colSums(is.na(data)) \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} {\bf{Logical NA Test}} & \seqsplit{all(colSums(is.na(data))} == 0) \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} {\bf{Trim Whitespace}} & trimws(charVector) \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} {\bf{Verify Data Type}} & class(data) {\bf{or}} str(data) \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} {\bf{Find Specific}} & test{[}test\$someCol \%in\% \tn % Row Count 10 (+ 2) % Row 5 \SetRowColor{white} & c("abcdefg", "hello"),{]} \tn % Row Count 11 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.18988 cm} x{2.78712 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{String Manipulation}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Uppercase}} & \seqsplit{toupper(names(charVector))} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} {\bf{Lowercase}} & \seqsplit{tolower(names(charVector))} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} {\bf{String Split}} & \seqsplit{strsplit(names(charVector)}, "\textbackslash{}\textbackslash{}.") \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} {\bf{Find \& Replace 1st}} & sub("\_", "", names(charVector)) \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} {\bf{Find \& Replace All}} & gsub("\_", "", names(charVector)) \tn % Row Count 10 (+ 2) % Row 5 \SetRowColor{white} {\bf{Get Location of Value}} & grep("F", LETTERS) \tn % Row Count 12 (+ 2) % Row 6 \SetRowColor{LightBackground} {\bf{Get Value from location}} & grep("F", LETTERS, value=TRUE) \tn % Row Count 14 (+ 2) % Row 7 \SetRowColor{white} {\bf{Table Count Instances}} & table(grepl("F", LETTERS)) \tn % Row Count 16 (+ 2) % Row 8 \SetRowColor{LightBackground} {\bf{Get Substring}} & substr(charData, 1, 7) \tn % Row Count 17 (+ 1) % Row 9 \SetRowColor{white} {\bf{Paste with Space}} & paste("Test", "Message") \tn % Row Count 19 (+ 2) % Row 10 \SetRowColor{LightBackground} {\bf{Paste Without Space}} & paste0("Test", "Message") \tn % Row Count 21 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.63781 cm} x{2.33919 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Statistics}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Statistical Summary}} & summary(data) \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} {\bf{Mean}} & mean(data) \tn % Row Count 3 (+ 1) % Row 2 \SetRowColor{LightBackground} {\bf{Standard Deviation}} & sd(vector) \tn % Row Count 5 (+ 2) % Row 3 \SetRowColor{white} {\bf{Variance}} & var(vector) \tn % Row Count 6 (+ 1) % Row 4 \SetRowColor{LightBackground} {\bf{Range}} & range(vector) \tn % Row Count 7 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{} \tn % Row Count 7 (+ 0) % Row 6 \SetRowColor{LightBackground} {\bf{Normal Distribution}} & rnorm(n, mean, sd) \tn % Row Count 9 (+ 2) % Row 7 \SetRowColor{white} {\bf{Binomial Distribution}} & rbinom(n, size, prob) \tn % Row Count 11 (+ 2) % Row 8 \SetRowColor{LightBackground} {\bf{Poisson Distribution}} & rpois(n, size) \tn % Row Count 13 (+ 2) % Row 9 \SetRowColor{white} {\bf{Uniform Distribution}} & runif(n, min=0, max=10) \tn % Row Count 15 (+ 2) % Row 10 \SetRowColor{LightBackground} {\bf{Exponential Distribution}} & rexp(n) \tn % Row Count 17 (+ 2) % Row 11 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{} \tn % Row Count 17 (+ 0) % Row 12 \SetRowColor{LightBackground} {\bf{K-Means Clustering}} & kmeans(data, centers = 3) \tn % Row Count 19 (+ 2) % Row 13 \SetRowColor{white} {\bf{Hierarchical Clustering}} & \seqsplit{hclust(dist(data))} \tn % Row Count 21 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}