\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{xeonkai} \pdfinfo{ /Title (data-management-in-r.pdf) /Creator (Cheatography) /Author (xeonkai) /Subject (Data management in R Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{EF5252} \definecolor{LightBackground}{HTML}{FEF4F4} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Data management in R Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{xeonkai} via \textcolor{DarkBackground}{\uline{cheatography.com/31513/cs/9676/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}xeonkai \\ \uline{cheatography.com/xeonkai} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 8th November, 2016.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{3} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Manipulating dataframes}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Adding new columns} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`mydata \textless{}- transform(mydata, sumx = x1 + x2, meanx = (x1 + x2)/2)`} \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`total \textless{}- cbind(A,B)` - each object to have same no. of rows and sorted in same order} \tn % Row Count 5 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`merge(dfA, dfB, by = c("ID","Country")`} \tn % Row Count 6 (+ 1) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Adding new rows} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`total \textless{}- rbind(A, B)` - each object to have same variables} \tn % Row Count 9 (+ 3) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Recoding variables} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`leadership \textless{}- within(leadership, \{ agecat \textless{}- NA agecat{[}age \textgreater{} 75{]} \textless{}- "Elder" agecat{[}age \textgreater{}= 55 \& age \textless{}=75{]} \textless{}- "Middle Aged" agecat{[}age \textless{} 55{]} \textless{}- "Young"\})`} \tn % Row Count 14 (+ 5) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}Other recoding functions: `car` package - `recode()`, `doBy` package - `recodevar()`, `cut()` in R} \tn % Row Count 17 (+ 3) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Renaming variables} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`reshape` package - `rename()`} \tn % Row Count 19 (+ 2) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`rename(dataframe, c(oldname="newname", oldname="newname",...)`} \tn % Row Count 21 (+ 2) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`names()` - `names(leadership){[}6:8{]} \textless{}- c("item1","item2","item3")`} \tn % Row Count 23 (+ 2) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Missing values} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`is.na()`, `na.rm=TRUE`, `na.omit()` - deletes any row with missing data} \tn % Row Count 26 (+ 3) % Row 10 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Date values} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`as.Date(x, "input\_format")`} \tn % Row Count 28 (+ 2) % Row 11 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}Default format: yyyy-mm-dd} \tn % Row Count 29 (+ 1) % Row 12 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`Sys.Date()`, `date()`, `difftime(date1, date2, units="weeks")`} \tn % Row Count 31 (+ 2) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Manipulating dataframes (cont)}} \tn % Row 13 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}Converting character to dates: `help(as.Date)`, `help(strftime)`} \tn % Row Count 2 (+ 2) % Row 14 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}Formatting dates and time: `help(ISOdatetime)`} \tn % Row Count 3 (+ 1) % Row 15 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`lubridate` and `fcalendar` package} \tn % Row Count 4 (+ 1) % Row 16 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Sorting data} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`order()`: default ascending, prepend sorting variable with `-` for descending} \tn % Row Count 7 (+ 3) % Row 17 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}e.g. `df2 \textless{}- df{[}order(df\$gender, -df\$age),{]}`} \tn % Row Count 8 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{p{0.54924 cm} x{3.06659 cm} x{0.96117 cm} } \SetRowColor{DarkBackground} \mymulticolumn{3}{x{5.377cm}}{\bf\textcolor{white}{Date formats}} \tn % Row 0 \SetRowColor{LightBackground} `\%d` & Day as a number (0-31) & 01-31 \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} `\%a` & Abbreviated weekday & Mon \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} `\%A` & Unabbreviated weekday & Monday \tn % Row Count 3 (+ 1) % Row 3 \SetRowColor{white} `\%m` & Month (00-12) & 00-12 \tn % Row Count 4 (+ 1) % Row 4 \SetRowColor{LightBackground} `\%b` & Abbreviated month & Jan \tn % Row Count 5 (+ 1) % Row 5 \SetRowColor{white} `\%B` & Unabbreviated month & January \tn % Row Count 6 (+ 1) % Row 6 \SetRowColor{LightBackground} `\%y` & 2-digit year & 07 \tn % Row Count 7 (+ 1) % Row 7 \SetRowColor{white} `\%Y` & 4-digit year & 2007 \tn % Row Count 8 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}---} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Type conversions}} \tn % Row 0 \SetRowColor{LightBackground} `is.numeric()` & `as.numeric()` \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} `is.character()` & `as.character()` \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} `is.vector()` & `as.vector()` \tn % Row Count 3 (+ 1) % Row 3 \SetRowColor{white} `is.matrix()` & `as.matrix()` \tn % Row Count 4 (+ 1) % Row 4 \SetRowColor{LightBackground} `is.data.frame()` & `as.data.frame()` \tn % Row Count 5 (+ 1) % Row 5 \SetRowColor{white} `is.factor()` & `as.factor()` \tn % Row Count 6 (+ 1) % Row 6 \SetRowColor{LightBackground} `is.logical()` & `as.logical()` \tn % Row Count 7 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Subsetting datasets}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Selecting variables} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`new \textless{}- df{[}, c(6:10){]}`} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`new \textless{}- df{[}c("q1","q2","q3"){]}`} \tn % Row Count 3 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`myvars \textless{}- paste("q", 1:3, sep="")`} \tn % Row Count 4 (+ 1) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`new \textless{}- df{[}myvars{]}`} \tn % Row Count 5 (+ 1) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Excluding variables} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`myvars \textless{}- names(leadership) \%in\% c("q1","q2")`} \tn % Row Count 7 (+ 2) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`new \textless{}- df{[}!myvars{]}`} \tn % Row Count 8 (+ 1) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`new \textless{}- df{[}c(-1,-2){]}`} \tn % Row Count 9 (+ 1) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`df\$q1 \textless{}- NULL`} \tn % Row Count 10 (+ 1) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Selecting observations} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`new \textless{}- df{[}1:3,{]}`} \tn % Row Count 12 (+ 2) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`new \textless{}- df{[}which(df\$q1=="M" \& df\$q2 \textgreater{}30),{]}`} \tn % Row Count 13 (+ 1) % Row 10 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Random Samples} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`mysample \textless{}- df{[}sample(1:nrow(df), 3, replace=FALSE),{]}`} \tn % Row Count 16 (+ 3) % Row 11 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}`sampling` and `survey` package} \tn % Row Count 17 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{`subset()` function \newline e.g. \newline `new \textless{}- subset(df, age \textgreater{}=35 | age \textless{} 24, select = c(q1, q2, q3))` \newline `new \textless{}- subset(df, gender == "M" \& age \textgreater{}25, select = gender:q3)`} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{SQL in R}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{`sqldf` package \newline % Row Count 1 (+ 1) `library(sqldf)` \newline % Row Count 2 (+ 1) `new \textless{}- sqldf("select * from mtcars where carb=1 order by mpg", row.names=TRUE)` \newline % Row Count 4 (+ 2) `sqldf("select avg(mpg) as avg\_mpg, avg(disp) as avg\_disp, gear from mtcars where cyl in (4,6) group by gear")`% Row Count 7 (+ 3) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.14011 cm} x{2.83689 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Mathematical functions}} \tn % Row 0 \SetRowColor{LightBackground} `abs(x)` & Absolute value \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} `sqrt(x)` & Square root. Same as `25\textasciicircum{}(0.5)`. \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} `ceiling(x)` & Smallest integer not less than x \tn % Row Count 5 (+ 2) % Row 3 \SetRowColor{white} `floor(x)` & Largest integer not greater than x \tn % Row Count 7 (+ 2) % Row 4 \SetRowColor{LightBackground} `trunc(x)` & Integer formed by truncating values in `x` towards 0 \tn % Row Count 10 (+ 3) % Row 5 \SetRowColor{white} `round(x, digits=n)` & Round `x` to the specified number of decimal places \tn % Row Count 13 (+ 3) % Row 6 \SetRowColor{LightBackground} `signif(x, digits=n)` & Round `x` to the specified number of significant digits \tn % Row Count 16 (+ 3) % Row 7 \SetRowColor{white} `cos(x), sin(x), tan(x)` & Cosine, sine, and tangent \tn % Row Count 18 (+ 2) % Row 8 \SetRowColor{LightBackground} `acos(x), asin(x), atan(x)` & Arc-cosine, arc-sine and arc-tangent \tn % Row Count 20 (+ 2) % Row 9 \SetRowColor{white} `cosh(x), sinh(x), tanh(x)` & Hyperbolic cosine, sine, and tangent \tn % Row Count 22 (+ 2) % Row 10 \SetRowColor{LightBackground} `acosh(x), asinh(x), atanh(x)` & Hyperbolic arc-cosine, arc-sine, and arc-tangent \tn % Row Count 25 (+ 3) % Row 11 \SetRowColor{white} `log(x, base=n)` & Logarithm of `x` to the base `n` \tn % Row Count 27 (+ 2) % Row 12 \SetRowColor{LightBackground} `log(x)` & Natural logarithm \tn % Row Count 28 (+ 1) % Row 13 \SetRowColor{white} `log10(x)` & Common logarithm \tn % Row Count 29 (+ 1) % Row 14 \SetRowColor{LightBackground} `exp(x)` & Exponential function \tn % Row Count 30 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.33919 cm} x{2.63781 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Statistical functions}} \tn % Row 0 \SetRowColor{LightBackground} `mean(x)` & Mean \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} `median(x)` & Median \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} `sd(x)` & Standard deviation \tn % Row Count 3 (+ 1) % Row 3 \SetRowColor{white} `var(x)` & Variance \tn % Row Count 4 (+ 1) % Row 4 \SetRowColor{LightBackground} `mad(x)` & Mean absolute deviation \tn % Row Count 6 (+ 2) % Row 5 \SetRowColor{white} `quantile(x, probs)` & Quantiles where `x` is the numeric vector of quantiles and `probs` is a numeric vector with probabilities in {[}0,1{]} \tn % Row Count 12 (+ 6) % Row 6 \SetRowColor{LightBackground} & `y \textless{}- quantile(x, c(.3,.84))` \tn % Row Count 14 (+ 2) % Row 7 \SetRowColor{white} `range(x)` & Range \tn % Row Count 15 (+ 1) % Row 8 \SetRowColor{LightBackground} & `diff(range(x))` returns difference between extreme values \tn % Row Count 18 (+ 3) % Row 9 \SetRowColor{white} `sum(x)` & Sum \tn % Row Count 19 (+ 1) % Row 10 \SetRowColor{LightBackground} `diff(x, lag=n)` & Lagged differences, with `lag` indicating which lag to use. Default lag is 1. \tn % Row Count 23 (+ 4) % Row 11 \SetRowColor{white} `min(x)` & Minimum \tn % Row Count 24 (+ 1) % Row 12 \SetRowColor{LightBackground} `max(x)` & Maximum \tn % Row Count 25 (+ 1) % Row 13 \SetRowColor{white} `scale(x, center=TRUE, scale=TRUE)` & Column center (`center=TRUE`) or standardize (`center=TRUE, scale=TRUE`) data object `x`, i.e. to a mean of 0 and std of 1 \tn % Row Count 31 (+ 6) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Trimmed mean - dropping top and lowest 5\% and missing values \newline `y \textless{}- mean(x, trim=0.05, na.rm=TRUE)`} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.44333 cm} x{3.53367 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Probability functions}} \tn % Row 0 \SetRowColor{LightBackground} `beta` & Beta \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} `binom` & Binomial \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} `cauchy` & Cauchy \tn % Row Count 3 (+ 1) % Row 3 \SetRowColor{white} `chisq` & Chi-squared (noncentral) \tn % Row Count 4 (+ 1) % Row 4 \SetRowColor{LightBackground} `exp` & Exponential \tn % Row Count 5 (+ 1) % Row 5 \SetRowColor{white} `f` & F \tn % Row Count 6 (+ 1) % Row 6 \SetRowColor{LightBackground} `gamma` & Gamma \tn % Row Count 7 (+ 1) % Row 7 \SetRowColor{white} `geom` & Geometric \tn % Row Count 8 (+ 1) % Row 8 \SetRowColor{LightBackground} `hyper` & Hypergeometric \tn % Row Count 9 (+ 1) % Row 9 \SetRowColor{white} `lnorm` & Lognormal \tn % Row Count 10 (+ 1) % Row 10 \SetRowColor{LightBackground} `logis` & Logistic \tn % Row Count 11 (+ 1) % Row 11 \SetRowColor{white} `multinom` & Multinomial \tn % Row Count 12 (+ 1) % Row 12 \SetRowColor{LightBackground} `nbinom` & Negative binomial \tn % Row Count 13 (+ 1) % Row 13 \SetRowColor{white} `norm` & Normal \tn % Row Count 14 (+ 1) % Row 14 \SetRowColor{LightBackground} `pois` & Poisson \tn % Row Count 15 (+ 1) % Row 15 \SetRowColor{white} `signrank` & Wilcoxon Signed Rank \tn % Row Count 16 (+ 1) % Row 16 \SetRowColor{LightBackground} `t` & T \tn % Row Count 17 (+ 1) % Row 17 \SetRowColor{white} `unif` & Uniform \tn % Row Count 18 (+ 1) % Row 18 \SetRowColor{LightBackground} `weibull` & Weibull \tn % Row Count 19 (+ 1) % Row 19 \SetRowColor{white} `wilcox` & Wilcoxon Rank Sum \tn % Row Count 20 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{General form of probability function: `{[}dpqr{]}distribution\_abbreviation()` \newline `d` = density \newline `p` = distribution function \newline `q` = quantile function \newline `r` = random generation (random deviates)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Character functions}} \tn % Row 0 \SetRowColor{LightBackground} `nchar(x)` & Counts the no. of characters of `x` \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} `substr(x, start, stop)` & Extract or replace substrings in a character vector \tn % Row Count 5 (+ 3) % Row 2 \SetRowColor{LightBackground} & `x \textless{}- "abcdef"` \tn % Row Count 6 (+ 1) % Row 3 \SetRowColor{white} & `substr(x, 2, 4)` returns `"bcd"` \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} & `substr(x, 2, 4) \textless{}- "22222"` produces `"a222ef` \tn % Row Count 11 (+ 3) % Row 5 \SetRowColor{white} `grep(pattern, x, ignore.case=FALSE, fixed=FALSE)` & Search for pattern in `x`. `fixed=FALSE` - `pattern` is regex. `fixed=TRUE` - `pattern` is text string. Returns matching indices. \tn % Row Count 18 (+ 7) % Row 6 \SetRowColor{LightBackground} `sub(pattern, replacement, x, ignore.case=FALSE, fixed=FALSE)` & Find `pattern` in `x` and substitute with `replacement` text \tn % Row Count 22 (+ 4) % Row 7 \SetRowColor{white} `strsplit(x, split, fixed=FALSE)` & Split the elements of `x` at `split` \tn % Row Count 24 (+ 2) % Row 8 \SetRowColor{LightBackground} & `y \textless{}- strsplit("abc", "")` returns 1-component, 3-element list containing `"a" "b" "c"`. \tn % Row Count 29 (+ 5) % Row 9 \SetRowColor{white} & `unlist(y){[}2{]}` and `sapply(y, "{[}", 2)` both return "b". \tn % Row Count 32 (+ 3) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Character functions (cont)}} \tn % Row 10 \SetRowColor{LightBackground} `paste(..., sep="")` & Concatenate strings after using `sep` string to separate them \tn % Row Count 4 (+ 4) % Row 11 \SetRowColor{white} & `paste("x", 1:3, sep="M")` returns `c("xM1","xM2","xM3")` \tn % Row Count 7 (+ 3) % Row 12 \SetRowColor{LightBackground} `toupper(x)` & Uppercase \tn % Row Count 8 (+ 1) % Row 13 \SetRowColor{white} `tolower(x)` & Lowercase \tn % Row Count 9 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.43873 cm} x{2.53827 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Other useful functions}} \tn % Row 0 \SetRowColor{LightBackground} `length(x)` & Length of object x` \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} `seq(from, to, by)` & Generate a sequence \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} `rep(x, n)` & Repeat `x n` times \tn % Row Count 3 (+ 1) % Row 3 \SetRowColor{white} `cut(x, n)` & Divide continuous variable `x` into factor with `n` levels. `ordered\_result = TRUE` creates an ordered factor. \tn % Row Count 9 (+ 6) % Row 4 \SetRowColor{LightBackground} `pretty(x, n)` & Create pretty breakpoints. Divide a continuous variable `x` into `n` intervals, by selecting `n+1` equally spaced rounded values. Often used in plotting. \tn % Row Count 17 (+ 8) % Row 5 \SetRowColor{white} `cat(..., file="myfile", append=FALSE)` & Concatenates the objects in ... and outputs them to the screen or to a file \tn % Row Count 21 (+ 4) % Row 6 \SetRowColor{LightBackground} `apply(x, MARGIN, FUN, ...)` & Apply function to data objects \tn % Row Count 23 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Escape characters: \newline `\textbackslash{}n` - new lines \newline `\textbackslash{}t` - tabs \newline `\textbackslash{}'` - single quote \newline `\textbackslash{}b` - backspace} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{p{0.79632 cm} x{4.18068 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Control flow}} \tn % Row 0 \SetRowColor{LightBackground} FOR & `for (var in seq) statement` \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} WHILE & `while (cond) statement` \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} \seqsplit{IF-ELSE} & `if (cond) statement` \tn % Row Count 4 (+ 2) % Row 3 \SetRowColor{white} & `if (cond) statement1 else statement2` \tn % Row Count 6 (+ 2) % Row 4 \SetRowColor{LightBackground} \seqsplit{IFELSE} & `ifelse(cond, statement1, statement2)` \tn % Row Count 8 (+ 2) % Row 5 \SetRowColor{white} \seqsplit{SWITCH} & `switch(expr, ...)` \tn % Row Count 9 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{`statement` - single R statement or compound statement enclosed in \{\} and separated by ; \newline `cond` - expression that resolves to `TRUE` or `FALSE` \newline `expr` - statement that evaluates to number or character string \newline `seq` - sequence of numbers or character strings} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Example for `switch`}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{feelings \textless{}- c("sad","afraid") \newline for (i in feelings) \newline print( \newline switch(i, \newline happy = "I am glad you are happy", \newline afraid = "There is nothing to fear", \newline sad = "Cheer up", \newline angry = "Calm down now" \newline ) \newline )} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.84149 cm} x{3.13551 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Aggregation and restructuring}} \tn % Row 0 \SetRowColor{LightBackground} `t()` & Transpose \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} `aggregate(x, by, FUN)` & Aggregate (`by` variables must be a list) \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} & `new \textless{}- aggregate(mtcars, by=list(Group.cyl=cyl, Group.gears=gear), FUN=mean, na.rm=TRUE)` \tn % Row Count 7 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{`Reshape` package}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{5.377cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/xeonkai_1478598360_Capture.JPG}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}