\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{bwaldo} \pdfinfo{ /Title (r-subsetting.pdf) /Creator (Cheatography) /Author (bwaldo) /Subject (R - Subsetting Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{378735} \definecolor{LightBackground}{HTML}{F2F7F2} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{R - Subsetting Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{bwaldo} via \textcolor{DarkBackground}{\uline{cheatography.com/35095/cs/11019/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}bwaldo \\ \uline{cheatography.com/bwaldo} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Published 27th February, 2017.\\ Updated 27th February, 2017.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{3} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Basics}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{There are a number of operators that can be used to extract subsets of R objects.}}\{\{nl\}\} * {[} - always returns an object of the same class as the original; can be used to select more than on elements (there is one exception)\{\{nl\}\} * {[}{[} - is used to extract elements of a list or a data frame; it can only be used to extract a single element and the class of the returned object will not necessarily be a list or data frame\{\{nl\}\} * \$ - is used to extract elements of a list or data frame by name, semantics are similar to hat of {[}{[}.\{\{nl\}\}} \tn % Row Count 11 (+ 11) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\bf{x \textless{}- c("a","b","c", "c","d","a") * character vector called x}}\{\{nl\}\} * \textgreater{} x{[}1{]}\{\{nl\}\} * {[}1{]} "a"\{\{nl\}\} * \textgreater{} x{[}2{]}\{\{nl\}\} * {[}1{]} "b"\{\{nl\}\} * \textgreater{} x{[}1:4{]}\{\{nl\}\} * {[}1{]} "a" "b" "c" "c"\{\{nl\}\}} \tn % Row Count 15 (+ 4) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{x{[}x\textgreater{}"a"{]} \# subset that gets every element greater than "a"}}\{\{nl\}\} {[}1{]} "b" "c" "c" "d"\{\{nl\}\}} \tn % Row Count 17 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\bf{u \textless{}- x \textgreater{} "a" \# create a logical vector called "u"}}\{\{nl\}\} \textgreater{} u\{\{nl\}\} {[}1{]} FALSE TRUE TRUE TRUE TRUE FALSE\{\{nl\}\}} \tn % Row Count 20 (+ 3) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{x{[}u{]} \# subet the vector "x" with this u vector and get out all elements that are greater than "a"}}\{\{nl\}\} {[}1{]} "b" "c" "c" "d"\{\{nl\}\}} \tn % Row Count 23 (+ 3) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{So in conclusion there are 2 types of indices that where used above\{\{nl\}\} * the first type with the numeric index\{\{nl\}\} * the second type was the logical index\{\{nl\}\}} \tn % Row Count 27 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Removing missing values}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{A common task is to remove missing values (NAS)}}} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\bf{create a logical vector which tells you where the NA's are so you can remove them}}\{\{nl\}\} \textgreater{} x \textless{}- c(1,2,NA,4,NA,5) * here we have a vector x\{\{nl\}\} \textgreater{} bad \textless{}- is.na(x) * tell me which elements are na and stores the in the bad vector\{\{nl\}\} \textgreater{} x{[}!bad{]} * give me the elements that are NOT missing or NA\{\{nl\}\} {[}1{]} 1 2 4 5\{\{nl\}\}} \tn % Row Count 9 (+ 7) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{What if there are multiple things and you want to take the subset with no missing values?}}\{\{nl\}\} \textgreater{} x \textless{}- c(1,2,NA,4,NA,5)\{\{nl\}\} \textgreater{} y \textless{}- c("a","b", NA, "d", NA, "f")\{\{nl\}\} \textgreater{} good \textless{}- complete.cases(x,y)\{\{nl\}\} \textgreater{} good\{\{nl\}\} {[}1{]} TRUE TRUE FALSE TRUE FALSE TRUE\{\{nl\}\} \textgreater{} x{[}good{]} * subset x\{\{nl\}\} {[}1{]} 1 2 4 5\{\{nl\}\} \textgreater{} y{[}good{]} * subset y\{\{nl\}\} {[}1{]} "a" "b" "d" "f"\{\{nl\}\}} \tn % Row Count 17 (+ 8) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Lists}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{The nice thing about being able to subset an element using its name is that you don't have to remember where it is in the list}}} \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{\textgreater{} x \textless{}- list(foo = 1:4, bar = 0.6) \# list of 2 elements foo and bar\{\{nl\}\} \textgreater{} x\{\{nl\}\} \$foo\{\{nl\}\} {[}1{]} 1 2 3 4\{\{nl\}\} \$bar\{\{nl\}\} {[}1{]} 0.6\{\{nl\}\}} \tn % Row Count 6 (+ 3) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{\textgreater{} x{[}1{]}* list that contained the sequence 1 thru 4\{\{nl\}\} \$foo\{\{nl\}\} {[}1{]} 1 2 3 4\{\{nl\}\}} \tn % Row Count 8 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{\textgreater{} x{[}{[}1{]}{]} \# just the sequence\{\{nl\}\} {[}1{]} 1 2 3 4\{\{nl\}\}} \tn % Row Count 10 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{\textgreater{} x\$bar \# give me the element that is associated with the name bar\{\{nl\}\} {[}1{]} 0.6\{\{nl\}\}} \tn % Row Count 12 (+ 2) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{\textgreater{} x{[}{[}"bar"{]}{]}\{\{nl\}\} {[}1{]} 0.6\{\{nl\}\}} \tn % Row Count 13 (+ 1) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{if you want to extract multiple elements of a list then you need to use the single bracket}}\{\{nl\}\} \textgreater{} x \textless{}- list(foo = 1:4, bar = 0.6, baz = "hello")\{\{nl\}\} \textgreater{} x{[}c(1,3){]} \# give me the 1st and 3rd element of the vector x\{\{nl\}\} \$foo\{\{nl\}\} {[}1{]} 1 2 3 4\{\{nl\}\} \$baz\{\{nl\}\} {[}1{]} "hello"\{\{nl\}\}} \tn % Row Count 19 (+ 6) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\bf{{[}{[} to index a list where the index itself was computed}}\{\{nl\}\} \textgreater{} x \textless{}- list(foo = 1:4, bar = 0.6, baz = "hello")\{\{nl\}\} \textgreater{} name \textless{}- "foo"\{\{nl\}\} \textgreater{} x{[}{[}name{]}{]} * computed index for 'foo'\{\{nl\}\} {[}1{]} 1 2 3 4\{\{nl\}\} \textgreater{} x\$name * element 'name' doesn't exist\{\{nl\}\} NULL\{\{nl\}\} \textgreater{} x\$foo * element 'foo' does exist\{\{nl\}\} {[}1{]} 1 2 3 4\{\{nl\}\}} \tn % Row Count 26 (+ 7) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{The {[}{[} indicator can take an interger sequence}}\{\{nl\}\} \textgreater{} x \textless{}- list(a = list(10,12,14), b = c(3.14,2.81))\{\{nl\}\} * I want to exract 14, that is really the 3rd element of the 1st element so its the 3rd element of the list which happens to be the first element of the other list.\{\{nl\}\} \textgreater{} x{[}{[}c(1,3){]}{]}\{\{nl\}\} {[}1{]} 14\{\{nl\}\} \textgreater{} x{[}{[}1{]}{]}{[}{[}3{]}{]}\{\{nl\}\} {[}1{]} 14\{\{nl\}\} \textgreater{} x{[}{[}c(2,1){]}{]} * extract the first element of the second element by passing the vector 2,1\{\{nl\}\} {[}1{]} 3.14\{\{nl\}\}} \tn % Row Count 36 (+ 10) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Matrices}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{Matrices can be subsetted in the usual way with (i,j) type indices}}\{\{nl\}\} \textgreater{} x \textless{}- matrix(1:6, 2, 3) \# create a 2x3 matrix with the number sequence of 1 thru 6\{\{nl\}\} \textgreater{} x{[}1,2{]} \# give me the first row and second column\{\{nl\}\} {[}1{]} 3\{\{nl\}\} \textgreater{} x{[}2,1{]}\{\{nl\}\} {[}1{]} 2\{\{nl\}\}} \tn % Row Count 6 (+ 6) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\bf{Indices can also be missing}}\{\{nl\}\} \textgreater{} x{[}1,{]} * i want the entire first row\{\{nl\}\} {[}1{]} 1 3 5\{\{nl\}\} \textgreater{} x{[},2{]} * i want just the second column\{\{nl\}\} {[}1{]} 3 4\{\{nl\}\}} \tn % Row Count 10 (+ 4) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{By default, when a single element of a matrix is retrieved, it is returned as a vector of length 1 rather than a 1x1 matrix. This behavior can be turned off by setting drop = FALSE.}}\{\{nl\}\} \textgreater{} x \textless{}- matrix (1:6,2,3)\{\{nl\}\} \textgreater{} x{[}1,2{]}\{\{nl\}\} {[}1{]} 3\{\{nl\}\} \textgreater{} x{[}1,2, drop = FALSE{]} * this preserves the dimension of the object\{\{nl\}\} {[},1{]}\{\{nl\}\} {[}1,{]} 3\{\{nl\}\}} \tn % Row Count 17 (+ 7) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\bf{Similarly, subsetting a single column or a single row will give you a vector, not a matrix (by default)}}\{\{nl\}\} \textgreater{} x \textless{}- matrix (1:6,2,3)\{\{nl\}\} \textgreater{} x{[}1,{]}\{\{nl\}\} {[}1{]} 1 3 5\{\{nl\}\} \textgreater{} x{[}1,,drop=FALSE{]}\{\{nl\}\} {[},1{]} {[},2{]} {[},3{]}\{\{nl\}\} {[}1,{]} 1 3 5\{\{nl\}\}} \tn % Row Count 22 (+ 5) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Partial Matching}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{Partial matching of names is allowed with {[}{[} and \$.}}} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{\textgreater{} x \textless{}- list(aardvark = 1:5) \# create a list x which as the element aardvark that is a seq 1 thru 5\{\{nl\}\} \textgreater{} x\$a * instead of typing aardvark everytime, search for a name in the list that matches 'a'\{\{nl\}\} {[}1{]} 1 2 3 4 5\{\{nl\}\} \textgreater{} x{[}{[}"a"{]}{]} * the {[}{[} expects a name with an exact match, so no partial matching\{\{nl\}\} NULL\{\{nl\}\} \textgreater{} x{[}{[}"a", exact = FALSE{]}{]} * specify exact = FALSE then the return will be below.\{\{nl\}\} {[}1{]} 1 2 3 4 5\{\{nl\}\}} \tn % Row Count 11 (+ 9) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Vectorized operations}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{Many operations in R are vectorized making code more efficient, concise, and easier to read}}} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{\textgreater{} x \textless{}- 1:4; y \textless{}- 6:9 * 2 vectors\{\{nl\}\} \textgreater{} x + y * add the 1st element of x to the 1st element of 2 etc (1+6, 2+7 etc)\{\{nl\}\} {[}1{]} 7 9 11 13\{\{nl\}\} \textgreater{} x \textgreater{} 2 {[}1{]} FALSE FALSE TRUE TRUE\{\{nl\}\} \textgreater{} x \textgreater{}= 2\{\{nl\}\} {[}1{]} FALSE TRUE TRUE TRUE\{\{nl\}\} \textgreater{} y == 8\{\{nl\}\} {[}1{]} FALSE FALSE TRUE FALSE\{\{nl\}\} \textgreater{} x * y\{\{nl\}\} {[}1{]} 6 14 24 36\{\{nl\}\} \textgreater{} x / y\{\{nl\}\} {[}1{]} 0.1666667 0.2857143 0.3750000 0.4444444\{\{nl\}\}} \tn % Row Count 10 (+ 8) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{Similary you can do the same with matrices}}\{\{nl\}\} \textgreater{} x \textless{}- matrix(1:4, 2, 2); y \textless{}- matrix(rep(10,4), 2, 2) \{\{nl\}\} * x is a matrix 1 thru 4 so its a 2x2 matrix\{\{nl\}\} * y is a matrix of all 10's its also a 2x2 matrix\{\{nl\}\}} \tn % Row Count 15 (+ 5) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{\textgreater{} x {\emph{ y * {\bf{element-wise multiplication}}\{\{nl\}\} {[},1{]} {[},2{]}\{\{nl\}\} {[}1,{]} 10 30\{\{nl\}\} {[}2,{]} 20 40\{\{nl\}\} \textgreater{} x / y\{\{nl\}\} {[},1{]} {[},2{]}\{\{nl\}\} {[}1,{]} 0.1 0.3\{\{nl\}\} {[}2,{]} 0.2 0.4\{\{nl\}\} \textgreater{} x \%}}\% y * {\bf{true matrix multiplication}}\{\{nl\}\} {[},1{]} {[},2{]}\{\{nl\}\} {[}1,{]} 40 40\{\{nl\}\} {[}2,{]} 60 60\{\{nl\}\}} \tn % Row Count 21 (+ 6) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}