\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{Icey\_cey} \pdfinfo{ /Title (data-visualization-in-r-for-gr5293.pdf) /Creator (Cheatography) /Author (Icey\_cey) /Subject (Data Visualization in R for GR5293 Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{7CA370} \definecolor{LightBackground}{HTML}{F6F9F6} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Data Visualization in R for GR5293 Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{Icey\_cey} via \textcolor{DarkBackground}{\uline{cheatography.com/145663/cs/31390/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}Icey\_cey \\ \uline{cheatography.com/icey-cey} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 1st April, 2022.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{3} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{{\bf{Histogram}}(L4)}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{hist(x, col = "lightblue", ylim = c(a,b), xlim=c(a,b), xlab = "Lab for x axis", right = TRUE, main="Title for the histogram", breaks = seq(m,n,p))} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{x}}: the vector to visualize \newline {\bf{col=}}: change the color of the histogram \newline {\bf{xlim=/ylim=}}: define the range of x/y axis \newline {\bf{xlab=/ylab=}}: rename the label for x/y axis \newline {\bf{right=TRUE/FALSE}}: "TRUE" stands for the right-closed (left-opened) interval. "FALSE" stands for the right-opened (left-closed) interval \newline {\bf{main=}}: name the title for the histogram \newline {\bf{breaks=}}: set up the value of x axis} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{{\bf{Single Boxplot}}(L5)}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{boxplot(x, horizontal=TRUE, log="x")} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{x}}: the vector to visualize \newline {\bf{horizontal=TRUE/FALSE}}: make the boxplot horizontally or vertically \newline {\bf{log=}}: if the x value is in the log scale} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{{\bf{Multiple Boxplot}}(L5)}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{ggplot(dataset, aes(x= ,y=)) \newline +geom\_boxplot() \newline +labs() \newline + theme(legend.position = "bottom")} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{dataset}}: the dataset to visualize \newline {\bf{aes(x= ,y=)}}: plot by x \& y \newline {\bf{labs()}}: label the element in the boxplot \newline {\bf{theme(legend.position)}}: assign the position of the legend} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{{\bf{Violin Plot}}(L5)}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{ggplot(dataset, aes(x= ,y= )) \newline +geom\_violin() \newline +coord\_flip() \newline +labs() \newline +theme()} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{dataset}}: the dataset to visualize \newline {\bf{aes(x= ,y=)}}: plot by x \& y \newline {\bf{geom\_violin}}: get the violin plot \newline {\bf{coord\_flip()}}: flip the x and y coordinate \newline {\bf{theme()}}: customize the non-data component} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{{\bf{Ridgeline Plot}}(L5)}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{ggplot(dataset, aes(x= ,y= ))+ \newline geom\_density\_ridges(fill="blue",alpha= ,scale= )} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{dataset}}: the dataset to visualize \newline {\bf{aes(x= ,y= )}}: plot by x \& y \newline {\bf{geom\_density\_ridges()}}: get the Ridgeline plot \newline {\bf{fill= }}: fill the Ridgeline with specific color \newline {\bf{alpha= }}: set the transparency of the area under the Ridgeline} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{{\bf{Q-Q plot (Quantile-Quantile)}}(L6)}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{qqnorm(x) \newline qqline(x, col="red")} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{qqnorm()}}: produce a normal QQ plot of the values in x \newline {\bf{qqline()}}: add a line to a "theoretical", by default normal, quantile-quantile plot} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.74195 cm} x{3.23505 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{{\bf{Types of data}}(L8)}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Numerical data}} & {\bf{Categorical data}} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \textasciitilde{}Discrete & \textasciitilde{}Nominal - no fixed category order \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \textasciitilde{}Continuous & \textasciitilde{}Ordinal - fixed category order \tn % Row Count 6 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{{\bf{Tidy Data}}(L10)}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{pivot\_longer(data, cols = , names\_to = ,values\_to = )}}: move selected columns' name to "name" column, and move values to a single "value" column} \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\bf{pivot\_wider(data, names\_from = , values\_from = )}}: use the name from a column as the column name, and use the value from select column to be the value in the final Dataframe} \tn % Row Count 7 (+ 4) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{row names\_to\_column()}}: add the column name to the rowname in the Dataframe} \tn % Row Count 9 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{{\bf{Parallel Coordinates}}(L13)}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{ggparcoord(dataset, columns = ,scale = ,alphaLines= ,splineFactor= ,groupColumn = )} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{dataset}}: the dataset to visualize \newline {\bf{columns= }}: select columns of data that will include in the plot \newline {\bf{scale= }}: method to scale the data (default is "std") \newline {\bf{alphaLines= }}: value of alpha scaler for the lines of the parcoord plot or a column name of the data \newline {\bf{splineFactor= }}: logical or numeric operator indicating whether spline interpolation should be used \newline {\bf{groupColumn = }}: a single variable to group (color) by} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{{\bf{ Biplot }}(L14)}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{pca\textless{}- prcomp(dataset) \newline biplot(pca) \newline \newline draw\_biplot(dataset)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{prcomp()}}: perform a principal components analysis on the given data matrix \newline {\bf{draw\_biplot()}}:perform PCA on a data frame and draw a biplot} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{{\bf{Cleveland dot plot}}(L15)}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{ggplot(dataset, aes(x = , y = fct\_reorder())) \newline +geom\_point(color = ) \newline +theme\_linedraw()} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{fct\_reorder()}}: reorder factor levels by sorting along the variables \newline {\bf{geom\_point()}}: create scatterplots \newline {\bf{theme\_linedraw()}}: add black lines of various widths on white backgrounds} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.55618 cm} x{1.51041 cm} x{1.51041 cm} } \SetRowColor{DarkBackground} \mymulticolumn{3}{x{5.377cm}}{\bf\textcolor{white}{{\bf{ Multivariate Data}}(L15)}} \tn % Row 0 \SetRowColor{LightBackground} {\bf{Stacked bar chart}} & {\bf{Grouped bar chart}} & {\bf{Mosaic plot (two variables)}} \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} ggplot(data, aes(x= , fill = \seqsplit{))+geom\_bar()+scale\_fill\_manual()} & ggplot(data, aes(x= ,fill= \seqsplit{))+geom\_bar(position} = \seqsplit{"dodge")+scale\_fill\_manual()} & mosaic(x\textasciitilde{}y, direction = c("v","h"),highlighting\_fill= ) \tn % Row Count 9 (+ 6) % Row 2 \SetRowColor{LightBackground} \textasciitilde{}plot x with different fill in different color & \textasciitilde{}bar plot grouped x filling with different color & \textasciitilde{}direction stands for the direction of different variables. \seqsplit{highlighting\_fill} used for distinguish different group \tn % Row Count 18 (+ 9) \hhline{>{\arrayrulecolor{DarkBackground}}---} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Alluvial diagram(L16)}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{ggplot(dataset, aes(axis1 = , axis2 = , y = )) + \newline geom\_alluvium(color = ) + \newline geom\_stratum() + \newline geom\_text(stat = "stratum", aes(label = \seqsplit{paste(after\_stat(stratum)}, "\textbackslash{}n", after\_stat(count)))) + \newline \seqsplit{scale\_x\_discrete(limits} = )} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{geom\_alluvium()}}: plot both the lodes themselves, using geom\_lode(), and the flows between them, using geom\_flow() \newline {\bf{geom\_stratum()}}: plot rectangles for these strata of a provided width \newline {\bf{geom\_text()}}: add only text to the plot \newline {\bf{scale\_x\_discrete()}}: set the values for discrete x scale aesthetics} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Heatmap(L17)}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{ggplot(dataset, aes(x= , y= )) + \newline geom\_tile(aes(fill = ), color = ) + \newline coord\_fixed()} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{geom\_rect()}}: use the locations of the four corners (xmin, xmax, ymin and ymax) \newline {\bf{geom\_tile()}}: use the center of the tile and its size (x, y, width, height) \newline {\bf{geom\_raster()}}: a high performance special case for when all the tiles are the same size \newline {\bf{coord\_fixed()}}: a fixed scale coordinate system forces a specified ratio between data units on the axes} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{{\bf{ Time series}}(L20)}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{ggplot(dataset, aes(x= ,y= ,color= )) \newline +geom\_line()+ \newline geom\_smooth(method= ,span= )} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{ggplot(dataset, aes(x= ,y= ,color= ))}}: plot multiple time series by different colors \newline {\bf{geom\_smooth()}}: add a smooth line according to the data \newline {\bf{method= }}: smoothing method (function) to use \newline {\bf{span= }}: control the amount of smoothing for the default loess smoother} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{{\bf{ Factor in R}}}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{fct\_recode()}}: change the name of the factor} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\bf{fct\_inorder()}}: display by each factor in the original order} \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{fct\_relevel(x, "G1", "G2", after = 3)}}: move the factor "G1", "G2" after the third item in factor x} \tn % Row Count 6 (+ 3) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\bf{fct\_reorder(color, count, .desc=TRUE)}}: order by decreasing frequency count} \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{fct\_infreq()}}: display by number of observations with each level (default is decreasing order of frequency)} \tn % Row Count 11 (+ 3) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\bf{fct\_rev()}}: reverse the order of factor levels} \tn % Row Count 12 (+ 1) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{fct\_explicit\_na()}}: turn NAs into a real factor level} \tn % Row Count 14 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}