\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{Niki (worlddoit)} \pdfinfo{ /Title (r-ml-library.pdf) /Creator (Cheatography) /Author (Niki (worlddoit)) /Subject (r ml library Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{6200A3} \definecolor{LightBackground}{HTML}{F5EFF9} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{r ml library Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{Niki (worlddoit)} via \textcolor{DarkBackground}{\uline{cheatography.com/170195/cs/36005/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}Niki (worlddoit) \\ \uline{cheatography.com/worlddoit} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 11th December, 2022.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{3} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Intro}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Additional material about top libraries for Machine Learning in R.% Row Count 2 (+ 2) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(**Basics)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{1. XML}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\bf{You can read a xml file in R using the "XML" package.}} \newline % Row Count 2 (+ 2) `install.packages("XML")`\{\{noshy\}\} \newline % Row Count 3 (+ 1) `\# Also load the other required package.`\{\{noshy\}\} \newline % Row Count 5 (+ 2) `library("methods")`\{\{noshy\}\} \newline % Row Count 6 (+ 1) `\# Give the input file name to the function.`\{\{noshy\}\} \newline % Row Count 8 (+ 2) `result \textless{}- xmlParse(file = "input.xml")`\{\{noshy\}\} \newline % Row Count 9 (+ 1) `\# Exract the root node form the xml file.`\{\{noshy\}\} \newline % Row Count 11 (+ 2) `rootnode \textless{}- xmlRoot(result)`\{\{noshy\}\} \newline % Row Count 12 (+ 1) `\# Find number of nodes in the root.`\{\{noshy\}\} \newline % Row Count 13 (+ 1) `rootsize \textless{}- xmlSize(rootnode)`\{\{noshy\}\}% Row Count 14 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional) Resources: \{\{popup="https://www.r-bloggers.com/2021/04/15-essential-packages-in-r-for-data-science/"\}\}1\{\{/popup\}\}, \{\{popup="https://www.tutorialspoint.com/r/r\_xml\_files.htm"\}\}2\{\{/popup\}\}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{2. dplyr}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\bf{dplyr}} is a grammar of data manipulation, providing a consistent set of verbs that help you solve the most common data manipulation challenges: \newline % Row Count 3 (+ 3) `mutate()` \{\{noshy\}\} adds new variables that are functions of existing variables \newline % Row Count 5 (+ 2) `select()` \{\{noshy\}\} picks variables based on their names. \newline % Row Count 7 (+ 2) `filter()` \{\{noshy\}\} picks cases based on their values. \newline % Row Count 9 (+ 2) `summarise()` \{\{noshy\}\} reduces multiple values down to a single summary. \newline % Row Count 11 (+ 2) `arrange()` changes the ordering of the rows. \newline % Row Count 12 (+ 1) These all combine naturally with `group\_by()` \{\{noshy\}\} which allows you to perform any operation "by group". \newline % Row Count 15 (+ 3) Example: \newline % Row Count 16 (+ 1) `starwars \%\textgreater{}\%`\{\{noshy\}\} \newline % Row Count 17 (+ 1) ` group\_by(species) \%\textgreater{}\%`\{\{noshy\}\} \newline % Row Count 18 (+ 1) ` summarise( n = n(), mass = mean(mass, na.rm = TRUE)`\{\{noshy\}\} \newline % Row Count 20 (+ 2) ` ) \%\textgreater{}\%`\{\{noshy\}\} \newline % Row Count 21 (+ 1) ` filter( n \textgreater{} 1, mass \textgreater{} 50)`\{\{noshy\}\}% Row Count 22 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional) Resources: \{\{popup="https://www.r-bloggers.com/2021/04/15-essential-packages-in-r-for-data-science/"\}\}1\{\{/popup\}\}, \{\{popup="https://dplyr.tidyverse.org/"\}\}2\{\{/popup\}\}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{3. xgboost}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{`library(xgboost)`\{\{noshy\}\} \newline % Row Count 1 (+ 1) `\# load data`\{\{noshy\}\} \newline % Row Count 2 (+ 1) `data(agaricus.train, package = 'xgboost')`\{\{noshy\}\} \newline % Row Count 4 (+ 2) `data(agaricus.test, package = 'xgboost')`\{\{noshy\}\} \newline % Row Count 6 (+ 2) `train \textless{}- agaricus.train`\{\{noshy\}\} \newline % Row Count 7 (+ 1) `test \textless{}- agaricus.test`\{\{noshy\}\} \newline % Row Count 8 (+ 1) `\# fit model`\{\{noshy\}\} \newline % Row Count 9 (+ 1) `bst \textless{}- xgboost(data = train\$data, label = train\$label, max\_depth = 2, eta = 1, nrounds = 2, `\{\{noshy\}\} \newline % Row Count 12 (+ 3) ` nthread = 2, objective = "binary:logistic")`\{\{noshy\}\} \newline % Row Count 14 (+ 2) `\# predict`\{\{noshy\}\} \newline % Row Count 15 (+ 1) `pred \textless{}- predict(bst, test\$data)`\{\{noshy\}\}% Row Count 16 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional) Resources: \{\{popup="https://www.r-bloggers.com/2021/04/15-essential-packages-in-r-for-data-science/"\}\}1\{\{/popup\}\}, \{\{popup="https://xgboost.ai/about"\}\}2\{\{/popup\}\}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{4. mlr3}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Lots of functionality, you can deal with clustering, regression, classification, and survival analysis. \newline % Row Count 3 (+ 3) `library(mlr3)`\{\{noshy\}\} \newline % Row Count 4 (+ 1) `\# create learning task`\{\{noshy\}\} \newline % Row Count 5 (+ 1) `task\_penguins = \seqsplit{as\_task\_classif(species} \textasciitilde{} ., data = palmerpenguins::penguins)`\{\{noshy\}\} \newline % Row Count 7 (+ 2) `\# load learner and set hyperparameter`\{\{noshy\}\} \newline % Row Count 8 (+ 1) `learner = lrn("classif.rpart", cp = .01)`\{\{noshy\}\} \newline % Row Count 10 (+ 2) `\# train/test split`\{\{noshy\}\} \newline % Row Count 11 (+ 1) `split = partition(task\_penguins, ratio = 0.67)`\{\{noshy\}\} \newline % Row Count 13 (+ 2) `\# train the model`\{\{noshy\}\} \newline % Row Count 14 (+ 1) \seqsplit{`learner\$train(task\_penguins}, split\$train\_set)`\{\{noshy\}\} \newline % Row Count 16 (+ 2) `\# predict data`\{\{noshy\}\} \newline % Row Count 17 (+ 1) `prediction = \seqsplit{learner\$predict(task\_penguins}, split\$test\_set)`\{\{noshy\}\} \newline % Row Count 19 (+ 2) `\# calculate performance`\{\{noshy\}\} \newline % Row Count 20 (+ 1) `prediction\$confusion`\{\{noshy\}\} \newline % Row Count 21 (+ 1) `measure = msr("classif.acc")`\{\{noshy\}\} \newline % Row Count 22 (+ 1) `prediction\$score(measure)`\{\{noshy\}\} \newline % Row Count 23 (+ 1) `\# 3-fold cross validation`\{\{noshy\}\} \newline % Row Count 24 (+ 1) `resampling = rsmp("cv", folds = 3L)`\{\{noshy\}\} \newline % Row Count 25 (+ 1) `\# run experiments`\{\{noshy\}\} \newline % Row Count 26 (+ 1) `rr = resample(task\_penguins, learner, resampling)`\{\{noshy\}\} \newline % Row Count 28 (+ 2) `\# access results`\{\{noshy\}\} \newline % Row Count 29 (+ 1) `rr\$score(measure){[}, .(task\_id, learner\_id, iteration, classif.acc){]}`\{\{noshy\}\} \newline % Row Count 31 (+ 2) } \tn \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{4. mlr3 (cont)}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{`rr\$aggregate(measure)`\{\{noshy\}\}% Row Count 1 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional) Resources: \{\{popup="https://www.r-bloggers.com/2021/04/15-essential-packages-in-r-for-data-science/"\}\}1\{\{/popup\}\}, \{\{popup="https://mlr3.mlr-org.com/"\}\}2\{\{/popup\}\}, \{\{popup="https://cran.r-project.org/web/packages/mlr3/mlr3.pdf"\}\}3\{\{/popup\}\}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{4. mlr3 additional}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{5.377cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/worlddoit_1670702500_ экрана 2022-12-10 в 21.01.23.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{5. knitr}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{It is reproducible, used for report creation, and integrates with various types of code structures like LaTeX, HTML, Markdown, LyX, etc. \newline % Row Count 3 (+ 3) This package is an amazing one, you can make a beautiful pdf report and editable pdf forms with the help of latex coding. \newline % Row Count 6 (+ 3) `kable`\{\{noshy\}\} \newline % Row Count 7 (+ 1) `xtable`\{\{noshy\}\} \newline % Row Count 8 (+ 1) `tikzDevice`\{\{noshy\}\}% Row Count 9 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional) Resources: \{\{popup="https://www.r-bloggers.com/2021/04/15-essential-packages-in-r-for-data-science/"\}\}1\{\{/popup\}\}, \{\{popup="https://sachsmc.github.io/knit-git-markr-guide/knitr/knit.html"\}\}2\{\{/popup\}\}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{6. plotly}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Plotly's R graphing library makes interactive, publication-quality graphs. Example of how to make line plots, scatter plots, area charts, bar charts, error bars, box plots, histograms, heatmaps, subplots, multiple-axes, and 3D (WebGL based) charts. \newline % Row Count 5 (+ 5) `library(plotly)`\{\{noshy\}\} \newline % Row Count 6 (+ 1) `\# volcano is a numeric matrix that ships with R`\{\{noshy\}\} \newline % Row Count 8 (+ 2) `fig \textless{}- plot\_ly(z = \textasciitilde{}volcano) \%\textgreater{}\% add\_surface(`\{\{noshy\}\} \newline % Row Count 10 (+ 2) ` contours = list(`\{\{noshy\}\} \newline % Row Count 11 (+ 1) ` z = list( show=TRUE, usecolormap=TRUE,`\{\{noshy\}\} \newline % Row Count 13 (+ 2) ` \seqsplit{highlightcolor="\#ff0000"}, project=list(z=TRUE) ) ) )`\{\{noshy\}\} \newline % Row Count 15 (+ 2) `fig \textless{}- fig \%\textgreater{}\% layout(`\{\{noshy\}\} \newline % Row Count 16 (+ 1) ` scene = list( camera= list(`\{\{noshy\}\} \newline % Row Count 17 (+ 1) ` eye = list(x=1.87, y=0.88, z=-0.64) )))`\{\{noshy\}\} \newline % Row Count 19 (+ 2) `fig`\{\{noshy\}\}% Row Count 20 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional) Resources: \{\{popup="https://plotly.com/r/"\}\}1\{\{/popup\}\}, \{\{popup="https://plotly-r.com/"\}\}2\{\{/popup\}\}, \{\{popup="https://www.r-bloggers.com/2021/04/15-essential-packages-in-r-for-data-science/"\}\}3\{\{/popup\}\}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{6. plotly: Output}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{5.377cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/worlddoit_1670748653_ экрана 2022-12-11 в 9.09.34.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{7. e1071}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Dealing with clustering, Fourier Transform, Naive Bayes, SVM, and other types of modeling data analysis then you can't avoid e1071. \newline % Row Count 3 (+ 3) Example: \newline % Row Count 4 (+ 1) `\#Author DataFlair`\{\{noshy\}\} \newline % Row Count 5 (+ 1) `library("e1071")`\{\{noshy\}\} \newline % Row Count 6 (+ 1) `data("iris")`\{\{noshy\}\} \newline % Row Count 7 (+ 1) `head(iris)`\{\{noshy\}\} \newline % Row Count 8 (+ 1) `x \textless{}- iris{[},-5{]}`\{\{noshy\}\} \newline % Row Count 9 (+ 1) `y \textless{}- iris{[}5{]}`\{\{noshy\}\} \newline % Row Count 10 (+ 1) `model\_svm \textless{}- svm(Species \textasciitilde{} ., data=iris)`\{\{noshy\}\} \newline % Row Count 12 (+ 2) `summary(svm\_model)`\{\{noshy\}\} \newline % Row Count 13 (+ 1) `pred \textless{}- predict(model\_svm,x)`\{\{noshy\}\} \newline % Row Count 14 (+ 1) `confusionMatrix(pred,y\$Species)`\{\{noshy\}\}% Row Count 15 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional) Ressources: \{\{popup="https://www.r-bloggers.com/2021/04/15-essential-packages-in-r-for-data-science/"\}\}1\{\{/popup\}\}, \{\{popup="https://data-flair.training/blogs/e1071-in-r/"\}\}2\{\{/popup\}\}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{8. tidyverse}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{For data manipulation. Covered in lectures (see previous sections). \newline % Row Count 2 (+ 2) Example: \newline % Row Count 3 (+ 1) `library(tidyverse)`\{\{noshy\}\} \newline % Row Count 4 (+ 1) `library(lubridate)`\{\{noshy\}\} \newline % Row Count 5 (+ 1) `library(nycflights13)`\{\{noshy\}\} \newline % Row Count 6 (+ 1) Create a new column basis count option \newline % Row Count 7 (+ 1) `flights \%\textgreater{}\%`\{\{noshy\}\} \newline % Row Count 8 (+ 1) ` mutate(long\_flight = (air\_time \textgreater{}= 6 * 60)) \%\textgreater{}\%`\{\{noshy\}\} \newline % Row Count 10 (+ 2) ` View()`\{\{noshy\}\} \newline % Row Count 11 (+ 1) Randomly Shuffle the data \newline % Row Count 12 (+ 1) `flights \%\textgreater{}\%`\{\{noshy\}\} \newline % Row Count 13 (+ 1) ` slice\_sample(n = 15)`\{\{noshy\}\}% Row Count 14 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional) Ressources: \{\{popup="https://www.r-bloggers.com/2021/04/15-essential-packages-in-r-for-data-science/"\}\}1\{\{/popup\}\}, \{\{popup="https://finnstats.com/index.php/2021/04/02/tidyverse-in-r/"\}\}2\{\{/popup\}\}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{9. caret}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{If you are dealing with classification and regression problems then caret is one of the essential packages. \newline % Row Count 3 (+ 3) `caret` package is the extension of the caret is CaretEnsemble which is used for combining different models. \newline % Row Count 6 (+ 3) Example: \newline % Row Count 7 (+ 1) Visualization of feature distribution by class \newline % Row Count 8 (+ 1) `featurePlot(x = GermanCredit{[},c("EmploymentDuration", "Age"){]},`\{\{noshy\}\} \newline % Row Count 10 (+ 2) ` y = GermanCredit\$Class,`\{\{noshy\}\} \newline % Row Count 11 (+ 1) ` plot = "box")`\{\{noshy\}\} \newline % Row Count 12 (+ 1) Pre-processing: imputation of missing data, one-hot encoding, and normalization \newline % Row Count 14 (+ 2) `set.seed(355)`\{\{noshy\}\} \newline % Row Count 15 (+ 1) `bagMissing \textless{}- preProcess(trainingSet, method = "bagImpute")`\{\{noshy\}\} \newline % Row Count 17 (+ 2) `trainingSet \textless{}- predict(bagMissing, newdata = trainingSet)`\{\{noshy\}\} \newline % Row Count 19 (+ 2) `dummyModel \textless{}- dummyVars(Class \textasciitilde{} ., data = trainingSet)`\{\{noshy\}\} \newline % Row Count 21 (+ 2) `trainingSetX \textless{}- \seqsplit{as.data.frame(predict(dummyModel}, newdata = trainingSet))`\{\{noshy\}\}% Row Count 23 (+ 2) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional) Ressources: \{\{popup="https://www.r-bloggers.com/2021/04/15-essential-packages-in-r-for-data-science/"\}\}1\{\{/popup\}\}, \{\{popup="https://rstudio-pubs-static.s3.amazonaws.com/867468\_4fc8916a0b2f4732b2a04fd20c7b5b70.html"\}\}2\{\{/popup\}\}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{10. shiny}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{If you are thinking about an interactive and beautiful web interface then Shiny is the solution. \newline % Row Count 2 (+ 2) Shiny interfaces are directly written in R and provide a customizable slider widget that has built-in support for animation. \newline % Row Count 5 (+ 3) Example: \newline % Row Count 6 (+ 1) `library(shiny)`\{\{noshy\}\} \newline % Row Count 7 (+ 1) `\# See above for the definitions of ui and server`\{\{noshy\}\} \newline % Row Count 9 (+ 2) `ui \textless{}- ...`\{\{noshy\}\} \newline % Row Count 10 (+ 1) `server \textless{}- ...`\{\{noshy\}\} \newline % Row Count 11 (+ 1) `shinyApp(ui = ui, server = server)`\{\{noshy\}\}% Row Count 12 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional) Ressources: \{\{popup="https://www.r-bloggers.com/2021/04/15-essential-packages-in-r-for-data-science/"\}\}1\{\{/popup\}\}, \{\{popup="https://shiny.rstudio.com/tutorial/written-tutorial/lesson1/"\}\}2\{\{/popup\}\}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{10. shiny: Output}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{5.377cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/worlddoit_1670751729_ экрана 2022-12-11 в 10.41.52.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional) Ressources: \{\{popup="https://www.r-bloggers.com/2021/04/15-essential-packages-in-r-for-data-science/"\}\}1\{\{/popup\}\}, \{\{popup="https://data-flair.training/blogs/e1071-in-r/"\}\}2\{\{/popup\}\}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{11. tidyquant}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{`tidyquant` is considered as a financial package that is used to carry out quantitative financial analysis. \newline % Row Count 3 (+ 3) Package tidyquant is also widely used for importing, analyzing, and visualizing data. \newline % Row Count 5 (+ 2) Example: \newline % Row Count 6 (+ 1) `library(tidyquant)`\{\{noshy\}\} \newline % Row Count 7 (+ 1) `google \textless{}- tq\_get(x = "GOOG")`\{\{noshy\}\} \newline % Row Count 8 (+ 1) `tq\_get\_options()`\{\{noshy\}\} \newline % Row Count 9 (+ 1) `?tq\_get`\{\{noshy\}\} \newline % Row Count 10 (+ 1) `tq\_exchange\_options()`\{\{noshy\}\} \newline % Row Count 11 (+ 1) `nyse \textless{}- tq\_exchange("NYSE")`\{\{noshy\}\}% Row Count 12 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional) Ressources: \{\{popup="https://www.r-bloggers.com/2021/04/15-essential-packages-in-r-for-data-science/"\}\}1\{\{/popup\}\}, \{\{popup="https://quantdev.ssri.psu.edu/sites/qdev/files/tidyquant\_tutorial\_Gray.html"\}\}2\{\{/popup\}\}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{12. tidyr}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{`tidyr` is a new package that makes it easy to "tidy" your data. tidyr package is an evolution of Reshape2. \newline % Row Count 3 (+ 3) The data is considered tidy when each variable represents columns and each row represents an observation. \newline % Row Count 6 (+ 3) `gather()` makes "wide" data longer \newline % Row Count 7 (+ 1) `spread()` makes "long" data wider \newline % Row Count 8 (+ 1) `separate()` splits a single column into multiple columns \newline % Row Count 10 (+ 2) `unite()` combines multiple columns into a single column \newline % Row Count 12 (+ 2) and More... \newline % Row Count 13 (+ 1) Example: \newline % Row Count 14 (+ 1) `wide\_DF \textless{}- unite\_DF \%\textgreater{}\% spread(Quarter, Revenue)`\{\{noshy\}\} \newline % Row Count 16 (+ 2) `head(wide\_DF, 24)`\{\{noshy\}\}% Row Count 17 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional) Ressources: \{\{popup="https://www.r-bloggers.com/2021/04/15-essential-packages-in-r-for-data-science/"\}\}1\{\{/popup\}\}, \{\{popup="https://uc-r.github.io/tidyr"\}\}2\{\{/popup\}\}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{13. ggraph}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Takes away all the limitations of ggplot2. \newline % Row Count 1 (+ 1) Example for Networks: \newline % Row Count 2 (+ 1) `ggraph(ig\_loc, layout="fr") + `\{\{noshy\}\} \newline % Row Count 3 (+ 1) ` \seqsplit{geom\_edge\_link(aes(color} = factor(to), width = log(weight)), alpha = 0.5, `\{\{noshy\}\} \newline % Row Count 5 (+ 2) ` start\_cap = circle(2, 'mm'), end\_cap = circle(2, 'mm')) + `\{\{noshy\}\} \newline % Row Count 7 (+ 2) ` scale\_edge\_width(range = c(0.5, 2.5)) + `\{\{noshy\}\} \newline % Row Count 9 (+ 2) ` geom\_node\_point(color = V(ig\_loc)\$color, size = 5, alpha = 0.5) +`\{\{noshy\}\} \newline % Row Count 11 (+ 2) ` \seqsplit{geom\_node\_text(aes(label} = name), repel = TRUE) +`\{\{noshy\}\} \newline % Row Count 13 (+ 2) ` theme\_void() + `\{\{noshy\}\} \newline % Row Count 14 (+ 1) ` theme(legend.position = "none") `\{\{noshy\}\}% Row Count 15 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional) Ressources: \{\{popup="https://www.r-bloggers.com/2021/04/15-essential-packages-in-r-for-data-science/"\}\}1\{\{/popup\}\}, \{\{popup="https://rpubs.com/neloe/ggraph\_intro"\}\}2\{\{/popup\}\}, \newline \{\{popup="https://www.data-imaginist.com/2017/ggraph-introduction-layouts/"\}\}3\{\{/popup\}\}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{13. ggraph: Output}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{5.377cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/worlddoit_1670753329_ экрана 2022-12-11 в 11.05.43.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{14. ggplot2}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{`ggplot2` is one of the most popular visualization package in R. \newline % Row Count 2 (+ 2) It is famous for its functionality and high-quality graphs that set it apart from other visualization packages. \newline % Row Count 5 (+ 3) Example: \newline % Row Count 6 (+ 1) `ggplot(chic, aes(x = o3, y = temp))+`\{\{noshy\}\} \newline % Row Count 7 (+ 1) ` labs(x = "Ozone Level", y = "Temperature (°F)") +`\{\{noshy\}\} \newline % Row Count 9 (+ 2) ` geom\_smooth(`\{\{noshy\}\} \newline % Row Count 10 (+ 1) ` method = "lm",`\{\{noshy\}\} \newline % Row Count 11 (+ 1) ` formula = y \textasciitilde{} x + I(x\textasciicircum{}2) + I(x\textasciicircum{}3) + I(x\textasciicircum{}4) + I(x\textasciicircum{}5),`\{\{noshy\}\} \newline % Row Count 13 (+ 2) ` color = "black",`\{\{noshy\}\} \newline % Row Count 14 (+ 1) ` fill = "firebrick") +`\{\{noshy\}\} \newline % Row Count 15 (+ 1) ` geom\_point(color = "gray40", alpha = .3)`\{\{noshy\}\}% Row Count 17 (+ 2) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional) Ressources: \{\{popup="https://www.r-bloggers.com/2021/04/15-essential-packages-in-r-for-data-science/"\}\}1\{\{/popup\}\}, \{\{popup="https://www.cedricscherer.com/2019/08/05/a-ggplot2-tutorial-for-beautiful-plotting-in-r/"\}\}2\{\{/popup\}\}, \{\{popup="http://r-statistics.co/Top50-Ggplot2-Visualizations-MasterList-R-Code.html"\}\}3\{\{/popup\}\}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{14. ggplot2: Output}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{5.377cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/worlddoit_1670754604_ экрана 2022-12-11 в 11.10.46.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{(*Additional)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}