\documentclass[10pt,a4paper]{article}

% Packages
\usepackage{fancyhdr}           % For header and footer
\usepackage{multicol}           % Allows multicols in tables
\usepackage{tabularx}           % Intelligent column widths
\usepackage{tabulary}           % Used in header and footer
\usepackage{hhline}             % Border under tables
\usepackage{graphicx}           % For images
\usepackage{xcolor}             % For hex colours
%\usepackage[utf8x]{inputenc}    % For unicode character support
\usepackage[T1]{fontenc}        % Without this we get weird character replacements
\usepackage{colortbl}           % For coloured tables
\usepackage{setspace}           % For line height
\usepackage{lastpage}           % Needed for total page number
\usepackage{seqsplit}           % Splits long words.
%\usepackage{opensans}          % Can't make this work so far. Shame. Would be lovely.
\usepackage[normalem]{ulem}     % For underlining links
% Most of the following are not required for the majority
% of cheat sheets but are needed for some symbol support.
\usepackage{amsmath}            % Symbols
\usepackage{MnSymbol}           % Symbols
\usepackage{wasysym}            % Symbols
%\usepackage[english,german,french,spanish,italian]{babel}              % Languages

% Document Info
\author{anaischia2014}
\pdfinfo{
  /Title (m5-machine-learning.pdf)
  /Creator (Cheatography)
  /Author (anaischia2014)
  /Subject (M5 Machine Learning Cheat Sheet)
}

% Lengths and widths
\addtolength{\textwidth}{6cm}
\addtolength{\textheight}{-1cm}
\addtolength{\hoffset}{-3cm}
\addtolength{\voffset}{-2cm}
\setlength{\tabcolsep}{0.2cm} % Space between columns
\setlength{\headsep}{-12pt} % Reduce space between header and content
\setlength{\headheight}{85pt} % If less, LaTeX automatically increases it
\renewcommand{\footrulewidth}{0pt} % Remove footer line
\renewcommand{\headrulewidth}{0pt} % Remove header line
\renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit
% This two commands together give roughly
% the right line height in the tables
\renewcommand{\arraystretch}{1.3}
\onehalfspacing

% Commands
\newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour
\newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols
\newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns
\newcommand{\tn}{\tabularnewline} % Required as custom column type in use

% Font and Colours
\definecolor{HeadBackground}{HTML}{333333}
\definecolor{FootBackground}{HTML}{666666}
\definecolor{TextColor}{HTML}{333333}
\definecolor{DarkBackground}{HTML}{4393A3}
\definecolor{LightBackground}{HTML}{F3F8F9}
\renewcommand{\familydefault}{\sfdefault}
\color{TextColor}

% Header and Footer
\pagestyle{fancy}
\fancyhead{} % Set header to blank
\fancyfoot{} % Set footer to blank
\fancyhead[L]{
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{C}
    \SetRowColor{DarkBackground}
    \vspace{-7pt}
    {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent
        \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}}
    }
\end{tabulary}
\columnbreak
\begin{tabulary}{11cm}{L}
    \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{M5 Machine Learning Cheat Sheet}}}} \\
    \normalsize{by \textcolor{DarkBackground}{anaischia2014} via \textcolor{DarkBackground}{\uline{cheatography.com/217080/cs/47591/}}}
\end{tabulary}
\end{multicols}}

\fancyfoot[L]{ \footnotesize
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{LL}
  \SetRowColor{FootBackground}
  \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}}  \\
  \vspace{-2pt}anaischia2014 \\
  \uline{cheatography.com/anaischia2014} \\
  \end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}}  \\
   \vspace{-2pt}Not Yet Published.\\
   Updated 18th January, 2026.\\
   Page {\thepage} of \pageref{LastPage}.
\end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}}  \\
  \SetRowColor{white}
  \vspace{-5pt}
  %\includegraphics[width=48px,height=48px]{dave.jpeg}
  Measure your website readability!\\
  www.readability-score.com
\end{tabulary}
\end{multicols}}


\begin{document}
\raggedright
\raggedcolumns

% Set font size to small. Switch to any value
% from this page to resize cheat sheet text:
% www.emerson.emory.edu/services/latex/latex_169.html
\footnotesize % Small font.

\begin{multicols*}{3}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Supervised learning}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Uses {\bf{labelled training data}} with mapped features to known labels/targets to predict outcomes for new unseen data (the test set)} \tn 
% Row Count 3 (+ 3)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Classification}}: predicts categorical outcomes} \tn 
% Row Count 4 (+ 1)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\emph{Logistic regression}}: type of {\bf{parametric}} classifier; passes a linear combination of inputs through a {\bf{logit (sigmoid)}} function; {\bf{decision boundary}} classes everything to left as 0 and right as 1; data is {\bf{not linearly separable}} producing a non-zero error rate} \tn 
% Row Count 10 (+ 6)
% Row 3
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Regression}}: predicts continuous outcomes} \tn 
% Row Count 11 (+ 1)
% Row 4
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Cross-validation}}: for tuning hyperparameters and choosing between models, prevents overfitting or data-leakage by separating from test data} \tn 
% Row Count 14 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Scaling}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Brings features into comparable ranges leading to faster and more stable model convergence i.e. distance-based algorithms} \tn 
% Row Count 3 (+ 3)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Normalisation}}: constrains values to a fixed range e.g. {[}0,1{]} or {[}-1,1{]}; MinMaxScaler() or Normalizer()} \tn 
% Row Count 6 (+ 3)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Standardisation}}: transforms the mean to 0 and variance/sd to 1 (z-scoring), data is unitless; StandardScaler()} \tn 
% Row Count 9 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Evaluation metrics (linear regression)}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{R\textasciicircum{}2\textasciicircum{}}}: proportion of variance explained by model features; closer to 1 is better} \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{MAE}}: average magnitude of errors and easily interpretable (same units as target), robust to outliers; smaller is better} \tn 
% Row Count 5 (+ 3)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{MSE/RMSE}}: averaged squared difference between predicted and actual, sensitive to outliers; smaller is better} \tn 
% Row Count 8 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Evaluation metrics (classification)}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{p{5.377cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/anaischia2014_1768741321_precision-recall.jpeg}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{False Positives}} are misdiagnoses, so precision gives {\emph{actual}} TPs \newline {\bf{False Negatives}} are missed diagnoses, so recall/sensitivity gives {\emph{identified}} TPs \newline {\bf{ROC-AUC}}: true positive rate vs false positive rate; closer to 1 is better \newline ({\bf{Specificity}}: TN/(TN+FP)}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Supervised Learning Pipeline}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{X = data.drop(columns='target')\{\{nl\}\}y = data{[}{[}'target'{]}{]}\{\{nl\}\}x\_train, x\_test, y\_train, y\_test = \{\{nl\}\}train\_test\_split(X, y, test\_size = 0.20..)\{\{nl\}\}scaler = StandardScaler()\{\{nl\}\}x\_train=...scaler.fit\_transform(x\_train))\{\{nl\}\}x\_test=...scaler.transform(x\_test))\{\{nl\}\}model = LinearRegression()\{\{nl\}\}model.fit(x\_train, y\_train)\{\{nl\}\}y\_pred = model.predict(x\_test)} \tn 
% Row Count 8 (+ 8)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{K-fold cross validation}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{p{5.377cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/anaischia2014_1768744088_k-fold cross validation.png}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Splits dataset into 'k' equal-sized folds, using k-1 folds for training and the remaining fold for validation, repeating k times to get an {\bf{average performance score}}; {\bf{useful when data is limited}} because every data point is used for both training and validation; {\bf{leave-one-out cross-validation}} (LOOCV)}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{K-Nearest Neighbours (KNN)}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{p{5.377cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/anaischia2014_1768747174_knn_algorithm.png}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Non-parametric classifier}} that looks at K points in training set nearest to test input x then computes average of these neighbours; {\bf{memory-based/instance-based learning}}; works well given good distance metric ({\bf{Euclidean}}) and sufficient training data; {\bf{poor performance under high dimensionality}}; \seqsplit{KNeighborsClassifier(n\_neighbors=3)}.fit(X,y)}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{L1 vs L2 regularisation in regression}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{L1 (Lasso)}}: sets some coefficients to 0 (feature selection); may jeopardise accuracy in small datasets} \tn 
% Row Count 3 (+ 3)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{L2 (Ridge)}}: shrinks coefficients and penalises higher weights} \tn 
% Row Count 5 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Parametric vs Nonparametric models}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Parametric models}}: fixed set of parameters depending on number of features in e.g. regression, Naive Bayes or number of centroids e.g. k-means clustering; {\bf{faster performance but stronger assumptions}}} \tn 
% Row Count 5 (+ 5)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Non-parametric models}}: makes no assumptions about dataset; number of parameters grow with amount of training data e.g. KNN, decision trees, random forest, kernel SVMs; {\bf{flexible but computationally expensive}}} \tn 
% Row Count 10 (+ 5)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Unsupervised Learning}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{K-means clustering}}: uses euclidean distance (scale features!) and iteratively {\bf{minimises inertia}} (within-cluster sum-of-squares)} \tn 
% Row Count 3 (+ 3)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{k-cluster centroids chosen at random → each datapoint assigned to cluster with nearest centroid → each centroid updated by taking mean of all points assigned to that cluster} \tn 
% Row Count 7 (+ 4)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Elbow method}}: determines optimal number of clusters} \tn 
% Row Count 9 (+ 2)
% Row 3
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{kmeans = KMeans(n\_clusters = 3, init = 'k-means++', max\_iter = 300...)\{\{nl\}\}y\_kmeans = \seqsplit{kmeans.fit\_predict(feat\_array)}} \tn 
% Row Count 12 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Marginalisation}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Sum of all probability values where 𝑋=𝑥 occurs with all possible values of 𝑌} \tn 
% Row Count 2 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Conditional probability}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{p{5.377cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/anaischia2014_1768751894_Screenshot 2026-01-18 at 3.57.20 PM.png}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Bayes Rule}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{p{5.377cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/anaischia2014_1768752416_Screenshot 2026-01-18 at 4.02.11 PM.png}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Base-rate fallacy}}: ignores prior probabilities of FPs, additionally use precision or confusion matrices}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Naive Bayes}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{p{5.377cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/anaischia2014_1768757679_Screenshot 2026-01-18 at 5.18.13 PM.png}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Assumes features are {\bf{independent}}; requires {\bf{small amount of training data}} to estimate parameters; aim is to predict {\bf{P(label | features)}}; fast but bad estimator}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Gaussian Naive Bayes classifier}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{GNB = \seqsplit{GaussianNB(var\_smoothing=0}.5)} \tn 
% Row Count 1 (+ 1)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{GNB.fit(x\_train, y\_train)} \tn 
% Row Count 2 (+ 1)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{y\_pred = GNB.predict(x\_test)} \tn 
% Row Count 3 (+ 1)
% Row 3
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{y\_pred\_probs = \seqsplit{GNB.predict\_proba(x\_test)}} \tn 
% Row Count 4 (+ 1)
% Row 4
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Compute {\bf{calibration curves}} and {\bf{brier score}} (lower is better), vary classification {\bf{decision threshold}} (typically .5) and assess AUC, use GridSearch to vary var\_smoothing parameter} \tn 
% Row Count 8 (+ 4)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Support Vector Machines (SVMs)}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Supervised classifier that attempts to separate classes of data using a {\bf{hyperplane}} wherein the 2 categories are {\bf{linearly separable}}} \tn 
% Row Count 3 (+ 3)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Optimal hyperplane}}: maximises the {\bf{margin}} between training points to minimise noise and hinge loss, preventing {\bf{overfitting}}} \tn 
% Row Count 6 (+ 3)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Types of kernels}}: linear, poly, rbf, sigmoid - higher capacity and overfitting risk with more complex kernels} \tn 
% Row Count 9 (+ 3)
% Row 3
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{svc = SVC(kernel='linear'...)\{\{nl\}\}svc.fit(X\_train, y\_train)} \tn 
% Row Count 11 (+ 2)
% Row 4
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Compute accuracy and decision boundaries, use GridSearch to tune kernel and C hyperparameters ({\bf{large C = small margin}})} \tn 
% Row Count 14 (+ 3)
% Row 5
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Pros}}: {\bf{high-dimensional spaces}}; {\bf{memory-efficient}} as uses support vectors; {\bf{versatile}} with different kernels} \tn 
% Row Count 17 (+ 3)
% Row 6
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Limitations}}: do not provide direct probability estimates; poor performance if no features \textgreater{} no samples} \tn 
% Row Count 20 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Decision Trees (DTs)}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Nonparametric}} supervised learning for both classification and regression} \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{Selects features iteratively based on a {\bf{criterion}}: lowest {\emph{entropy}}/highest information gain, {\emph{Gini impurity}} i.e. how impure classes are within a dataset} \tn 
% Row Count 6 (+ 4)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{node = feature, branch = choice, leaves = outcome}}} \tn 
% Row Count 8 (+ 2)
% Row 3
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{dt = \seqsplit{DecisionTreeClassifier(...);} dt.fit(X\_train, y\_train); Compute accuracy and decision boundaries; Use GridSearch to tune criterion and tree depth parameters} \tn 
% Row Count 12 (+ 4)
% Row 4
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Limitations}}: prone to overfit; poor generalisability; high variance; slight changes in dataset can drastically change splits, complicating interpretation; unstable; errors at the top affect lower splits due to hierarchical nature; biased if dataset unbalanced} \tn 
% Row Count 18 (+ 6)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Random Forest}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Ensemble model that consists of {\bf{multiple trees/base estimators}}; overcomes limitations of DTs} \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Averaging}}: build several independent estimators and average predictions, {\bf{reducing variance or overfitting}} in combined estimator} \tn 
% Row Count 5 (+ 3)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\emph{Pasting}}: random subsets of dataset are drawn as random subsets of samples} \tn 
% Row Count 7 (+ 2)
% Row 3
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\emph{Bagging/bootstrapping}}: samples are drawn with replacement} \tn 
% Row Count 9 (+ 2)
% Row 4
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\emph{Random Subspaces}}: random subsets of dataset are drawn as random subsets of features} \tn 
% Row Count 11 (+ 2)
% Row 5
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\emph{Random Patches}}: base estimators are built on subsets of both samples and features} \tn 
% Row Count 13 (+ 2)
% Row 6
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Boosting}}: base estimators built sequentially with the next/combined estimator trying to {\bf{minimise bias and underfitting}}} \tn 
% Row Count 16 (+ 3)
% Row 7
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\emph{XGBoost}} builds trees in parallel; {\emph{Gradient Boosting}} minimises residuals sequentially (iterative)} \tn 
% Row Count 18 (+ 2)
% Row 8
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{rf = \seqsplit{RandomForestClassifier();} rf.fit(X\_train, y\_train); Use pipeline for heterogeneous ensembles} \tn 
% Row Count 20 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}


% That's all folks
\end{multicols*}

\end{document}