\documentclass[10pt,a4paper]{article}

% Packages
\usepackage{fancyhdr}           % For header and footer
\usepackage{multicol}           % Allows multicols in tables
\usepackage{tabularx}           % Intelligent column widths
\usepackage{tabulary}           % Used in header and footer
\usepackage{hhline}             % Border under tables
\usepackage{graphicx}           % For images
\usepackage{xcolor}             % For hex colours
%\usepackage[utf8x]{inputenc}    % For unicode character support
\usepackage[T1]{fontenc}        % Without this we get weird character replacements
\usepackage{colortbl}           % For coloured tables
\usepackage{setspace}           % For line height
\usepackage{lastpage}           % Needed for total page number
\usepackage{seqsplit}           % Splits long words.
%\usepackage{opensans}          % Can't make this work so far. Shame. Would be lovely.
\usepackage[normalem]{ulem}     % For underlining links
% Most of the following are not required for the majority
% of cheat sheets but are needed for some symbol support.
\usepackage{amsmath}            % Symbols
\usepackage{MnSymbol}           % Symbols
\usepackage{wasysym}            % Symbols
%\usepackage[english,german,french,spanish,italian]{babel}              % Languages

% Document Info
\author{HockeyPlay21}
\pdfinfo{
  /Title (data-mining.pdf)
  /Creator (Cheatography)
  /Author (HockeyPlay21)
  /Subject (Data Mining Cheat Sheet)
}

% Lengths and widths
\addtolength{\textwidth}{6cm}
\addtolength{\textheight}{-1cm}
\addtolength{\hoffset}{-3cm}
\addtolength{\voffset}{-2cm}
\setlength{\tabcolsep}{0.2cm} % Space between columns
\setlength{\headsep}{-12pt} % Reduce space between header and content
\setlength{\headheight}{85pt} % If less, LaTeX automatically increases it
\renewcommand{\footrulewidth}{0pt} % Remove footer line
\renewcommand{\headrulewidth}{0pt} % Remove header line
\renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit
% This two commands together give roughly
% the right line height in the tables
\renewcommand{\arraystretch}{1.3}
\onehalfspacing

% Commands
\newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour
\newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols
\newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns
\newcommand{\tn}{\tabularnewline} % Required as custom column type in use

% Font and Colours
\definecolor{HeadBackground}{HTML}{333333}
\definecolor{FootBackground}{HTML}{666666}
\definecolor{TextColor}{HTML}{333333}
\definecolor{DarkBackground}{HTML}{A3A3A3}
\definecolor{LightBackground}{HTML}{F3F3F3}
\renewcommand{\familydefault}{\sfdefault}
\color{TextColor}

% Header and Footer
\pagestyle{fancy}
\fancyhead{} % Set header to blank
\fancyfoot{} % Set footer to blank
\fancyhead[L]{
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{C}
    \SetRowColor{DarkBackground}
    \vspace{-7pt}
    {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent
        \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}}
    }
\end{tabulary}
\columnbreak
\begin{tabulary}{11cm}{L}
    \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Data Mining Cheat Sheet}}}} \\
    \normalsize{by \textcolor{DarkBackground}{HockeyPlay21} via \textcolor{DarkBackground}{\uline{cheatography.com/36862/cs/11602/}}}
\end{tabulary}
\end{multicols}}

\fancyfoot[L]{ \footnotesize
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{LL}
  \SetRowColor{FootBackground}
  \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}}  \\
  \vspace{-2pt}HockeyPlay21 \\
  \uline{cheatography.com/hockeyplay21} \\
  \end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}}  \\
   \vspace{-2pt}Published 30th April, 2017.\\
   Updated 30th April, 2017.\\
   Page {\thepage} of \pageref{LastPage}.
\end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}}  \\
  \SetRowColor{white}
  \vspace{-5pt}
  %\includegraphics[width=48px,height=48px]{dave.jpeg}
  Measure your website readability!\\
  www.readability-score.com
\end{tabulary}
\end{multicols}}


\begin{document}
\raggedright
\raggedcolumns

% Set font size to small. Switch to any value
% from this page to resize cheat sheet text:
% www.emerson.emory.edu/services/latex/latex_169.html
\footnotesize % Small font.

\begin{multicols*}{2}

\begin{tabularx}{8.4cm}{x{3.04 cm} x{4.96 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Data Mining Steps}}  \tn
% Row 0
\SetRowColor{LightBackground}
1. Data Cleaning & Removal of noise and inconsistent records \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
2. Data Integration & Combing multiple sources \tn 
% Row Count 4 (+ 2)
% Row 2
\SetRowColor{LightBackground}
3. Data Selection & Only data relevant for the task are retrieved from the database \tn 
% Row Count 7 (+ 3)
% Row 3
\SetRowColor{white}
4. Data Transformation & Converting data into a form more appropriate for mining \tn 
% Row Count 10 (+ 3)
% Row 4
\SetRowColor{LightBackground}
5. Data Mining & Application of intelligent methods to extract data patterns \tn 
% Row Count 13 (+ 3)
% Row 5
\SetRowColor{white}
6. Model Evaluation & Identification of truly interesting patterns representing knowledge \tn 
% Row Count 16 (+ 3)
% Row 6
\SetRowColor{LightBackground}
7. Knowledge Presentation & Visualization or other knowledge presentation techniques \tn 
% Row Count 19 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{8.4cm}}{Data mining could also be called Knowledge Discovery in Databases (see kdnuggets.com)}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{x{1.36 cm} x{6.64 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Types of Attributes}}  \tn
% Row 0
\SetRowColor{LightBackground}
\seqsplit{Nomial} & e.g., ID numbers, eye color, zip codes \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
\seqsplit{Ordinal} & e.g., rankings, grades, height \tn 
% Row Count 4 (+ 2)
% Row 2
\SetRowColor{LightBackground}
\seqsplit{Interval} & e.g., calendar dates, temperatures \tn 
% Row Count 6 (+ 2)
% Row 3
\SetRowColor{white}
Ratio & e.g., length, time, counts \tn 
% Row Count 7 (+ 1)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Distance Measures}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/hockeyplay21_1493565206_Distance.png}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{Manhattan = City Block \newline  \newline Jaccard coefficient, Hamming, Cosine are a similarity / dissimilarity measures}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Measures of Node Impurity}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/hockeyplay21_1493266681_Impurity.png}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Model Evaluation}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/hockeyplay21_1493271522_ModelEval.png}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{Kappa = (observed agreement - chance agreement) / (1- chance agreement) \newline  \newline Kappa = (Dreal – Drandom) / (Dperfect – Drandom), where D indicates the sum of values in diagonal of the confusion matrix}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{K-Nearest Neighbor}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{{\emph{ Compute distance between two points \newline  \newline }} Determine the class from nearest neighbor list \newline     {\emph{ Take the majority vote of class labels  \newline       among the k-nearest neighbors \newline  \newline     }} Weigh the vote according to distance \newline         * weight factor, w = 1 / d\textasciicircum{}2} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Rule-based Classification}}  \tn
\SetRowColor{white}
\mymulticolumn{1}{x{8.4cm}}{Classify records by using a collection of  \newline % Row Count 1 (+ 1)
"if…then…" rules \newline % Row Count 2 (+ 1)
{\bf{Rule:}} (Condition) -{}-\textgreater{} y \newline % Row Count 3 (+ 1)
{\emph{where:}} \newline % Row Count 4 (+ 1)
* Condition is a conjunction of attributes \newline % Row Count 5 (+ 1)
* y is the class label \newline % Row Count 6 (+ 1)
{\bf{LHS:}} rule antecedent or condition \newline % Row Count 7 (+ 1)
{\bf{RHS:}} rule consequent \newline % Row Count 8 (+ 1)
{\bf{Examples of classification rules:}} \newline % Row Count 9 (+ 1)
(Blood Type=Warm) \textasciicircum{} (Lay Eggs=Yes) -{}-\textgreater{} Birds \newline % Row Count 10 (+ 1)
(Taxable Income \textless{} 50K) \textasciicircum{} (Refund=Yes) -{}-\textgreater{} Evade=No \newline % Row Count 12 (+ 2)
Sequential covering is a rule-based classifier.% Row Count 13 (+ 1)
} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Rule Evaluation}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/hockeyplay21_1493267726_RuleEval.png}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Bayesian Classification}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/hockeyplay21_1493268606_Bayes.png}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{p(a,b) is the probability that both a and b happen.  \newline  \newline p(a|b) is the probability that a happens, knowing that b has already happened.}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{x{3.68 cm} x{4.32 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Terms}}  \tn
% Row 0
\SetRowColor{LightBackground}
Association Analysis & Min-Apriori, LIFT, Simpson's Paradox, Anti-monotone property \tn 
% Row Count 3 (+ 3)
% Row 1
\SetRowColor{white}
Ensemble Methods & Staking, Random Forest \tn 
% Row Count 5 (+ 2)
% Row 2
\SetRowColor{LightBackground}
Decision Trees & C4.5, Pessimistic estimate, Occam's Razor, Hunt's Algorithm \tn 
% Row Count 8 (+ 3)
% Row 3
\SetRowColor{white}
Model Evaluation & Cross-validation, Bootstrap, Leave-one out (C-V), Misclassification error rate, Repeated holdout, Stratification \tn 
% Row Count 14 (+ 6)
% Row 4
\SetRowColor{LightBackground}
Bayes & Probabilistic classifier \tn 
% Row Count 16 (+ 2)
% Row 5
\SetRowColor{white}
Data Visualization & Chernoff faces, Data cube, Percentile plots, Parallel coordinates \tn 
% Row Count 20 (+ 4)
% Row 6
\SetRowColor{LightBackground}
Nonlinear Dimensionality Reduction & Principal components, ISOMAP, Multidimensional scaling \tn 
% Row Count 23 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Ensemble Techniques}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/hockeyplay21_1493270652_Ensemble.png}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{{\bf{Manipulate training data:}} bagging and boosting ensemble of "experts", each specializing on different portions of the instance space  \newline  \newline {\bf{Manipulate output values:}} error-correcting output coding (ensemble of "experts", each predicting 1 bit of the \{multibit\} full class label) \newline  \newline {\bf{Methods:}} BAGGing, Boosting, AdaBoost}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Rules Analysis}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/hockeyplay21_1493273003_RuleAnaly.png}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Apriori Algorithm}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{Let k=1 \newline  \newline Generate frequent itemsets of length 1 \newline  \newline Repeat until no new frequent itemsets are identified \newline  \newline     Generate length (k+1) candidate itemsets from  \newline     length k frequent itemsets \newline  \newline     Prune candidate itemsets containing subsets  \newline     of length k that are infrequent \newline  \newline     Count the support of each candidate by  \newline     scanning the DB \newline  \newline     Eliminate candidates that are infrequent,  \newline     leaving only those that are frequent} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{K-means Clustering}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{Select K points as the initial centroids \newline  \newline {\bf{repeat}} \newline     Form K Clusters by assigning all points to the closest centroid \newline  \newline     Recompute the centroid of each cluster \newline  \newline {\bf{until}} the centroids don't change} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{{\bf{Closeness}} is measured by distance (e.g., Euclidean), similarity (e.g., Cosine), correlation. \newline  \newline {\bf{Centroid}} is typically the mean of the points in the cluster}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Hierarchical Clustering}}  \tn
\SetRowColor{white}
\mymulticolumn{1}{x{8.4cm}}{{\bf{Single-Link or MIN}} \newline % Row Count 1 (+ 1)
Similarity of two clusters is based on the two most similar (closest / minimum) points in the different clusters \newline % Row Count 4 (+ 3)
Determined by one pair of points, i.e., by one link in the proximity graph. \newline % Row Count 6 (+ 2)
{\bf{Complete or MAX}} \newline % Row Count 7 (+ 1)
Similarity of two clusters is based on the two least similar (most distant, maximum) points in the different clusters \newline % Row Count 10 (+ 3)
Determined by all pairs of points in the two clusters \newline % Row Count 12 (+ 2)
{\bf{Group Average}} \newline % Row Count 13 (+ 1)
Proximity of two clusters is the average of pairwise proximity between points in the two clusters% Row Count 15 (+ 2)
} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{{\bf{Agglomerative}} clustering starts with points as individual clusters and merges closest clusters until only one cluster left. \newline  \newline {\bf{Divisive}} clustering starts with one, all-inclusive cluster and splits a cluster until each cluster only has one point.}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Dendrogram Example}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/hockeyplay21_1493565164_Dendrogram.png}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{{\bf{Dataset:}} \{7, 10, 20, 28, 35\}}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Density-Based Clustering}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{current\_cluster\_label \textless{}-{}- 1 \newline  \newline {\bf{for}} all core points {\bf{do}} \newline     {\bf{if}} the core point has no cluster label {\bf{then}} \newline         current\_cluster\_label \textless{}-{}- current\_cluster\_label +1 \newline         Label the current core point with the cluster label \newline     {\bf{end if}} \newline     {\bf{for}} all points in the Eps-neighborhood, except i-th the point itself {\bf{do}} \newline         {\bf{if}} the point does not have a cluster label {\bf{then}} \newline             Label the point with cluster label \newline         {\bf{end if}} \newline     {\bf{end for}} \newline {\bf{end for}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{DBSCAN is a popular algorithm \newline  \newline Density = number of points within a specified radius (Eps) \newline  \newline A point is a core point if it has more than a specified number of points (MinPts) within Eps \newline  \newline These are points that are at the interior of a cluster \newline  \newline A border point has fewer than MinPts within Eps, but is in the neighborhood of a core point  \newline  \newline A noise point is any point that is not a core point or a border point}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Other Clustering Methods}}  \tn
\SetRowColor{white}
\mymulticolumn{1}{x{8.4cm}}{{\bf{Fuzzy}} is a partitional clustering method. {\bf{Fuzzy clustering}} (also referred to as {\bf{soft clustering}}) is a form of clustering in that each data point can belong to more than one cluster. \newline % Row Count 4 (+ 4)
{\bf{Graph-based}} methods: Jarvis-Patrick, Shared-Near Neighbor (SNN, Density),  Chameleon \newline % Row Count 6 (+ 2)
{\bf{Model-based}} methods: Expectation-Maximization% Row Count 7 (+ 1)
} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Regression Analysis}}  \tn
\SetRowColor{white}
\mymulticolumn{1}{x{8.4cm}}{* Linear Regression \newline % Row Count 1 (+ 1)
~~|~Least squares \newline % Row Count 2 (+ 1)
* Subset selection \newline % Row Count 3 (+ 1)
* Stepwise selection \newline % Row Count 4 (+ 1)
* Regularized regression \newline % Row Count 5 (+ 1)
~~|~Ridge \newline % Row Count 6 (+ 1)
~~|~Lasso \newline % Row Count 7 (+ 1)
~~|~Elastic Net% Row Count 8 (+ 1)
} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Anomaly Detection}}  \tn
\SetRowColor{white}
\mymulticolumn{1}{x{8.4cm}}{Anomaly is a pattern in the data that does not conform to the expected behavior (e.g., outliers, exceptions, peculiarities, surprise) \newline % Row Count 3 (+ 3)
{\bf{Types of Anomaly}} \newline % Row Count 4 (+ 1)
~~{\emph{Point:}} An individual data instance is anomalous w.r.t. the data \newline % Row Count 6 (+ 2)
~~{\emph{Contextual:}} An individual data instance is anomalous within a context \newline % Row Count 8 (+ 2)
~~{\emph{Collective:}} A collection of related data instances is anomalous \newline % Row Count 10 (+ 2)
{\bf{Approaches}} \newline % Row Count 11 (+ 1)
~~* Graphical (e.g., boxplots, scatter plots) \newline % Row Count 13 (+ 2)
~~* Statistical (e.g., normal distribution, likelihood) \newline % Row Count 15 (+ 2)
~~~~| Parametric Techniques \newline % Row Count 16 (+ 1)
~~~~| Non-parametric Techniques \newline % Row Count 18 (+ 2)
~~* Distance (e.g., nearest-neighbor, density, clustering)% Row Count 20 (+ 2)
} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{{\bf{Local outlier factor (LOF)}} is a density-based distance approach \newline  \newline {\bf{Mahalanobis Distance}} is a clustering-based distance approach}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}


% That's all folks
\end{multicols*}

\end{document}