\documentclass[10pt,a4paper]{article}

% Packages
\usepackage{fancyhdr}           % For header and footer
\usepackage{multicol}           % Allows multicols in tables
\usepackage{tabularx}           % Intelligent column widths
\usepackage{tabulary}           % Used in header and footer
\usepackage{hhline}             % Border under tables
\usepackage{graphicx}           % For images
\usepackage{xcolor}             % For hex colours
%\usepackage[utf8x]{inputenc}    % For unicode character support
\usepackage[T1]{fontenc}        % Without this we get weird character replacements
\usepackage{colortbl}           % For coloured tables
\usepackage{setspace}           % For line height
\usepackage{lastpage}           % Needed for total page number
\usepackage{seqsplit}           % Splits long words.
%\usepackage{opensans}          % Can't make this work so far. Shame. Would be lovely.
\usepackage[normalem]{ulem}     % For underlining links
% Most of the following are not required for the majority
% of cheat sheets but are needed for some symbol support.
\usepackage{amsmath}            % Symbols
\usepackage{MnSymbol}           % Symbols
\usepackage{wasysym}            % Symbols
%\usepackage[english,german,french,spanish,italian]{babel}              % Languages

% Document Info
\author{mvyjayanti}
\pdfinfo{
  /Title (data-mining.pdf)
  /Creator (Cheatography)
  /Author (mvyjayanti)
  /Subject (data mining Cheat Sheet)
}

% Lengths and widths
\addtolength{\textwidth}{6cm}
\addtolength{\textheight}{-1cm}
\addtolength{\hoffset}{-3cm}
\addtolength{\voffset}{-2cm}
\setlength{\tabcolsep}{0.2cm} % Space between columns
\setlength{\headsep}{-12pt} % Reduce space between header and content
\setlength{\headheight}{85pt} % If less, LaTeX automatically increases it
\renewcommand{\footrulewidth}{0pt} % Remove footer line
\renewcommand{\headrulewidth}{0pt} % Remove header line
\renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit
% This two commands together give roughly
% the right line height in the tables
\renewcommand{\arraystretch}{1.3}
\onehalfspacing

% Commands
\newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour
\newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols
\newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns
\newcommand{\tn}{\tabularnewline} % Required as custom column type in use

% Font and Colours
\definecolor{HeadBackground}{HTML}{333333}
\definecolor{FootBackground}{HTML}{666666}
\definecolor{TextColor}{HTML}{333333}
\definecolor{DarkBackground}{HTML}{A3A3A3}
\definecolor{LightBackground}{HTML}{F3F3F3}
\renewcommand{\familydefault}{\sfdefault}
\color{TextColor}

% Header and Footer
\pagestyle{fancy}
\fancyhead{} % Set header to blank
\fancyfoot{} % Set footer to blank
\fancyhead[L]{
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{C}
    \SetRowColor{DarkBackground}
    \vspace{-7pt}
    {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent
        \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}}
    }
\end{tabulary}
\columnbreak
\begin{tabulary}{11cm}{L}
    \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{data mining Cheat Sheet}}}} \\
    \normalsize{by \textcolor{DarkBackground}{mvyjayanti} via \textcolor{DarkBackground}{\uline{cheatography.com/72036/cs/18263/}}}
\end{tabulary}
\end{multicols}}

\fancyfoot[L]{ \footnotesize
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{LL}
  \SetRowColor{FootBackground}
  \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}}  \\
  \vspace{-2pt}mvyjayanti \\
  \uline{cheatography.com/mvyjayanti} \\
  \end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}}  \\
   \vspace{-2pt}Not Yet Published.\\
   Updated 13th December, 2018.\\
   Page {\thepage} of \pageref{LastPage}.
\end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}}  \\
  \SetRowColor{white}
  \vspace{-5pt}
  %\includegraphics[width=48px,height=48px]{dave.jpeg}
  Measure your website readability!\\
  www.readability-score.com
\end{tabulary}
\end{multicols}}


\begin{document}
\raggedright
\raggedcolumns

% Set font size to small. Switch to any value
% from this page to resize cheat sheet text:
% www.emerson.emory.edu/services/latex/latex_169.html
\footnotesize % Small font.

\begin{multicols*}{4}

\begin{tabularx}{3.833cm}{x{1.81949 cm} x{1.61351 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{3.833cm}}{\bf\textcolor{white}{Naive Bayes and LogReg}}  \tn
% Row 0
\SetRowColor{LightBackground}
P(A|C) = (P(C|A)P(A))/P(C) & predicts T/F,  "S" shaped, from 0-1 \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
posterior = (likelihood x prior)/normalizing constant & log(odds) = log(p/(1-p)) \tn 
% Row Count 5 (+ 3)
% Row 2
\SetRowColor{LightBackground}
pros: easy/fast, assuming independence, categorical & z = estimated intercept/std error \tn 
% Row Count 8 (+ 3)
% Row 3
\SetRowColor{white}
cons: if not in set -\textgreater{} 0\%  -{}- can use Laplace estimation (add 1), bad estimator, independent predictor assumption -\textgreater{} unlikely & y = log(F)B1 + log(T/F)B2 \tn 
% Row Count 14 (+ 6)
% Row 4
\SetRowColor{LightBackground}
LR: p = e\textasciicircum{}log(odds))\textasciicircum{}/(1+e\textasciicircum{}log(odds)\textasciicircum{}) & likelihood = mul. all T x all (1-F) \tn 
% Row Count 16 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{3.833cm}}{log(L) =sum i to n(log(Tn) + sum(log(Fn)) \newline R\textasciicircum{}2\textasciicircum{}=(SS(mean) - SS(fit))/SS(mean)}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{3.833cm}{x{1.7165 cm} x{1.7165 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{3.833cm}}{\bf\textcolor{white}{ANNs}}  \tn
% Row 0
\SetRowColor{LightBackground}
neuron = things that hold number from 0 to 1 & boolean: T=1, F=0 \tn 
% Row Count 3 (+ 3)
% Row 1
\SetRowColor{white}
ŷ = 1 if \seqsplit{w₁x₁+w₂x₂+...wnxn-t(bias} factor) \textgreater{}0\{\{noshy\}\} & , -1 if \textless{}0 \tn 
% Row Count 7 (+ 4)
% Row 2
\SetRowColor{LightBackground}
\seqsplit{ŷ=sign(w₁x₁+w₂x₂+}...wnxn-t=sign({\bf{w}}•{\bf{x}})\{\{nl\}\}λ=learning rate\{\{nl\}\}xij=val of jth attribute of training example xi & for weight update: wj\textasciicircum{}(k+1)\textasciicircum{} = weight param associated w/ i\textasciicircum{}th\textasciicircum{} input link after k\textasciicircum{}th\textasciicircum{} iteration \tn 
% Row Count 14 (+ 7)
% Row 3
\SetRowColor{white}
wj\textasciicircum{}(k+1)\textasciicircum{}=wj\textasciicircum{}(k)+λ(y1-ŷi\textasciicircum{}k\textasciicircum{})xij & error = y - ŷ \tn 
% Row Count 16 (+ 2)
% Row 4
\SetRowColor{LightBackground}
if error = 2, inc w of +ves & if error = -2, in w of -ves \tn 
% Row Count 18 (+ 2)
% Row 5
\SetRowColor{white}
Error E = ΣEk | k∊outputs & Ek = 1/2(tk-ok)\textasciicircum{}2\textasciicircum{} \tn 
% Row Count 20 (+ 2)
% Row 6
\SetRowColor{LightBackground}
output oi = 1/(1+e\textasciicircum{}-net i\textasciicircum{}) & net i = Σwij*oi \tn 
% Row Count 22 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{3.833cm}{x{1.7165 cm} x{1.7165 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{3.833cm}}{\bf\textcolor{white}{Inductive Bias, No Free Lunch}}  \tn
% Row 0
\SetRowColor{LightBackground}
IB: anything influencing hypothesis choice other than training set & part of language accessible, method of choosing \tn 
% Row Count 4 (+ 4)
% Row 1
\SetRowColor{white}
NFL: for any 2 algorithms A\&B, there exists a dataset for which A outperforms B & assuming uniform P(x,y)→\#of datasets for which A\textgreater{}B = \# B\textgreater{}A \tn 
% Row Count 8 (+ 4)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{3.833cm}{x{1.7165 cm} x{1.7165 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{3.833cm}}{\bf\textcolor{white}{SVM}}  \tn
% Row 0
\SetRowColor{LightBackground}
frontier that best segregates 2 classes by margins & polynomial kernel: k(x,y)=(x*y+1)\textasciicircum{}d\textasciicircum{} \tn 
% Row Count 3 (+ 3)
% Row 1
\SetRowColor{white}
RBF kernel:k(x,y)=\{\{nl\}\}e\textasciicircum{}-𝛄(||x-y||$^{\textrm{2}}$\textasciicircum{} & tune by k-fold cross-val (k=5) \tn 
% Row Count 6 (+ 3)
% Row 2
\SetRowColor{LightBackground}
adv: high dimension spaces, \#of dimensions \textgreater{} \#of samples & diff kernel functions for diff decisions \tn 
% Row Count 9 (+ 3)
% Row 3
\SetRowColor{white}
k1+k2 = even more complex\{\{br\}\} & \{\{bt\}\}dis: if \#features \textgreater{} \#samples, CV \tn 
% Row Count 11 (+ 2)
% Row 4
\SetRowColor{LightBackground}
min(||w||\textasciicircum{}2\textasciicircum{}) for linear\{\{nl\}\}ξ: how far ptᵢ is from correct side & wxᵢ+b\textgreater{}=1-ξ if yᵢ=1\{\{nl\}\}wxᵢ+b\textgreater{}=-1+ξ if yᵢ=-1 \tn 
% Row Count 15 (+ 4)
% Row 5
\SetRowColor{white}
min(||w||+C(Σi=1→nξᵢ)\{\{noshy\}\} & max((Σλᵢ) -1/2(λᵢλⱼyᵢyⱼxᵢxⱼ)\{\{noshy\}\} \tn 
% Row Count 18 (+ 3)
% Row 6
\SetRowColor{LightBackground}
dist btw parallel planes = z/||w|| & ||w|| = sqrt(w₁+w₂...) \tn 
% Row Count 20 (+ 2)
% Row 7
\SetRowColor{white}
generalization error\textless{}= p(bar)(1-s\textasciicircum{}2\textasciicircum{})/s\textasciicircum{}2\textasciicircum{} & p(bar) = avg correlation, s=strength \tn 
% Row Count 23 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{3.833cm}{x{1.7165 cm} x{1.7165 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{3.833cm}}{\bf\textcolor{white}{Errors}}  \tn
% Row 0
\SetRowColor{LightBackground}
P(+) = 1/(1+ e\textasciicircum{}-(w0+w1x1...)\textasciicircum{}) & error= misclassification \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
For new cases, predict: & 1 if (w0+w1x1+...)\textgreater{}=1, 0 else \tn 
% Row Count 4 (+ 2)
% Row 2
\SetRowColor{LightBackground}
if w0 inc as x inc, p(+) inc & error = (FP+FN)/All \tn 
% Row Count 6 (+ 2)
% Row 3
\SetRowColor{white}
sensitivity = TP/(TP+FN) & specificity = TN/(TN+FP) \tn 
% Row Count 8 (+ 2)
% Row 4
\SetRowColor{LightBackground}
+ve predicted val = TP/(TP+FP) & -ve predicted val = TN/(TN+FN) \tn 
% Row Count 10 (+ 2)
% Row 5
\SetRowColor{white}
true error: error on true underlying distribution (unmeasurable) & apparent error: error on example used to train model (underestimates TE) \tn 
% Row Count 14 (+ 4)
% Row 6
\SetRowColor{LightBackground}
generalization: ability to predict unseen cases & Occam's Razor: should not be multiplied beyond necessity \tn 
% Row Count 17 (+ 3)
% Row 7
\SetRowColor{white}
Overfitting: memorizing training set & test error: error on ex. held out of training \tn 
% Row Count 20 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{3.833cm}{x{1.92248 cm} x{1.51052 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{3.833cm}}{\bf\textcolor{white}{KNN}}  \tn
% Row 0
\SetRowColor{LightBackground}
select k: sqrt(n), if n is even, choose odd & Ri = \{x:d(x,xi)\textless{} d(x,x2), i!=j\} \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
\mymulticolumn{2}{x{3.833cm}}{euclidean distance =sqrt((x-x1)\textasciicircum{}2\textasciicircum{}+(y-y1)\textasciicircum{}2\textasciicircum{})} \tn 
% Row Count 3 (+ 1)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{3.833cm}{x{1.7165 cm} x{1.7165 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{3.833cm}}{\bf\textcolor{white}{Model Eval}}  \tn
% Row 0
\SetRowColor{LightBackground}
Holdout & train on 2/3, test on 1/3, one is validation set (high variance on estimate) \tn 
% Row Count 4 (+ 4)
% Row 1
\SetRowColor{white}
Leave-one-out & train on N-1, test on 1 (good estimate) \tn 
% Row Count 6 (+ 2)
% Row 2
\SetRowColor{LightBackground}
K-folds Cross Val & divide set into k parts, LOO each, repeat N times, compute mean and std dev for each \tn 
% Row Count 11 (+ 5)
% Row 3
\SetRowColor{white}
Bootstrapping\{\{noshy\}\} & randomly draw N points (can repeat), train, test on S - S1 \tn 
% Row Count 14 (+ 3)
% Row 4
\SetRowColor{LightBackground}
Compare 2 methods: H0: meanLR = meanNB, H1: meanLR\textless{}meanNB & t=(meanNB-meanLR)/S (S:pooled variance), reject H0 if t\textgreater{}t alpha \tn 
% Row Count 18 (+ 4)
% Row 5
\SetRowColor{white}
OR H1: meanLR!=meanNB & 2-tailed t. t alpha/2. \seqsplit{((meanVar)x(sqrt(n)))} / S \tn 
% Row Count 21 (+ 3)
% Row 6
\SetRowColor{LightBackground}
OR H1: meanLR!=meanNB & 2-tailed t. t alpha/2. \seqsplit{((meanVar)x(sqrt(n)))} / S \tn 
% Row Count 24 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{3.833cm}}{Better: stratify each fold to contain same \% of positives and negatives}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{3.833cm}{x{1.7165 cm} x{1.7165 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{3.833cm}}{\bf\textcolor{white}{Decision Trees}}  \tn
% Row 0
\SetRowColor{LightBackground}
asks a question: classifies based on T/F & root, internal(arrows to and from), external(arrows to)(leaves) \tn 
% Row Count 4 (+ 4)
% Row 1
\SetRowColor{white}
break into categories & T/F and Y/N for each \tn 
% Row Count 6 (+ 2)
% Row 2
\SetRowColor{LightBackground}
P(Y|T), P(N|T), P(Y|F), P(N|F) & GI\textasciicircum{}2\textasciicircum{}= 1-(Y|F/(Y|F + N|F))\textasciicircum{}2\textasciicircum{}+(N|F/(Y|F + N|F))\textasciicircum{}2\textasciicircum{}\{\{noshy\}\} \tn 
% Row Count 9 (+ 3)
% Row 3
\SetRowColor{white}
GI\textasciicircum{}1\textasciicircum{} = & 1-(Y|T/(Y|T + N|T))\textasciicircum{}2\textasciicircum{}+(N|T/(Y|T + N|T))\textasciicircum{}2\textasciicircum{}\{\{noshy\}\} \tn 
% Row Count 12 (+ 3)
% Row 4
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{3.833cm}}{GI\textasciicircum{}all\textasciicircum{}=(T/(T+F) x GI\textasciicircum{}1\textasciicircum{})+(F/(T+F) x GI\textasciicircum{}2\textasciicircum{})} \tn 
% Row Count 13 (+ 1)
% Row 5
\SetRowColor{white}
entropy: \seqsplit{H(S)=P(y)log2(P(y))-P(n)log(P(n))} & find H(S\textasciicircum{}true\textasciicircum{}) and H(S\textasciicircum{}false\textasciicircum{}), H(S)-w1H(S\textasciicircum{}true\textasciicircum{})-w2H(S\textasciicircum{}false\textasciicircum{})\{\{noshy\}\} \tn 
% Row Count 17 (+ 4)
% Row 6
\SetRowColor{LightBackground}
w1 = T instances/all\{\{noshy\}\} & w2 = F instances/all \tn 
% Row Count 19 (+ 2)
% Row 7
\SetRowColor{white}
w1 = T instances/all\{\{noshy\}\} & w2 = F instances/all \tn 
% Row Count 21 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{3.833cm}}{largest info gain, least GI}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{3.833cm}{x{1.7165 cm} x{1.7165 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{3.833cm}}{\bf\textcolor{white}{ROC and Lift Curves}}  \tn
% Row 0
\SetRowColor{LightBackground}
ROC: sensitivity vs. (1-specificity), higher val the better, \{\{nl\}\}flatter line the worse & sens: TP rate,\{\{nl\}\} 1-spec: FP rate \tn 
% Row Count 5 (+ 5)
% Row 1
\SetRowColor{white}
Lift curves: find \% of each total response from sum of all & find \% of each +ve responses from total +ve responses \tn 
% Row Count 8 (+ 3)
% Row 2
\SetRowColor{LightBackground}
y = +ve \% / \% of total & x = \% of total \tn 
% Row Count 10 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{3.833cm}{x{1.7165 cm} x{1.7165 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{3.833cm}}{\bf\textcolor{white}{k-means clustering}}  \tn
% Row 0
\SetRowColor{LightBackground}
user choose k, initialize k centers, loop: assign pts nearest those centers, move centroid of assigned pts & center in dense regions or random, optimizing (total distance)\textasciicircum{}2\textasciicircum{} \tn 
% Row Count 6 (+ 6)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{3.833cm}}{returns local solution}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{3.833cm}{x{1.7165 cm} x{1.7165 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{3.833cm}}{\bf\textcolor{white}{Ensembles}}  \tn
% Row 0
\SetRowColor{LightBackground}
Bagging: bootstrap aggregating\{\{noshy\}\} & \{\{bl\}\}Boosting: changing weights on pts and building series of classifiers, start w=1 \tn 
% Row Count 5 (+ 5)
% Row 1
\SetRowColor{white}
\{\{bt\}\}incorrect pts weighed by \# that is inversely proportional to training error\{\{noshy\}\} & w inc if misclassified, dec else\{\{nl\}\}classifiers combined by weighting-accuracy of training set \tn 
% Row Count 10 (+ 5)
% Row 2
\SetRowColor{LightBackground}
\{\{bt\}\}Arcing(Adaptive resample\&combine):\{\{nl\}\}like boosting but change w by update method & eg. Arc x4: w(x) = 1+e(x)\textasciicircum{}4\textasciicircum{} \{\{nl\}\}e(x)=times x has been misclassified so far\{\{bt\}\} \tn 
% Row Count 15 (+ 5)
% Row 3
\SetRowColor{white}
\{\{noshy\}\}depends on: strength(perf of individuals), diversity (uncorrelated errors) & bagging error: from reducing var\{\{nl\}\}boosting can reduce bias\&var | bagging is \textgreater{} base classifier \tn 
% Row Count 20 (+ 5)
% Row 4
\SetRowColor{LightBackground}
boosting better or overfit noisy & Random forests: for tree,choose pts,for node, features subset w/ best IG,split, end,recurse,end \tn 
% Row Count 25 (+ 5)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{3.833cm}{x{1.7165 cm} x{1.7165 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{3.833cm}}{\bf\textcolor{white}{feature selection}}  \tn
% Row 0
\SetRowColor{LightBackground}
removing irrelevant info for a better, faster model & drop missing values or encode them \tn 
% Row Count 3 (+ 3)
% Row 1
\SetRowColor{white}
drop: if all values are the same & if highly correlated, one of them \tn 
% Row Count 5 (+ 2)
% Row 2
\SetRowColor{LightBackground}
if low correlation with target|\{\{nl\}\}trees with least info gain & forward, backward, stepwise selection: best model with f1, then keep going until validation error stops dropping \tn 
% Row Count 11 (+ 6)
% Row 3
\SetRowColor{white}
beam or heuristic search & for computation interpretability |\{\{nl\}\}genetic algorithms \tn 
% Row Count 14 (+ 3)
% Row 4
\SetRowColor{LightBackground}
1) filters: all above + other correlation & 2) wrappers: build a classifier with a subset+eval on validation data. but 2\textasciicircum{}d\textasciicircum{} possible subsets \tn 
% Row Count 19 (+ 5)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{3.833cm}{x{1.7165 cm} x{1.7165 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{3.833cm}}{\bf\textcolor{white}{Bias and Var}}  \tn
% Row 0
\SetRowColor{LightBackground}
PCA : dimensionality reduction & linear combo of OG features \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
max. variance: smallest \# until 90\% var explained & μ=E(y|x)=T(uk)\{\{nl\}\}ŷ=f(x,Ө) \tn 
% Row Count 5 (+ 3)
% Row 2
\SetRowColor{LightBackground}
error: MSE = (ŷ-μ)\textasciicircum{}2\textasciicircum{}\{\{nl\}\}var: E(ŷE(ŷ))\textasciicircum{}2\textasciicircum{}\{\{nl\}\}bias:(E(ŷ-μ)\textasciicircum{}2\textasciicircum{}+noise & \textasciicircum{}best estimate of y given x and fixed params Ө \tn 
% Row Count 9 (+ 4)
% Row 3
\SetRowColor{white}
\mymulticolumn{2}{x{3.833cm}}{KNN,ANN,DT: low bias, high var} \tn 
% Row Count 10 (+ 1)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{3.833cm}}{var: how much does my estimate var across datasets| bias: systematic error prediction, inability to fit}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{3.833cm}{x{1.7165 cm} x{1.7165 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{3.833cm}}{\bf\textcolor{white}{EM Expectation Maximization clust.}}  \tn
% Row 0
\SetRowColor{LightBackground}
hard clustering: each pt only belongs to one cluster & soft clustering: can belong to more than one cluster by \% \tn 
% Row Count 3 (+ 3)
% Row 1
\SetRowColor{white}
EM: automatically discover all params for k "sources"→but we may not know source\{\{nl\}\} if we know μ,σ, can find likeliness & mixture models: probabilistic way of soft clustering\{\{nl\}\}each cluster Gaussian or multinominal \tn 
% Row Count 10 (+ 7)
% Row 2
\SetRowColor{LightBackground}
1/sqrt(2πσ\textasciicircum{}2\textasciicircum{})*exp(-(xᵢ - μᵦ)\textasciicircum{}2\textasciicircum{}/2σᵦ\textasciicircum{}2\textasciicircum{})\{\{nl\}\}aᵢ=1-bᵢ=P(aᵢ) & Bayesian posterior: bᵢ = P(b|xᵢ) = (P(xᵢ|b)P(b)) / (P(xᵢ|b)P(b) + P(xᵢ|a)P(a)) \tn 
% Row Count 15 (+ 5)
% Row 3
\SetRowColor{white}
σᵦ\textasciicircum{}2\textasciicircum{}=(b₁(x₁-μᵦ)\textasciicircum{}2\textasciicircum{}+...) /(b₁+b₂+...) & μᵦ = \seqsplit{(b₁x₁+b₂x₂+..)} / (b₁+b₂+...) \tn 
% Row Count 18 (+ 3)
% Row 4
\SetRowColor{LightBackground}
em: places randomly,for each pt P(b|xᵢ): does it look like it came from b & Working to adjust (μₐ, σₐ\textasciicircum{}2\textasciicircum{}) and (μᵦ, σᵦ\textasciicircum{}2\textasciicircum{}) to fit points assigned \tn 
% Row Count 22 (+ 4)
% Row 5
\SetRowColor{white}
Iterate until convergence\{\{nl\}\}P(a) = 1- P(b) & Could also estimate priors: P(b) = (b₁+b₂+...)/n \tn 
% Row Count 25 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{3.833cm}}{"What proportion of the data is each distribution describing"}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}


% That's all folks
\end{multicols*}

\end{document}