\documentclass[10pt,a4paper]{article}

% Packages
\usepackage{fancyhdr}           % For header and footer
\usepackage{multicol}           % Allows multicols in tables
\usepackage{tabularx}           % Intelligent column widths
\usepackage{tabulary}           % Used in header and footer
\usepackage{hhline}             % Border under tables
\usepackage{graphicx}           % For images
\usepackage{xcolor}             % For hex colours
%\usepackage[utf8x]{inputenc}    % For unicode character support
\usepackage[T1]{fontenc}        % Without this we get weird character replacements
\usepackage{colortbl}           % For coloured tables
\usepackage{setspace}           % For line height
\usepackage{lastpage}           % Needed for total page number
\usepackage{seqsplit}           % Splits long words.
%\usepackage{opensans}          % Can't make this work so far. Shame. Would be lovely.
\usepackage[normalem]{ulem}     % For underlining links
% Most of the following are not required for the majority
% of cheat sheets but are needed for some symbol support.
\usepackage{amsmath}            % Symbols
\usepackage{MnSymbol}           % Symbols
\usepackage{wasysym}            % Symbols
%\usepackage[english,german,french,spanish,italian]{babel}              % Languages

% Document Info
\author{Damini}
\pdfinfo{
  /Title (scikit-learn-for-machine-learning.pdf)
  /Creator (Cheatography)
  /Author (Damini)
  /Subject (SciKit Learn for Machine Learning Cheat Sheet)
}

% Lengths and widths
\addtolength{\textwidth}{6cm}
\addtolength{\textheight}{-1cm}
\addtolength{\hoffset}{-3cm}
\addtolength{\voffset}{-2cm}
\setlength{\tabcolsep}{0.2cm} % Space between columns
\setlength{\headsep}{-12pt} % Reduce space between header and content
\setlength{\headheight}{85pt} % If less, LaTeX automatically increases it
\renewcommand{\footrulewidth}{0pt} % Remove footer line
\renewcommand{\headrulewidth}{0pt} % Remove header line
\renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit
% This two commands together give roughly
% the right line height in the tables
\renewcommand{\arraystretch}{1.3}
\onehalfspacing

% Commands
\newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour
\newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols
\newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns
\newcommand{\tn}{\tabularnewline} % Required as custom column type in use

% Font and Colours
\definecolor{HeadBackground}{HTML}{333333}
\definecolor{FootBackground}{HTML}{666666}
\definecolor{TextColor}{HTML}{333333}
\definecolor{DarkBackground}{HTML}{0BDED7}
\definecolor{LightBackground}{HTML}{EFFCFC}
\renewcommand{\familydefault}{\sfdefault}
\color{TextColor}

% Header and Footer
\pagestyle{fancy}
\fancyhead{} % Set header to blank
\fancyfoot{} % Set footer to blank
\fancyhead[L]{
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{C}
    \SetRowColor{DarkBackground}
    \vspace{-7pt}
    {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent
        \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}}
    }
\end{tabulary}
\columnbreak
\begin{tabulary}{11cm}{L}
    \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{SciKit Learn for Machine Learning Cheat Sheet}}}} \\
    \normalsize{by \textcolor{DarkBackground}{Damini} via \textcolor{DarkBackground}{\uline{cheatography.com/121435/cs/22218/}}}
\end{tabulary}
\end{multicols}}

\fancyfoot[L]{ \footnotesize
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{LL}
  \SetRowColor{FootBackground}
  \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}}  \\
  \vspace{-2pt}Damini \\
  \uline{cheatography.com/damini} \\
  \end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}}  \\
   \vspace{-2pt}Not Yet Published.\\
   Updated 31st March, 2020.\\
   Page {\thepage} of \pageref{LastPage}.
\end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}}  \\
  \SetRowColor{white}
  \vspace{-5pt}
  %\includegraphics[width=48px,height=48px]{dave.jpeg}
  Measure your website readability!\\
  www.readability-score.com
\end{tabulary}
\end{multicols}}


\begin{document}
\raggedright
\raggedcolumns

% Set font size to small. Switch to any value
% from this page to resize cheat sheet text:
% www.emerson.emory.edu/services/latex/latex_169.html
\footnotesize % Small font.

\begin{multicols*}{3}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Loading the data}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\textgreater{}\textgreater{}\textgreater{} import numpy as np  \newline \textgreater{}\textgreater{}\textgreater{} X = np.random.random((10,5))  \newline \textgreater{}\textgreater{}\textgreater{} y = np.array({[}'M','M','F','F','M','F','M','M','F','F','F'{]})  \newline \textgreater{}\textgreater{}\textgreater{} X{[}X \textless{} 0.7{]} = 0} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Training and Test data}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\textgreater{}\textgreater{}\textgreater{} from sklearn.model\_selection import train\_test\_split  \newline \textgreater{}\textgreater{}\textgreater{} X\_train, X\_test, y\_train, y\_test = train\_test\_split(X, y, random\_state=0)} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Prediction}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Supervised Estimators}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}\textgreater{}\textgreater{}\textgreater{} y\_pred = \seqsplit{svc.predict(np.random.random((2},5))) \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} y\_pred = lr.predict(X\_test) \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} y\_pred = \seqsplit{knn.predict\_proba(X\_test)}} \tn 
% Row Count 4 (+ 4)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Unsupervised Estimators}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}\textgreater{}\textgreater{}\textgreater{} y\_pred = \seqsplit{k\_means.predict(X\_test)}} \tn 
% Row Count 6 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Pre-processing the data}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Standardization}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}\textgreater{}\textgreater{}\textgreater{} from sklearn.preprocessing import StandardScaler \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} scaler = \seqsplit{StandardScaler().fit(X\_train)} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} standardized\_X = \seqsplit{scaler.transform(X\_train)} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} standardized\_X\_test = \seqsplit{scaler.transform(X\_test)}} \tn 
% Row Count 6 (+ 6)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Normalization}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}\textgreater{}\textgreater{}\textgreater{} from sklearn.preprocessing import Normalizer \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} scaler = \seqsplit{Normalizer().fit(X\_train)} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} normalized\_X = \seqsplit{scaler.transform(X\_train)} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} normalized\_X\_test = \seqsplit{scaler.transform(X\_test)}} \tn 
% Row Count 12 (+ 6)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Encoding Categorical Features}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}\textgreater{}\textgreater{}\textgreater{} from sklearn.preprocessing import LabelEncoder \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} enc = LabelEncoder() \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} y = enc.fit\_transform(y)} \tn 
% Row Count 16 (+ 4)
% Row 3
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Imputting Missing Values}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}\textgreater{}\textgreater{}\textgreater{} from sklearn.preprocessing import Imputer \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} imp = \seqsplit{Imputer(missing\_values=0}, strategy='mean', axis=0) \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} \seqsplit{imp.fit\_transform(X\_train)}} \tn 
% Row Count 21 (+ 5)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Model Fitting}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Supervised Learning}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}\textgreater{}\textgreater{}\textgreater{} lr.fit(X, y) \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} knn.fit(X\_train, y\_train) \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} svc.fit(X\_train, y\_train)} \tn 
% Row Count 3 (+ 3)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Unsupervised Learning}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}\textgreater{}\textgreater{}\textgreater{} k\_means.fit(X\_train) \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} pca\_model = \seqsplit{pca.fit\_transform(X\_train)}} \tn 
% Row Count 6 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Create model}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Supervised Learning Estimators}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}{\bf{Linear Regression}} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn.linear\_model import LinearRegression \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} lr = \seqsplit{LinearRegression(normalize=True)} \{\{nl\}\}\{\{nl\}\}{\bf{Support Vector Machines (SVM)}} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn.svm import SVC \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} svc = SVC(kernel='linear') \{\{nl\}\}\{\{nl\}\}{\bf{Naive Bayes}} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn.naive\_bayes import GaussianNB \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} gnb = GaussianNB() \{\{nl\}\}\{\{nl\}\}{\bf{KNN}} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn import neighbors \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} knn = \seqsplit{neighbors.KNeighborsClassifier(n\_neighbors=5)}} \tn 
% Row Count 12 (+ 12)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Unsupervised Learning Estimators}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}{\bf{Principal Component Analysis (PCA)}} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn.decomposition import PCA \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} pca = PCA(n\_components=0.95) \{\{nl\}\}\{\{nl\}\}{\bf{K Means}} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn.cluster import KMeans \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} k\_means = KMeans(n\_clusters=3, random\_state=0)} \tn 
% Row Count 19 (+ 7)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Tune Your Model}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Grid Search}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}\textgreater{}\textgreater{}\textgreater{} from sklearn.grid\_search import GridSearchCV \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} params = \{"n\_neighbors": np.arange(1,3), "metric": {[}"euclidean", "cityblock"{]}\} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} grid = \seqsplit{GridSearchCV(estimator=knn}, param\_grid=params) \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} grid.fit(X\_train, y\_train) \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} \seqsplit{print(grid.best\_score\_)} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} \seqsplit{print(grid.best\_estimator\_.n\_neighbors)}} \tn 
% Row Count 8 (+ 8)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Randomized Parameter Optimization}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}\textgreater{}\textgreater{}\textgreater{} from sklearn.grid\_search import RandomizedSearchCV \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} params = \{"n\_neighbors": range(1,5), "weights": {[}"uniform", "distance"{]}\} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} rsearch = \seqsplit{RandomizedSearchCV(estimator=knn}, \seqsplit{param\_distributions=params}, cv=4, n\_iter=8, random\_state=5) \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} rsearch.fit(X\_train, y\_train) \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} \seqsplit{print(rsearch.best\_score\_)}} \tn 
% Row Count 16 (+ 8)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Evaluate Your Model's Performance}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Classification Metrics}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}{\bf{Accuracy Score}} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} knn.score(X\_test, y\_test) \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn.metrics import accuracy\_score \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} accuracy\_score(y\_test, y\_pred) \{\{nl\}\}\{\{nl\}\}{\bf{Classification Report}} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn.metrics import classification\_report \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} \seqsplit{print(classification\_report(y\_test}, y\_pred)) \{\{nl\}\}\{\{nl\}\}{\bf{Confusion Matrix}} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn.metrics import confusion\_matrix \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} \seqsplit{print(confusion\_matrix(y\_test}, y\_pred))} \tn 
% Row Count 11 (+ 11)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Regression Metrics}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}{\bf{Mean Absolute Error}} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn.metrics import mean\_absolute\_error \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} y\_true = {[}3, -0.5, 2{]} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} \seqsplit{mean\_absolute\_error(y\_true}, y\_pred)\{\{nl\}\}\{\{nl\}\} {\bf{Mean Squared Error}} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn.metrics import mean\_squared\_error \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} \seqsplit{mean\_squared\_error(y\_test}, y\_pred)\{\{nl\}\}\{\{nl\}\} {\bf{R$^{\textrm{2}}$ Score}} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn.metrics import r2\_score \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} r2\_score(y\_true, y\_pred)} \tn 
% Row Count 21 (+ 10)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Clustering Metrics}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}{\bf{Adjusted Rand Index}} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn.metrics import adjusted\_rand\_score \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} \seqsplit{adjusted\_rand\_score(y\_true}, y\_pred)\{\{nl\}\}\{\{nl\}\} {\bf{Homogeneity}} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn.metrics import homogeneity\_score \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} \seqsplit{homogeneity\_score(y\_true}, y\_pred)\{\{nl\}\}\{\{nl\}\} {\bf{V-measure}} \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn.metrics import v\_measure\_score \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} \seqsplit{metrics.v\_measure\_score(y\_true}, y\_pred)} \tn 
% Row Count 31 (+ 10)
\end{tabularx}
\par\addvspace{1.3em}

\vfill
\columnbreak
\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Evaluate Your Model's Performance (cont)}}  \tn
% Row 3
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Cross-Validation}}} \tn 
\mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}\textgreater{}\textgreater{}\textgreater{} from \seqsplit{sklearn.cross\_validation} import cross\_val\_score \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} \seqsplit{print(cross\_val\_score(knn}, X\_train, y\_train, cv=4)) \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} \seqsplit{print(cross\_val\_score(lr}, X, y, cv=2))} \tn 
% Row Count 5 (+ 5)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}


% That's all folks
\end{multicols*}

\end{document}