\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{Daryabi} \pdfinfo{ /Title (scikit-learn.pdf) /Creator (Cheatography) /Author (Daryabi) /Subject (Scikit Learn Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{A3A3A3} \definecolor{LightBackground}{HTML}{F3F3F3} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Scikit Learn Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{Daryabi} via \textcolor{DarkBackground}{\uline{cheatography.com/181709/cs/37825/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}Daryabi \\ \uline{cheatography.com/daryabi} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 21st March, 2023.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{2} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Data Preprocessing}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.preprocessing import \textless{}classname\textgreater{} \newline \newline StandardScaler, MinMaxScaler, RobustScaler \newline QuantileTransformer , PowerTransformer, FunctionTransformer \newline KBinsDiscretizer , PolynomialFeatures , Normalizer \newline scaler = StandardScaler() \newline \newline \newline \# Apply a user-defined function to the data \newline transformer = \seqsplit{FunctionTransformer(np.log1p)} \newline \newline \# Discretize features into k bins \newline discretizer = \seqsplit{KBinsDiscretizer(n\_bins=3}, encode='ordinal', strategy='uniform') \newline \newline poly\_features = \seqsplit{PolynomialFeatures(degree=2)} \newline \newline X\_scaled = \textless{}object\textgreater{}.fit\_transform(X)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Encoding Categorical Data}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.preprocessing import \textless{}classname\textgreater{} \newline \newline LabelEncoder , OneHotEncoder , OrdinalEncoder , LabelBinarizer \newline \newline tb = OneHotEncoder() \newline le = LabelEncoder() \newline lb = LabelBinarizer() \newline \newline y = le.fit\_transform({[}'Yes', 'No', 'Yes'{]}) \newline y = lb.fit\_transform({[}'Yes', 'No', 'Yes'{]}) \newline \newline X\_encoded = tb.fit\_transform(X)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Handling missing values}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.impute import SimpleImputer, KNNImputer , IterativeImputer , MissingIndicator \newline from sklearn.experimental import \seqsplit{enable\_iterative\_imputer} \newline \newline imputer = \seqsplit{SimpleImputer(strategy='mean')} \newline imputer = \seqsplit{KNNImputer(n\_neighbors=2)} \newline imputer = \seqsplit{IterativeImputer(random\_state=0)} \newline indicator = MissingIndicator() \newline \newline X\_imputed = \seqsplit{imputer.fit\_transform(X)}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Feature Selection:}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from \seqsplit{sklearn.feature\_selection} import \newline \newline SelectKBest ,SelectPercentile, SelectFromModel, VarianceThreshold, RFE, RFECV, \newline SequentialFeatureSelector} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Dimensionality Reduction}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.decomposition import \newline \newline PCA, IncrementalPCA, TruncatedSVD, KernelPCA, NMF, FastICA, \seqsplit{LatentDirichletAllocation} \newline \newline pca = PCA(n\_components=2) \newline kpca = \seqsplit{KernelPCA(n\_components=2}, kernel='rbf') \newline tsne = TSNE(n\_components=2) \newline \newline X\_new = any.fit\_transform(X)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Pipelines:}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.pipeline import \newline \newline Pipeline \newline FeatureUnion \newline ColumnTransformer} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Supervised Learning Models:}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Linear Models:} \tn \mymulticolumn{1}{x{8.4cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}LinearRegression, Ridge, Lasso, ElasticNet, LogisticRegression, SGDClassifier, SGDRegressor, Perceptron} \tn % Row Count 4 (+ 4) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Naive Bayes:} \tn \mymulticolumn{1}{x{8.4cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}GaussianNB, BernoulliNB, MultinomialNB,} \tn % Row Count 6 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Tree-Based Models:} \tn \mymulticolumn{1}{x{8.4cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}DecisionTreeClassifier, DecisionTreeRegresso,} \tn % Row Count 8 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Support Vector Machines (SVM):} \tn \mymulticolumn{1}{x{8.4cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}SVC, SVR, LinearSVC, LinearSVR, NuSVC, NuSVR, OneClassSVM} \tn % Row Count 11 (+ 3) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Nearest Neighbors:} \tn \mymulticolumn{1}{x{8.4cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}KNeighborsClassifier, KNeighborsRegressor, \seqsplit{RadiusNeighborsClassifier}, RadiusNeighborsRegressor} \tn % Row Count 14 (+ 3) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Neural Networks:} \tn \mymulticolumn{1}{x{8.4cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}MLPClassifier, MLPRegressor} \tn % Row Count 16 (+ 2) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Ensemble} \tn \mymulticolumn{1}{x{8.4cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}RandomForestClassifier, RandomForestRegressor, \seqsplit{GradientBoostingClassifier}, \seqsplit{GradientBoostingRegressor}, ExtraTreesClassifier, ExtraTreesRegressor, AdaBoostClassifier, AdaBoostRegressor} \tn % Row Count 21 (+ 5) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{xgboost} \tn \mymulticolumn{1}{x{8.4cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}XGBClassifier, XGBRegressor} \tn % Row Count 23 (+ 2) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{lightgbm} \tn \mymulticolumn{1}{x{8.4cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}LGBMClassifier, LGBMRegressor} \tn % Row Count 25 (+ 2) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{catboost} \tn \mymulticolumn{1}{x{8.4cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}CatBoostClassifier, CatBoostRegressor,} \tn % Row Count 27 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.linear\_model import ,from sklearn.naive\_bayes , from sklearn.tree \newline from sklearn.ensemble , from xgboost import , from lightgbm import \newline from catboost import , from sklearn.svm \newline from sklearn.neighbors \newline from sklearn.neural\_network import} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Semi-Supervised Learning:}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{LabelPropagation \newline LabelSpreading} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Unsupervised Learning Models}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Clustering:} \tn \mymulticolumn{1}{x{8.4cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}KMeans, AgglomerativeClustering ,DBSCAN, Birch, SpectralClustering} \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Dimensionality Reduction:} \tn \mymulticolumn{1}{x{8.4cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}PCA, IncrementalPCA, TruncatedSVD, KernelPCA, NMF, FastICA, \seqsplit{LatentDirichletAllocation}} \tn % Row Count 6 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Clustering}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{KMeans \newline AgglomerativeClustering \newline DBSCAN \newline Birch \newline SpectralClustering} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Model Evaluation Metrics}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Regression Metrics:} \tn \mymulticolumn{1}{x{8.4cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}mean\_squared\_error, r2\_score, mean\_absolute\_error, \seqsplit{explained\_variance\_score}, median\_absolute\_error, \seqsplit{mean\_squared\_log\_error}} \tn % Row Count 4 (+ 4) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Classification Metrics:} \tn \mymulticolumn{1}{x{8.4cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}accuracy\_score, precision\_score, recall\_score, f1\_score, roc\_auc\_score, \seqsplit{average\_precision\_score}, log\_loss, confusion\_matrix, classification\_report} \tn % Row Count 9 (+ 5) \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.metrics import} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}