\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{satwik dondapati (sati)} \pdfinfo{ /Title (scikit-learn-cyber-security.pdf) /Creator (Cheatography) /Author (satwik dondapati (sati)) /Subject (Scikit-Learn(Cyber-security) Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{26A318} \definecolor{LightBackground}{HTML}{F1F9F0} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Scikit-Learn(Cyber-security) Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{satwik dondapati (sati)} via \textcolor{DarkBackground}{\uline{cheatography.com/121228/cs/22124/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}satwik dondapati (sati) \\ \uline{cheatography.com/sati} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Published 22nd March, 2020.\\ Updated 22nd March, 2020.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{2} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Definition}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Scikit-learn}} is an open source Python library that implements a range of machine learning, preprocessing, cross-validation and visualization algorithms using a unified interface} \tn % Row Count 4 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Splitting Data}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.model\_selection import train\_test\_split} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{X\_train, X\_test, y\_train, y\_test = train\_test\_split(X, y, random\_state=7)} \tn % Row Count 4 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Handling Missing Data}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.impute import SimpleImputer \{\{nl\}\}} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{missingvalues = \seqsplit{SimpleImputer(missing\_values} = np.nan, strategy = 'mean') \{\{nl\}\} missingvalues = missingvalues.fit(X{[}:, 1:3{]}) \{\{nl\}\} X{[}:, 1:3{]}=missingvalues.transform(X{[}:, 1:3{]})} \tn % Row Count 5 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Linear Regression}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.linear\_model import {\bf{LinearRegression}}} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{linear\_reg = LinearRegression() \{\{nl\}\} linear\_reg.fit( X , y )} \tn % Row Count 4 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Decision Tree and Random forest}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.tree import {\bf{DecisionTreeRegressor}}\{\{nl\}\}from sklearn.ensemble import RandomForestRegressor} \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{regressor = \seqsplit{DecisionTreeRegressor(random\_state} = 0)\{\{nl\}\} regressor.fit(X,y)} \tn % Row Count 5 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{regressor2 = \seqsplit{RandomForestRegressor(n\_estimators} = 100,random\_state=0)\{\{nl\}\}regressor2.fit(X,y)} \tn % Row Count 7 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Cross-Validation}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.datasets import make\_regression\{\{nl\}\} from sklearn.linear\_model import LinearRegression\{\{nl\}\}from sklearn.model\_selection import cross\_validate} \tn % Row Count 4 (+ 4) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{X , y = \seqsplit{make)regression(n\_samples} = 1000, random\_state = 0)\{\{nl\}\}lr = LinearRegression()} \tn % Row Count 6 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{result = cross\_validate(lr,X,y)\{\{nl\}\}result{[}'test\_score'{]}} \tn % Row Count 8 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{It is used to know the effectiveness of our Models by re-sampling and applying to models in different iterations.} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{3.84 cm} x{4.16 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Pandas functions for importing {\bf{Data}}}} \tn % Row 0 \SetRowColor{LightBackground} \seqsplit{pd.read\_csv(filename)} & From a CSV file \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \seqsplit{pd.read\_excel(filename)} & From an Excel file \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} pd.read\_sql(query, \seqsplit{connection\_object)} & Read from a SQL table/database \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} \seqsplit{pd.read\_clipboard()} & Takes the contents of your clipboard and passes it to read\_table() \tn % Row Count 10 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{4 cm} x{4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Visualization using Scikit-learn}} \tn % Row 0 \SetRowColor{LightBackground} from sklearn.metrics import plot\_roc\_curve & Importing "plot\_roc\_curve" to plot \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} svc\_disp = \seqsplit{plot\_roc\_curve(svc}, X\_test, y\_test) & Plotting {\bf{Receiver operating characteristic}} Curve \tn % Row Count 6 (+ 3) % Row 2 \SetRowColor{LightBackground} \seqsplit{metrics.plot\_confusion\_matrix} & Plotting {\bf{Confusion Matrix}}. \tn % Row Count 8 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Clustering metrics}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Adjusted Rand Index \{\{nl\}\} \textgreater{}\textgreater{}\textgreater{} from sklearn.metrics import adjusted\_rand\_score \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} \seqsplit{adjusted\_rand\_score(y\_true}, y\_pred) \{\{nl\}\}} \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Homogeneity \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} from sklearn.metrics import homogeneity\_score \textgreater{}\textgreater{}\textgreater{} \seqsplit{homogeneity\_score(y\_true}, y\_pred)} \tn % Row Count 6 (+ 3) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{V-measure\{\{nl\}\} \textgreater{}\textgreater{}\textgreater{} from sklearn.metrics import v\_measure\_score \{\{nl\}\}\textgreater{}\textgreater{}\textgreater{} \seqsplit{metrics.v\_measure\_score(y\_true}, y\_pred)} \tn % Row Count 9 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{2.4 cm} x{5.6 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Pandas Data Cleaning functions}} \tn % Row 0 \SetRowColor{LightBackground} pd.isnull() & Checks for null Values, Returns Boolean Arrray \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \seqsplit{pd.notnull()} & Opposite of pd.isnull() \tn % Row Count 3 (+ 1) % Row 2 \SetRowColor{LightBackground} df.dropna() & Drop all rows that contain null values \tn % Row Count 5 (+ 2) % Row 3 \SetRowColor{white} \seqsplit{df.dropna(axis=1)} & Drop all columns that contain null values \tn % Row Count 7 (+ 2) % Row 4 \SetRowColor{LightBackground} \seqsplit{df.fillna(x)} & Replace all null values with x \tn % Row Count 9 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{4 cm} x{4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Numpy Basic Functions}} \tn % Row 0 \SetRowColor{LightBackground} import numpy as np & importing numpy \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} example = {[}0,1,2{]}\{\{nl\}\}example = np.array(example) & array({[}0, 1, 2{]}) \tn % Row Count 4 (+ 3) % Row 2 \SetRowColor{LightBackground} np.arange(1,4) & array({[}1,2,3{]}) \tn % Row Count 5 (+ 1) % Row 3 \SetRowColor{white} np.zeros(2,2) & array({[}{[}0,0{]},{[}0,0{]}{]}) \tn % Row Count 6 (+ 1) % Row 4 \SetRowColor{LightBackground} np.linspace(0,10,2) & array({[}0,5{]}), gives two evenly spaced values \tn % Row Count 9 (+ 3) % Row 5 \SetRowColor{white} np.eye(2) & array({[}{[}1,0{]},{[}0,1{]}), 2*2 Identity Matrix \tn % Row Count 11 (+ 2) % Row 6 \SetRowColor{LightBackground} example.reshape(3,1) & array({[}{[}0{]},{[}1{]},{[}2{]}{]}) \tn % Row Count 13 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Loading Dataset from local Machine}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{import pandas as pd} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{data = pd.read\_csv(pathname)} \tn % Row Count 2 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{If the file is in the local directory then we can directly use File name} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Loading Data from Standard datasets}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn import datasets \{\{nl\}\} iris = datasets.load\_iris() \{\{nl\}\} digits = datasets.load\_digits()} \tn % Row Count 3 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Encoding Categorical Variables}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.preprocessing import LabelEncoder} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{labelencoder\_X = LabelEncoder()} \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{X{[} : , 0{]} = labelencoder\_X.fit\_transform(X{[} : , 0 {]})} \tn % Row Count 4 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{onehotencoder = \seqsplit{OneHotEncoder(categorical\_features} = {[}0{]}) \{\{nl\}\} X = \seqsplit{onehotencoder.fit\_transform(X).toarray()}} \tn % Row Count 7 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Polynomial Regression}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.preprocessing import {\bf{PolynomialFeatures}}} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{poly\_reg = \seqsplit{PolynomialFeatures(degree} =2)\{\{nl\}\}X\_poly = \seqsplit{poly\_reg.fit\_transform(X)}} \tn % Row Count 4 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{It not only checks the relation between X(independent) and y(dependent). But also checks with X\textasciicircum{}2\textasciicircum{} ..X \textasciicircum{}n\textasciicircum{}. (n is degree specified by us).} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Evaluation of {\bf{Regression Model}} Performance}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{R\textasciicircum{}2\textasciicircum{} = 1 - SS(residuals)/SS(total)} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{SS(res) = SUM(Y{\emph{i}} - y\textasciicircum{}\textasciicircum{}\textasciicircum{}{\emph{i}})\textasciicircum{}2\textasciicircum{}\{\{nl\}\}SS(Total) = SUM(y{\emph{i}} - y{\emph{avg}})\textasciicircum{}2\textasciicircum{}} \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.metrics import r2\_score\{\{nl\}\} r2\_score(y\_true,y\_pred)} \tn % Row Count 5 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{The Greater the R\textasciicircum{}2\textasciicircum{} value the better the model is..} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Converting Dataframe to Matrix}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{data = pd.read\_csv("data.csv")} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{X = data.iloc{[} : , :-1{]}.values} \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{y = data.iloc{[} : , 3{]}.values} \tn % Row Count 3 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{y is Dependent parameter} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Feature Scaling}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.preprocessing import StandardScaler} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{sc\_X = StandardScaler()} \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{X\_train = \seqsplit{sc\_X.fit\_transform(X\_train)} \{\{nl\}\} X\_test = sc\_X.transform(X\_test)} \tn % Row Count 4 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Euclidean distance is dominated by the larger numbers and to make all the values on the same scale. hence Scaling should be done. Most of the models do feature scaling by themselves.} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{SVR(Non-linear Regression model)}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.svm import {\bf{SVR}}} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{regressor = {\bf{SVR}}(kernel = 'rbf') \{\{nl\}\}regressor.fit(X,y)} \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{y\_prediction = regressor.{\bf{predict}}(values)} \tn % Row Count 4 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Basically, the kernel is selected based on the given problem. If the problem is Linear then {\bf{kernel='linear'}}. And if problem is non-linear we can choose either 'poly' or{\bf{ 'rbf'(gussian)}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Some Classification Models}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Logistic Regression} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{K-NN(K- nearest neighbours)} \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Support Vector Machine(SVM)} \tn % Row Count 3 (+ 1) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Naive Bayes} \tn % Row Count 4 (+ 1) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Decision Tree Classification} \tn % Row Count 5 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Random Forest Classification} \tn % Row Count 6 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Some Clustering Models}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{K-Means Clustering} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Hierarchial Clustering} \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{DB-SCAN} \tn % Row Count 3 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{2 cm} x{6 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Knowing about Data information with Pandas}} \tn % Row 0 \SetRowColor{LightBackground} \seqsplit{df.head(n)} & First n rows of the DataFrame \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \seqsplit{df.tail(n)} & Last n rows of the DataFrame \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} df.shape & Number of rows and columns \tn % Row Count 3 (+ 1) % Row 3 \SetRowColor{white} df.info() & Index, Datatype and Memory information \tn % Row Count 5 (+ 2) % Row 4 \SetRowColor{LightBackground} \seqsplit{df.describe()} & Summary statistics for numerical columns \tn % Row Count 7 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}