\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{SebChw} \pdfinfo{ /Title (explainable-ai.pdf) /Creator (Cheatography) /Author (SebChw) /Subject (Explainable AI Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{9568A3} \definecolor{LightBackground}{HTML}{F8F5F9} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Explainable AI Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{SebChw} via \textcolor{DarkBackground}{\uline{cheatography.com/149723/cs/32502/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}SebChw \\ \uline{cheatography.com/sebchw} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 18th June, 2022.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{2} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Before you start - SHAP}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{import xgboost \newline import shap \newline \newline X, y = shap.datasets.boston() \newline model = \seqsplit{xgboost.XGBRegressor().fit(X}, y) \newline \newline explainer = shap.Explainer(model) \newline shap\_values = explainer(X)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Waterfall Chart}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/sebchw_1655121645_boston_waterfall.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\emph{Used to see contributions of different atributes for the prediction. These SHAP values are valid for this observation only. With other data points the SHAP values will change.}} \newline \newline `shap.plots.force(shap\_values{[}0{]})`} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Force Plot}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/sebchw_1655125036_force_plot.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Exactly the same purpose as the waterfall chart but much more compact \newline \newline `shap.plots.force(shap\_values{[}0{]})`} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{SHAP Summaries}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/sebchw_1655125085_boston_dataset.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{If you take force plots for all observations, rotate them by 90 degrees and then put next to each other you obtain a SHAP summary plot. This is very useful if you want te see explanations for the entire dataset. \newline \newline `shap.plots.force(shap\_values)`} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{SHAP Beeswarm}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/sebchw_1655125175_boston_beeswarm.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Useful to see which attributes are the most important. For every feature and every sample we plot a dot. We denote value of the feature with color: big (red) or small (blue). On the X-axis we see the importance. From this plot we see that LSTAT is probably the most important attribute. Also, high value of RM increases the model prediction \newline \newline `shap.plots.beeswarm(shap\_values)`} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Feature Interaction}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/sebchw_1655125230_boston_scatter.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{This one is helpful to capture feature interaction and how they influence SHAP value for given feature. On X and Y axis we have information about attribute we are interested in. Color represents value of another feature that is interacting with considered. From here we see that if RAD is small then RM have quite big impact on the prediction whereas when RAD is big then this impact is much smaller. \newline \newline `shap.plots.scatter(shap\_values{[}:,"RM"{]}, color=shap\_values)`} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{SHAP for text}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/sebchw_1655127112_sentiment_analysis_plot.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{We can extend this idea to text and see how particular words influence the prediction.} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{SHAP for images}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/sebchw_1655127159_gradient_imagenet_plot.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{This can be also used for images to see the influence of individual pixels.} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Before you start - OTHER XAI PLOTS}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{from sklearn.linear\_model import LinearRegression \newline \newline import plotly.express as px \newline import dalex as dx \newline \newline linarModel = \seqsplit{LinearRegression().fit(scale(X)}, y) \newline \newline boston\_rf\_exp = dx.Explainer(model, X, y, label="Boston houses RF Pipeline")} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Break down plot}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/kord_1655545196_breakDown.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{This plot shows the decomposition of the model's prediction into contributions of different attributes \newline \newline `bd = \seqsplit{boston\_rf\_exp.predict\_parts(house}, type='break\_down')` \newline `bd.plot()`} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Permutation importance}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/kord_1655547392_permutationImportance.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{This functions function calculates the feature importance of estimators for a given dataset for given evaluation metrics. Can be visualized on bar chart. \newline \newline `r = \seqsplit{permutation\_importance(model}, X, y)`} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Tree models feature importance}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/kord_1655545825_treeImportance.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Tree algorithms offer importance scores based on the reduction in the evaluation criterion, like Gini or entropy. Can be used either in regression or classification problems in decision trees, random forests or boosting methods. \newline \newline `px.bar(x=X.columns, \seqsplit{y=model.feature\_importances\_)`}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Ceteris paribus profiles (partial dependence plot)}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/kord_1655544776_certisParibus.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{This figure shows how different attributes in a new instance can change a prediction of the model. \newline In a nutshell, we held all explanatory variables but one (can increase this but computational const increases by much) constant. Then we change the values of one selected and see how the response changes. \newline \newline `cp = \seqsplit{titanic\_rf\_exp.predict\_profile(house)`} \newline `cp.plot(variables={[}'NOX', 'RM', 'DIS', 'LSTAT'{]})`} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Linear model feature importance}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/kord_1655550479_linearImportance.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{After scaling features we can measure how each attribute is important for the model \newline \newline `px.bar(y=abs(linarModel.coef\_), x=X.columns)`} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Ceteris paribus profiles (partial dependence plot)}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/kord_1655550658_certisParibus.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{This figure shows how different attributes in a new instance can change a prediction of the model. \newline In a nutshell, we held all explanatory variables but one (can increase this but computational const increases by much) constant. Then we change the values of one selected and see how the response changes. \newline \newline `cp = \seqsplit{titanic\_rf\_exp.predict\_profile(house)`} \newline `cp.plot(variables={[}'NOX', 'RM', 'DIS', 'LSTAT'{]})`} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}