\documentclass[10pt,a4paper]{article}

% Packages
\usepackage{fancyhdr}           % For header and footer
\usepackage{multicol}           % Allows multicols in tables
\usepackage{tabularx}           % Intelligent column widths
\usepackage{tabulary}           % Used in header and footer
\usepackage{hhline}             % Border under tables
\usepackage{graphicx}           % For images
\usepackage{xcolor}             % For hex colours
%\usepackage[utf8x]{inputenc}    % For unicode character support
\usepackage[T1]{fontenc}        % Without this we get weird character replacements
\usepackage{colortbl}           % For coloured tables
\usepackage{setspace}           % For line height
\usepackage{lastpage}           % Needed for total page number
\usepackage{seqsplit}           % Splits long words.
%\usepackage{opensans}          % Can't make this work so far. Shame. Would be lovely.
\usepackage[normalem]{ulem}     % For underlining links
% Most of the following are not required for the majority
% of cheat sheets but are needed for some symbol support.
\usepackage{amsmath}            % Symbols
\usepackage{MnSymbol}           % Symbols
\usepackage{wasysym}            % Symbols
%\usepackage[english,german,french,spanish,italian]{babel}              % Languages

% Document Info
\author{gokug6}
\pdfinfo{
  /Title (python-data-structures-and-algorithms.pdf)
  /Creator (Cheatography)
  /Author (gokug6)
  /Subject (Python Data Structures \& Algorithms Cheat Sheet)
}

% Lengths and widths
\addtolength{\textwidth}{6cm}
\addtolength{\textheight}{-1cm}
\addtolength{\hoffset}{-3cm}
\addtolength{\voffset}{-2cm}
\setlength{\tabcolsep}{0.2cm} % Space between columns
\setlength{\headsep}{-12pt} % Reduce space between header and content
\setlength{\headheight}{85pt} % If less, LaTeX automatically increases it
\renewcommand{\footrulewidth}{0pt} % Remove footer line
\renewcommand{\headrulewidth}{0pt} % Remove header line
\renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit
% This two commands together give roughly
% the right line height in the tables
\renewcommand{\arraystretch}{1.3}
\onehalfspacing

% Commands
\newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour
\newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols
\newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns
\newcommand{\tn}{\tabularnewline} % Required as custom column type in use

% Font and Colours
\definecolor{HeadBackground}{HTML}{333333}
\definecolor{FootBackground}{HTML}{666666}
\definecolor{TextColor}{HTML}{333333}
\definecolor{DarkBackground}{HTML}{A3A3A3}
\definecolor{LightBackground}{HTML}{F3F3F3}
\renewcommand{\familydefault}{\sfdefault}
\color{TextColor}

% Header and Footer
\pagestyle{fancy}
\fancyhead{} % Set header to blank
\fancyfoot{} % Set footer to blank
\fancyhead[L]{
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{C}
    \SetRowColor{DarkBackground}
    \vspace{-7pt}
    {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent
        \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}}
    }
\end{tabulary}
\columnbreak
\begin{tabulary}{11cm}{L}
    \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Python Data Structures \& Algorithms Cheat Sheet}}}} \\
    \normalsize{by \textcolor{DarkBackground}{gokug6} via \textcolor{DarkBackground}{\uline{cheatography.com/190865/cs/39709/}}}
\end{tabulary}
\end{multicols}}

\fancyfoot[L]{ \footnotesize
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{LL}
  \SetRowColor{FootBackground}
  \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}}  \\
  \vspace{-2pt}gokug6 \\
  \uline{cheatography.com/gokug6} \\
  \end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}}  \\
   \vspace{-2pt}Not Yet Published.\\
   Updated 7th August, 2023.\\
   Page {\thepage} of \pageref{LastPage}.
\end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}}  \\
  \SetRowColor{white}
  \vspace{-5pt}
  %\includegraphics[width=48px,height=48px]{dave.jpeg}
  Measure your website readability!\\
  www.readability-score.com
\end{tabulary}
\end{multicols}}


\begin{document}
\raggedright
\raggedcolumns

% Set font size to small. Switch to any value
% from this page to resize cheat sheet text:
% www.emerson.emory.edu/services/latex/latex_169.html
\footnotesize % Small font.

\begin{multicols*}{2}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Binary Search: Time - O(log n),    Space - O(1)}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{def search(nums, target): \newline         start = 0 \newline         end = len(nums) - 1 \newline         mid = 0 \newline      \newline         while start \textless{}= end: \newline      \newline             mid = (start + end) // 2 \newline      \newline             \# If x is greater, ignore left half \newline             if nums{[}mid{]} \textless{} target: \newline                 start = mid + 1 \newline      \newline             \# If x is smaller, ignore right half \newline             elif nums{[}mid{]} \textgreater{} target: \newline                 end = mid - 1 \newline      \newline             \# means x is present at mid \newline             else: \newline                 return mid \newline      \newline         \# If we reach here, then the element was not present \newline         return -1} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{DFS for In-order Tree Traversal}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{def inOrder(root): \newline  \newline     current = root \newline     stack = {[}{]} \newline       \newline     while True: \newline         if current is not None: \newline             stack.append(current) \newline           \newline             current = current.left \newline         elif(stack): \newline             current = stack.pop() \newline             print(current.data, end=" ") \newline             current = current.right \newline   \newline         else: \newline             break \newline       \newline     print()} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{DFS for Graphs}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{def DFS(self,s):             \newline \# prints all vertices in DFS manner from a given source. \newline \# Initially mark all vertices as not visited \newline     visited = {[}False for i in range(self.V){]} \newline   \newline     \# Create a stack for DFS \newline     stack = {[}{]} \newline   \newline     \# Push the current source node. \newline     stack.append(s) \newline   \newline     while (len(stack)): \newline         \# Pop a vertex from stack and print it \newline         s = stack{[}-1{]} \newline         stack.pop() \newline   \newline         \# Stack may contain same vertex twice. So \newline         \# we need to print the popped item only \newline         \# if it is not visited. \newline         if (not visited{[}s{]}): \newline             print(s,end=' ') \newline             visited{[}s{]} = True \newline   \newline         \# Get all adjacent vertices of the popped vertex s \newline         \# If a adjacent has not been visited, then push it \newline         \# to the stack. \newline         for node in self.adj{[}s{]}: \newline             if (not visited{[}node{]}): \newline                 stack.append(node)} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Batch Normalization}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{activation\_map\_sample1 = np.array({[} \newline     {[}1, 1, 1{]}, \newline     {[}1, 1, 1{]}, \newline     {[}1, 1, 1{]} \newline {]}, dtype=np.float32) \newline   \newline activation\_map\_sample2 = np.array({[} \newline     {[}1, 2, 3{]}, \newline     {[}4, 5, 6{]}, \newline     {[}7, 8, 9{]} \newline {]}, dtype=np.float32) \newline   \newline activation\_map\_sample3 = np.array({[} \newline     {[}9, 8, 7{]}, \newline     {[}6, 5, 4{]}, \newline     {[}34, 2, 1{]} \newline {]}, dtype=np.float32) \newline  \newline activation\_mean\_bn = np.mean({[}activation\_map\_sample1, activation\_map\_sample2, activation\_map\_sample3{]}, axis=0) \newline   \newline \#get standard deviation across different samples in batch for each activation \newline activation\_std\_bn = np.std({[}activation\_map\_sample1, activation\_map\_sample2, activation\_map\_sample3{]}, axis=0) \newline   \newline activation\_map\_sample1\_bn = \seqsplit{(activation\_map\_sample1} - activation\_mean\_bn) / activation\_std\_bn} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{BCE}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{p{8.4cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/gokug6_1691384751_bce.png}}} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Linked List}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{class Node: \newline   \newline     \# Constructor to initialize the node object \newline     def \_\_init\_\_(self, data): \newline         self.data = data \newline         self.next = None \newline   \newline   \newline class LinkedList: \newline   \newline     \# Function to initialize head \newline     def \_\_init\_\_(self): \newline         self.head = None \newline   \newline     \# Function to reverse the linked list \newline     def reverse(self): \newline         prev = None \newline         current = self.head \newline         while(current is not None): \newline             next = current.next \newline             current.next = prev \newline             prev = current \newline             current = next \newline         self.head = prev \newline   \newline     \# Function to insert a new node at the beginning \newline     def push(self, new\_data): \newline         new\_node = Node(new\_data) \newline         new\_node.next = self.head \newline         self.head = new\_node \newline  \newline def deleteN(head, position): \newline     temp = head \newline     prev = head \newline     for i in range(0, position): \newline         if i == 0 and position == 1: \newline             head = head.next \newline   \newline         else: \newline             if i == position-1 and temp is not None: \newline                 prev.next = temp.next \newline             else: \newline                 prev = temp \newline   \newline                 \# Position was greater than \newline                 \# number of nodes in the list \newline                 if prev is None: \newline                     break \newline                 temp = temp.next \newline     return head \newline  \newline     def search(self, x): \newline   \newline         \# Initialize current to head \newline         current = self.head \newline   \newline         \# loop till current not equal to None \newline         while current != None: \newline             if current.data == x: \newline                 return True  \# data found \newline   \newline             current = current.next \newline   \newline         return False  \# Data Not found \newline  \newline     def getCount(self): \newline         temp = self.head  \# Initialise temp \newline         count = 0  \# Initialise count \newline   \newline         \# Loop while end of linked list is not reached \newline         while (temp): \newline             count += 1 \newline             temp = temp.next \newline         return count \newline   \newline     \# Utility function to print the LinkedList \newline     def printList(self): \newline         temp = self.head \newline         while(temp): \newline             print(temp.data, end=" ") \newline             temp = temp.next \newline   \newline   \newline \# Driver code \newline llist = LinkedList() \newline llist.push(20) \newline llist.push(4) \newline llist.push(15) \newline llist.push(85)} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Floor/Ceil}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{3//2 \# floor == 1 \newline -(-3//2) \# ceil == 2} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{BFS for Level-order Tree Traversal}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{class TreeNode: \newline 	def \_\_init\_\_(self, key): \newline 		self.data = key \newline 		self.left = None \newline 		self.right = None \newline  \newline def printLevelOrder(root): \newline     if not root: \newline         return {[}{]} \newline   \newline     queue = {[}{]} \newline     queue.append(root) \newline   \newline     while(len(queue) \textgreater{} 0): \newline         print(queue{[}0{]}.data) \newline         node = queue.pop(0) \newline         if node.left is not None: \newline             queue.append(node.left) \newline   \newline         if node.right is not None: \newline             queue.append(node.right) \newline  \newline root = TreeNode(1) \newline root.left = TreeNode(2) \newline root.right = TreeNode(3) \newline root.left.left = TreeNode(4) \newline root.left.right = TreeNode(5) \newline  \newline printLevelOrder(root)} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{ML System Design}}  \tn
\SetRowColor{white}
\mymulticolumn{1}{x{8.4cm}}{1. Clarify Requirements \newline % Row Count 1 (+ 1)
        What is the goal? Any secondary goal? \newline % Row Count 2 (+ 1)
            e.g. for CTR - maximizing the number of clicks is the primary goal. A secondary goal might be the quality of the ads/content \newline % Row Count 5 (+ 3)
        Ask questions about the scale of the system - how many users, how much content? \newline % Row Count 7 (+ 2)
2. How the ML system fits into the overall product backend \newline % Row Count 9 (+ 2)
        Think/draw a very simple diagram with input/output line between system backend and ML system \newline % Row Count 12 (+ 3)
3. Data Related Activites \newline % Row Count 13 (+ 1)
        Data Explore - whats the dataset looks like? \newline % Row Count 15 (+ 2)
        Understand different features and their relationship with the target \newline % Row Count 17 (+ 2)
                - Is the data balanced? If not do you need \seqsplit{oversampling/undersampling}? \newline % Row Count 19 (+ 2)
                - Is there a missing value (not an issue for tree-based models) \newline % Row Count 21 (+ 2)
                - Is there an unexpected value for one/more data columns? How do you know if its a typo etc. and decide to ignore? \newline % Row Count 24 (+ 3)
        Feature Importance - partial dependency plot, SHAP values, dataschool video (reference) \newline % Row Count 26 (+ 2)
        (ML Pipeline: Data Ingestion) Think of Data ingestion services/storage \newline % Row Count 28 (+ 2)
        (ML Pipeline: Data Preparation) Feature Engineering - encoding categorical features, embedding generation etc. \newline % Row Count 31 (+ 3)
} \tn 
\end{tabularx}
\par\addvspace{1.3em}

\vfill
\columnbreak
\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{ML System Design (cont)}}  \tn
\SetRowColor{white}
\mymulticolumn{1}{x{8.4cm}}{        (ML Pipeline - Data Segregation) Data split - train set, validation set, test set \newline % Row Count 2 (+ 2)
4. Model Related Activities \newline % Row Count 3 (+ 1)
        (ML Pipeline - Model Train and Evaluation) Build a simple model (XGBoost or NN) \newline % Row Count 5 (+ 2)
                - How to select a model? Assuming its a Neural Network \newline % Row Count 7 (+ 2)
                    1. NLP/Sequence Model \newline % Row Count 8 (+ 1)
                        - start: LSTM with 2 hidden layers \newline % Row Count 10 (+ 2)
                        - see if 3 layers help, \newline % Row Count 11 (+ 1)
                        - improve: check if Attention based model can help \newline % Row Count 13 (+ 2)
                    2. Image Models - (Don't care right now) \newline % Row Count 15 (+ 2)
                    3. Other \newline % Row Count 16 (+ 1)
                        - start: Fully connected NN with 2 hidden layers \newline % Row Count 18 (+ 2)
                        - Improve: problem specific \newline % Row Count 20 (+ 2)
        (ML Pipeline - Model Train and Evaluation) What are the different hyperparameters (HPO) in the model that you chose and why? \newline % Row Count 23 (+ 3)
        (ML Pipeline - Model Train and Evaluation) Once the simple model is built, do a bias-variance tradeoff, it will give you an idea of overfitting vs underfitting and based on whether overfit or underfit, you need different approaches to make you model better. \newline % Row Count 29 (+ 6)
        Draw the ML pipeline (reference \#3) \newline % Row Count 30 (+ 1)
} \tn 
\end{tabularx}
\par\addvspace{1.3em}

\vfill
\columnbreak
\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{ML System Design (cont)}}  \tn
\SetRowColor{white}
\mymulticolumn{1}{x{8.4cm}}{        Model Debug (reference \#1) \newline % Row Count 1 (+ 1)
        Model Deployment (reference\#3) \newline % Row Count 2 (+ 1)
        (ML Pipeline: Performance Monitoring) Metrics \newline % Row Count 4 (+ 2)
            AUC, F1, MSE, Accuracy, NDCG for ranking problems etc. \newline % Row Count 6 (+ 2)
            When to use which metrics? \newline % Row Count 7 (+ 1)
5.Scaling% Row Count 8 (+ 1)
} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{BFS for Graphs}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{def bfs(r,c): \newline     q = collections.deque() \newline     q.append((r,c)) \newline     visited.add((r,c)) \newline     neighbors = {[}{[}-1,0{]},{[}0,-1{]},{[}1,0{]},{[}0,1{]}{]} \newline     area = 1 \newline  \newline     while q: \newline         row, col = q.popleft() \newline         for n in neighbors: \newline             curInd = (row + n{[}0{]}, col + n{[}1{]}) \newline             if (curInd{[}0{]} \textgreater{}= 0) and (curInd{[}0{]} \textless{} rows) and (curInd{[}1{]} \textgreater{}= 0) and (curInd{[}1{]} \textless{} cols): \newline                 if grid{[}curInd{[}0{]}{]}{[}curInd{[}1{]}{]} and (curInd not in visited): \newline                     q.append(curInd) \newline                     visited.add(curInd) \newline                     area = area + 1 \newline     return area} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{8.4cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Softmax Reg PyTorch}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{8.4cm}}{import time \newline from torchvision import datasets \newline from torchvision import transforms \newline from torch.utils.data import DataLoader \newline import torch.nn.functional as F \newline import torch \newline  \newline device = torch.device("cuda:0" if \seqsplit{torch.cuda.is\_available()} else "cpu") \newline  \newline \# Hyperparameters \newline random\_seed = 123 \newline learning\_rate = 0.1 \newline num\_epochs = 25 \newline batch\_size = 256 \newline  \newline \# Architecture \newline num\_features = 784 \newline num\_classes = 10 \newline  \newline  \newline \#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\# \newline \#\#\# MNIST DATASET \newline \#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\# \newline  \newline train\_dataset = \seqsplit{datasets.MNIST(root='data'},  \newline                                train=True,  \newline                                \seqsplit{transform=transforms.ToTensor()}, \newline                                download=True) \newline  \newline test\_dataset = \seqsplit{datasets.MNIST(root='data'},  \newline                               train=False,  \newline                               \seqsplit{transform=transforms.ToTensor())} \newline  \newline  \newline train\_loader = \seqsplit{DataLoader(dataset=train\_dataset},  \newline                           batch\_size=batch\_size,  \newline                           shuffle=True) \newline  \newline test\_loader = \seqsplit{DataLoader(dataset=test\_dataset},  \newline                          batch\_size=batch\_size,  \newline                          shuffle=False) \newline  \newline  \newline \# Checking the dataset \newline for images, labels in train\_loader:   \newline     print('Image batch dimensions:', images.shape) \#NCHW \newline     print('Image label dimensions:', labels.shape) \newline     break \newline  \newline \#\#\#\#\#\#\# MODEL \#\#\#\#\#\# \newline  \newline class \seqsplit{SoftmaxRegression(torch.nn.Module):} \newline  \newline     def \_\_init\_\_(self, num\_features, num\_classes): \newline         super(SoftmaxRegression, self).\_\_init\_\_() \newline         self.linear = \seqsplit{torch.nn.Linear(num\_features}, num\_classes) \newline          \newline         \seqsplit{self.linear.weight.detach().zero\_()} \newline         \seqsplit{self.linear.bias.detach().zero\_()} \newline          \newline     def forward(self, x): \newline         logits = self.linear(x) \newline         probas = F.softmax(logits, dim=1) \newline         return logits, probas \newline  \newline class MLP(torch.nn.Module): \newline  \newline     def \_\_init\_\_(self, num\_features, num\_hidden, num\_classes): \newline         super().\_\_init\_\_() \newline          \newline         self.num\_classes = num\_classes \newline          \newline         \#\#\# 1st hidden layer \newline         self.linear\_1 = \seqsplit{torch.nn.Linear(num\_features}, num\_hidden) \newline         \seqsplit{self.linear\_1.weight.detach().normal\_(0.0}, 0.1) \newline         \seqsplit{self.linear\_1.bias.detach().zero\_()} \newline  \newline         \#\#\# Output layer \newline         self.linear\_out = \seqsplit{torch.nn.Linear(num\_hidden}, num\_classes) \newline         \seqsplit{self.linear\_out.weight.detach().normal\_(0.0}, 0.1) \newline         \seqsplit{self.linear\_out.bias.detach().zero\_()} \newline          \newline     def forward(self, x): \newline         out = self.linear\_1(x) \newline         out = torch.sigmoid(out) \newline         logits = self.linear\_out(out) \newline         \#probas = torch.softmax(logits, dim=1) \newline         return logits\#, probas \newline  \newline      \newline \#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\# \newline \#\#\# Model Initialization \newline \#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\# \newline      \newline torch.manual\_seed(RANDOM\_SEED) \newline model = MLP(num\_features=28{\emph{28, \newline             num\_hidden=100, \newline             num\_classes=10) \newline  \newline model = \seqsplit{SoftmaxRegression(num\_features=num\_features}, \newline                           \seqsplit{num\_classes=num\_classes)} \newline  \newline model.to(device) \newline  \newline \#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\# \newline \#\#\# COST AND OPTIMIZER \newline \#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\#\# \newline  \newline optimizer = \seqsplit{torch.optim.SGD(model.parameters()}, lr=learning\_rate)   \newline  \newline torch.manual\_seed(random\_seed) \newline  \newline  \newline def compute\_accuracy(model, data\_loader): \newline     correct\_pred, num\_examples = 0, 0 \newline      \newline     for features, targets in data\_loader: \newline         features = features.view(-1, 28}}28).to(device) \newline         targets = targets.to(device) \newline         logits, probas = model(features) \newline         \_, predicted\_labels = torch.max(probas, 1) \newline         num\_examples += targets.size(0) \newline         correct\_pred += (predicted\_labels == targets).sum() \newline          \newline     return correct\_pred.float() / num\_examples {\emph{ 100 \newline      \newline  \newline start\_time = time.time() \newline epoch\_costs = {[}{]} \newline for epoch in range(num\_epochs): \newline     avg\_cost = 0. \newline     for batch\_idx, (features, targets) in \seqsplit{enumerate(train\_loader):} \newline          \newline         features = features.view(-1, 28}}28).to(device) \newline         targets = targets.to(device) \newline              \newline         \#\#\# FORWARD AND BACK PROP \newline         logits, probas = model(features) \newline          \newline         \# note that the PyTorch implementation of \newline         \# CrossEntropyLoss works with logits, not \newline         \# probabilities \newline         cost = F.cross\_entropy(logits, targets) \newline         optimizer.zero\_grad() \newline         cost.backward() \newline         avg\_cost += cost \newline          \newline         \#\#\# UPDATE MODEL PARAMETERS \newline         optimizer.step() \newline          \newline         \#\#\# LOGGING \newline         if not batch\_idx \% 50: \newline             print ('Epoch: \%03d/\%03d | Batch \%03d/\%03d | Cost: \%.4f'  \newline                    \%(epoch+1, num\_epochs, batch\_idx,  \newline                      \seqsplit{len(train\_dataset)//batch\_size}, cost)) \newline              \newline     with \seqsplit{torch.set\_grad\_enabled(False):} \newline         avg\_cost = \seqsplit{avg\_cost/len(train\_dataset)} \newline         \seqsplit{epoch\_costs.append(avg\_cost)} \newline         print('Epoch: \%03d/\%03d training accuracy: \%.2f\%\%' \% ( \newline               epoch+1, num\_epochs,  \newline               compute\_accuracy(model, train\_loader))) \newline         print('Time elapsed: \%.2f min' \% ((time.time() - start\_time)/60))} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}


% That's all folks
\end{multicols*}

\end{document}