\documentclass[10pt,a4paper]{article}

% Packages
\usepackage{fancyhdr}           % For header and footer
\usepackage{multicol}           % Allows multicols in tables
\usepackage{tabularx}           % Intelligent column widths
\usepackage{tabulary}           % Used in header and footer
\usepackage{hhline}             % Border under tables
\usepackage{graphicx}           % For images
\usepackage{xcolor}             % For hex colours
%\usepackage[utf8x]{inputenc}    % For unicode character support
\usepackage[T1]{fontenc}        % Without this we get weird character replacements
\usepackage{colortbl}           % For coloured tables
\usepackage{setspace}           % For line height
\usepackage{lastpage}           % Needed for total page number
\usepackage{seqsplit}           % Splits long words.
%\usepackage{opensans}          % Can't make this work so far. Shame. Would be lovely.
\usepackage[normalem]{ulem}     % For underlining links
% Most of the following are not required for the majority
% of cheat sheets but are needed for some symbol support.
\usepackage{amsmath}            % Symbols
\usepackage{MnSymbol}           % Symbols
\usepackage{wasysym}            % Symbols
%\usepackage[english,german,french,spanish,italian]{babel}              % Languages

% Document Info
\author{Taissir Boukrouba (taissir2002)}
\pdfinfo{
  /Title (biopython-basics.pdf)
  /Creator (Cheatography)
  /Author (Taissir Boukrouba (taissir2002))
  /Subject (Biopython Basics Cheat Sheet)
}

% Lengths and widths
\addtolength{\textwidth}{6cm}
\addtolength{\textheight}{-1cm}
\addtolength{\hoffset}{-3cm}
\addtolength{\voffset}{-2cm}
\setlength{\tabcolsep}{0.2cm} % Space between columns
\setlength{\headsep}{-12pt} % Reduce space between header and content
\setlength{\headheight}{85pt} % If less, LaTeX automatically increases it
\renewcommand{\footrulewidth}{0pt} % Remove footer line
\renewcommand{\headrulewidth}{0pt} % Remove header line
\renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit
% This two commands together give roughly
% the right line height in the tables
\renewcommand{\arraystretch}{1.3}
\onehalfspacing

% Commands
\newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour
\newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols
\newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns
\newcommand{\tn}{\tabularnewline} % Required as custom column type in use

% Font and Colours
\definecolor{HeadBackground}{HTML}{333333}
\definecolor{FootBackground}{HTML}{666666}
\definecolor{TextColor}{HTML}{333333}
\definecolor{DarkBackground}{HTML}{1138B8}
\definecolor{LightBackground}{HTML}{F0F2FA}
\renewcommand{\familydefault}{\sfdefault}
\color{TextColor}

% Header and Footer
\pagestyle{fancy}
\fancyhead{} % Set header to blank
\fancyfoot{} % Set footer to blank
\fancyhead[L]{
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{C}
    \SetRowColor{DarkBackground}
    \vspace{-7pt}
    {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent
        \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}}
    }
\end{tabulary}
\columnbreak
\begin{tabulary}{11cm}{L}
    \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Biopython Basics Cheat Sheet}}}} \\
    \normalsize{by \textcolor{DarkBackground}{Taissir Boukrouba (taissir2002)} via \textcolor{DarkBackground}{\uline{cheatography.com/193976/cs/42971/}}}
\end{tabulary}
\end{multicols}}

\fancyfoot[L]{ \footnotesize
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{LL}
  \SetRowColor{FootBackground}
  \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}}  \\
  \vspace{-2pt}Taissir Boukrouba (taissir2002) \\
  \uline{cheatography.com/taissir2002} \\
  \end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}}  \\
   \vspace{-2pt}Not Yet Published.\\
   Updated 7th April, 2024.\\
   Page {\thepage} of \pageref{LastPage}.
\end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}}  \\
  \SetRowColor{white}
  \vspace{-5pt}
  %\includegraphics[width=48px,height=48px]{dave.jpeg}
  Measure your website readability!\\
  www.readability-score.com
\end{tabulary}
\end{multicols}}


\begin{document}
\raggedright
\raggedcolumns

% Set font size to small. Switch to any value
% from this page to resize cheat sheet text:
% www.emerson.emory.edu/services/latex/latex_169.html
\footnotesize % Small font.

\begin{multicols*}{3}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Installing Biopython}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{pip install biopython \newline pip install -{}-upgrade biopython \newline  \newline \# import the library  \newline import Bio} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Creating Sequences}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{from Bio.Seq import Seq \newline my\_seq = Seq("AATGCACGTTG")} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{To create a sequence we use the `Seq` function from `Bio` library}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Filling Sequences}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\# filling sequences  \newline fragments = {[}Seq("GTAT"), Seq("TACT"){]} \newline filler = Seq("A"*3) \newline print(filler.join(fragments)) \newline  \newline \#output :  \newline \textgreater{}\textgreater{}\textgreater{} GTATAAATACT} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Slicing Sequences}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\# defining sequences  \newline my\_seq = Seq("AAGTCCAGTGT") \newline my\_seq\_2 = Seq("AAAA") \newline  \newline \# slicing sequences \newline print(my\_seq{[}1:6{]})  \newline print(my\_seq{[}0::2{]}) \newline  \newline \# output :  \newline \textgreater{}\textgreater{}\textgreater{} AGTCC \newline \textgreater{}\textgreater{}\textgreater{} AGCATT} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Slicing Sequences is the same as that of a python list ( we use `{[}{]}`)}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Appending Sequences}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{my\_seq = Seq("AAGTCCAGTGT") \newline my\_seq\_2 = Seq("AAAA") \newline  \newline \#appending sequences \newline print(my\_seq + my\_seq\_2) \newline  \newline \# output  \newline \textgreater{}\textgreater{}\textgreater{} AAGTCCAGTGTAAAA} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Appending sequences is the same as appending strings in python}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Sequence Counting}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{from Bio.Seq import Seq \newline \# creating a sequence  \newline seq\_example = Seq("AGTACACTGGT") \newline  \newline seq\_length = len(seq\_example) \newline occ = seq\_example.count("C") \newline  \newline print("The length of the sequence is", len(seq\_example)) \newline print("The number of occurrences for nucleotide C is ", occ ) \newline  \newline \#output :  \newline The length of the sequence is 11 \newline The number of occurrences for nucleotide C is 2} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{This provides how to get the length of a sequence and the number of occurrences of a specific nucleotide}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Finding Sub-sequence Index}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{my\_seq = Seq("AAGTCCAGTGT") \newline index = my\_seq.find("GTC") \newline print(f"GTC index is \{index\}") \newline  \newline \# output :  \newline GTC index is 2} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{This returns the start index of the selected sub-sequence}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Reading Sequence Files}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{from Bio import SeqIO \newline  \newline records = \seqsplit{SeqIO.parse("sequence\_file.fasta"}, "fasta") \newline for record in records : \newline     print(record.seq)} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{We can also access other attributes from the records :  \newline - `record.seq` : returns one sequence from list of records \newline - `record.id` : returns the identifier of the sequence \newline - `record.description` : returns the sequence description}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Writing Sequences into a file}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{from Bio import SeqIO \newline  \newline \# Define your sequence as a string \newline sequence = "ATCGATCGATCGATCGATC" \newline  \newline \# Defining file name and format \newline filename = "my\_sequence.fasta" \newline format = "fasta" \newline  \newline \# defining the sequence  \newline seq = \seqsplit{SeqIO.SeqRecord(SeqIO.Seq(sequence1)}, \newline                     id="my\_id", description="My sequence description") \newline  \newline \# Open the file for writing in text mode \newline with open(filename, "w") as file: \newline   \# Create a SeqRecord object  \newline   record = SeqIO.SeqRecord(seq) \newline   \# Write the record to the file using the specified format \newline   SeqIO.write(record, file, format)} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Converting Files}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\# syntax \newline SeqIO.convert(inp\_file, inp\_format, outp\_file, outp\_format, alphabet=None) \newline  \newline \#example  \newline SeqIO.convert("sequence.gbk", "genbank", \seqsplit{"sequence\_converted.fasta"}, "fasta")} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{inp\_file : path to input file  \newline inp\_format : input file format/extention  \newline outp\_file : path to output file  \newline outp\_format : output file format/extention  \newline alphabet : specify the correct alphabet (DNA,RNA or Protein) to avoid conversion confusion}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Sequence Molecular Weight}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{from Bio.SeqUtils import molecular\_weight \newline from Bio.Seq import Seq \newline  \newline seq\_example = Seq("TGTACCCTGGT") \newline mw = \seqsplit{molecular\_weight(seq\_example)} \newline  \newline print(mw) \newline  \newline \#output :  \newline \textgreater{}\textgreater{}\textgreater{} 3403.1577} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Molecular weight}} is a way to guess how heavy a tiny building block of life (like a protein or piece of DNA) is compared to a single carbon atom where the bigger the building blocks , the higher the molecular weight}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{GC-Content}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{from Bio.SeqUtils import gc\_fraction \newline from Bio.Seq import Seq \newline  \newline \# creating a sequence  \newline seq\_example = Seq("AGATTCACTGGT") \newline gc\_content = \seqsplit{gc\_fraction(seq\_example)} \newline print(gc\_content) \newline  \newline \# output :  \newline \textgreater{}\textgreater{}\textgreater{} 0.41} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{G-C content}} refers to the percentage of {\bf{guanine (G)}} and {\bf{cytosine (C)}} molecules out of all the building blocks (called nucleotides) in a strand of DNA or RNA.}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Reverse Complement}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{from Bio.Seq import Seq \newline  \newline \#creating a sequence  \newline seq\_example = Seq("AGTACACTGGT") \newline print("Sequence is :",seq\_example) \newline  \newline \# getting the reverse compliment \newline rev\_comp = \seqsplit{seq\_example.reverse\_complement()} \newline print("Reverse complement:", rev\_comp) \newline  \newline \#output :  \newline \textgreater{}\textgreater{}\textgreater{} Sequence is : AGTACACTGGT \newline \textgreater{}\textgreater{}\textgreater{} The reverse complement : ACCAGTGTACT} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Reverse complement}} of a DNA sequence is like a mirror image on the opposite strand.  \newline  \newline - {\bf{Reverse:}} Flips the order of the DNA letters (A, C, G, T) from left to right to right to left. \newline - {\bf{Complement:}} Swaps each letter according to its pair: A pairs with T, and C pairs with G.}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Transcription \& Translation}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{seq\_example = Seq("ATGAAGTTTTAG") \newline transc  = \seqsplit{seq\_example.transcribe()} \newline print("Transcription:", transc) \newline  \newline transl = seq\_example.translate() \newline print("Translation:", transl) \newline  \newline  \newline \#output :  \newline \textgreater{}\textgreater{}\textgreater{} Transcription: AUGAAGUUUUAG \newline \textgreater{}\textgreater{}\textgreater{} Translation: MKF*} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{{\bf{Transcription and translation}} are the two main steps that turn the instructions in our genes (DNA) into the building blocks of life (proteins). \newline  \newline - {\emph{Transcription}} : is going from DNA to RNA ( creating a copy ) \newline - {\emph{Translation}} : is going from RNA to Protein}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Accessing NCBI Database using esearch()}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{from Bio import Entrez \newline  \newline handle = \seqsplit{Entrez.esearch(db="nucleotide"}, term="BRCA1 gene", retmax=20) \newline record = Entrez.read(handle)} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{- `db` : The name of the Entrez database to search ("nucleotide", "protein"....) \newline - `term` : The search term (e.g., gene name, protein ID ....) \newline - `retmode (str, optional)`: The format (return mode) to return results in (default: "xml"). \newline - `retmax (int, optional)`: Maximum number of IDs to return (default: 10). \newline - `sort (str, optional)`: Sorting criteria for results (default: "relevance")}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Accessing NCBI Database using efetch()}}  \tn
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{from Bio import Entrez \newline  \newline id\_list = {[}"NM\_007294.3", "NM\_000546.5"{]} \newline handle = \seqsplit{Entrez.efetch(db="nucleotide"}, id=id\_list, rettype="gb") \newline records = Entrez.read(handle)} \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{`db` : The name of the Entrez database to search ("nucleotide", "protein"....) \newline `id` (list or str): A single ID or a list of IDs to retrieve \newline `rettype` (str, optional): The type of information to return (default: "gb" for GenBank format) \newline `retmode` (str, optional): The format to return results in (default: "xml").}  \tn 
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}


% That's all folks
\end{multicols*}

\end{document}