\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{Justin1209 (Justin1209)} \pdfinfo{ /Title (beautiful-soup.pdf) /Creator (Cheatography) /Author (Justin1209 (Justin1209)) /Subject (Beautiful Soup Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{0B00A3} \definecolor{LightBackground}{HTML}{F7F7FC} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Beautiful Soup Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{Justin1209 (Justin1209)} via \textcolor{DarkBackground}{\uline{cheatography.com/101982/cs/21428/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}Justin1209 (Justin1209) \\ \uline{cheatography.com/justin1209} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 18th December, 2019.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{2} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Import Resources}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{import}} {\emph{requests}} \newline \newline {\bf{from}} {\emph{bs4}} {\bf{import}} {\emph{BeautifulSoup}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Make a soup object out of a website}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\emph{// 1. The HTTP request}} \newline webpage = request.get('{\bf{URL}}', {\emph{'html.parser'}}); \newline \newline {\emph{// 2. Turn the website into a soup object}} \newline soup = BeautifulSoup(webpage.{\bf{content}});} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{"html.parser"}} is one option for parsers we could use. There are other options, like "lxml" and "html5lib" that have different advantages and disadvantages.} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Object Types}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\emph{//1. Tags correspond to HTML tags}} \newline {\bf{Example Code:}} \newline soup = BeautifulSoup('\textless{}div {\emph{id="example"}}\textgreater{}An example div\textless{}/div\textgreater{}\textless{}p\textgreater{}An example p tag\textless{}/p\textgreater{}'); \newline \newline print(soup.{\bf{div}}); \newline -{}-\textgreater{} \textless{}div id="example"\textgreater{}An example div\textless{}/div\textgreater{} \newline -{}-\textgreater{} {\emph{gets the {\bf{first}} tag of that type on the page}} \newline \newline print(soup.div.{\bf{name}}) \newline print(soup.div.{\bf{attrs}}) \newline -{}-\textgreater{} div \newline -{}-\textgreater{} \{'id': 'example'\} \newline \newline //{\emph{2. Navigable Strings: Piece of text inside of HTML Tags}} \newline print(soup.div.{\bf{string}}) \newline -{}-\textgreater{} An example div} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Navigating by Tags}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Example Code:}} \newline \textless{}h1\textgreater{}World's Best Chocolate Chip Cookies\textless{}/h1\textgreater{} \newline \textless{}div class="banner"\textgreater{} \newline \textless{}h1\textgreater{}Ingredients\textless{}/h1\textgreater{} \newline \textless{}/div\textgreater{} \newline \textless{}ul\textgreater{} \newline \textless{}li\textgreater{} 1 cup flour \textless{}/li\textgreater{} \newline \textless{}li\textgreater{} 1/2 cup sugar \textless{}/li\textgreater{} \newline \textless{}li\textgreater{} 2 tbsp oil \textless{}/li\textgreater{} \newline \textless{}li\textgreater{} 1/2 tsp baking soda \textless{}/li\textgreater{} \newline \textless{}li\textgreater{} $\frac{1}{2}$ cup chocolate chips \textless{}/li\textgreater{} \newline \textless{}li\textgreater{} 1/2 tsp vanilla \textless{}li\textgreater{} \newline \textless{}li\textgreater{} 2 tbsp milk \textless{}/li\textgreater{} \newline \textless{}/ul\textgreater{} \newline \newline //{\emph{1. Get the children of a tag:}} \newline {\emph{for}} {\bf{child}} {\emph{in}} {\bf{soup.ul.children:}} \newline print(child) \newline -{}-\textgreater{} \textless{}li\textgreater{} 1 cup flour \textless{}/li\textgreater{} \newline -{}-\textgreater{} \textless{}li\textgreater{} 1/2 cup sugar \textless{}/li\textgreater{} \newline ... \newline \newline //{\emph{2. Get the parent of a tag:}} \newline {\emph{for}} {\bf{parent}} {\emph{in}} {\bf{soup.li.parents:}} \newline print(parent)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Find All}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{//{\emph{1. {\bf{find\_all()}}}} \newline print(soup.{\bf{find\_all}}("h1")) \newline -{}-\textgreater{} Outputs all \textless{}h1\textgreater{}...\textless{}/h1\textgreater{} on the website \newline \newline //{\emph{1.1. find\_all() with {\bf{regex}}}} \newline import {\bf{re}} \newline soup.find\_all({\bf{re.compile("{[}ou{]}l")}}) \newline -{}-\textgreater{} Outputs all \textless{}ul\textgreater{}...\textless{}/ul\textgreater{} {\bf{and}} \textless{}ol\textgreater{}...\textless{}/ol\textgreater{} \newline soup.find\_all({\bf{re.compile("h{[}1-9{]}")}}) \newline -{}-\textgreater{} Outputs all headings \newline \newline \newline //{\emph{1.2. find\_all() with {\bf{lists}}}} \newline soup.find\_all({[}'h1', 'a', 'p'{]}) \newline \newline \newline //{\emph{1.3 find\_all() with {\bf{attributes}}}} \newline soup.find\_all({\bf{attrs=\{'class':'banner', 'id':'jumbotron'\}}}); \newline \newline \newline //{\emph{1.4 find\_all() with {\bf{functions}}}} \newline def has\_banner\_class\_and\_hello\_world({\bf{tag}}): \newline return {\bf{tag.attr('class')}} == "banner" {\emph{and}} {\bf{tag.string}} == "Hello world" \newline \newline soup.find\_all(has\_banner\_class\_and\_hello\_world)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{CSS Selectors}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{//{\emph{1. grab CSS classes with {\bf{.select("class\_name")}}}} \newline soup.{\bf{select}}(".recipeLink") \newline \newline //{\emph{2. grab CSS IDs with {\bf{.select("\#id\_name")}} \newline soup.{\bf{select}}("\#selected") \newline \newline //}}3. using a loop* \newline for link in soup.select(".recipeLink \textgreater{} a"): \newline webpage = requests.get(link) \newline new\_soup = BeautifulSoup(webpage)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}