\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{LoiVyen} \pdfinfo{ /Title (spacy.pdf) /Creator (Cheatography) /Author (LoiVyen) /Subject (spaCy Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{3345A3} \definecolor{LightBackground}{HTML}{F2F3F9} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{spaCy Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{LoiVyen} via \textcolor{DarkBackground}{\uline{cheatography.com/163788/cs/34314/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}LoiVyen \\ \uline{cheatography.com/loivyen} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 2nd October, 2022.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{3} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Base Initialization}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{`import spacy`} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{`nlp = spacy.blank("en")\{\{nl\}\}nlp = spacy.load("en\_core\_web\_sm")\{\{nl\}\}nlp = spacy.load("en\_core\_web\_md")\{\{nl\}\}nlp = spacy.load("en\_core\_web\_lg")\{\{nl\}\} nlp = \seqsplit{spacy.load("en\_core\_web\_trf")`}} \tn % Row Count 5 (+ 4) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{`doc = nlp("Insert your text here. This string will be used to create a doc object. For natural language processing.")`} \tn % Row Count 8 (+ 3) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{`print("tokens: ", {[}token.text for token in doc{]})`} \tn % Row Count 9 (+ 1) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{output:}}\{\{nl\}\}tokens: {[}'Insert', 'your', 'text', 'here', '.', 'This', 'string', 'will', 'be', 'used', 'to', 'create', 'a', 'doc', 'object', '.', 'For', 'natural', 'language', 'processing', '.'{]}} \tn % Row Count 13 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Token Attributes applied}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Remove punctuations for tokens} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{`words = {[}token.text for token in doc if token.is\_stop != True and token.is\_punct != True{]} \{\{nl\}\}print (words)`} \tn % Row Count 4 (+ 3) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{output:}} \{\{nl\}\} {[}'Insert', 'text', 'string', 'create', 'doc', 'object', 'natural', 'language', 'processing'{]}} \tn % Row Count 7 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{https://spacy.io/api/token}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Lemma}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{`for token in doc: \{\{nl\}\}~~~~print("token: ", token, " lemma: ", token.lemma\_)`} \tn % Row Count 3 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{token: token object within object \newline lemma: 'base' form of token \newline (ex. going -{}-\textgreater{} go; was -{}-\textgreater{} be)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{displaCy Visualizer}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{`import spacy \{\{nl\}\}from spacy import displacy`} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{`nlp = \seqsplit{spacy.load("en\_core\_web\_sm")} \{\{nl\}\}doc = nlp("This is a sentence.") \{\{nl\}\}displacy.render(doc, style='dep',jupyter=True)`} \tn % Row Count 4 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Sentence}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{`list(doc.sents) \{\{nl\}\}print(list(text.sents))`} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\bf{output:}} \{\{nl\}\}{[}Insert your text here., This string will be used to create a doc object., For natural language processing.{]}} \tn % Row Count 4 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Word Frequency}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{`from collections import Counter`} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{`words = {[}token.text for token in doc if token.is\_stop != True and token.is\_punct != True{]} \{\{nl\}\}word\_freq = Counter(words) \{\{nl\}\}common\_words = \seqsplit{word\_freq.most\_common(5)} \{\{nl\}\}print (common\_words)`} \tn % Row Count 5 (+ 4) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{output:} \tn % Row Count 6 (+ 1) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{[}('Insert', 1), ('text', 1), ('string', 1), ('create', 1), ('doc', 1){]}} \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{Using textrank component ("custom")}}} \tn % Row Count 9 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{`nlp.add\_pipe("textrank")} \{\{nl\}\}doc = nlp("Insert your text here. This string will be used to create a doc object. For natural language processing.")`} \tn % Row Count 12 (+ 3) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{`for index, token in \seqsplit{enumerate(doc.\_.phrases):} \{\{nl\}\}~~~~print("index: ", index, "\textbackslash{}ntext: ", token.text) \{\{nl\}\}~~~~print("rank:", token.rank, " count:", token.count) \{\{nl\}\}~~~~print("chunks: ", token.chunks)`} \tn % Row Count 18 (+ 6) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{output:} \tn % Row Count 19 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{index: 0 \newline text: natural language processing \newline rank: 0.22805281953000428 count: 1 \newline chunks: {[}natural language processing{]} \newline index: 1 \newline text: a doc object \newline rank: 0.1481208925234081 count: 1 \newline chunks: {[}a doc object{]} \newline index: 2 \newline text: your text \newline rank: 0.073534568107334 count: 1 \newline chunks: {[}your text{]} \newline index: 3 \newline text: This string \newline rank: 0.054063724281900864 count: 1 \newline chunks: {[}This string{]}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Textacy}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{`pip install textacy`} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{`import textacy`} \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{`metadata = \{ \{\{nl\}\}~~~~"title": "Natural-language processing", \{\{nl\}\}~~~~"url": \seqsplit{"https://en.wikipedia.org/wiki/Natural-language\_processing"}, \{\{nl\}\}~~~~"source": "wikipedia", \{\{nl\}\}\}doc = \seqsplit{textacy.make\_spacy\_doc((text}, metadata), lang="en\_core\_web\_sm")\{\{nl\}\}print(doc.\_.meta{[}"title"{]})`} \tn % Row Count 10 (+ 8) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\emph{output:}} Natural-language processing} \tn % Row Count 11 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{"`{\bf{textacy}}` is a Python library for performing a variety of natural language processing (NLP) tasks, built on the highperformance spaCy library" \newline https://github.com/chartbeat-labs/textacy \newline https://textacy.readthedocs.io/en/latest/ \newline \newline latest version documentation: \newline https://buildmedia.readthedocs.org/media/pdf/textacy/stable/textacy.pdf \newline https://textacy.readthedocs.io/en/0.12.0/api\_reference/datasets\_resources.html} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}