\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{binca} \pdfinfo{ /Title (spidering.pdf) /Creator (Cheatography) /Author (binca) /Subject (Spidering Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{9C3B11} \definecolor{LightBackground}{HTML}{F8F2F0} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Spidering Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{binca} via \textcolor{DarkBackground}{\uline{cheatography.com/44948/cs/13321/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}binca \\ \uline{cheatography.com/binca} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 9th November, 2017.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{2} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Definition}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Also known as "crawling"} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Involves following web links to download a copy of an entire site.} \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Analyze offline to discover: potential security weaknesses in code, list of keywords for password-guessing, confidential data, and names, emails, addresses, and phone numbers.} \tn % Row Count 7 (+ 4) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{May spider a site many times w/various tools. Scanning primes many tools.} \tn % Row Count 9 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Manual spidering is done by browsing a site and saving each page.} \tn % Row Count 11 (+ 2) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Automated scans are common but if the site is too complex it may fail.} \tn % Row Count 13 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Robot Control}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Automated tools are often referred to as robots o bots.} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Developers control bots by using a robots.txt file either placed in the root directory of the web app or using meta tags on individual pages.} \tn % Row Count 5 (+ 3) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Robots Exclusion Protocol is an unofficial commonly implemented protocol that uses robots.txt to specify which user-agent types should be disallowed access to certain directories and individual pages.} \tn % Row Count 9 (+ 4) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Tags that prevent page caching: \{\{nl\}\} {\bf{\textless{}META HTTP-EQUIV="PRAGMA" CONTENT="NO-CACHE"\textgreater{}}} \{\{nl\}\} {\bf{\textless{}META HTTP-EQUIV="CACHE CONTROL" CONTENT="NO-CACHE"\textgreater{}}}} \tn % Row Count 13 (+ 4) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Tags that control: \{\{nl\}\} {\bf{\textless{}META NAME="ROBOTS" CONTENT="INDEX, NOFOLLOW"\textgreater{}}} \{\{nl\}\} {\bf{\textless{}META NAME="GOOGLEBOT" CONTENT="NOARCHIVE"\textgreater{}}}} \tn % Row Count 16 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Automated Spidering with ZAP}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{ZAP interception proxy includes spidering capabilities} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Primed by using interception proxy to navigate to the site} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Does well but dynamically generated links on client can cause it to miss pages} \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Has separate AJAX spider for dynamic sites} \tn % Row Count 7 (+ 1) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Will show out of scope targets} \tn % Row Count 8 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Wappalyzer}} \tn \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Provides detailed understanding of technologies running on a web app including OS, web servers, packaged web apps, languages, frameworks, and APIs% Row Count 3 (+ 3) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{ZAP + Wappalyzer}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Leverages Wappalyzer functionality through the use of Technology Detection extension in ZAP marketplace} \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{The extension is passive} \tn % Row Count 4 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Only implements some functionality} \tn % Row Count 5 (+ 1) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Not release quality} \tn % Row Count 6 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{ZAP Forced Browse}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Based on inactive DirBuster Project, a dictionary attack} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Seeks to find unlinked content using a number of default wordlists or one provided by user} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Forced Browse}} = entire site} \tn % Row Count 5 (+ 1) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{Forced Browse Directory}} = focuses on 1 directory} \tn % Row Count 7 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{Forced Browse Directory + Children}} = recursive forced browse against a directory and any discovered sub-directories} \tn % Row Count 10 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Automated Spidering with Burp}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Similar to ZAP and Paros} \tn % Row Count 1 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Automated Spidering with wget}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Console-based web browser that runs on most platforms and has basic spidering capabilities saves retrieved items in a directory} \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Obeys robot.txt unless invoked with the {\bf{-e robots=off option}}} \tn % Row Count 5 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{{\bf{-r}} invokes recursion of discovered links} \tn % Row Count 6 (+ 1) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{{\bf{-l {[}N\}}} specifies max link recursion where N = \#, default is set to 5} \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{It can retrieve via HTTP, HTTPS, and FTP} \tn % Row Count 9 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Popularity due to script inclusion} \tn % Row Count 10 (+ 1) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Syntax: {\bf{wget -r {[}domain{]} -l 3 -P /tmp}}} \tn % Row Count 11 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{CeWL}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Custom word list generator.} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Spiders a website generating a word list from the EXIF data of any images and the site contents} \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Syntax: {\bf{./cewl.rb {[}domain{]}}}} \tn % Row Count 4 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Analyzing Results: What to Look For}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{(HTML) Comments that reveal sensitive or useful information} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Commented code and links} \tn % Row Count 3 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Disabled functionality} \tn % Row Count 4 (+ 1) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Linked servers such as content and app servers} \tn % Row Count 5 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{HTML Comments}} \tn \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Contents in the HTML that are included in the server response to the client such as dev notes, explanations of functionality and variables, usernames and passwords. Should move comments to server-side.% Row Count 5 (+ 5) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Disabled Functionality}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Reveals previous or future sections of site.} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{In the case of "disabled" functionality may be able to still invoke or it may indicate security weaknesses.} \tn % Row Count 4 (+ 3) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Examples: \{\{nl\}\} Links that have been commented out \{\{nl\}\} Client-side code that has been commented out, which may be server-side code now} \tn % Row Count 7 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}