\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{depasinre2} \pdfinfo{ /Title (data-analytics-for-cyber.pdf) /Creator (Cheatography) /Author (depasinre2) /Subject (DATA ANALYTICS FOR CYBER Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{A3A3A3} \definecolor{LightBackground}{HTML}{F3F3F3} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{DATA ANALYTICS FOR CYBER Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{depasinre2} via \textcolor{DarkBackground}{\uline{cheatography.com/170760/cs/35782/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}depasinre2 \\ \uline{cheatography.com/depasinre2} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Published 29th November, 2022.\\ Updated 29th November, 2022.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{4} \begin{tabularx}{3.833cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{3.833cm}}{\bf\textcolor{white}{ML in practice: Malware detection}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{JSTAP: A project does malicious JS detection} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{Premise/problem JS can cause:bitcoin mining, abuse browser vulnerabilities} \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{Abstract Syntax Tree – Derived from grammar of programming language} \tn % Row Count 5 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{JSTAP Principle: Perform static analysis with abstract syntax trees and random forests} \tn % Row Count 7 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{{\bf{ Static Analyses }}} \tn % Row Count 8 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{Static analysis - we don't run the code at all, Reverse analysis of the code} \tn % Row Count 10 (+ 2) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{dynamic analysis - run the code in virtual machine or debugger. Malware writers deliberately obfuscate to defeat static tools. Example: GozNym runs trivial infinite loop in thread, then suspends thread and overwrites code with jump to previously dead code} \tn % Row Count 16 (+ 6) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{Dynamic Analysis pitfalls - 1. Easy to detect you are in a debugger, VM, or running Anti- virus – Query registry – IsDebuggerPresent – VM specific instructions. 2. Do long delay in hopes simulator will give up and go away} \tn % Row Count 21 (+ 5) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{Control Flow Graph – Shows program flow (calls, selection, loops)} \tn % Row Count 23 (+ 2) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{Program Dependence Graph – Includes data and control dependencies} \tn % Row Count 25 (+ 2) % Row 10 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{Token - splitting a program into lexical units (words in sentences for English)} \tn % Row Count 27 (+ 2) % Row 11 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{N-gram - simple way to analyze token sequences} \tn % Row Count 28 (+ 1) % Row 12 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{{\bf{ JSTAP n-grams}}} \tn % Row Count 29 (+ 1) % Row 13 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{- Depth-first pre-order traversal of AST} \tn % Row Count 30 (+ 1) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{3.833cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{3.833cm}}{\bf\textcolor{white}{ML in practice: Malware detection (cont)}} \tn % Row 14 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{- For CFG, also traverse AST, but only nodes linked by control flow edge. Traverse sub-AST for each node with control flow once} \tn % Row Count 3 (+ 3) % Row 15 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{- Similar for PDG, considering data flow} \tn % Row Count 4 (+ 1) % Row 16 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{- Independent n-grams for tokens, AST, CFG, PDG-Data Flow and PDG-Control Flow} \tn % Row Count 6 (+ 2) % Row 17 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{- 4 is the best value.} \tn % Row Count 7 (+ 1) % Row 18 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{- Use chi-squared test to check for correlation(check the ngram in benign or malcious), keep x\textasciicircum{}2(chi squared) geq 6.63 (confidence of 99\%)} \tn % Row Count 10 (+ 3) % Row 19 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{- if ngram in both (benh and malc), throw ngram away} \tn % Row Count 12 (+ 2) % Row 20 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{{\bf{ JSTAP Dataset }} - 131448 malicous, 141768 benign} \tn % Row Count 14 (+ 2) % Row 21 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{JSTAP Classifier Training • Select 10,000 malicious and benign randomly for training – Additional 5,000 of each for validation • Repeat 5 times and average detection results} \tn % Row Count 18 (+ 4) % Row 22 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{JSTAP results • Two step process • First phase – Unanimous voting, classifies 93\% of data with 99.73\% accuracy • Second phase – Unanimous voting, classifies 6.5\% of data with accuracy still over 99\%} \tn % Row Count 23 (+ 5) % Row 23 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{Evasion techniques - Add more benign features • Copy malicious into larger benign file} \tn % Row Count 25 (+ 2) % Row 24 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{Extremely avstract OS- learn the sample without implementing the underlying OS. Over-approximation has more behaviors than system S, under-approximation has fewer. Less precise than virtualization or emulation} \tn % Row Count 30 (+ 5) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{3.833cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{3.833cm}}{\bf\textcolor{white}{ML in practice: Malware detection (cont)}} \tn % Row 25 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{Abstract execution - A Technique for Efficiently Tracing Programs. In a dynamic analysis, it has Emulator, Extremely avstract OS and paths, Less precise than virtualization or emulation} \tn % Row Count 4 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{3.833cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{3.833cm}}{\bf\textcolor{white}{ML in practice: Phishing detection}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{Phishing Websites - Often used to collect credentials. Fake website to induce personal info.} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{Techniques for finding Phish:} \tn % Row Count 3 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{- Industrial toolbar-based: Eg SpoofGuard, TrustWatch, Netcraft (found these ineffective)} \tn % Row Count 5 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{- User-Interface-based: Eg provide custom image per user, Password manager (Only provides password to certain domains)} \tn % Row Count 8 (+ 3) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{- Web page content-based: Use web page info (URL, links, terms, images, forms) to detect phishing} \tn % Row Count 10 (+ 2) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{–- CANTINA: compute term frequency-inverse document frequency for terms, then Google a few terms to see if current website is a top result – B-APT: Bayesian based on tokens from DOM} \tn % Row Count 14 (+ 4) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{Some defination: Surface level content-URL, hyperlinks, Textual content-Terms or words, Visual content- Color, font size, style, location of images} \tn % Row Count 17 (+ 3) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{Textual and visual classification: text classifiers work by examining text within a page to detect whether certain words are more likely in a fraudulent page or not. Image classifiers transform webpage to images and then compares similarity to genuine webpages.} \tn % Row Count 23 (+ 6) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{Step of baye analysis: 1. Obtain webpage and normalize 2. Compute signature 3. Calculate EMD and similarity between website and protected web page 4. Classify via threshold} \tn % Row Count 27 (+ 4) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{Overall framework 1. Train text and image classifier, collect similarity measurements for different classifiers 2. Partition similarity into sub-intervals 3. Estimate probs for text classifier 4. Estimate probs for image classifier 5. Classify each test image 6. If different from two classifiers, calculate decision factor 7. Return final classification} \tn % Row Count 35 (+ 8) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{3.833cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{3.833cm}}{\bf\textcolor{white}{ML in practice: Phishing detection (cont)}} \tn % Row 10 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{{\bf{ High quality dataset}}:} \tn % Row Count 1 (+ 1) % Row 11 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{{\bf{accessibility}}: publicly available;{\bf{completeness}}: encompass all the breadth within phishing; {\bf{ consistency }}: range and variance of dataset to make sure data won't be substantively changing; {\bf{integrity}}: data and labels is correct, non-corrupted; {\bf{Validity}}: data is properly representative; {\bf{ interpretability }}: data is understandable; {\bf{Timeliness: }} data is updated or still valid today and future} \tn % Row Count 10 (+ 9) % Row 12 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{{\bf{ Bagging classifier }} is an ensemble meta-estimator that fits base classifiers each on random subsets of the original dataset and then aggregate their individual predictions (either by voting or by averaging) to form a final prediction} \tn % Row Count 15 (+ 5) % Row 13 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{{\bf{ boosting classifier }} is random forests build each tree independently while gradient boosting builds one tree at a time. This additive model (ensemble) works in a forward stage-wise manner, introducing a weak learner to improve the shortcomings of existing weak learners.} \tn % Row Count 21 (+ 6) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{3.833cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{3.833cm}}{\bf\textcolor{white}{Social network security - Spam}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{{\bf{Spam}} - irrelevant messages sent to many, Spamming is the use of messaging systems to send multiple unsolicited messages (spam) to large numbers of recipients} \tn % Row Count 4 (+ 4) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{Criminal accounts tend to be socially connected, Maybe less discriminating in who they follow – Maybe intentional} \tn % Row Count 7 (+ 3) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{Criminal hubs are more inclined to follow criminal accounts} \tn % Row Count 9 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{K-anonymity - Publisher decides which attributes public/private – Public are "quasi-identifiers" • Every quasi-identifier tuple appears in at least k records in anonymized DB} \tn % Row Count 13 (+ 4) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{{\bf{Determine if a database is k-anonymous for a particular value of k}} - for quasi-identifier, if it appears in at least k records in the db. Every public tuples appears at least twice. We can't uniquely identify someone. A database is 2-anonymous if no click trace is unique} \tn % Row Count 19 (+ 6) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{{\bf{how an attacker might deanonymize a database with auxiliary information(background info related to record)}}} \tn % Row Count 22 (+ 3) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{- Amplification of background knowledge - Uses Aux(r) close to r on subset of attributes to find r' close to r on all - Extended to a subset} \tn % Row Count 25 (+ 3) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{1. Compute score(aux, r') for each r' in sample 2. Apply matching criteria 3. Output record or probability distribution for records} \tn % Row Count 28 (+ 3) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{{\bf{ Bystander }} - Someone who is "present but not taking part" in the photo, Someone who is "not a subject of the photo and is thus not important for the meaning of the photo"} \tn % Row Count 32 (+ 4) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{3.833cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{3.833cm}}{\bf\textcolor{white}{Social network security - Spam (cont)}} \tn % Row 9 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{How bystander detection could improve privacy: this can stop bystanders from being recorded without knowing or let them know. Self-centered photos can put bystanders in awkward situations, poor posture, or reveal information they don't want on record,} \tn % Row Count 6 (+ 6) % Row 10 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{{\bf{Unicity}} - Proportion of unique pieces of information U =0 is k-anonymous and k\textgreater{}=2. U =0.25 means 1/4 of the click traces are unique.} \tn % Row Count 9 (+ 3) % Row 11 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{How to get \textless{} 10\% unicity • Remove all info pertaining to clients and website visits • Coarsen time to at least hours} \tn % Row Count 12 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{3.833cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{3.833cm}}{\bf\textcolor{white}{Strategic manipulation, propaganda, and fake news}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{fake news - news that is itentionally false, published by news outlet.} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{challenges in defining "fake news" - apart from validity of information, is it satire, actual misinformation, intended for deception, clickbait, rumor etc.} \tn % Row Count 6 (+ 4) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{automatic fact-checking - compare with knowledge/expert base (references); use base of SFO triples: subject, predicate object} \tn % Row Count 9 (+ 3) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{fact extraction: \seqsplit{redundancy(Donaldjohntrump} vs donald-trump), timeliness(Britain, joinIn, EuropeanUnion), conflict, unreliability(TheOnion), incompleteness(May need to infer if something is missing)} \tn % Row Count 13 (+ 4) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{Why temporal analysis may help with fake news detection: time can change the validity of information Why source analysis may help with fake news detection: is the news satire or credibile} \tn % Row Count 17 (+ 4) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{Explain how textual and visual analysis may help with fake news detection} \tn % Row Count 19 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{3.833cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{3.833cm}}{\bf\textcolor{white}{Cond}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{-textual can determine fake news by Quantity, Complexity, uncertainty, subjectivity, sentiment, informality, specificity and readavlity} \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{- visual content can clarity, coherence, similarity distribution, diversity and clustering score.} \tn % Row Count 5 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{- using SVM's and CNNs for text analysis} \tn % Row Count 6 (+ 1) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{{\bf{mixed code}} - Use of different languages, symbols, scripts, shapes to avoid detection. Text on Document – Defined from standard alphabetic characters • Text in Visual Media – Text in pictures • Text as Art Form** – Use symbols not part of the alphabet to depict a simple code} \tn % Row Count 12 (+ 6) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{{\bf{ frequency-inverse document frequency }} - tfidf is used to reflect how important a word is to a document in a collection tfidf} \tn % Row Count 15 (+ 3) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{{\bf{ bi-clique }} - bipartite graph where every vertex of first is connected to every vertex of second} \tn % Row Count 17 (+ 2) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{Label bi-partite graph with nodes as articles and users, Edge if user mentions article, Find maximal bi-cliques,} \tn % Row Count 20 (+ 3) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{Find temporal cohesion, And textual cohesion, And created weighted sum, For an article, average its score in all bi- cliques,} \tn % Row Count 23 (+ 3) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{top 5\% of these are seeded fake, Bottom 5\% are seeded true} \tn % Row Count 25 (+ 2) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{Spread labels if – Part of same bi-cliques – Have a lot of common users – Are textually similar,} \tn % Row Count 28 (+ 3) % Row 10 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{Spread labels based on – Common users – Textually similar} \tn % Row Count 30 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{3.833cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{3.833cm}}{\bf\textcolor{white}{Dark Web}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{{\bf{ Deep Web: }} (password) consists of internet not indexed on search engines (such as social media)} \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{{\bf{ Dark Web: }} (Tor) overlay networks that use the Internet but require specific software, configurations, or authorization to access -Behind password logins – Encrypted – Not linked – Tor Hidden Servcies} \tn % Row Count 8 (+ 5) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{{\bf{ransomware:}} threatens to publish victims data or holds data hostage unless paid} \tn % Row Count 10 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{{\bf{Tor browsing}}: use many(3) different machine to create onion networks. Each connection is encripted beside of the exit.The exit will appear to be browsing.} \tn % Row Count 14 (+ 4) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{{\bf{Tor hidden service}} - introduction points , directory service () and rendezvous point.} \tn % Row Count 16 (+ 2) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{1.pick introduction points to build encrpted tunnels 2. announce the service into db. 3. User get back to 3 introduction points and create rendezvous points (3 steps from) and 4. send msg to intro point. 5. now the rend point is 6 hops away from intro.} \tn % Row Count 22 (+ 6) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{beneficial uses of Tor and anonymous browsing: can prevent control from authoritarian regimes; people cannot be banned from accessing information} \tn % Row Count 25 (+ 3) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{socially detrimental uses of Tor and anonymous browsing: can be used as a harbor for illegal/ illicit things} \tn % Row Count 28 (+ 3) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{how Tor traffic could be deanonymized by a large organization: they with the computational power can get both a entry and exit point and then be able to decrypt what goes on in between} \tn % Row Count 32 (+ 4) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{3.833cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{3.833cm}}{\bf\textcolor{white}{Dark Web (cont)}} \tn % Row 9 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{how researchers have crawled the dark web: first get access by identifying dark web forms. Then get data thru anon access, then process and identify relationships/ link data sources etc. then visualization and reports} \tn % Row Count 5 (+ 5) % Row 10 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{why dark web crawling is beneficial for security practitioners - are able to limit the damage of a data breach and take the necessary steps to protect business, employees, customers, etc. from potential attacks. Can be used to detect/ collect any leaked information} \tn % Row Count 11 (+ 6) % Row 11 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{Information gain - reduction of entropy gained by knowing feature x: IG(y|x) = H(y) – H(y|x)} \tn % Row Count 13 (+ 2) % Row 12 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{Stemming - remove suffixes to get stem word can be use to handling-misspellings with 3-7 ngrams} \tn % Row Count 15 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{3.833cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{3.833cm}}{\bf\textcolor{white}{REST}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{Abstract execution records a small set of events during the traced program's execution. These events serve as input to an abstract version of the program that generates a full trace by re-executing selected portions of the original program.} \tn % Row Count 5 (+ 5) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{insider threat and accidental insider threat: threats from within (employees, associates) weak passwords, unlocked devices intentional can be injecting rogue software} \tn % Row Count 9 (+ 4) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{Techniques for host-based user profiling on Unix and Windows: Markov chain codel; bayers factor to determine if transition is consistent (command A-\textgreater{} command B); windows measures "properties" which vote with weights wether an intrusion has occurred} \tn % Row Count 15 (+ 6) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{3.833cm}}{Advantage of a hidden Markov model over an SVM for classifying command sequences: Markov model creates probability of each transition; this can easily grow very big; pick a K that is small; svm can be very accurate but it does not address concept drift very well} \tn % Row Count 21 (+ 6) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{3.833cm}}{honeypot: a computer security mechanism set tro detect deflect or counteract attempts at unauthorized use of info systems. Generally consists of data that appears legit with info but is isolated and monitored and blocks or analyses attackers} \tn % Row Count 26 (+ 5) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}