\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{Robyn.jll} \pdfinfo{ /Title (general-stats.pdf) /Creator (Cheatography) /Author (Robyn.jll) /Subject (General (Stats) Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{A3A3A3} \definecolor{LightBackground}{HTML}{F3F3F3} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{General (Stats) Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{Robyn.jll} via \textcolor{DarkBackground}{\uline{cheatography.com/146401/cs/31685/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}Robyn.jll \\ \uline{cheatography.com/robyn-jll} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 14th April, 2022.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{3} \begin{tabularx}{5.377cm}{x{1.64241 cm} x{3.33459 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Terms}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{Reliability}}} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{Reliability is about the consistency of a measure} \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} Test-retest & The consistency of a measure across time: do you get the same results when you repeat the measurement? \tn % Row Count 6 (+ 4) % Row 3 \SetRowColor{white} Interrater & The consistency of a measure across raters or observers: do you get the same results when different people conduct the same measurement? \tn % Row Count 12 (+ 6) % Row 4 \SetRowColor{LightBackground} Internal consistency & The consistency of the measurement itself: do you get the same results from different parts of a test that are designed to measure the same thing? \tn % Row Count 18 (+ 6) % Row 5 \SetRowColor{white} Ensuring reliability & Apply your methods consistently, Standardize the conditions of your research \tn % Row Count 21 (+ 3) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{Validity}}} \tn % Row Count 22 (+ 1) % Row 7 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{validity is about the accuracy of a measure} \tn % Row Count 23 (+ 1) % Row 8 \SetRowColor{LightBackground} Construct & The adherence of a measure to existing theory and knowledge of the concept being measured. \tn % Row Count 27 (+ 4) % Row 9 \SetRowColor{white} Content & The extent to which the measurement covers all aspects of the concept being measured. \tn % Row Count 31 (+ 4) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{x{1.64241 cm} x{3.33459 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Terms (cont)}} \tn % Row 10 \SetRowColor{LightBackground} Criterion & The extent to which the result of a measure corresponds to other valid measures of the same concept. \tn % Row Count 4 (+ 4) % Row 11 \SetRowColor{white} Ensuring validity & Choose appropriate methods of measurement, Use appropriate sampling methods to select your subjects \tn % Row Count 8 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.74195 cm} x{3.23505 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Quantitative Data}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{is the process of collecting and analyzing numerical data. It can be used to find patterns and averages, make predictions, test causal relationships, and generalize results to wider populations.} \tn % Row Count 4 (+ 4) % Row 1 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{{\bf{Research Methods:}}} \tn % Row Count 5 (+ 1) % Row 2 \SetRowColor{LightBackground} descriptive research & you simply seek an overall summary of your study variables. \tn % Row Count 8 (+ 3) % Row 3 \SetRowColor{white} correlational research & you investigate relationships between your study variables \tn % Row Count 11 (+ 3) % Row 4 \SetRowColor{LightBackground} experimental research & you systematically examine whether there is a cause-and-effect relationship between variables. \tn % Row Count 15 (+ 4) % Row 5 \SetRowColor{white} Advantages: & Replication, Direct comparison of results, Large Samples, Hypothesis testing \tn % Row Count 18 (+ 3) % Row 6 \SetRowColor{LightBackground} \seqsplit{Disadvantages:} & Superficiality, Narrow focus, Structural bias, Lack of context \tn % Row Count 21 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.59264 cm} x{3.38436 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Qualitative Data}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Qualitative research involves collecting and analyzing non-numerical data (e.g., text, video, or audio) to understand concepts, opinions, or experiences. It can be used to gather in-depth insights into a problem or generate new ideas for research.} \tn % Row Count 5 (+ 5) % Row 1 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{{\bf{Research Methods:}}} \tn % Row Count 6 (+ 1) % Row 2 \SetRowColor{LightBackground} \seqsplit{Observations:} & recording what you have seen, heard, or encountered in detailed field notes. \tn % Row Count 9 (+ 3) % Row 3 \SetRowColor{white} Interviews: & personally asking people questions in one-on-one conversations. \tn % Row Count 12 (+ 3) % Row 4 \SetRowColor{LightBackground} Focus groups: & asking questions and generating discussion among a group of people. \tn % Row Count 15 (+ 3) % Row 5 \SetRowColor{white} Surveys: & distributing questionnaires with open-ended questions. \tn % Row Count 17 (+ 2) % Row 6 \SetRowColor{LightBackground} Secondary research: & collecting existing data in the form of texts, images, audio or video recordings, etc. \tn % Row Count 21 (+ 4) % Row 7 \SetRowColor{white} Advantages: & Flexibility, Natural setting, Meaningful insights, Generation of new ideas \tn % Row Count 24 (+ 3) % Row 8 \SetRowColor{LightBackground} \seqsplit{Disadvantages:} & Unreliability, Subjectivity, Limited generalizability, Labor- intensive \tn % Row Count 27 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.64241 cm} x{3.33459 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Descriptive Statistics}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{summarize and organize characteristics of a data set. A data set is a collection of responses or observations from a sample or entire population.} \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{{\bf{3 main types of descriptive statistics:}}} \tn % Row Count 4 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{1. The distribution concerns the frequency of each value (Graphs).}}} \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{{\bf{2. The Measures of central tendency concerns the averages of the values}}} \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} - Mean & To find the mean, simply add up all response values and divide the sum by the total number of responses. The total number of responses or observations is called N. \tn % Row Count 15 (+ 7) % Row 5 \SetRowColor{white} - Median & To find the median, order each response value from the smallest to the biggest. Then, the median is the number in the middle. If there are two numbers in the middle, find their mean. \tn % Row Count 22 (+ 7) % Row 6 \SetRowColor{LightBackground} - Mode & To find the mode, order your data set from lowest to highest and find the response that occurs most frequently \tn % Row Count 27 (+ 5) % Row 7 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{{\bf{3. The Measures of variability or dispersion concerns how spread out the values are}}} \tn % Row Count 29 (+ 2) % Row 8 \SetRowColor{LightBackground} - Range & To find the range, simply subtract the lowest value from the highest value. - Standard Deviation \tn % Row Count 33 (+ 4) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{x{1.64241 cm} x{3.33459 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Descriptive Statistics (cont)}} \tn % Row 9 \SetRowColor{LightBackground} - Standard Deviation & The standard deviation (s) is the average amount of variability in your dataset. It tells you, on average, how far each score lies from the mean. The larger the standard deviation, the more variable the data set is. \tn % Row Count 9 (+ 9) % Row 10 \SetRowColor{white} - Variance & The variance (s2)is the average of squared deviations from the mean. Variance reflects the degree of spread in the data set. The more spread the data, the larger the variance is in relation to the mean. \tn % Row Count 17 (+ 8) % Row 11 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{Univariate descriptive statistics}}} \tn % Row Count 18 (+ 1) % Row 12 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{Univariate descriptive statistics focus on only one variable at a time. It's important to examine data from each variable separately using multiple measures of distribution, central tendency and spread.} \tn % Row Count 23 (+ 5) % Row 13 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{Bivariate descriptive statistics}}} \tn % Row Count 24 (+ 1) % Row 14 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{If you've collected data on more than one variable, you can use bivariate or multivariate descriptive statistics to explore whether there are relationships between them. In bivariate analysis, you simultaneously study the frequency and variability of two variables to see if they vary together. You can also compare the central tendency of the two variables before performing further statistical tests.} \tn % Row Count 33 (+ 9) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{x{1.64241 cm} x{3.33459 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Descriptive Statistics (cont)}} \tn % Row 15 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{Multivariate analysis}}} \tn % Row Count 1 (+ 1) % Row 16 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{is the same as bivariate analysis but with more than two variables.} \tn % Row Count 3 (+ 2) % Row 17 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{Contingency table}}} \tn % Row Count 4 (+ 1) % Row 18 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{In a contingency table, each cell represents the intersection of two variables. Usually, an independent variable (e.g., gender) appears along the vertical axis and a dependent one appears along the horizontal axis (e.g., activities). You read "across" the table to see how the independent and dependent variables relate to each other.} \tn % Row Count 11 (+ 7) % Row 19 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{Scatter plots}}} \tn % Row Count 12 (+ 1) % Row 20 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{A scatter plot is a chart that shows you the relationship between two or three variables. It's a visual representation of the strength of a relationship.} \tn % Row Count 16 (+ 4) % Row 21 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{In a scatter plot, you plot one variable along the x-axis and another one along the y-axis. Each data point is represented by a point in the chart.} \tn % Row Count 19 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.69218 cm} x{3.28482 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Inferential Statistics}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{help you come to conclusions and make predictions based on your data, to understand the larger population from which the sample is taken. it's important to use random and unbiased sampling methods. If your sample isn't representative of your population, then you can't make valid statistical inferences.} \tn % Row Count 7 (+ 7) % Row 1 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{{\bf{Inferential statistics have two main uses:}}} \tn % Row Count 8 (+ 1) % Row 2 \SetRowColor{LightBackground} & • making estimates about populations (for example, the mean SAT score of all 11th graders in the US). \tn % Row Count 12 (+ 4) % Row 3 \SetRowColor{white} & • testing hypotheses to draw conclusions about populations (for example, the relationship between SAT scores and family income). \tn % Row Count 17 (+ 5) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{Sampling error}}} \tn % Row Count 18 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{Since the size of a sample is always smaller than the size of the population, some of the population isn't captured by sample data. This creates sampling error, which is the difference between the true population values (called parameters) and the measured sample values (called statistics).} \tn % Row Count 24 (+ 6) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{two important types of estimates you can make about the population}}} \tn % Row Count 26 (+ 2) % Row 7 \SetRowColor{white} point estimate & is a single value estimate of a parameter. For instance, a sample mean is a point estimate of a population mean. \tn % Row Count 31 (+ 5) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{x{1.69218 cm} x{3.28482 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Inferential Statistics (cont)}} \tn % Row 8 \SetRowColor{LightBackground} interval estimate & gives you a range of values where the parameter is expected to lie. A confidence interval is the most common type of interval estimate. \tn % Row Count 6 (+ 6) % Row 9 \SetRowColor{white} - confidence interval & uses the variability around a statistic to come up with an interval estimate for a parameter. Confidence intervals are useful for estimating parameters because they take sampling error into account. confidence interval tells you the uncertainty of the point estimate confidence level tells you the probability (in percentage) of the interval containing the parameter estimate if you repeat the study again A 95\% confidence interval means that if you repeat your study with a new sample in exactly the same way 100 times, you can expect your estimate to lie within the specified range of values 95 times. \tn % Row Count 30 (+ 24) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{x{1.69218 cm} x{3.28482 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Inferential Statistics (cont)}} \tn % Row 10 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{Hypothesis Testing}}} \tn % Row Count 1 (+ 1) % Row 11 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{is a formal process of statistical analysis using inferential statistics. The goal of hypothesis testing is to compare populations or assess relationships between variables using samples.} \tn % Row Count 5 (+ 4) % Row 12 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{Parametric tests}} make assumptions that include the following:} \tn % Row Count 7 (+ 2) % Row 13 \SetRowColor{white} & • the population that the sample comes from follows a normal distribution of scores \tn % Row Count 11 (+ 4) % Row 14 \SetRowColor{LightBackground} & • the sample size is large enough to represent the population \tn % Row Count 14 (+ 3) % Row 15 \SetRowColor{white} & • the variances, a measure of spread, of each group being compared are similar \tn % Row Count 18 (+ 4) % Row 16 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{Non-parametric tests}} are called "distribution-free tests" because they don't assume anything about the distribution of the population data.} \tn % Row Count 21 (+ 3) % Row 17 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{{\bf{Comparison tests}}} \tn % Row Count 22 (+ 1) % Row 18 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{assess whether there are differences in means, medians or rankings of scores of two or more groups} \tn % Row Count 24 (+ 2) % Row 19 \SetRowColor{white} & T-test, Anova, Mood´s median, Wolcoxon signed- rank, Mann-Whitnes U, Krustal-Wallis H \tn % Row Count 28 (+ 4) % Row 20 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{Correlation tests}}} \tn % Row Count 29 (+ 1) % Row 21 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{Correlation tests determine the extent to which two variables are associated.} \tn % Row Count 31 (+ 2) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{x{1.69218 cm} x{3.28482 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Inferential Statistics (cont)}} \tn % Row 22 \SetRowColor{LightBackground} & Pearson´s r, Spearman´s r, Chi square test of independence \tn % Row Count 3 (+ 3) % Row 23 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{{\bf{Regression tests}}} \tn % Row Count 4 (+ 1) % Row 24 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Regression tests demonstrate whether changes in predictor variables cause changes in an outcome variable. You can decide which regression test to use based on the number and types of variables you have as predictors and outcomes. Most of the commonly used regression tests are parametric. If your data is not normally distributed, you can perform data transformations.} \tn % Row Count 12 (+ 8) % Row 25 \SetRowColor{white} & Simple linear regression, Multiple linear regression, Logistic regression, Nominal regression, Ordinal regression \tn % Row Count 17 (+ 5) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}