\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{c-mclaren} \pdfinfo{ /Title (pscy3000.pdf) /Creator (Cheatography) /Author (c-mclaren) /Subject (PSCY3000 Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{F08CFF} \definecolor{LightBackground}{HTML}{FBE1FF} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{PSCY3000 Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{c-mclaren} via \textcolor{DarkBackground}{\uline{cheatography.com/27441/cs/7951/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}c-mclaren \\ \uline{cheatography.com/c-mclaren} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 13th May, 2016.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{3} \begin{tabularx}{5.377cm}{p{0.4977 cm} p{0.4977 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{F- Distribution}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Skewed right} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{Mean is 1} \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Only non negative values} \tn % Row Count 3 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Gathering Data}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Treatments: experimental conditions which correspond to assigned values of explanatory variable.} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Observational studies: watch and observe values on response variable (non experimental)} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Advantage of Experiments over Observational Studies: experiments reduce potential for lurking variable by random selection, also experiment is only way to determine causality} \tn % Row Count 8 (+ 4) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Sample Survey: selects sample from population and gathers data} \tn % Row Count 10 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Sampling Frame: list of subjects in the population from which the sample is taken} \tn % Row Count 12 (+ 2) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Simple random sampling: when each possible sample of that size has the same chance of being selected} \tn % Row Count 14 (+ 2) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{To select a simple random sample: number the subjects in the sampling frame using numbers of the same length (number of digits). select numbers of that length from a table of random numbers or using a random number generator. include in the sample those subjects having numbers equal to the random numbers selected.} \tn % Row Count 21 (+ 7) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Margin of Error tells us how well the sample estimate predicts the population percentage. Ex. A survey results says margin of error is +/- 3\% MEANS "it is very likely that the reported sample percentage is no more than \#5 lower/higher than the population percentage} \tn % Row Count 27 (+ 6) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Bias: When certain outcomes will occur more often in the sample than they do in the population.} \tn % Row Count 29 (+ 2) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Sampling bias occurs from using nonrandom samples or having undercoverage.} \tn % Row Count 31 (+ 2) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Gathering Data (cont)}} \tn % Row 10 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Nonresponse bias occurs when some sampled subjects cannot be reached or refuse to participate or fail to answer some questions.} \tn % Row Count 3 (+ 3) % Row 11 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Response bias occurs when the subject gives an incorrect response (perhaps lying) or the way the interviewer asks the questions (or wording of a question in print) is confusing or misleading.} \tn % Row Count 7 (+ 4) % Row 12 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Large Sample doesn't guarantee unbiased sample} \tn % Row Count 8 (+ 1) % Row 13 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Convenience Sample problems: results only apply to observed subjects, unlikely to be representative of population, often severe biases result} \tn % Row Count 11 (+ 3) % Row 14 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Key Parts of a Sample Survey:Identify the population of all subjects of interest. Construct a sampling frame which attempts to list all subjects in the population. Use a random sampling design to select n subjects from the sampling frame. Be cautious of sampling bias due to nonrandom samples (such as volunteer samples) and sample undercoverage, response bias from subjects not giving their true response or from poorly worded questions, and nonresponse bias from refusal of subjects to participate.} \tn % Row Count 21 (+ 10) % Row 15 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{3 Components of Good Experiment:} \tn % Row Count 22 (+ 1) % Row 16 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{1. Control Group - placebo, allows to analyze effectiveness} \tn % Row Count 24 (+ 2) % Row 17 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{2. Randomization - eliminates researcher bias, balances comparison groups on known and lurking variables} \tn % Row Count 27 (+ 3) % Row 18 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{3. Replication: allows to attribute observed effects to tx rather than regular variability} \tn % Row Count 29 (+ 2) % Row 19 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Statistically Significant: if observed difference is larger than would be expected by chance} \tn % Row Count 31 (+ 2) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{Gathering Data (cont)}} \tn % Row 20 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Can generalize only to population represented by sample} \tn % Row Count 2 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{p{0.4977 cm} p{0.4977 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{14.1 One-Way Anova: Comparing Several Means}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{One way ANOVA is an ANOVA with a single factor} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{Factor: categorial explanatory variable} \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Test analyzes whether differences observed among the {\emph{sample}} means could have reasonably occurred by chance, if the null hypothesis of equal {\emph{population}} means were true} \tn % Row Count 6 (+ 4) % Row 3 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{Evidence against the null is stronger when the variability between sample means increases and as the sample sizes increase} \tn % Row Count 9 (+ 3) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{Assumptions and the effects of violating them:}}} \tn % Row Count 10 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{Population distributions are normal (Moderate violations of the normality assumption are not serious.) These distributions have the same standard deviation. (Moderate violations are not serious.) The data resulted from randomization.} \tn % Row Count 15 (+ 5) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Misleading results may occur with the F-test if the distributions are highly skewed and the sample size N is small.} \tn % Row Count 18 (+ 3) % Row 7 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{Misleading results may also occur with the F-test if there are relatively large differences among the standard deviations (the largest sample standard deviation being more than double the smallest one).} \tn % Row Count 23 (+ 5) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Several T-Tests vs. F-test: If separate t tests are used, the significance level applies to each individual comparison, not the overall type I error rate for all the comparisons. However, the F test does not tell us which groups differ or how different they are.} \tn % Row Count 29 (+ 6) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{One Way ANOVA example}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Question: Three groups, with different French skills, scored on one quiz} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Assumptions} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}Independent Random Samples, normal population distributions with equal standard deviations} \tn % Row Count 5 (+ 3) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Hypotheses} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}H0: u1=u2=u3 Ha: at least two population means are unequal} \tn % Row Count 8 (+ 3) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Test statistic} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}F= btwn groups variability/within groups variability \{\{nl\}\} df1 = (g-1) df2 = (N-g)} \tn % Row Count 11 (+ 3) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{P-Value} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}Right tail probability of above observed F value} \tn % Row Count 14 (+ 3) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{Conclusion} \tn \mymulticolumn{1}{x{5.377cm}}{\hspace*{6 px}\rule{2px}{6px}\hspace*{6 px}Interpret in context, reject Ho based on p-value being below or = significant value} \tn % Row Count 17 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{14.2}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{Confidence Intervals Comparing Pairs of Means}}} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{s is square root of within groups variance estimate (s\textasciicircum{}2\textasciicircum{})} \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{For 95\% Confidence Interval comparing means ui - uj: when the confidence interval does NOT containt 0, we can infer the population means are different, the interval shows just how different they may be} \tn % Row Count 8 (+ 5) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\bf{Example:}} for comparing the very happy and pretty happy categories, the confidence interval for u1 - u2 = (0.7, 5.3) \{\{nl\}\} Since the CI contains only positive numbers, this suggests that on average people who are very happy have more friends than people who are pretty happy} \tn % Row Count 14 (+ 6) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{{\bf{Effects of violating assumptions}} \{\{nl\}\} When the sample sizes are large and the ratio of the largest standard deviation to the smallest is less than 2, these procedures are robust to violations of these assumptions. \{\{nl\}\} If the ratio of the largest standard deviation to the smallest exceeds 2, use the confidence interval formulas that use separate standard deviations for the groups.} \tn % Row Count 22 (+ 8) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{{\bf{Tukey Multiple Comparison}}} \tn % Row Count 23 (+ 1) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{Ex. Groups: (Very Happy, Pretty Happy) Difference of Means: (u1-u2) 95\% CI: (0.7, 5.3) {\emph{Tukey 95\% Multiple Comparison}} (0.3, 5.7)} \tn % Row Count 26 (+ 3) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{The Tukey intervals hold with an {\emph{overall}} confidence level of 95\%, this confidence applies to all intervals. Tukey is wider than separate CI's because uses a higher confidence level to achieve 95\% for all intervals.} \tn % Row Count 31 (+ 5) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{14.2 (cont)}} \tn % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{The Tukey Confidence interval for u1-u2 contains only positive values so infer that u1\textgreater{}u2, mean number of good friends higher for very happy than pretty happy (but maybe barely so).} \tn % Row Count 4 (+ 4) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{ANOVA and Regression \{\{nl\}\} \{\{nl\}\} \{\{nl\}\} \{\{nl\}\} \{\{nl\}\} \{\{nl\}\} \{\{nl\}\} \{\{nl\}\} \{\{nl\}\} \{\{nl\}\}} \tn % Row Count 6 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{p{0.4977 cm} p{0.4977 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Two -Way ANOVA}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{Difference Between 1 and 2 way ANOVA}}} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{One way analyzes relationship between mean of quantitative response variable and groups that are categories of a factor} \tn % Row Count 4 (+ 3) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Two Way ANOVA analyzes quantitative response variable on two categorical response variables} \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{{\bf{Null Hypothesis}} In two-way ANOVA, a null hypothesis states that the population means are the same in each category of one factor, at each fixed level of the other factor.} \tn % Row Count 10 (+ 4) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Ex. Ho: Mean corn yield is equal for plots at the low and high levels of manure, for each fixed level of fertilizer. From the output, you can obtain the F-test statistic of 6.88 with its corresponding P-value of 0.018. The small P-value indicates strong evidence that the mean corn yield depends on manure level.} \tn % Row Count 17 (+ 7) % Row 5 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{{\bf{No interaction}} between two factors means that the effect of either factor on the response variable is the same at each category of the other factor.} \tn % Row Count 21 (+ 4) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Usually test hypothesis that there is no interaction first} \tn % Row Count 23 (+ 2) % Row 7 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{If the evidence of interaction is not strong (that is, if the P-value is not small), then test the main effects hypotheses and/or construct confidence intervals for those effects.} \tn % Row Count 27 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Repeated Measures ANOVA}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{{\bf{Sum of Squares in One Way Repeated Measures}}} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} Independent Groups: \{\{nl\}\} SS Groups (df = g-1) \{\{nl\}\} SS Error (df = N - g) & Dependent Groups \{\{nl\}\} SS Groups (df = g-1) \{\{nl\}\} SS subjects (df = subj - 1) \{\{nl\}\} SS error (df = n-g-subj.+1) \tn % Row Count 7 (+ 6) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{In repeated measures (dependent groups) ANOVA, the variability of the subjects is calculated (as if it was a factor) and is not included in the error sums of squares.} \tn % Row Count 11 (+ 4) % Row 3 \SetRowColor{white} \mymulticolumn{2}{x{5.377cm}}{A very important assumption underlying repeated measures ANOVA is sphericity and, relatedly, compound symmetry. When either of these assumptions are violated, the P-values tend to be too small. A Greenhouse-Geisser adjustment to the dfs will accommodate for any potential violations of this assumption.} \tn % Row Count 18 (+ 7) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Two-factor studies often have different (i.e., independent) samples on one of the factors and the same (i.e., dependent) samples on the other factor. The factor with different groups of subjects is called the "between-subjects" factor and the factor with repeated measures is called the "within-subjects" factor.} \tn % Row Count 25 (+ 7) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}