\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{Kaji} \pdfinfo{ /Title (statistiek-met-r.pdf) /Creator (Cheatography) /Author (Kaji) /Subject (Statistiek met R Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{B8C2E3} \definecolor{LightBackground}{HTML}{F6F7FB} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Statistiek met R Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{Kaji} via \textcolor{DarkBackground}{\uline{cheatography.com/63960/cs/19405/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}Kaji \\ \uline{cheatography.com/kaji} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 23rd April, 2019.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{tabularx}{17.67cm}{x{10.7074 cm} x{6.5626 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{17.67cm}}{\bf\textcolor{white}{EXTRA}} \tn % Row 0 \SetRowColor{LightBackground} Twee plots naast elkaar & `par(mfrow=c(1,2))` \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} Plots naast elkaar uitschakelen & `dev.off()` \tn % Row Count 4 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{17.67cm}{x{8.635 cm} x{8.635 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{17.67cm}}{\bf\textcolor{white}{Z-toets}} \tn % Row 0 \SetRowColor{LightBackground} Standaardnormale verdeling & `dnorm(x (evt, mean=, sd=))` \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} Standaardnormale verdeling plot & `plot(x, dnorm(x))` \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} Kans berekenen & `pnorm(i, mean, sd)` \tn % Row Count 5 (+ 1) % Row 3 \SetRowColor{white} \seqsplit{Voorspellingsinterval} 95\% berekenen & `qnorm(c(0.025, 0.975), mean, sd` \tn % Row Count 7 (+ 2) % Row 4 \SetRowColor{LightBackground} \seqsplit{Voorspellingsinterval} 99\% berekenen & `qnorm(c(0.05, 0.995), mean, sd)` \tn % Row Count 9 (+ 2) % Row 5 \SetRowColor{white} \seqsplit{Voorspellingsinterval} 95\% van steekproef berekenen & `qnorm(c(0.025, 0.975), mean, sd/sqrt(j))` \tn % Row Count 12 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{17.67cm}}{i = variabel dat je wilt testen \newline j = steekproefgrootte (n)} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{17.67cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{17.67cm}}{\bf\textcolor{white}{T-test}} \tn \SetRowColor{white} \mymulticolumn{1}{x{17.67cm}}{Bij een gepaarde test, `t.test(na, voor, paired = T)`% Row Count 2 (+ 2) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{17.67cm}{x{4.3862 cm} x{6.2419 cm} x{6.2419 cm} } \SetRowColor{DarkBackground} \mymulticolumn{3}{x{17.67cm}}{\bf\textcolor{white}{Hypothesen}} \tn % Row 0 \SetRowColor{LightBackground} \seqsplit{Shapiro-Wilk} test & H0: De data is normaal verdeeld. & HA: De data is niet normaal verdeeld. \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} T-test & H0: Er is geen verschil tussen de \seqsplit{verwachtingswaarde} en het gemiddelde. & HA: Er is wel verschil tussen de \seqsplit{verwachtingswaarde} en het gemiddelde. \tn % Row Count 9 (+ 6) % Row 2 \SetRowColor{LightBackground} \seqsplit{W-M-W-test} & H0: De populatie distributie van X1 en X2 is hetzelfde. & HA: De populatie distributie van X1 en X2 zijn niet hetzelfde \seqsplit{(verschillend)}. \tn % Row Count 15 (+ 6) % Row 3 \SetRowColor{white} \seqsplit{Chi-kwadraat} \seqsplit{homogeniteit} & H0: Er is geen \seqsplit{onafhankelijkheid}. (Geen verschil in verdeling) & HA: Er is geen \seqsplit{onafhankelijkheid}. (Wel verschil in verdeling.) \tn % Row Count 20 (+ 5) % Row 4 \SetRowColor{LightBackground} \seqsplit{Chi-kwadraat} \seqsplit{goodness-of-fit} & H0: Er is geen verband tussen X1 en X2, & HA: Er is een verband tussen X1 en X2. \tn % Row Count 23 (+ 3) % Row 5 \SetRowColor{white} ANOVA & H0: Er is geen verschil in de \seqsplit{verwachtingswaarden}. & HA: Er is minimaal één van de gemiddelde die significant verschillend is van de andere gemiddelden. \tn % Row Count 31 (+ 8) \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{17.67cm}{x{4.3862 cm} x{6.2419 cm} x{6.2419 cm} } \SetRowColor{DarkBackground} \mymulticolumn{3}{x{17.67cm}}{\bf\textcolor{white}{Hypothesen (cont)}} \tn % Row 6 \SetRowColor{LightBackground} The Sign test & H0: De distributie van X1 is hetzelfde als X2. & HA: De distributie van X1 is niet hetzelfde als X2. \tn % Row Count 4 (+ 4) % Row 7 \SetRowColor{white} WSR test & H0: Er is geen verschil in Y tussen X1 en X2. & HA: Er is wel verschil in Y tussen X1 en X2. \tn % Row Count 8 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}---} \SetRowColor{LightBackground} \mymulticolumn{3}{x{17.67cm}}{Shapiro Wilk test: \newline P \textgreater{} 0.05? Data is normaal verdeeld. H0 niet verwerpen. \newline P \textless{} 0.05? Data niet normaal verdeeld. H0 verwerpen. \newline - De gevonden verschillen berusten niet alleen op toeval.} \tn \hhline{>{\arrayrulecolor{DarkBackground}}---} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{17.67cm}{x{12.6071 cm} x{4.6629 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{17.67cm}}{\bf\textcolor{white}{Standaard berekeningen}} \tn % Row 0 \SetRowColor{LightBackground} Gemiddelde & `mean()` \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} Mediaan & \seqsplit{`median()`} \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} Variantie & `var()` \tn % Row Count 3 (+ 1) % Row 3 \SetRowColor{white} Standaarddeviatie & `sd()` \tn % Row Count 4 (+ 1) % Row 4 \SetRowColor{LightBackground} Kwantiel & \seqsplit{`quantile()`} \tn % Row Count 6 (+ 2) % Row 5 \SetRowColor{white} Gemiddelde, kwartielen en mediaan & \seqsplit{`summary()`} \tn % Row Count 8 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{17.67cm}{x{7.0807 cm} x{10.1893 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{17.67cm}}{\bf\textcolor{white}{Binomale testen}} \tn % Row 0 \SetRowColor{LightBackground} Binomale verdeling voor plot & `dbinom(x, aantal, kans)` \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} Kans berekenen & `pbinom(x, aantal, kans, lower.tail=TRUE)` \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} Kwantielen & `qbinom(x, aantal, kans, lower.tail=TRUE)` \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} x & `seq(startwaarde, stopwaarde, stapgrootte)` \tn % Row Count 8 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{17.67cm}{x{8.635 cm} x{8.635 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{17.67cm}}{\bf\textcolor{white}{Lineaire Regressie}} \tn % Row 0 \SetRowColor{LightBackground} Scatterplot maken & `plot(y\textasciitilde{}x)` \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} Lineaire regressielijn & `lm(y\textasciitilde{}x)` \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} Regressielijn toevoegen in plot & \seqsplit{`lines(regressie\$fitted}.values\textasciitilde{}data\$x, type="l")` \tn % Row Count 6 (+ 3) % Row 3 \SetRowColor{white} Correlatie \seqsplit{coëfficiënt/bepalen} & `cor(x, y)` \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} Sterkte van correlatie berekenen & `cor.test(x, y)` \tn % Row Count 10 (+ 2) % Row 5 \SetRowColor{white} Fitted waarden (verwachtte y-waarde) & `fitted(regressie)` \tn % Row Count 12 (+ 2) % Row 6 \SetRowColor{LightBackground} Residue waarden (verschil tussen fitted en waargenomen) & `resid(regressie)` \tn % Row Count 15 (+ 3) % Row 7 \SetRowColor{white} Residue waarden in plot weergeven & `segments(x, y, x, fitted(regressie))` \tn % Row Count 17 (+ 2) % Row 8 \SetRowColor{LightBackground} Verschil fitted waarden tussen prediction interval & \seqsplit{`predict.lm(regressie}, int = 'prediction')` \tn % Row Count 20 (+ 3) % Row 9 \SetRowColor{white} Verschil fitted waarden tussen confidence interval & \seqsplit{`predict.lm(regressie}, int = 'confidence'` \tn % Row Count 23 (+ 3) % Row 10 \SetRowColor{LightBackground} Plot van opgestelde model & `abline(slope, interception)` \tn % Row Count 25 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{17.67cm}}{x \textless{}- variable met vaste waarden \newline y \textless{}- random \newline Y is somehow afhankelijk van X. \newline `regressie \textless{}- lm(y\textasciitilde{}x)`} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{17.67cm}{x{8.635 cm} x{8.635 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{17.67cm}}{\bf\textcolor{white}{Chi-kwadraat test}} \tn % Row 0 \SetRowColor{LightBackground} Chi-kwadraat test ( χ2 berekenen) & chisq.test(x) \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} Voorspelde waarden bij geen verband & \seqsplit{chisq.test(x)\$expected} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} Verschil tussen verwachtte en gevonden waarden weergeven & \seqsplit{chisq.test(x)\$residuals} \tn % Row Count 7 (+ 3) % Row 3 \SetRowColor{white} Kwantielen om te plotten/grenswaarde van de grootheid χ2 & qchisq(0.99, df) \tn % Row Count 10 (+ 3) % Row 4 \SetRowColor{LightBackground} Chi-kwadraat om te plotten & dchisq() \tn % Row Count 12 (+ 2) % Row 5 \SetRowColor{white} Conclusie & Er is wel/geen verband in verdeling tussen groepen. \tn % Row Count 15 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{17.67cm}}{Deze toets gebruiken voor data met een categorische respons: data die je in categorieën kan verdelen. \newline x = een matrix \newline df = degrees of freedom (aantal rijen -1 ) * (aantal kolommen -1) ! totaal niet meenemen} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{17.67cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{17.67cm}}{\bf\textcolor{white}{Chi-kwadraat Goodness of Fit}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{17.67cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/kaji_1555962545_chisq.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{17.67cm}}{Komt een gevonden verdeling van waarden van één kwalitatief kenmerk overeen met verwachte verdeling? \newline Goodness of fit (aanpassing) \newline bv: dobbelsteen, wordt elke zijde 1/6 van alle keren geworpen?} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{17.67cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{17.67cm}}{\bf\textcolor{white}{Chi-kwadraat Homogeniteit}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{17.67cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/kaji_1555962793_chisqhg.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{17.67cm}}{Is er een verband tussen kwalitatieve kenmerken? \newline independence and homogeneity (onafhankelijkheid en homogeniteit) \newline bv: mannen/vrouwen en rokers/niet-rokers} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{17.67cm}{x{6.7353 cm} x{10.5347 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{17.67cm}}{\bf\textcolor{white}{Niet-parametrische testen}} \tn % Row 0 \SetRowColor{LightBackground} Wilcoxon signed rank test & `wilcox.test(data1, data2, paired=TRUE, exact = TRUE, conf.level = (confidence level 0.95/0.99), conf.int = TRUE)` \tn % Row Count 5 (+ 5) % Row 1 \SetRowColor{white} Wilcoxon Mann-Whitney test & `wilcox.test(data1, data2, alternative = 'greater/less, exact = FALSE, conf.int = TRUE, conf.level = 0.95/0.99)` \tn % Row Count 10 (+ 5) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{17.67cm}}{Verschil toetsen bij niet-normaal verdeelde data. \newline Bij onafhankelijke data: WMW. \newline Bij afhankelijke data: sign test of WSR. \newline Alternative alleen gebruiken bij eenzijdige toetsen.} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{17.67cm}{x{8.635 cm} x{8.635 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{17.67cm}}{\bf\textcolor{white}{ANOVA}} \tn % Row 0 \SetRowColor{LightBackground} 1) Kijken of elke groep data normaal verdeeld is & `shapiro.test()` \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} 2) 1 kolom met waarden, 1 kolom met indicator & `stack()` \tn % Row Count 6 (+ 3) % Row 2 \SetRowColor{LightBackground} 3.1) One-way ANOVA & `anova(lm(waardes\textasciitilde{}indicator))` \tn % Row Count 8 (+ 2) % Row 3 \SetRowColor{white} 3.2a) Two-way ANOVA per factor & `anova(lm(waardes\textasciitilde{}indicator1 + indicator2))` \tn % Row Count 11 (+ 3) % Row 4 \SetRowColor{LightBackground} 3.2b) Two-way ANOVA interactie testen & `anova(lm(waardes\textasciitilde{}indicator1 : indicator2))` \tn % Row Count 14 (+ 3) % Row 5 \SetRowColor{white} 3.2c) Two-way ANOVA factor + interactie & `anova(lm(waardes\textasciitilde{}indicator1 * indicator2))` \tn % Row Count 17 (+ 3) % Row 6 \SetRowColor{LightBackground} Boxplot maken van two way ANOVA & `boxplot(waardes \textasciitilde{} indicator1 + indicator2)` \tn % Row Count 20 (+ 3) % Row 7 \SetRowColor{white} Interactie plot & \seqsplit{`interaction.plot(indicator1}, indicator2, y)` \tn % Row Count 23 (+ 3) % Row 8 \SetRowColor{LightBackground} * Unstacken om de shapiro test te doen & `unstack(data, form = waardes \textasciitilde{} indicator)` \tn % Row Count 26 (+ 3) % Row 9 \SetRowColor{white} * Shapiro test voor alle groepen tegelijkertijd & `lapply(unstackdata,shapiro.test)` \tn % Row Count 29 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{17.67cm}{x{5.6991 cm} x{11.5709 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{17.67cm}}{\bf\textcolor{white}{Post-hoc}} \tn % Row 0 \SetRowColor{LightBackground} Bonferroni correctie & `paired.t.test(values, indicator, \seqsplit{p.adjust.method="bonferroni")`} \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} Tukey HSD & `TukeyHSD(aov(lm(values\textasciitilde{}idicator)))` \tn % Row Count 5 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{17.67cm}}{Gebruik dit na de ANOVA om te kijken welke categorieën precies afwijken. \newline values = y \newline indicators = x} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \end{document}