\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{林漪 (kevin123)} \pdfinfo{ /Title (r-me-for-final-exam.pdf) /Creator (Cheatography) /Author (林漪 (kevin123)) /Subject (R me for final exam Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{A3A3A3} \definecolor{LightBackground}{HTML}{F3F3F3} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{R me for final exam Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{林漪 (kevin123)} via \textcolor{DarkBackground}{\uline{cheatography.com/148191/cs/32330/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}林漪 (kevin123) \\ \uline{cheatography.com/kevin123} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 5th June, 2022.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{3} \begin{tabularx}{5.377cm}{x{1.69218 cm} x{3.28482 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{四则运算}} \tn % Row 0 \SetRowColor{LightBackground} `\textasciicircum{}` & 幂运算 \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} `\%\%` & 取余运算 \tn % Row Count 2 (+ 1) % Row 2 \SetRowColor{LightBackground} `\%/\%` & 整除运算 \tn % Row Count 3 (+ 1) % Row 3 \SetRowColor{white} `round()` & 四舍五入取整数 \tn % Row Count 4 (+ 1) % Row 4 \SetRowColor{LightBackground} `floor()` & 向下取整 \tn % Row Count 5 (+ 1) % Row 5 \SetRowColor{white} `ceiling()` & 向上取整 \tn % Row Count 6 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{`round(3.14159,2) ` \# \seqsplit{四舍五入到两位小数} \newline \#\# {[}1{]} 3.14} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{\seqsplit{逻辑运算应该注意的点}}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{判断两个浮点型对象是否完全相同,不能直接采用==和identical(),而应该用all}.equal()% Row Count 3 (+ 3) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.18988 cm} x{2.78712 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{字符型数据的处理}} \tn % Row 0 \SetRowColor{LightBackground} `nchar()` & \seqsplit{计算字符串的长度} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \seqsplit{`toupper()`,`tolower()`} & 转换大小写 \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} `substr(x, start, stop)` & \seqsplit{从x中取出start到stop的子串} \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} `substring(x, start)` & \seqsplit{从x中取出start到末尾的子串} \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} `gsub(pattern, replacement, x)` & \seqsplit{x中与pattern对应的字符替换成replacement} \tn % Row Count 11 (+ 3) % Row 5 \SetRowColor{white} `paste()` & \seqsplit{连接两个字符型对象,默认用空格连接} \tn % Row Count 14 (+ 3) % Row 6 \SetRowColor{LightBackground} `strsplit()` & \seqsplit{拆分两个及以上的字符型对象,默认一个个} \tn % Row Count 17 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{`substr(x, start, \seqsplit{stop)`还可以代替特定位置} \newline `nchar("R统计软件", type = "bytes")` \# 以字节为单位 \newline \#\# {[}1{]} 9 \newline ` nchar("code monkey\textbackslash{}t")` \newline \#\# {[}1{]} 12(空格算,\textbackslash{}t算一个) \newline ` paste()` 中的` sep` \seqsplit{控制用什么连接,要直接连接用'} ' \newline `strsplit()`中的` split` \seqsplit{指示用什么字符拆分}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{p{0.74655 cm} x{4.23045 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{日期时间类型数据}} \tn % Row 0 \SetRowColor{LightBackground} Date & \seqsplit{一般用整数保存,数值为从1970-1-1经过的天数。} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \seqsplit{POSIXct} & \seqsplit{从1970年1月1日零时到该日期时间的时间间隔秒数} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \seqsplit{POSIXlt} & \seqsplit{一个包含年、月、日、星期、时、分、秒等成分的列表} \tn % Row Count 7 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{\seqsplit{`difftime(as.Date("2021-9-10")}, as.Date("2020-9-10"), units = 'days')` \newline \#\# Time difference of 31.33333 days \newline `difftime(as.Date("2021-9-10"), as.Date("2020-9-10"), units = 'days')` \newline \#\# Time difference of 365 days} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{\seqsplit{从字符串生成日期数据}}} \tn \SetRowColor{LightBackground} \mymulticolumn{1}{p{5.377cm}}{\vspace{1px}\centerline{\includegraphics[width=5.1cm]{/web/www.cheatography.com/public/uploads/kevin123_1654427936_.png}}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{3.13551 cm} x{1.84149 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{因子类变量}} \tn % Row 0 \SetRowColor{LightBackground} `factor(c("男", "女"))` & 男 女 Levels: 男 女 \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} `factor(LETTERS{[}1:3{]}, ordered = TRUE)` & A B C Levels: A \textless{} B \textless{} C \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} `factor(LETTERS{[}1:3{]}, ordered = TRUE, levels = c("C","B","A"))` & A B C Levels: C \textless{} B \textless{} A \tn % Row Count 7 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{\seqsplit{因子的levels()(水平值)属性是一个映射,} 把整数值1,2,映射成这些水平值,因子在保存时会保存成整数值1,2,等与水平值对应的编号。} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{R的数据类型}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{整型(int):如1L \newline % Row Count 1 (+ 1) \seqsplit{数值型/双整型(numeric}, double):如1,1.1 \newline % Row Count 2 (+ 1) \seqsplit{逻辑型(logical):只有两个值TRUE和FALSE}, 缺失时为NA。 \newline % Row Count 4 (+ 2) \seqsplit{字符型(character):存储一小段文本,用双引号包住,其中单个元素称之为字符串(string),如"Hello"}, "1" \newline % Row Count 7 (+ 3) \seqsplit{复数类型(complex):如1+3i} \newline % Row Count 8 (+ 1) 日期时间类型(Date, POSIXct, POSIXlt):如Sys.time() \newline % Row Count 10 (+ 2) 因子类型(factor) \newline % Row Count 11 (+ 1) 特殊符号:NA(Not Avaiable), NaN(Not a Number), Inf(infinite), NULL% Row Count 13 (+ 2) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{typeof()返回数据类型} \newline is.foo()判断是否属于某种类型foo,是返回TRUE,否返回FALSE \newline as.foo()强制转换成foo类型} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{矩阵}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{matrix()函数把矩阵元素以向量的形式输入,用} nrow 和 ncol \seqsplit{规定行数和列数,向量元素填入的缺省次序是按列填入,用} byrow=TRUE \seqsplit{选项可转换成按行填入。} \newline % Row Count 4 (+ 4) rbind(),cbind() \seqsplit{,diag(),dim(),dimnames()} \newline % Row Count 5 (+ 1) \seqsplit{对两个同形状的矩阵,`} * \seqsplit{`表示两个矩阵对应元素相乘,`/} \seqsplit{`表示两个矩阵对应元素相除} \newline % Row Count 8 (+ 3) `\%*\%` \seqsplit{矩阵乘法,t()转置,det()行列式,solve()逆} \newline % Row Count 10 (+ 2) solve(A, b) \seqsplit{返回的是线性方程组} Ax=b的解 \newline % Row Count 12 (+ 2) \seqsplit{矩阵内积自己记一下} \newline % Row Count 13 (+ 1) apply(A, i, FUN) 把矩阵 A \seqsplit{的每一列分别输入到函数} FUN \seqsplit{中,得到对应于每一维度的结果,其中} i = 1 \seqsplit{表示对行进行运算,} i = 2 \seqsplit{表示对列进行运算。} \newline % Row Count 17 (+ 4) \seqsplit{矩阵的下标和子集与向量类似。}% Row Count 18 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{p{0.4977 cm} p{0.4977 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{数据框}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{} \tn % Row Count 0 (+ 0) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{\seqsplit{各列之间允许有不同的类型,同一列中的元素保持相同类型。} \newline 数据框之中有增加行列,命名,访问,with()的用法,稍后再来搞} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{列表}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{不同于之前,列表(list)是用来保存不同类型的数据。} \newline % Row Count 2 (+ 2) 可通过 names() 来命名 \newline % Row Count 3 (+ 1) `names(dist) \textless{}- c("a", "b", "c")` \newline % Row Count 4 (+ 1) \seqsplit{也可在一开始定义的时候就命名好} \newline % Row Count 5 (+ 1) `dist1 \textless{}- list(a = "exponential", b = 7, c = FALSE)` \newline % Row Count 7 (+ 2) 单个列表元素必须用两重方括号格式访问如`dist{[}{[}1{]}{]}` \newline % Row Count 9 (+ 2) \seqsplit{使用单重方括号对列表取子集结果还是列表而不是列表元素} \newline % Row Count 11 (+ 2) \seqsplit{直接给列表不存在的元素名定义元素值就添加了新元素} \newline % Row Count 13 (+ 2) \seqsplit{把某个列表元素赋值为} NULL 就删掉这个元素 \newline % Row Count 15 (+ 2) \seqsplit{要把已经存在的元素修改为} NULL \seqsplit{值而不是删除此元素,} \seqsplit{或者给列表增加一个取值为} NULL 的元素, \seqsplit{这时需要用单重的方括号取子集,这样的子集会保持其列表类型,给这样的子列表赋值为} list(NULL)。如`dist{[}'was.estimated'{]} \textless{}- list(NULL)` \newline % Row Count 21 (+ 6) as.list() \seqsplit{将其他转换成列表,unlist()把列表转换成基本向量}% Row Count 23 (+ 2) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{矩阵注意点}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{提取A的第一行结果为向量,维数会有不同,drop} = FALSE 可保留原有维度。 \newline % Row Count 2 (+ 2) \seqsplit{寻找矩阵中的最小元素,并返回其位置} \newline % Row Count 4 (+ 2) ` mat \textless{}- matrix(rnorm(40), 10, 4) which(mat == min(mat, na.rm=TRUE)) \# \seqsplit{返回的是向量的位置`} \newline % Row Count 7 (+ 3) ` which(mat == min(mat, na.rm=TRUE), arr.ind = TRUE) \# 返回行号和列号 `% Row Count 9 (+ 2) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{向量下标}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{正整数下标:访问对应位置的元素和子集} \newline % Row Count 2 (+ 2) \seqsplit{负整数下标:扣除相应的元素后的子集} \newline % Row Count 4 (+ 2) 下标超界返回NA \newline % Row Count 5 (+ 1) \seqsplit{下标可以是与向量等长的逻辑表达式} \newline % Row Count 6 (+ 1) \seqsplit{元素名下标:向量可以为每个元素命名,命名后即可用元素名或者元素名向量作为向量的下标。} \newline % Row Count 9 (+ 3) \seqsplit{重复下标:R在使用整数或元素名作为向量下标时,允许使用重复下标。}% Row Count 11 (+ 2) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{3.08574 cm} x{1.89126 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{向量}} \tn % Row 0 \SetRowColor{LightBackground} `seq(from = 5, to = 25, by = 5)` & 5 10 15 20 25 \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \seqsplit{`seq(as.Date('2021-10-1')}, by='days', length=2)` & "2021-10-01" "2021-10-02" \tn % Row Count 4 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{\seqsplit{两个不等长向量的四则运算,规则是每次从头重复利用短的一个} \newline `seq(as.Date('2021-9-8'),to=as.Date('2022-1-1'),by='2 weeks')` \newline 可以把向量看成一个集合,对两个向量进行集合运算,如unique(), setdiff(), setequal(), union(), intersect()} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{读入数据}} \tn % Row 0 \SetRowColor{LightBackground} `d2 \textless{}- \seqsplit{read.csv("covid19.csv"}, header = TRUE, na.strings = "", row.names = "序号", nrows = 10)` & header = TRUE \seqsplit{包括列名,na.strings} = x 指定 x \seqsplit{为缺失值,row.names} = x 指定列名为 x \seqsplit{的列为行名,skip} = x 跳过前面 x 行,nrows = x 只读取 x 行,若header = TRUE \seqsplit{则不包括列名那一行} \tn % Row Count 11 (+ 11) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{\seqsplit{readLines()可以读文本文件} \newline ` idx.na \textless{}- apply(is.na(d), 1, any) d{[}idx.na, {]}`返回有缺失值的行 \newline head(x, n) 选择数据框 x 的前 n 行 \newline tail(x, n) 选择数据框 x 的倒数 n 行} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{整理数据}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{行号一样直接合并 \newline % Row Count 1 (+ 1) 行号不一样,用 merge(dat1, dat2, by = x) \seqsplit{按照列名为x来合并,只保留x元素相同的行,即同时在两个数据框中的行} \newline % Row Count 4 (+ 3) 用 merge(dat1, dat2, by.x = x, by.y = y) 把 dat1中的x列和 \seqsplit{dat2中的y列作为合并的标准。} \newline % Row Count 7 (+ 3) 如果想要保留 dat1 \seqsplit{中的所有行,则指定} all.x = TRUE。(?) \newline % Row Count 9 (+ 2) scale() \seqsplit{把每一列都标准化,} \seqsplit{即每一列都减去该列的平均值,然后除以该列的样本标准差。} \newline % Row Count 12 (+ 3) scale(x, center=TRUE, scale=FALSE) \seqsplit{仅中心化而不标准化。} \newline % Row Count 14 (+ 2) \seqsplit{仅适用于数值型的变量}% Row Count 15 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.78712 cm} x{2.18988 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{汇总数据}} \tn % Row 0 \SetRowColor{LightBackground} 总体信息 & \seqsplit{summary(),table()} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} 位置度量 & mean() , median() \tn % Row Count 3 (+ 1) % Row 2 \SetRowColor{LightBackground} \seqsplit{分散程度(变异性)度量} & sd() , IQR() , mad() \tn % Row Count 5 (+ 2) % Row 3 \SetRowColor{white} 分位数 & min() , max() , quantile() \tn % Row Count 7 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{\seqsplit{对于因子类型的变量,可以通过table()查看其在每一类的频数分布。} \newline 可通过 na.rm = TRUE \seqsplit{将其中NA的数值去除来计算平均值、标准差、中位数等。}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{分组汇总数据}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{aggregate() \seqsplit{函数对输入的数据框用指定的分组变量(或交叉分组)} \seqsplit{分组进行概括统计。} \newline % Row Count 3 (+ 3) `aggregate(d{[},c(3:5,7){]}, by = d{[}c("分型","性别"){]}, mean)` \newline % Row Count 5 (+ 2) tapply() \seqsplit{函数对向量进行分组概括} \newline % Row Count 6 (+ 1) `tapply(d{[},"性别"{]}, INDEX = d{[}"分型"{]}, table)` \newline % Row Count 8 (+ 2) 可以通过 useNA = "always" 或useNA = "ifany"来把 NA计算在内 \newline % Row Count 10 (+ 2) \seqsplit{对两个分类变量进行交叉分组计算频数} \newline % Row Count 12 (+ 2) `table( d{[},"分型"{]}, d{[},"性别"{]})`% Row Count 13 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.4885 cm} x{2.4885 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{随机数}} \tn % Row 0 \SetRowColor{LightBackground} sample(x, size, replace = FALSE, prob = NULL) & x \seqsplit{用以存储有限集合的向量,size} \seqsplit{指定抽样个数,prob} \seqsplit{=指定以各种权重抽取,默认是等概率。replace} \seqsplit{=指定是否为有放回抽样,TRUE是有放回抽样}, \seqsplit{FALSE是无放回抽样即样本数} \tn % Row Count 12 (+ 12) % Row 1 \SetRowColor{white} set.seed(seed, kind = NULL, normal.kind = NULL, sample.kind = NULL) & seed = k \seqsplit{指定一个编号为} k的种子,kind \seqsplit{=指定后续程序要使用的随机数发生器名称;normal}.kind= \seqsplit{指定要使用的正态分布随机数发生器名称。} \tn % Row Count 21 (+ 9) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{\seqsplit{这是古典概型的例子。} \newline 随机排序sample(10) sample(letters) \newline 多项分布的随机抽样 \newline `sample(1:3, size = 100, replace = TRUE, prob = c(.2, .3, .5))`} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{随机数函数}} \tn \SetRowColor{white} \mymulticolumn{1}{x{5.377cm}}{\seqsplit{每一种分布都有自己的名字,在其前面添加如下的字母分别代表不同的功能} \newline % Row Count 2 (+ 2) p分布函数 q分位数 d概率密度函数 r随机数 \newline % Row Count 4 (+ 2) \seqsplit{指数分布的概率密度函数} \newline % Row Count 5 (+ 1) x \textless{}- seq(0, 8, .05) \newline % Row Count 6 (+ 1) plot (x, dexp(x), ty="l", main="题目", xlab="x", ylab="f(x)") \newline % Row Count 8 (+ 2) lines (x, dexp(x, rate=0.5), col="red") \newline % Row Count 9 (+ 1) lines (x, dexp(x, rate=0.2), col="blue") \newline % Row Count 10 (+ 1) legend("topright", legend = paste("lambda = ", c(1, 0.5, 0.2)), col=c("black", "red", "blue"), lty=1, inset = .02) \newline % Row Count 13 (+ 3) 指数分布的随机数 \newline % Row Count 14 (+ 1) x \textless{}- seq(0, 16, .05) \newline % Row Count 15 (+ 1) hist(rexp(1000, 0.5), freq = FALSE, xlab="x", main="题目") \newline % Row Count 17 (+ 2) lines (x, dexp(x, 0.5), col="red", lwd=2)% Row Count 18 (+ 1) } \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \SetRowColor{LightBackground} \mymulticolumn{1}{x{5.377cm}}{runif(n) 产生 n \seqsplit{个标准均匀分布随机数} \newline rnorm(n) 产生 n \seqsplit{个标准正态分布随机数}} \tn \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}