\documentclass[10pt,a4paper]{article}

% Packages
\usepackage{fancyhdr}           % For header and footer
\usepackage{multicol}           % Allows multicols in tables
\usepackage{tabularx}           % Intelligent column widths
\usepackage{tabulary}           % Used in header and footer
\usepackage{hhline}             % Border under tables
\usepackage{graphicx}           % For images
\usepackage{xcolor}             % For hex colours
%\usepackage[utf8x]{inputenc}    % For unicode character support
\usepackage[T1]{fontenc}        % Without this we get weird character replacements
\usepackage{colortbl}           % For coloured tables
\usepackage{setspace}           % For line height
\usepackage{lastpage}           % Needed for total page number
\usepackage{seqsplit}           % Splits long words.
%\usepackage{opensans}          % Can't make this work so far. Shame. Would be lovely.
\usepackage[normalem]{ulem}     % For underlining links
% Most of the following are not required for the majority
% of cheat sheets but are needed for some symbol support.
\usepackage{amsmath}            % Symbols
\usepackage{MnSymbol}           % Symbols
\usepackage{wasysym}            % Symbols
%\usepackage[english,german,french,spanish,italian]{babel}              % Languages

% Document Info
\author{cgeeeeh}
\pdfinfo{
  /Title (pandas.pdf)
  /Creator (Cheatography)
  /Author (cgeeeeh)
  /Subject (Pandas Cheat Sheet)
}

% Lengths and widths
\addtolength{\textwidth}{6cm}
\addtolength{\textheight}{-1cm}
\addtolength{\hoffset}{-3cm}
\addtolength{\voffset}{-2cm}
\setlength{\tabcolsep}{0.2cm} % Space between columns
\setlength{\headsep}{-12pt} % Reduce space between header and content
\setlength{\headheight}{85pt} % If less, LaTeX automatically increases it
\renewcommand{\footrulewidth}{0pt} % Remove footer line
\renewcommand{\headrulewidth}{0pt} % Remove header line
\renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit
% This two commands together give roughly
% the right line height in the tables
\renewcommand{\arraystretch}{1.3}
\onehalfspacing

% Commands
\newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour
\newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols
\newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns
\newcommand{\tn}{\tabularnewline} % Required as custom column type in use

% Font and Colours
\definecolor{HeadBackground}{HTML}{333333}
\definecolor{FootBackground}{HTML}{666666}
\definecolor{TextColor}{HTML}{333333}
\definecolor{DarkBackground}{HTML}{976666}
\definecolor{LightBackground}{HTML}{F8F5F5}
\renewcommand{\familydefault}{\sfdefault}
\color{TextColor}

% Header and Footer
\pagestyle{fancy}
\fancyhead{} % Set header to blank
\fancyfoot{} % Set footer to blank
\fancyhead[L]{
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{C}
    \SetRowColor{DarkBackground}
    \vspace{-7pt}
    {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent
        \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}}
    }
\end{tabulary}
\columnbreak
\begin{tabulary}{11cm}{L}
    \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Pandas Cheat Sheet}}}} \\
    \normalsize{by \textcolor{DarkBackground}{cgeeeeh} via \textcolor{DarkBackground}{\uline{cheatography.com/191000/cs/39717/}}}
\end{tabulary}
\end{multicols}}

\fancyfoot[L]{ \footnotesize
\noindent
\begin{multicols}{3}
\begin{tabulary}{5.8cm}{LL}
  \SetRowColor{FootBackground}
  \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}}  \\
  \vspace{-2pt}cgeeeeh \\
  \uline{cheatography.com/cgeeeeh} \\
  \end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}}  \\
   \vspace{-2pt}Not Yet Published.\\
   Updated 4th December, 2023.\\
   Page {\thepage} of \pageref{LastPage}.
\end{tabulary}
\vfill
\columnbreak
\begin{tabulary}{5.8cm}{L}
  \SetRowColor{FootBackground}
  \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}}  \\
  \SetRowColor{white}
  \vspace{-5pt}
  %\includegraphics[width=48px,height=48px]{dave.jpeg}
  Measure your website readability!\\
  www.readability-score.com
\end{tabulary}
\end{multicols}}


\begin{document}
\raggedright
\raggedcolumns

% Set font size to small. Switch to any value
% from this page to resize cheat sheet text:
% www.emerson.emory.edu/services/latex/latex_169.html
\footnotesize % Small font.

\begin{multicols*}{3}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{初始化}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{pd.Series(data=\{key:value,key:value\},index,name)。index如果存在key里没有的值，value是NaN; \seqsplit{Name是series的名字，在dataframe里一个series的名字是列名}} \tn 
% Row Count 4 (+ 4)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{pd.DataFrame(data,index,columns)。给数据用list是按行，比如{[}{[}row1{]}{[}row2{]}{[}row3{]}{]}，用dict是按列，如\{key:{[}{]},key:{[}{]}\}} \tn 
% Row Count 7 (+ 3)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{CSV导入：pd.read\_csv("data.csv"}, sheetname, skiprows, header, nrows, index\_col, names,encoding)。sheetname似乎对csv文件不适用；skiprows按照【下标】跳过某些行，如range(0,6)，给的是下标，比如3就是跳过4行；header是第一行数据所在的【下标】，不建议使用因为中文有些字符包含\textbackslash{}n；nrows是读取多少行；index\_col是索引列的【下标】；names可以重命名列} \tn 
% Row Count 16 (+ 9)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{x{2.63781 cm} x{2.33919 cm} }
\SetRowColor{DarkBackground}
\mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{属性}}  \tn
% Row 0
\SetRowColor{LightBackground}
df.columns & df.index \tn 
% Row Count 1 (+ 1)
% Row 1
\SetRowColor{white}
df.axes & df.values \tn 
% Row Count 2 (+ 1)
% Row 2
\SetRowColor{LightBackground}
df.ndim & df.shape \tn 
% Row Count 3 (+ 1)
% Row 3
\SetRowColor{white}
df.size & df.empty \tn 
% Row Count 4 (+ 1)
% Row 4
\SetRowColor{LightBackground}
df.dtypes & df.str \tn 
% Row Count 5 (+ 1)
% Row 5
\SetRowColor{white}
\mymulticolumn{2}{x{5.377cm}}{df.T} \tn 
% Row Count 6 (+ 1)
% Row 6
\SetRowColor{LightBackground}
\mymulticolumn{2}{x{5.377cm}}{\seqsplit{df.index.get\_level\_values(0/"index\_name")读取多重索引中的一个}} \tn 
% Row Count 8 (+ 2)
% Row 7
\SetRowColor{white}
\mymulticolumn{2}{x{5.377cm}}{仅限dataframe：df.items()-\textgreater{}(index,series)/df.iterrows()-\textgreater{}(index,series)} \tn 
% Row Count 10 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}--}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{初探}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{df.describe()返回summary} statistics/df.info()返回index\&data type} \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{df.head(5)/df.tail(5)} \tn 
% Row Count 3 (+ 1)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{df.set\_index(col,inplace=True)。将列设置成索引。col是列名，也可以是list（多个列名）} \tn 
% Row Count 6 (+ 3)
% Row 3
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{dfc=df.astype(dtype)改变数据类型}} \tn 
% Row Count 7 (+ 1)
% Row 4
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{df.isnull()} \tn 
% Row Count 8 (+ 1)
% Row 5
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{df.corr()} \tn 
% Row Count 9 (+ 1)
% Row 6
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{Pandas显示设置} \tn 
% Row Count 10 (+ 1)
% Row 7
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{pd.set\_option(,)。'max\_colwidth' \seqsplit{value的显示长度，默认50；'display}.max\_columns'+None显示所有列；'display.max\_rows'+None显示所有行} \tn 
% Row Count 13 (+ 3)
% Row 8
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{链式赋值的警告：pd}.options.mode.chained\_assignment = None} \tn 
% Row Count 15 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{预处理}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{转数据格式：df{[}"Colname"{]}.astype()} \tn 
% Row Count 1 (+ 1)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{字符串转时间再转回来：df{[}"Date"{]}=df{[}"Date"{]}.apply(lambda x: \seqsplit{pd.Timestamp(str(x)).strftime("\%Y-\%m-\%d"))}} \tn 
% Row Count 4 (+ 3)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{转list：series.to\_list()}} \tn 
% Row Count 5 (+ 1)
% Row 3
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{判断空值：=df.isnull().sum()/=df.isna().sum()按列求和，返回一个series/=df.notnull()}} \tn 
% Row Count 7 (+ 2)
% Row 4
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{填空值：df{[}"colname"{]}.fillna(df{[}'colname'{]}.mode()/"ffill",inplace=True)；ffill是填写上一个非空值} \tn 
% Row Count 10 (+ 3)
% Row 5
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{删除空值：dv1.dropna(inplace=True)}} \tn 
% Row Count 11 (+ 1)
% Row 6
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{列重命名：df.rename(columns=\{0:"sales"\},inplace=True)} \tn 
% Row Count 13 (+ 2)
% Row 7
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{排序：df.sort\_values(by={[}"Colname1","Colname2"{]},ascending=True,inplace)} \tn 
% Row Count 15 (+ 2)
% Row 8
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{排序：df{[}"Name"{]}.rank(axis, method, numeric\_only, na\_option, ascending, \seqsplit{pct)。axis=0对行排序，axis=1对列排序;} method有min,max,average,first,dense; \seqsplit{numeric\_only决定是否只对数值排序，false则只对非数值排序;} na\_option有keep, top, bottom} \tn 
% Row Count 22 (+ 7)
% Row 9
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{重新组合：df.reshape(x},x)} \tn 
% Row Count 23 (+ 1)
% Row 10
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{堆叠：stack()。把columns堆在index里面，形成多层索引}} \tn 
% Row Count 25 (+ 2)
% Row 11
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{透视表：pivot("col1","col2")。把sheet里的数据转换成透视表，第一个是索引，第二个是列。随后列索引会有两层，因此需要只取出第二层} \tn 
% Row Count 29 (+ 4)
% Row 12
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{拼接：pd.concat({[}df1,df2{]}, axis, ignore\_index), \seqsplit{axis默认0，0是沿index（上下），1是沿着column（左右）。沿着index} \seqsplit{concat，df2多的列新建，df1有的拼接在下面}} \tn 
% Row Count 33 (+ 4)
\end{tabularx}
\par\addvspace{1.3em}

\vfill
\columnbreak
\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{预处理 (cont)}}  \tn
% Row 13
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{join：pd.merge(dv1,df\_rates,on,how)，on没有指定的时候，就是两个df的列的交集，how默认是inner} \tn 
% Row Count 3 (+ 3)
% Row 14
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{删除某几列的重复值：df}.drop\_duplicates(subset={[}'colname'{]})} \tn 
% Row Count 5 (+ 2)
% Row 15
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{将dummy们转化：pd.get\_dummies(df}, columns={[}"col1","col2"{]})} \tn 
% Row Count 7 (+ 2)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{计算}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{max，min索引：df{[}"Test1"{]}.idxmax()/idxmin()} \tn 
% Row Count 1 (+ 1)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{count(), mean(), var(), std(), median(), mode(), sum(), unique(), cov()} \tn 
% Row Count 3 (+ 2)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{df.groupby({[}'A'{]}){[}'B','C'{]}.agg(np.mean)/.count()} \tn 
% Row Count 4 (+ 1)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{添加数据}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{行：loc} \tn 
% Row Count 1 (+ 1)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{list\_row = {[}"Hyperion", 27000, "60days", 2000{]} df.loc{[}len(df){]} = list\_row} \tn 
% Row Count 3 (+ 2)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{行：df.append()} \tn 
% Row Count 4 (+ 1)
% Row 3
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{new\_row = \{'Courses':'Hyperion', 'Fee':24000, 'Duration':'55days', 'Discount':1800\} df2 = df.append(new\_row, ignore\_index=True)} \tn 
% Row Count 7 (+ 3)
% Row 4
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{行：append with named index} \tn 
% Row Count 8 (+ 1)
% Row 5
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{df2 = df.append(pd.DataFrame({[}new\_row{]},index={[}'7'{]},columns=df.columns))} \tn 
% Row Count 10 (+ 2)
% Row 6
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{行：append，series。但是竖着的series} \seqsplit{append进dataframe后变成行了}} \tn 
% Row Count 12 (+ 2)
% Row 7
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{df2 = \seqsplit{df.append(pd.Series(new\_row}, index=df.columns, name='7'))} \tn 
% Row Count 14 (+ 2)
% Row 8
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{行：pd.concat()} \tn 
% Row Count 15 (+ 1)
% Row 9
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{new\_row = pd.DataFrame(\{'Courses':'Hyperion', 'Fee':24000, 'Duration':'55days', 'Discount':1800\}, index={[}0{]}) df2 = pd.concat({[}new\_row,df.loc{[}:{]}{]}).reset\_index(drop=True)} \tn 
% Row Count 19 (+ 4)
% Row 10
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{行：df.loc{[}{]}（可以把index一起加进去么？）} \tn 
% Row Count 21 (+ 2)
% Row 11
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{df.loc{[}'7', :{]} = {[}'Hive',25000,'45days',1800{]}} \tn 
% Row Count 22 (+ 1)
% Row 12
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{行：pd.concat} \tn 
% Row Count 23 (+ 1)
% Row 13
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{df\_row\_reindex = pd.concat({[}df1, df2{]}, \seqsplit{ignore\_index)。ignore\_index=True则重新索引，axis=0上下堆，axis=1左右堆}} \tn 
% Row Count 26 (+ 3)
% Row 14
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{pieces = \{'x': df1, 'y': df2\}；df\_piece = pd.concat(pieces)} \tn 
% Row Count 28 (+ 2)
% Row 15
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{列：df1{[}"colname"{]}={[}30,52,50,28{]}} \tn 
% Row Count 29 (+ 1)
% Row 16
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{用索引查缺补漏：ser2}.combine\_first(ser1)。ser1里的数据，如果索引ser2没有则补充，如果有则不变} \tn 
% Row Count 32 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{删数据}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{del df1{[}"Colname"{]}} \tn 
% Row Count 1 (+ 1)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{df.drop(index如{[}"colname","colname2"{]}/{[}3,4,6{]},axis,inplace)。index可以是行或列索引，axis默认为0删除行，1删除列} \tn 
% Row Count 4 (+ 3)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{X=df.drop(columns="Creditability")}} \tn 
% Row Count 5 (+ 1)
% Row 3
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{空值：df.dropna(axis=0}, \seqsplit{inplace=True)。axis默认为0删行，1删列}} \tn 
% Row Count 7 (+ 2)
% Row 4
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{重复：df.drop\_duplicates(subset=None},keep,inplace,ignore\_index)。subset接受索引，是个list；keep有first，last，False；ignore\_index=False，True则会重排序} \tn 
% Row Count 11 (+ 4)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{查数据}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{单列：df{[}"col\_name"{]}-\textgreater{}series/df{[}{[}"col\_name"{]}{]}-\textgreater{}dataframe，df.loc{[}colname{]}} \tn 
% Row Count 2 (+ 2)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{多列：df.loc{[}:,{[}"Test2","Test3"{]}{]}/df.loc{[}:,"Test2":"Test3"{]}/df.loc{[} : , {[}col for col in df.columns if 'Random' in col{]}{]}} \tn 
% Row Count 5 (+ 3)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{单行：df.loc{[}'John'{]}/df.loc{[}2{]}；df.iloc{[}1{]}。loc和iloc的区别是，查阅索引和下标} \tn 
% Row Count 7 (+ 2)
% Row 3
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{多行：df.loc{[}{[}0,1{]}{]}/df.loc{[}{[}'Peter','Mary'{]}{]}/df.loc{[}{[}2:4{]}{]}/df.loc{[}'Peter':'Mary'{]}} \tn 
% Row Count 9 (+ 2)
% Row 4
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{某个/片单元格：df.loc{[}{[}"peter","Marry"{]},{[}"test1","test2"{]}{]}/df.iloc{[}1:2,1:3{]}} \tn 
% Row Count 11 (+ 2)
% Row 5
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{dv1.loc{[}dv1{[}"MMM"{]}==mths{[}i{]},"MM"{]}="aa"单个值也可以} \tn 
% Row Count 13 (+ 2)
% Row 6
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{依据列查询行：df.loc{[}\textasciitilde{} (df{[}"Name"{]}==xxx) | (df.Name.isin({[}"John","Peter"{]})), {[}"A","B",""C{]}{]}} \tn 
% Row Count 15 (+ 2)
% Row 7
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{依据行查询列：df.loc{[}:,df.isin({[}54,56{]}).any(){]}/df.loc{[} : , {[}(df{[}col{]} == 30).any() for col in df.columns{]}{]}} \tn 
% Row Count 18 (+ 3)
% Row 8
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{依据datatype查询：df}.select\_dtypes('object'/'number')} \tn 
% Row Count 20 (+ 2)
% Row 9
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{df{[}{]}{[}{]}被允许，但是第一个参数是行的索引切片（如1：2），第二个参数是列（list）。df{[}1{]}{[}"x"{]}的表述是不允许的} \tn 
% Row Count 23 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{连接}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{只有一列同名列：pd}.merge(frame1,frame2)} \tn 
% Row Count 1 (+ 1)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{多列同名：pd.merge(df1},df2,on="colname")} \tn 
% Row Count 2 (+ 1)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{连接不同名的两列：pd}.merge(df1,df2,left\_on="col1",right\_on="col2")} \tn 
% Row Count 4 (+ 2)
% Row 3
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{连接多列：pd.merge(df1}, df2, on={[}"col1","col2"{]}, how)。how有right,left,outer} \tn 
% Row Count 6 (+ 2)
% Row 4
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{INNER JOIN：only the set of records that match in both A and B} \tn 
% Row Count 8 (+ 2)
% Row 5
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{LEFT JOIN：a complete set of records from A (left DataFrame), with the matching records (where available) in B (right DataFrame). If there is no match, the left side will contain null.} \tn 
% Row Count 12 (+ 4)
% Row 6
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{OUTER JOIN：combines the results of both the left and the right outer joins} \tn 
% Row Count 14 (+ 2)
% Row 7
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{\seqsplit{索引连接：pd.merge(df1},df2,right\_index=True,left\_index=True)} \tn 
% Row Count 16 (+ 2)
% Row 8
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{join with index：df1.join(df2, lsuffix, rsuffix, )} \tn 
% Row Count 18 (+ 2)
% Row 9
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{join with key \seqsplit{col：df.set\_index('key').join(other.set\_index('key'))/df.join(other.set\_index('key')}, on='key')} \tn 
% Row Count 21 (+ 3)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}

\begin{tabularx}{5.377cm}{X}
\SetRowColor{DarkBackground}
\mymulticolumn{1}{x{5.377cm}}{\bf\textcolor{white}{输出到文件}}  \tn
% Row 0
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{df.to\_csv('scores.csv',encoding="utf-8")} \tn 
% Row Count 1 (+ 1)
% Row 1
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{from pandas import ExcelWriter writer = \seqsplit{ExcelWriter('score.xlsx')} df.to\_excel(writer,'Sheet10') writer.save()} \tn 
% Row Count 4 (+ 3)
% Row 2
\SetRowColor{LightBackground}
\mymulticolumn{1}{x{5.377cm}}{dictionary = df.to\_dict()} \tn 
% Row Count 5 (+ 1)
% Row 3
\SetRowColor{white}
\mymulticolumn{1}{x{5.377cm}}{string = df.to\_string()} \tn 
% Row Count 6 (+ 1)
\hhline{>{\arrayrulecolor{DarkBackground}}-}
\end{tabularx}
\par\addvspace{1.3em}


% That's all folks
\end{multicols*}

\end{document}