\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{Molly\_6075} \pdfinfo{ /Title (machine-learning.pdf) /Creator (Cheatography) /Author (Molly\_6075) /Subject (Machine Learning Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{306380} \definecolor{LightBackground}{HTML}{F2F5F7} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Machine Learning Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{Molly\_6075} via \textcolor{DarkBackground}{\uline{cheatography.com/193986/cs/40757/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}Molly\_6075 \\ \uline{cheatography.com/molly-6075} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 16th October, 2023.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{2} \begin{tabularx}{8.4cm}{x{4 cm} x{4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Boost Classifier}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{8.4cm}}{AdaBoost} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} from sklearn.ensemble import AdaBoostClassifier & 导入AdaBoost \tn % Row Count 4 (+ 3) % Row 2 \SetRowColor{LightBackground} classifier = \seqsplit{AdaBoostClassifier(n\_estimators=3},learning\_rate=0.2, random\_state=0) & 参数设置 \tn % Row Count 9 (+ 5) % Row 3 \SetRowColor{white} \seqsplit{classifier.fit(x\_train}, y\_train) & y\_pred = \seqsplit{classifier.predict(x\_test)} \tn % Row Count 11 (+ 2) % Row 4 \SetRowColor{LightBackground} from \seqsplit{sklearn.model\_selection} import GridSearchCV & 最优参数选择 \tn % Row Count 14 (+ 3) % Row 5 \SetRowColor{white} param\_grid = \{'n\_estimators': {[}1,10,100{]},'learning\_rate': {[}0.2,0.4,0.6,0.8{]}\} & grid = \seqsplit{GridSearchCV(AdaBoostClassifier()},param\_grid, scoring ='accuracy') \tn % Row Count 18 (+ 4) % Row 6 \SetRowColor{LightBackground} grid.fit(x\_train,y\_train) & \seqsplit{grid.best\_estimator\_}, grid.best\_params\_, grid.cv\_results\_ \tn % Row Count 21 (+ 3) % Row 7 \SetRowColor{white} \mymulticolumn{2}{x{8.4cm}}{GradientBoost} \tn % Row Count 22 (+ 1) % Row 8 \SetRowColor{LightBackground} from sklearn.ensemble import \seqsplit{GradientBoostingClassifier} & 导入GradientBoost \tn % Row Count 25 (+ 3) % Row 9 \SetRowColor{white} classifier = \seqsplit{GradientBoostingClassifier(n\_estimators=1}, learning\_rate=0.4,max\_depth=1, random\_state=0) & \seqsplit{classifier.fit(x\_train}, y\_train) \tn % Row Count 31 (+ 6) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{x{4 cm} x{4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Boost Classifier (cont)}} \tn % Row 10 \SetRowColor{LightBackground} param\_grid = \{'n\_estimators': {[}1,10,100{]},'learning\_rate': {[}0.2,0.4,0.6,0.8{]}\} & grid = \seqsplit{GridSearchCV(GradientBoostingClassifier()},param\_grid, scoring ='accuracy') \tn % Row Count 5 (+ 5) % Row 11 \SetRowColor{white} grid.fit(x\_train,y\_train) & \seqsplit{grid.best\_estimator\_}, grid.best\_params\_, grid.cv\_results\_ \tn % Row Count 8 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{4 cm} x{4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{SVM}} \tn % Row 0 \SetRowColor{LightBackground} \seqsplit{标准化处理,将数字型的变量转换为-1到1的区间里} & from \seqsplit{sklearn.preprocessing} import StandardScaler \tn % Row Count 4 (+ 4) % Row 1 \SetRowColor{white} ss = StandardScaler() & x\_transformd = \seqsplit{ss.fit\_transform(x)} \tn % Row Count 6 (+ 2) % Row 2 \SetRowColor{LightBackground} \seqsplit{分割训练集和测试集} & from \seqsplit{sklearn.model\_selection} import train\_test\_split \tn % Row Count 9 (+ 3) % Row 3 \SetRowColor{white} x\_train,x\_test,y\_train,y\_test = \seqsplit{train\_test\_split(x\_transformd},y,test\_size=0.3,random\_state=0) & \seqsplit{按照训练集70\%划分} \tn % Row Count 14 (+ 5) % Row 4 \SetRowColor{LightBackground} \seqsplit{支持向量机回归} & from sklearn.svm import SVR \tn % Row Count 16 (+ 2) % Row 5 \SetRowColor{white} y\_pred\_SVR = \seqsplit{regression.predict(x\_test)} & \seqsplit{用训练集做拟合,测试集预测结果} \tn % Row Count 19 (+ 3) % Row 6 \SetRowColor{LightBackground} regression = SVR() & \seqsplit{regression.fit(x\_train},y\_train) \tn % Row Count 21 (+ 2) % Row 7 \SetRowColor{white} \seqsplit{支持向量机分类器} & from sklearn.svm import SVC \tn % Row Count 23 (+ 2) % Row 8 \SetRowColor{LightBackground} df{[}"y"{]} = pd.cut(x = df.col0, bins={[}0,6,10{]},labels={[}0,1{]}) & \seqsplit{依据col0列的值划分成两组,0-5一组,6-10另一组,并保存在新列中(categorical} \seqsplit{Y适用于分类器)} \tn % Row Count 30 (+ 7) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{x{4 cm} x{4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{SVM (cont)}} \tn % Row 9 \SetRowColor{LightBackground} classifier = SVC(kernel=kernel,random\_state=0) & \seqsplit{kernel还可选择'linear'},'poly','rbf','sigmoid',准确率随之改变 \tn % Row Count 4 (+ 4) % Row 10 \SetRowColor{white} \seqsplit{classifier.fit(x\_train},y\_train) & y\_pred = \seqsplit{classifier.predict(x\_test)} \tn % Row Count 6 (+ 2) % Row 11 \SetRowColor{LightBackground} \mymulticolumn{2}{x{8.4cm}}{结果评估} \tn % Row Count 7 (+ 1) % Row 12 \SetRowColor{white} 回归适用 & from sklearn.metrics import \seqsplit{mean\_squared\_error}, \seqsplit{mean\_absolute\_error} \tn % Row Count 11 (+ 4) % Row 13 \SetRowColor{LightBackground} \seqsplit{mean\_absolute\_error(y\_test},y\_pred\_SVR) & \seqsplit{MAE预测值和实际值之间绝对误差的平均值} \tn % Row Count 14 (+ 3) % Row 14 \SetRowColor{white} \seqsplit{mean\_squared\_error(y\_test},y\_pred\_SVR) & \seqsplit{MSE预测值和实际值之间误差的平方的平均值} \tn % Row Count 17 (+ 3) % Row 15 \SetRowColor{LightBackground} 分类适用 & from sklearn.metrics import confusion\_matrix, accuracy\_score, \seqsplit{classification\_report} \tn % Row Count 22 (+ 5) % Row 16 \SetRowColor{white} \seqsplit{accuracy\_score(y\_test},y\_pred)) & accuracy = \seqsplit{(TP+TN)/(TP+TN+FP+FN)} \tn % Row Count 24 (+ 2) % Row 17 \SetRowColor{LightBackground} \seqsplit{confusion\_matrix(y\_test}, y\_pred) & 详细的TP, TN, FP, FN的数量 \tn % Row Count 26 (+ 2) % Row 18 \SetRowColor{white} \seqsplit{classification\_report(y\_test}, y\_pred) & \seqsplit{包括了准确率、召回率、F1分数和支持度等指标} \tn % Row Count 30 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{4 cm} x{4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Feature Engineering}} \tn % Row 0 \SetRowColor{LightBackground} pip install scikit-image & 安装包 \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \seqsplit{读取图片并展示} & from skimage import io \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} food = \seqsplit{io.imread("chips1.jpg")} & io.imshow(food) \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} 变化图片颜色 & from skimage.color import* \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} \seqsplit{io.imshow(rgb2gray(food))} & \seqsplit{将彩色图片转换为灰色并展示} \tn % Row Count 10 (+ 2) % Row 5 \SetRowColor{white} 给图片加滤镜 & from skimage.filters import * \tn % Row Count 12 (+ 2) % Row 6 \SetRowColor{LightBackground} \seqsplit{io.imshow(laplace(food},ksize=3, mask=None)) & 使用Laplace \seqsplit{filter并设置内核大小为3} \tn % Row Count 15 (+ 3) % Row 7 \SetRowColor{white} 更改图片尺寸 & from skimage import transform \tn % Row Count 17 (+ 2) % Row 8 \SetRowColor{LightBackground} image = \seqsplit{transform.resize(image},(2000,2000)) print(image.shape) & \seqsplit{更改为指定尺寸并检查更改后的大小} \tn % Row Count 21 (+ 4) % Row 9 \SetRowColor{white} 主成分分析 & from \seqsplit{sklearn.decomposition} import PCA \tn % Row Count 23 (+ 2) % Row 10 \SetRowColor{LightBackground} pca = \seqsplit{PCA(n\_components=30)}.fit(chip) & \seqsplit{降维并保留30个主成分,将PCA模型拟合到chip图像数据上} \tn % Row Count 27 (+ 4) % Row 11 \SetRowColor{white} x\_new = pca.transform(chip) & \seqsplit{使用已经训练好的PCA模型,将chip图像数据投影到新的特征空间中(包含30个重要特征)} \tn % Row Count 33 (+ 6) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{x{4 cm} x{4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Feature Engineering (cont)}} \tn % Row 12 \SetRowColor{LightBackground} recdata = \seqsplit{pca.inverse\_transform(x\_new)} & \seqsplit{建了图像数据,只使用了30个主成分来表示原始图像} \tn % Row Count 4 (+ 4) % Row 13 \SetRowColor{white} os.listdir(".") & \seqsplit{列出当前工作目录内的所有文件名和目录名} \tn % Row Count 7 (+ 3) % Row 14 \SetRowColor{LightBackground} \seqsplit{os.chdir("directorypath")} & 改变工作目录 \tn % Row Count 9 (+ 2) % Row 15 \SetRowColor{white} os.getcwd() & \seqsplit{获取当前的工作目录} \tn % Row Count 11 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{4 cm} x{4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{K-NN}} \tn % Row 0 \SetRowColor{LightBackground} 导入K-NN & from sklearn.neighbors import \seqsplit{KNeighborsClassifier} \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} classifier = \seqsplit{KNeighborsClassifier(n\_neighbors=6)} & \seqsplit{n\_neighbors的默认值是5} \tn % Row Count 6 (+ 3) % Row 2 \SetRowColor{LightBackground} \seqsplit{classifier2.fit(x\_train},y\_train) & 训练模型 \tn % Row Count 8 (+ 2) % Row 3 \SetRowColor{white} \seqsplit{accuracy\_score(y\_test},classifier2.predict(x\_test)) & 得出准确率 \tn % Row Count 11 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{4 cm} x{4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Text Analytics}} \tn % Row 0 \SetRowColor{LightBackground} s.strip & \seqsplit{删除尾端全部空格,s为字符串名} \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} a.upper( )/ a.lower & \seqsplit{将字符串a转换成大写、小写形式} \tn % Row Count 6 (+ 3) % Row 2 \SetRowColor{LightBackground} 分词 Tokenization & import nltk \tn % Row Count 7 (+ 1) % Row 3 \SetRowColor{white} tokens = \seqsplit{nltk.word\_tokenize(text)} & \seqsplit{读取长文本并根据空格和标点分词} \tn % Row Count 10 (+ 3) % Row 4 \SetRowColor{LightBackground} \seqsplit{打开文件并读取} & with open ('sample.txt','r',encoding='utf-8') as f: tokens = \seqsplit{nltk.word\_tokenize(f.read())} \tn % Row Count 15 (+ 5) % Row 5 \SetRowColor{white} \seqsplit{词性标签Part-of-speech(POS)} Tagging & \seqsplit{nltk.download('averaged\_perceptron\_tagger')} \tn % Row Count 18 (+ 3) % Row 6 \SetRowColor{LightBackground} tagged = \seqsplit{nltk.pos\_tag(tokens)} & \seqsplit{给分词后的每个单词加个标签} \tn % Row Count 20 (+ 2) % Row 7 \SetRowColor{white} 删除前后缀 \seqsplit{Stemming(可能产生invalid} word) & from nltk.stem import PorterStemmer \tn % Row Count 23 (+ 3) % Row 8 \SetRowColor{LightBackground} ps = PorterStemmer() & \seqsplit{print(ps.stem('campaigning'))} \tn % Row Count 25 (+ 2) % Row 9 \SetRowColor{white} 词形还原 Lemmatization (generally valid) & from nltk.stem import WordNetLemmatizer \tn % Row Count 28 (+ 3) % Row 10 \SetRowColor{LightBackground} wnl = WordNetLemmatizer( ) & \seqsplit{wnl.lemmatize('beaten'},'v') \tn % Row Count 30 (+ 2) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{x{4 cm} x{4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Text Analytics (cont)}} \tn % Row 11 \SetRowColor{LightBackground} 情感分析 & from \seqsplit{nltk.sentiment.vader} import \seqsplit{SentimentIntensityAnalyzer} \tn % Row Count 3 (+ 3) % Row 12 \SetRowColor{white} \seqsplit{nltk.download('vader\_lexicon')} & analyzer = \seqsplit{SentimentIntensityAnalyzer(} ) \tn % Row Count 5 (+ 2) % Row 13 \SetRowColor{LightBackground} analyzer.polarity\_scores(text){[}'compound'{]} & \seqsplit{分析长文本的情感色彩并得出综合分数} \tn % Row Count 8 (+ 3) % Row 14 \SetRowColor{white} for index, row in df.iterrows( ): compound\_score = analyzer.polarity\_scores(row{[}'clean\_text'{]}){[}'compound'{]} & \seqsplit{dataframe中按行读取cleaned\_data列的每一条数据,并得出综合得分} \tn % Row Count 14 (+ 6) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{4 cm} x{4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Time Series}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{2}{x{8.4cm}}{时间格式统一} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} df{[}'Date'{]} = {[}datetime.strptime(x, "\%b-\%y") for x in df.Period{]} & \seqsplit{新建一列存放时间,依据原本period列中} \seqsplit{Jan-2021的日期形式改成2021-01-01的标准形式} \tn % Row Count 7 (+ 6) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{2}{x{8.4cm}}{df\_b{[}'Date'{]} = {[}datetime.strptime(x, "\%d/\%m/\%y") for x in df\_b.iloc{[}:,0{]}{]}} \tn % Row Count 9 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{2}{x{8.4cm}}{列名操作} \tn % Row Count 10 (+ 1) % Row 4 \SetRowColor{LightBackground} df.columns = {[}"Period", "GoldPrice", "BondYield"{]} & 更改列名 \tn % Row Count 13 (+ 3) % Row 5 \SetRowColor{white} df.columns = {[}each.strip() for each in df.columns{]} & \seqsplit{去除列名前后的空格} \tn % Row Count 16 (+ 3) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{2}{x{8.4cm}}{数据提取} \tn % Row Count 17 (+ 1) % Row 7 \SetRowColor{white} df\_gold = df{[}df.Gold.isna() == False{]}.loc{[}:, {[}"Top Producers","Gold"{]}{]} & \seqsplit{取指定列的非空行} \tn % Row Count 21 (+ 4) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{2}{x{8.4cm}}{绘图} \tn % Row Count 22 (+ 1) % Row 9 \SetRowColor{white} \seqsplit{df.groupby("Date").Gold.mean().plot(kind="line")} & \seqsplit{按日期分组,统计黄金的均值,并绘制折线图} \tn % Row Count 25 (+ 3) % Row 10 \SetRowColor{LightBackground} df.loc{[}:, {[}"Gold", "Silver","Date"{]}{]}.groupby("Date").mean().plot() & \seqsplit{按日期分组,同时统计黄金、银的均值,并绘制折线图} \tn % Row Count 29 (+ 4) % Row 11 \SetRowColor{white} df.groupby("month"){[}{[}"column0","column1","column2"{]}{]}.mean().plot() & 写法2 \tn % Row Count 33 (+ 4) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{x{4 cm} x{4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Time Series (cont)}} \tn % Row 12 \SetRowColor{LightBackground} df.plot(kind="line", y="Gold ETF", x="Date") & \seqsplit{指定横纵坐标绘制折线图} \tn % Row Count 3 (+ 3) % Row 13 \SetRowColor{white} sns.heatmap(df.loc{[}:, {[}"Gold", "Platinum","Silver"{]}{]}.corr(), annot=True) & \seqsplit{先计算黄金银铂金两两之间的相关关系,并依据结果绘制热力图} \tn % Row Count 8 (+ 5) % Row 14 \SetRowColor{LightBackground} \seqsplit{sns.barplot(data=df\_platinum},y="Platinum", x ="Top Producers") & \seqsplit{运用seaborn绘图(import} seaborn as sns) \tn % Row Count 12 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{4 cm} x{4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Web scrapping}} \tn % Row 0 \SetRowColor{LightBackground} import request & 用于发送 HTTP 请求 \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} response = request.get(url) & 获取数据 \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} result = response.json( & \seqsplit{加工数据并print} \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{2}{x{8.4cm}}{Beautiful Soup} \tn % Row Count 7 (+ 1) % Row 4 \SetRowColor{LightBackground} from bs4 import BeautifulSoup & \seqsplit{解析和处理网页} \tn % Row Count 9 (+ 2) % Row 5 \SetRowColor{white} r = requests.get(url) & \seqsplit{请求网址,其中url为包含网址的变量} \tn % Row Count 12 (+ 3) % Row 6 \SetRowColor{LightBackground} print(soup.title) & \seqsplit{获取网页的标题} \tn % Row Count 14 (+ 2) % Row 7 \SetRowColor{white} soup = \seqsplit{BeautifulSoup(r.content}, 'html.parser') & \seqsplit{解析获取到的内容} \tn % Row Count 17 (+ 3) % Row 8 \SetRowColor{LightBackground} title= soup.find\_all("h6", "h6 \seqsplit{list-object\_\_heading")} & \seqsplit{运用find\_all查找指定内容第一个变量是tag},第二个变量为class(查找新闻标题) \tn % Row Count 23 (+ 6) % Row 9 \SetRowColor{white} each\_title = title.text & \seqsplit{通过.text获取标题内容} \tn % Row Count 25 (+ 2) % Row 10 \SetRowColor{LightBackground} each\_title = each\_title.strip() & \seqsplit{删除标题前后的空格,之后print(each\_title)} \tn % Row Count 28 (+ 3) % Row 11 \SetRowColor{white} data2 = r.json() & \seqsplit{将请求的结果转换成json字符串} \tn % Row Count 30 (+ 2) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{x{4 cm} x{4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Web scrapping (cont)}} \tn % Row 12 \SetRowColor{LightBackground} data2.keys() & \seqsplit{查看json中包含的键} \tn % Row Count 2 (+ 2) % Row 13 \SetRowColor{white} data2{[}'help'{]} & \seqsplit{help是其中一个键名(keys)} \tn % Row Count 4 (+ 2) % Row 14 \SetRowColor{LightBackground} data2{[}'result'{]}{[}'records'{]} & \seqsplit{直接通过json中的层级关系查找内容} \tn % Row Count 7 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}