\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{Arshdeep} \pdfinfo{ /Title (pandas.pdf) /Creator (Cheatography) /Author (Arshdeep) /Subject (Pandas Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{3E8BA3} \definecolor{LightBackground}{HTML}{F2F7F9} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Pandas Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{Arshdeep} via \textcolor{DarkBackground}{\uline{cheatography.com/201979/cs/42963/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}Arshdeep \\ \uline{cheatography.com/arshdeep} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Not Yet Published.\\ Updated 6th April, 2024.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{2} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Introduction to Pandas}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Pandas is a powerful open-source data analysis and manipulation library for Python.} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{It provides data structures and functions to efficiently work with structured data.} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Developed by Wes McKinney in 2008, Pandas is widely used in data science, finance, and research.} \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Key components include Series (1-dimensional labeled array) and DataFrame (2-dimensional labeled data structure).} \tn % Row Count 9 (+ 3) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Pandas simplifies data manipulation tasks such as cleaning, filtering, grouping, and transforming.} \tn % Row Count 11 (+ 2) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{It integrates seamlessly with other libraries like NumPy, Matplotlib, and Scikit-learn.} \tn % Row Count 13 (+ 2) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Pandas is built on top of NumPy, leveraging its fast array processing capabilities.} \tn % Row Count 15 (+ 2) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Offers intuitive and flexible functionalities for data exploration and analysis.} \tn % Row Count 17 (+ 2) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Ideal for tasks ranging from data cleaning and preprocessing to statistical analysis and visualization.} \tn % Row Count 20 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Indexing and Selecting Data}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Use .loc{[}{]} for label-based indexing on rows and columns.} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Use .iloc{[}{]} for integer-based indexing on rows and columns.} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Boolean indexing allows selecting data based on conditions.} \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{df{[}column\_name{]} or df.column\_name selects a single column.} \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{df{[}{[}column1, column2{]}{]} selects multiple columns.} \tn % Row Count 9 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{.head(n) returns the first n rows of the DataFrame.} \tn % Row Count 11 (+ 2) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{.tail(n) returns the last n rows of the DataFrame.} \tn % Row Count 12 (+ 1) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{df.at{[}{]} and df.iat{[}{]} for single value selection based on label or integer.} \tn % Row Count 14 (+ 2) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{df.iloc{[}:, {[}0, 1{]}{]} selects all rows and specific columns.} \tn % Row Count 16 (+ 2) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{.query() method for SQL-like queries.} \tn % Row Count 17 (+ 1) % Row 10 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{.isin() method for filtering based on multiple values.} \tn % Row Count 19 (+ 2) % Row 11 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Chained indexing should be avoided for assignment (use .loc{[}{]} or .iloc{[}{]} instead).} \tn % Row Count 21 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Dealing with Outliers}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Identify outliers using descriptive statistics (mean, median, standard deviation)} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Visualize data distribution using box plots, histograms, or scatter plots} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Use domain knowledge to determine if outliers are valid data points or errors} \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Apply statistical methods like Z-score, IQR (Interquartile Range) to detect outliers} \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Consider different strategies for handling outliers:} \tn % Row Count 10 (+ 2) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Removing outliers: Drop outliers from the dataset} \tn % Row Count 11 (+ 1) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Transforming data: Apply mathematical transformations (log, square root) to reduce the impact of outliers} \tn % Row Count 14 (+ 3) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Winsorization: Cap or clamp extreme values to a specified percentile} \tn % Row Count 16 (+ 2) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Evaluate the impact of outlier handling on data analysis and modeling} \tn % Row Count 18 (+ 2) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Document the rationale behind outlier treatment for reproducibility and transparency} \tn % Row Count 20 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Data Cleaning}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Handling Missing Values:} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{dropna(): Drops rows or columns with missing values.} \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{fillna(): Fills missing values with specified values.} \tn % Row Count 5 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{isna() / notna(): Checks for missing or non-missing values.} \tn % Row Count 7 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Removing Duplicates:} \tn % Row Count 8 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{duplicated(): Identifies duplicate rows.} \tn % Row Count 9 (+ 1) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{drop\_duplicates(): Removes duplicate rows.} \tn % Row Count 10 (+ 1) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Data Imputation:} \tn % Row Count 11 (+ 1) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Replace missing values with the mean, median, or mode.} \tn % Row Count 13 (+ 2) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Use interpolation methods for time series data.} \tn % Row Count 14 (+ 1) % Row 10 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Data Validation:} \tn % Row Count 15 (+ 1) % Row 11 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Validate data types using dtype.} \tn % Row Count 16 (+ 1) % Row 12 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Use regular expressions to validate string data.} \tn % Row Count 17 (+ 1) % Row 13 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Data Standardization:} \tn % Row Count 18 (+ 1) % Row 14 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Convert data to a consistent format (e.g., lowercase).} \tn % Row Count 20 (+ 2) % Row 15 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Normalize numeric data to a common scale.} \tn % Row Count 21 (+ 1) % Row 16 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Data Transformation:} \tn % Row Count 22 (+ 1) % Row 17 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Convert data types using astype().} \tn % Row Count 23 (+ 1) % Row 18 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Apply custom functions using apply().} \tn % Row Count 24 (+ 1) % Row 19 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Outlier Detection:} \tn % Row Count 25 (+ 1) % Row 20 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Visualize data distribution with histograms and box plots.} \tn % Row Count 27 (+ 2) % Row 21 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Use statistical methods like z-score or IQR to detect outliers.} \tn % Row Count 29 (+ 2) % Row 22 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Error Correction:} \tn % Row Count 30 (+ 1) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Data Cleaning (cont)}} \tn % Row 23 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Handle erroneous values based on domain knowledge.} \tn % Row Count 1 (+ 1) % Row 24 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Use external datasets or references for validation.} \tn % Row Count 3 (+ 2) % Row 25 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Handling Inconsistent Data:} \tn % Row Count 4 (+ 1) % Row 26 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Standardize categorical data.} \tn % Row Count 5 (+ 1) % Row 27 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Resolve inconsistencies in naming conventions.} \tn % Row Count 6 (+ 1) % Row 28 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Handling Data Integrity Issues:} \tn % Row Count 7 (+ 1) % Row 29 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Identify and rectify data inconsistencies.} \tn % Row Count 8 (+ 1) % Row 30 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Use data profiling tools for anomaly detection.} \tn % Row Count 9 (+ 1) % Row 31 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Error Handling:} \tn % Row Count 10 (+ 1) % Row 32 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Use try-except blocks to handle errors during data processing.} \tn % Row Count 12 (+ 2) % Row 33 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Log errors for debugging and tracking purposes.} \tn % Row Count 13 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Grouping and Aggregating Data}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Grouping Data:} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Grouping data based on one or more columns using the groupby() function.} \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Example: df.groupby('Column') or df.groupby({[}'Column1', 'Column2'{]}).} \tn % Row Count 5 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Aggregating Data:} \tn % Row Count 6 (+ 1) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Applying aggregate functions like sum, mean, count, etc., to grouped data.} \tn % Row Count 8 (+ 2) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Example: \seqsplit{df.groupby('Column').sum()} or df.groupby('Column').agg(\{'Column2': 'mean', 'Column3': 'sum'\}).} \tn % Row Count 11 (+ 3) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Common Aggregate Functions:} \tn % Row Count 12 (+ 1) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{sum(): Calculates the sum of numeric values.} \tn % Row Count 13 (+ 1) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{mean(): Calculates the mean of numeric values.} \tn % Row Count 14 (+ 1) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{count(): Counts non-null values.} \tn % Row Count 15 (+ 1) % Row 10 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{min(), max(): Finds the minimum or maximum value.} \tn % Row Count 16 (+ 1) % Row 11 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{agg(): Allows specifying multiple aggregate functions for different columns.} \tn % Row Count 18 (+ 2) % Row 12 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Custom Aggregation:} \tn % Row Count 19 (+ 1) % Row 13 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Defining custom aggregation functions using agg() or apply().} \tn % Row Count 21 (+ 2) % Row 14 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Example: \seqsplit{df.groupby('Column').agg(custom\_function).}} \tn % Row Count 23 (+ 2) % Row 15 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Grouping with Multiple Functions:} \tn % Row Count 24 (+ 1) % Row 16 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Applying multiple aggregate functions simultaneously.} \tn % Row Count 26 (+ 2) % Row 17 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Example: df.groupby('Column').agg({[}'mean', 'sum'{]}).} \tn % Row Count 28 (+ 2) % Row 18 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Named Aggregation:} \tn % Row Count 29 (+ 1) % Row 19 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Providing custom names for aggregated columns.} \tn % Row Count 30 (+ 1) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Grouping and Aggregating Data (cont)}} \tn % Row 20 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Example: \seqsplit{df.groupby('Column').agg(avg\_salary=('Salary'}, 'mean'), total\_sales=('Sales', 'sum')).} \tn % Row Count 2 (+ 2) % Row 21 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Grouping by Time Periods:} \tn % Row Count 3 (+ 1) % Row 22 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Grouping time series data by specific time periods like months or years.} \tn % Row Count 5 (+ 2) % Row 23 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Example: \seqsplit{df.groupby(pd.Grouper(freq='M')).}} \tn % Row Count 6 (+ 1) % Row 24 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Grouping with Categorical Data:} \tn % Row Count 7 (+ 1) % Row 25 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Grouping based on categorical data types.} \tn % Row Count 8 (+ 1) % Row 26 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Example: \seqsplit{df.groupby('Category').sum().}} \tn % Row Count 9 (+ 1) % Row 27 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Handling Grouped Data:} \tn % Row Count 10 (+ 1) % Row 28 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Accessing grouped data using get\_group() method.} \tn % Row Count 11 (+ 1) % Row 29 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Example: \seqsplit{grouped.get\_group('Group\_Name').}} \tn % Row Count 12 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Working with Excel Files}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Reading Excel Files:} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{pd.read\_excel() function to read Excel files into DataFrame.} \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Specify sheet name, header, index, and column names.} \tn % Row Count 5 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Writing Excel Files:} \tn % Row Count 6 (+ 1) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{DataFrame.to\_excel() method to write DataFrame to an Excel file.} \tn % Row Count 8 (+ 2) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Specify sheet name, index, and column names.} \tn % Row Count 9 (+ 1) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Working with Multiple Sheets:} \tn % Row Count 10 (+ 1) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{pd.ExcelFile() to work with multiple sheets in a single Excel file.} \tn % Row Count 12 (+ 2) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Read specific sheets using parse() or read\_excel().} \tn % Row Count 14 (+ 2) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Handling Excel Formatting:} \tn % Row Count 15 (+ 1) % Row 10 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Preserve formatting while reading with pd.ExcelFile() and xlrd engine.} \tn % Row Count 17 (+ 2) % Row 11 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Formatting may be lost when writing to Excel.} \tn % Row Count 18 (+ 1) % Row 12 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Excel Data Manipulation:} \tn % Row Count 19 (+ 1) % Row 13 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Apply pandas operations (filtering, sorting, grouping) to Excel data after reading.} \tn % Row Count 21 (+ 2) % Row 14 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Convert Excel data into pandas DataFrame for manipulation and analysis.} \tn % Row Count 23 (+ 2) % Row 15 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Exporting DataFrame to Specific Excel Formats:} \tn % Row Count 24 (+ 1) % Row 16 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Specify Excel file format (xls, xlsx) while writing.} \tn % Row Count 26 (+ 2) % Row 17 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Use appropriate file extension (.xls or .xlsx) for compatibility.} \tn % Row Count 28 (+ 2) % Row 18 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Handling Large Excel Files:} \tn % Row Count 29 (+ 1) % Row 19 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Utilize chunksize parameter when reading large Excel files to load data in manageable chunks.} \tn % Row Count 31 (+ 2) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Working with Excel Files (cont)}} \tn % Row 20 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Process data incrementally to avoid memory overflow.} \tn % Row Count 2 (+ 2) % Row 21 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Excel File Metadata:} \tn % Row Count 3 (+ 1) % Row 22 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Retrieve Excel file information (sheet names, data types, etc.) using pandas metadata functions.} \tn % Row Count 5 (+ 2) % Row 23 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Access metadata through pd.ExcelFile() object or DataFrame attributes.} \tn % Row Count 7 (+ 2) % Row 24 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Excel File Validation:} \tn % Row Count 8 (+ 1) % Row 25 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Validate Excel data integrity using pandas functions (e.g., checking for missing values, data types).} \tn % Row Count 11 (+ 3) % Row 26 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Ensure consistency between Excel data and expected data types for analysis.} \tn % Row Count 13 (+ 2) % Row 27 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Excel File Performance Optimization:} \tn % Row Count 14 (+ 1) % Row 28 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Optimize Excel file reading and writing performance by specifying appropriate options (e.g., engine, dtype).} \tn % Row Count 17 (+ 3) % Row 29 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Utilize parallel processing or asynchronous methods for faster data processing.} \tn % Row Count 19 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{3.76 cm} x{4.24 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Reshaping Data}} \tn % Row 0 \SetRowColor{LightBackground} Pivot Tables & Restructuring data using one or more columns as new columns. \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} Melting & Unpivoting data from wide to long format. \tn % Row Count 5 (+ 2) % Row 2 \SetRowColor{LightBackground} Stacking and Unstacking & Manipulating hierarchical indices. \tn % Row Count 7 (+ 2) % Row 3 \SetRowColor{white} Reshaping with Hierarchical Indexing & Restructuring data with MultiIndex. \tn % Row Count 9 (+ 2) % Row 4 \SetRowColor{LightBackground} Transposing Data & Swapping rows and columns. \tn % Row Count 11 (+ 2) % Row 5 \SetRowColor{white} Merging and Joining DataFrames & Combining data horizontally based on common columns or indices. \tn % Row Count 14 (+ 3) % Row 6 \SetRowColor{LightBackground} Appending DataFrames & Concatenating data vertically. \tn % Row Count 16 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{2.72 cm} x{5.28 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Input/Output}} \tn % Row 0 \SetRowColor{LightBackground} \seqsplit{pd.read\_csv()} & Read CSV files into DataFrame. \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \seqsplit{pd.read\_excel()} & Read Excel files into DataFrame. \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \seqsplit{pd.read\_sql()} & Read SQL query or database table into DataFrame. \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} \seqsplit{pd.read\_json()} & Read JSON files into DataFrame. \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} \seqsplit{pd.read\_html()} & Read HTML tables into DataFrame. \tn % Row Count 10 (+ 2) % Row 5 \SetRowColor{white} \seqsplit{pd.read\_pickle()} & Read pickled (serialized) objects into DataFrame. \tn % Row Count 12 (+ 2) % Row 6 \SetRowColor{LightBackground} \seqsplit{DataFrame.to\_csv()} & Write DataFrame to a CSV file. \tn % Row Count 14 (+ 2) % Row 7 \SetRowColor{white} \seqsplit{DataFrame.to\_excel()} & Write DataFrame to an Excel file. \tn % Row Count 16 (+ 2) % Row 8 \SetRowColor{LightBackground} \seqsplit{DataFrame.to\_sql()} & Write DataFrame to a SQL database. \tn % Row Count 18 (+ 2) % Row 9 \SetRowColor{white} \seqsplit{DataFrame.to\_json()} & Write DataFrame to a JSON file. \tn % Row Count 20 (+ 2) % Row 10 \SetRowColor{LightBackground} \seqsplit{DataFrame.to\_html()} & Write DataFrame to an HTML file. \tn % Row Count 22 (+ 2) % Row 11 \SetRowColor{white} \seqsplit{DataFrame.to\_pickle()} & Write DataFrame to a pickled (serialized) object file. \tn % Row Count 25 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{3.68 cm} x{4.32 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Performance Optimization}} \tn % Row 0 \SetRowColor{LightBackground} Use Vectorized Operations & Avoid looping through DataFrame rows; instead, utilize Pandas' built-in vectorized operations for faster computations. \tn % Row Count 6 (+ 6) % Row 1 \SetRowColor{white} Optimize Memory Usage & Convert data types to more memory-efficient ones (e.g., using int8 instead of int64 for smaller integers). \tn % Row Count 12 (+ 6) % Row 2 \SetRowColor{LightBackground} Leverage Caching & Utilize caching mechanisms like df.eval() and df.query() for repetitive computations on large datasets to improve performance. \tn % Row Count 18 (+ 6) % Row 3 \SetRowColor{white} Use DataFrame.apply() with caution & It can be slow; explore alternatives like \seqsplit{DataFrame.transform()} or vectorized operations whenever possible. \tn % Row Count 24 (+ 6) % Row 4 \SetRowColor{LightBackground} Pandas Built-in Methods & Utilize built-in Pandas methods that are optimized for performance (e.g., df.groupby().agg() instead of custom aggregation functions). \tn % Row Count 31 (+ 7) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{x{3.68 cm} x{4.32 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Performance Optimization (cont)}} \tn % Row 5 \SetRowColor{LightBackground} Chunking & When working with large datasets, process data in smaller, manageable chunks to avoid memory errors and improve performance. \tn % Row Count 6 (+ 6) % Row 6 \SetRowColor{white} Parallelization & Use libraries like Dask or Modin to parallelize Pandas operations across multiple cores for faster execution. \tn % Row Count 12 (+ 6) % Row 7 \SetRowColor{LightBackground} Profile and Benchmark & Identify bottlenecks in your code using tools like pandas\_profiling or Python's built-in cProfile module, and optimize accordingly. \tn % Row Count 19 (+ 7) % Row 8 \SetRowColor{white} Avoid Method Chaining & While method chaining can make code concise, it can also hinder performance; consider breaking chains into separate statements for better performance. \tn % Row Count 27 (+ 8) % Row 9 \SetRowColor{LightBackground} Pandas Built-in I/O & Use Pandas' optimized file I/O methods (e.g., pd.read\_csv() with appropriate parameters) to efficiently read and write data from various sources. \tn % Row Count 34 (+ 7) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Advanced Indexing}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{MultiIndexing} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Creating hierarchical indexes with multiple levels.} \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Accessing and manipulating data with MultiIndexes.} \tn % Row Count 4 (+ 1) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Hierarchical Indexing:} \tn % Row Count 5 (+ 1) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Understanding hierarchical indexes.} \tn % Row Count 6 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Using hierarchical indexes for advanced data organization and analysis.} \tn % Row Count 8 (+ 2) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Indexing with Boolean Masks:} \tn % Row Count 9 (+ 1) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Using boolean arrays to filter data.} \tn % Row Count 10 (+ 1) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Applying boolean masks for advanced data selection.} \tn % Row Count 12 (+ 2) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Indexing with .loc and .iloc:} \tn % Row Count 13 (+ 1) % Row 10 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Utilizing .loc for label-based indexing.} \tn % Row Count 14 (+ 1) % Row 11 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Utilizing .iloc for integer-based indexing.} \tn % Row Count 15 (+ 1) % Row 12 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Setting and Resetting Index:} \tn % Row Count 16 (+ 1) % Row 13 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Setting new indexes for DataFrames.} \tn % Row Count 17 (+ 1) % Row 14 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Resetting indexes to default integer index.} \tn % Row Count 18 (+ 1) % Row 15 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Indexing Performance Optimization:} \tn % Row Count 19 (+ 1) % Row 16 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Techniques for optimizing indexing performance.} \tn % Row Count 20 (+ 1) % Row 17 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Avoiding common pitfalls for efficient indexing.} \tn % Row Count 21 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{3.68 cm} x{4.32 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Tips and Tricks for Efficient Pandas Usage}} \tn % Row 0 \SetRowColor{LightBackground} Use Vectorized Operations & Utilize built-in functions and operations for faster computation \tn % Row Count 4 (+ 4) % Row 1 \SetRowColor{white} Avoid Iteration over Rows & Use apply() with vectorized functions instead of looping through rows. \tn % Row Count 8 (+ 4) % Row 2 \SetRowColor{LightBackground} Use Method Chaining & Combine multiple operations in a single statement for cleaner code. \tn % Row Count 12 (+ 4) % Row 3 \SetRowColor{white} Optimize Memory Usage & Convert data types to appropriate ones (int64 to int32, etc.) to reduce memory usage. \tn % Row Count 17 (+ 5) % Row 4 \SetRowColor{LightBackground} Utilize Pandas Built-in Functions: & Explore and leverage the extensive set of built-in functions for common tasks. \tn % Row Count 21 (+ 4) % Row 5 \SetRowColor{white} Explore Pandas Documentation & Refer to the official documentation for detailed explanations and examples. \tn % Row Count 25 (+ 4) % Row 6 \SetRowColor{LightBackground} Profile Code & Use profiling tools like cProfile to identify bottlenecks and optimize performance. \tn % Row Count 29 (+ 4) % Row 7 \SetRowColor{white} Leverage Cython and Numba & For computationally intensive tasks, consider using Cython or Numba to speed up operations. \tn % Row Count 34 (+ 5) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{x{3.68 cm} x{4.32 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Tips and Tricks for Efficient Pandas Usage (cont)}} \tn % Row 8 \SetRowColor{LightBackground} Parallelize Operations & Utilize parallel processing with libraries like Dask or Modin for large datasets. \tn % Row Count 4 (+ 4) % Row 9 \SetRowColor{white} Keep Code Readable & Prioritize readability and maintainability while optimizing performance. \tn % Row Count 8 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Working with JSON and XML Data}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Reading JSON Data:} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{pd.read\_json() to read JSON files into a DataFrame.} \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Specify orient parameter for different JSON structures ('records', 'split', 'index', 'columns').} \tn % Row Count 5 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Writing JSON Data:} \tn % Row Count 6 (+ 1) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{to\_json() method to convert DataFrame to JSON format.} \tn % Row Count 8 (+ 2) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Specify orient parameter for desired JSON structure.} \tn % Row Count 10 (+ 2) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Reading XML Data:} \tn % Row Count 11 (+ 1) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Use xml.etree.ElementTree or lxml library to parse XML data.} \tn % Row Count 13 (+ 2) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Convert XML structure to DataFrame manually.} \tn % Row Count 14 (+ 1) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Writing XML Data:} \tn % Row Count 15 (+ 1) % Row 10 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{No direct method in Pandas for writing XML.} \tn % Row Count 16 (+ 1) % Row 11 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Convert DataFrame to XML using libraries like xml.etree.ElementTree or lxml.} \tn % Row Count 18 (+ 2) % Row 12 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Handling Nested JSON/XML:} \tn % Row Count 19 (+ 1) % Row 13 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Use normalization techniques like pd.json\_normalize() to handle nested JSON structures.} \tn % Row Count 21 (+ 2) % Row 14 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{For XML, flatten the hierarchical structure manually or use appropriate libraries.} \tn % Row Count 23 (+ 2) % Row 15 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Working with APIs:} \tn % Row Count 24 (+ 1) % Row 16 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Retrieve JSON data from APIs using libraries like requests.} \tn % Row Count 26 (+ 2) % Row 17 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Convert JSON responses to DataFrame for analysis.} \tn % Row Count 27 (+ 1) % Row 18 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Performance Considerations:} \tn % Row Count 28 (+ 1) % Row 19 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{JSON and XML parsing can be slower compared to other formats like CSV.} \tn % Row Count 30 (+ 2) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Working with JSON and XML Data (cont)}} \tn % Row 20 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Optimize parsing methods for large datasets to improve performance.} \tn % Row Count 2 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Working with Text Data}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Pandas provides powerful tools for working with text data within Series and DataFrame objects.} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{str accessor allows accessing string methods for Series containing strings.} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Common string methods include lower(), upper(), strip(), split(), replace(), etc.} \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{contains() method checks if a pattern or substring exists in each element of a Series.} \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{extract() method extracts substrings using regular expressions.} \tn % Row Count 10 (+ 2) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{split() method splits strings into lists of substrings based on a delimiter.} \tn % Row Count 12 (+ 2) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{join() method joins lists of strings into a single string with a specified delimiter.} \tn % Row Count 14 (+ 2) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{get\_dummies() method creates dummy variables for categorical text data.} \tn % Row Count 16 (+ 2) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{replace() method replaces values based on a mapping or regular expression.} \tn % Row Count 18 (+ 2) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{find() method finds the first occurrence of a substring in each element of a Series.} \tn % Row Count 20 (+ 2) % Row 10 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{count() method counts occurrences of a substring in each element of a Series.} \tn % Row Count 22 (+ 2) % Row 11 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{startswith() and endswith() methods check if each element in a Series starts or ends with a specified substring.} \tn % Row Count 25 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Handling Categorical Data}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Convert categorical data to numerical representation using pd.factorize() or pd.get\_dummies()} \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Utilize astype() method to convert categorical data to categorical dtype} \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Handle ordinal data using Categorical dtype with specified order} \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Use pd.cut() for binning numerical data into discrete intervals} \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Employ pd.qcut() for quantile-based discretization} \tn % Row Count 9 (+ 1) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Encode categorical variables using LabelEncoder or OneHotEncoder from sklearn.preprocessing} \tn % Row Count 11 (+ 2) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Handle high cardinality categorical data using techniques like frequency encoding or target encoding} \tn % Row Count 13 (+ 2) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Use pd.Categorical() to create categorical data with custom categories and ordering} \tn % Row Count 15 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{2.72 cm} x{5.28 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Visualization with Pandas}} \tn % Row 0 \SetRowColor{LightBackground} Plotting Functions: & Pandas provides easy-to-use plotting functions that leverage Matplotlib under the hood. Use .plot() method on Series or DataFrame to create various types of plots like line, bar, histogram, scatter, etc. \tn % Row Count 8 (+ 8) % Row 1 \SetRowColor{white} \seqsplit{Customization:} & You can customize plots by passing parameters to the plotting functions such as title, labels, colors, styles, etc. Additionally, you can directly use Matplotlib functions to fine-tune your plots further. \tn % Row Count 16 (+ 8) % Row 2 \SetRowColor{LightBackground} Subplots: & Pandas supports creating subplots from DataFrame or Series. Simply call .plot() on different columns or subsets of data to create multiple plots in the same figure. \tn % Row Count 23 (+ 7) % Row 3 \SetRowColor{white} Interactive Plots: & Pandas supports integration with libraries like Plotly and Bokeh for creating interactive plots. Simply install these libraries and Pandas will use them to generate interactive visualizations. \tn % Row Count 31 (+ 8) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{x{2.72 cm} x{5.28 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Visualization with Pandas (cont)}} \tn % Row 4 \SetRowColor{LightBackground} Time Series Plotting: & Pandas makes it easy to plot time series data with intelligent date formatting and labeling. Use .plot() with time-indexed data to create informative time series plots. \tn % Row Count 7 (+ 7) % Row 5 \SetRowColor{white} Seaborn Integration: & Seaborn, a statistical data visualization library, integrates seamlessly with Pandas. You can use Seaborn functions directly on Pandas objects to create more complex and visually appealing plots. \tn % Row Count 15 (+ 8) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Time Series Data}} \tn % Row 0 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Introduction:} \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Time series data is sequential data indexed by timestamps.} \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Pandas provides robust tools for working with time series data efficiently.} \tn % Row Count 5 (+ 2) % Row 3 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Date-Time Index:} \tn % Row Count 6 (+ 1) % Row 4 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Pandas offers specialized data structures like DatetimeIndex to handle time series indexing.} \tn % Row Count 8 (+ 2) % Row 5 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Convert date strings to DatetimeIndex using pd.to\_datetime().} \tn % Row Count 10 (+ 2) % Row 6 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Resampling and Frequency Conversion:} \tn % Row Count 11 (+ 1) % Row 7 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Adjust time series data to different frequencies using resample().} \tn % Row Count 13 (+ 2) % Row 8 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Aggregating or downsampling time series data to a lower frequency or upsampling to a higher frequency.} \tn % Row Count 16 (+ 3) % Row 9 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Time Shifting:} \tn % Row Count 17 (+ 1) % Row 10 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Shift index by a specified number of periods with shift().} \tn % Row Count 19 (+ 2) % Row 11 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Useful for calculating differences over time or shifting data for alignment.} \tn % Row Count 21 (+ 2) % Row 12 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Rolling and Expanding Windows:} \tn % Row Count 22 (+ 1) % Row 13 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Compute rolling statistics (mean, sum, etc.) over a specified window with rolling().} \tn % Row Count 24 (+ 2) % Row 14 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Calculate expanding statistics over the entire history of a time series with expanding().} \tn % Row Count 26 (+ 2) % Row 15 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Time Zone Handling:} \tn % Row Count 27 (+ 1) % Row 16 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Localize timestamps to a specific time zone using tz\_localize().} \tn % Row Count 29 (+ 2) % Row 17 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Convert timestamps between time zones with tz\_convert().} \tn % Row Count 31 (+ 2) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{X} \SetRowColor{DarkBackground} \mymulticolumn{1}{x{8.4cm}}{\bf\textcolor{white}{Time Series Data (cont)}} \tn % Row 18 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Offset Aliases:} \tn % Row Count 1 (+ 1) % Row 19 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Use offset aliases like 'D' for day, 'M' for month, 'Y' for year to perform frequency conversions easily.} \tn % Row Count 4 (+ 3) % Row 20 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Time Series Plotting:} \tn % Row Count 5 (+ 1) % Row 21 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Pandas provides convenient methods for plotting time series data directly from DataFrames.} \tn % Row Count 7 (+ 2) % Row 22 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Use plot() function with a datetime index for quick visualization.} \tn % Row Count 9 (+ 2) % Row 23 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Date Range Generation:} \tn % Row Count 10 (+ 1) % Row 24 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Generate date ranges using date\_range() for easy creation of time series indices.} \tn % Row Count 12 (+ 2) % Row 25 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Specify start date, end date, frequency, and time zone parameters.} \tn % Row Count 14 (+ 2) % Row 26 \SetRowColor{LightBackground} \mymulticolumn{1}{x{8.4cm}}{Time Series Analysis:} \tn % Row Count 15 (+ 1) % Row 27 \SetRowColor{white} \mymulticolumn{1}{x{8.4cm}}{Perform time series analysis including trend analysis, seasonality detection, and forecasting using Pandas in conjunction with other libraries like Statsmodels.} \tn % Row Count 19 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}-} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{3.6 cm} x{4.4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Merging and Joining DataFrames}} \tn % Row 0 \SetRowColor{LightBackground} Concatenation & Combining DataFrames along rows or columns. \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} Merge & Combining DataFrames based on common columns using SQL-like joins such as inner, outer, left, and right joins. \tn % Row Count 7 (+ 5) % Row 2 \SetRowColor{LightBackground} Join & Convenient method for merging DataFrames based on index labels. \tn % Row Count 10 (+ 3) % Row 3 \SetRowColor{white} Handling Duplicate Columns & Dealing with duplicate column names when merging DataFrames. \tn % Row Count 13 (+ 3) % Row 4 \SetRowColor{LightBackground} Suffixes & Specifying suffixes for overlapping column names in the merged DataFrame. \tn % Row Count 17 (+ 4) % Row 5 \SetRowColor{white} Merging on Index & Merging DataFrames based on their index values. \tn % Row Count 20 (+ 3) % Row 6 \SetRowColor{LightBackground} Joining on Index & Joining DataFrames based on their index labels. \tn % Row Count 23 (+ 3) % Row 7 \SetRowColor{white} Concatenating DataFrames & Combining multiple DataFrames along rows or columns using the pd.concat() function. \tn % Row Count 27 (+ 4) % Row 8 \SetRowColor{LightBackground} Merging with Different Join Types & Utilizing different types of joins (inner, outer, left, right) to merge DataFrames using the pd.merge() function. \tn % Row Count 33 (+ 6) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{x{3.6 cm} x{4.4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Merging and Joining DataFrames (cont)}} \tn % Row 9 \SetRowColor{LightBackground} Joining on Index & Merging DataFrames based on their index labels using the .join() method. \tn % Row Count 4 (+ 4) % Row 10 \SetRowColor{white} Handling Overlapping Column Names & Managing duplicate or overlapping column names during merging. \tn % Row Count 7 (+ 3) % Row 11 \SetRowColor{LightBackground} Merging on Multiple Columns & Performing merges based on multiple columns in the DataFrames. \tn % Row Count 10 (+ 3) % Row 12 \SetRowColor{white} Suffixes & Specifying suffixes for overlapping column names to distinguish them in the merged DataFrame. \tn % Row Count 15 (+ 5) % Row 13 \SetRowColor{LightBackground} Merging on Index & Merging DataFrames based on their index values using the .merge() method with the 'left\_index' and 'right\_index' parameters. \tn % Row Count 21 (+ 6) % Row 14 \SetRowColor{white} Joining on Index & Joining DataFrames based on their index labels using the .join() method. \tn % Row Count 25 (+ 4) % Row 15 \SetRowColor{LightBackground} Handling Overlapping Column Names & Managing duplicate or overlapping column names during merging. \tn % Row Count 28 (+ 3) % Row 16 \SetRowColor{white} Merging on Multiple Columns & Performing merges based on multiple columns in the DataFrames. \tn % Row Count 31 (+ 3) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{3.6 cm} x{4.4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Data Transformation}} \tn % Row 0 \SetRowColor{LightBackground} Applying Functions & Use .apply() to apply a function along an axis of the DataFrame or Series. \tn % Row Count 4 (+ 4) % Row 1 \SetRowColor{white} Mapping & Transform values in a Series or DataFrame using a mapping or a function. \tn % Row Count 8 (+ 4) % Row 2 \SetRowColor{LightBackground} Replacing Values & Replace specific values in a DataFrame or Series with other values. \tn % Row Count 12 (+ 4) % Row 3 \SetRowColor{white} Dropping Columns or Rows & Use .drop() to remove specified rows or columns from a DataFrame. \tn % Row Count 15 (+ 3) % Row 4 \SetRowColor{LightBackground} Adding/Removing Columns & Add or remove columns from a DataFrame using assignment or the .drop() method. \tn % Row Count 19 (+ 4) % Row 5 \SetRowColor{white} Renaming Columns & Rename columns in a DataFrame using the .rename() method. \tn % Row Count 22 (+ 3) % Row 6 \SetRowColor{LightBackground} Duplicating Data & Create copies of data using the .copy() method. \tn % Row Count 25 (+ 3) % Row 7 \SetRowColor{white} Changing Data Types & Convert data types of columns using the .astype() method. \tn % Row Count 28 (+ 3) % Row 8 \SetRowColor{LightBackground} Discretization and Binning & Convert continuous data into discrete intervals using the .cut() function. \tn % Row Count 32 (+ 4) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{x{3.6 cm} x{4.4 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Data Transformation (cont)}} \tn % Row 9 \SetRowColor{LightBackground} Encoding Categorical Variables & Convert categorical variables into numerical representations using techniques like one-hot encoding or label encoding. \tn % Row Count 6 (+ 6) % Row 10 \SetRowColor{white} Normalization and Standardization & Scale numeric data to a standard range or distribution. \tn % Row Count 9 (+ 3) % Row 11 \SetRowColor{LightBackground} \seqsplit{Merging/Concatenating} DataFrames & Combine multiple DataFrames either by concatenating or merging based on common columns or indices. \tn % Row Count 14 (+ 5) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{3.12 cm} x{4.88 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Basic Operations}} \tn % Row 0 \SetRowColor{LightBackground} Slicing & Selecting subsets of data using row and column labels or positions. \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} Filtering & Applying conditions to extract specific rows or columns from a DataFrame. \tn % Row Count 7 (+ 4) % Row 2 \SetRowColor{LightBackground} Sorting & Arranging data in ascending or descending order based on one or more columns. \tn % Row Count 11 (+ 4) % Row 3 \SetRowColor{white} Applying Functions & Applying functions element-wise to data, either built-in or custom functions. \tn % Row Count 15 (+ 4) % Row 4 \SetRowColor{LightBackground} Descriptive Statistics & Calculating basic statistical measures like mean, median, mode, etc., for data exploration. \tn % Row Count 19 (+ 4) % Row 5 \SetRowColor{white} Data Alignment & Automatically aligning data based on row and column labels when performing operations between different DataFrames or Series. \tn % Row Count 25 (+ 6) % Row 6 \SetRowColor{LightBackground} Element-wise Operations & Performing operations like addition, subtraction, multiplication, and division on individual elements of a DataFrame or Series. \tn % Row Count 31 (+ 6) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{x{3.12 cm} x{4.88 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Basic Operations (cont)}} \tn % Row 7 \SetRowColor{LightBackground} Aggregating Data & Computing summary statistics like sum, mean, count, etc., over specified axes of the data. \tn % Row Count 4 (+ 4) % Row 8 \SetRowColor{white} Filling Missing Values & Handling missing or NaN values by filling them with a specified value or using methods like forward-fill or backward-fill. \tn % Row Count 10 (+ 6) % Row 9 \SetRowColor{LightBackground} Applying Conditional Logic & Using conditions to assign values or modify data based on certain criteria. \tn % Row Count 14 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{8.4cm}{x{3.44 cm} x{4.56 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Data Structures}} \tn % Row 0 \SetRowColor{LightBackground} Series & One-dimensional labeled array that can hold any data type. \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} DataFrame & Two-dimensional labeled data structure with columns of potentially different types, akin to a spreadsheet or SQL table. \tn % Row Count 9 (+ 6) % Row 2 \SetRowColor{LightBackground} Indexing and Selecting Data & Techniques for accessing specific elements, rows, or columns within Series or DataFrame. \tn % Row Count 13 (+ 4) % Row 3 \SetRowColor{white} Basic Operations & Fundamental operations such as slicing, filtering, and sorting data for effective manipulation. \tn % Row Count 18 (+ 5) % Row 4 \SetRowColor{LightBackground} Data Cleaning & Strategies for handling missing values, duplicates, and other inconsistencies within the data. \tn % Row Count 23 (+ 5) % Row 5 \SetRowColor{white} Data Transformation & Methods for applying functions, mapping values, and transforming data for analysis. \tn % Row Count 27 (+ 4) % Row 6 \SetRowColor{LightBackground} Grouping and Aggregating Data & Techniques for grouping data based on specified criteria and performing aggregations like sum, mean, count, etc. \tn % Row Count 33 (+ 6) \end{tabularx} \par\addvspace{1.3em} \vfill \columnbreak \begin{tabularx}{8.4cm}{x{3.44 cm} x{4.56 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{8.4cm}}{\bf\textcolor{white}{Data Structures (cont)}} \tn % Row 7 \SetRowColor{LightBackground} Merging and Joining DataFrames & Methods for combining multiple DataFrames based on common columns or indices. \tn % Row Count 4 (+ 4) % Row 8 \SetRowColor{white} Reshaping Data & Tools for reshaping data using pivot tables, melting, and other techniques to suit analytical needs. \tn % Row Count 9 (+ 5) % Row 9 \SetRowColor{LightBackground} Time Series Data & Handling and analyzing time-based data using pandas' specialized functionalities. \tn % Row Count 13 (+ 4) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}