\documentclass[10pt,a4paper]{article} % Packages \usepackage{fancyhdr} % For header and footer \usepackage{multicol} % Allows multicols in tables \usepackage{tabularx} % Intelligent column widths \usepackage{tabulary} % Used in header and footer \usepackage{hhline} % Border under tables \usepackage{graphicx} % For images \usepackage{xcolor} % For hex colours %\usepackage[utf8x]{inputenc} % For unicode character support \usepackage[T1]{fontenc} % Without this we get weird character replacements \usepackage{colortbl} % For coloured tables \usepackage{setspace} % For line height \usepackage{lastpage} % Needed for total page number \usepackage{seqsplit} % Splits long words. %\usepackage{opensans} % Can't make this work so far. Shame. Would be lovely. \usepackage[normalem]{ulem} % For underlining links % Most of the following are not required for the majority % of cheat sheets but are needed for some symbol support. \usepackage{amsmath} % Symbols \usepackage{MnSymbol} % Symbols \usepackage{wasysym} % Symbols %\usepackage[english,german,french,spanish,italian]{babel} % Languages % Document Info \author{issambd} \pdfinfo{ /Title (importing-data-in-python-i.pdf) /Creator (Cheatography) /Author (issambd) /Subject (Importing Data in Python I Cheat Sheet) } % Lengths and widths \addtolength{\textwidth}{6cm} \addtolength{\textheight}{-1cm} \addtolength{\hoffset}{-3cm} \addtolength{\voffset}{-2cm} \setlength{\tabcolsep}{0.2cm} % Space between columns \setlength{\headsep}{-12pt} % Reduce space between header and content \setlength{\headheight}{85pt} % If less, LaTeX automatically increases it \renewcommand{\footrulewidth}{0pt} % Remove footer line \renewcommand{\headrulewidth}{0pt} % Remove header line \renewcommand{\seqinsert}{\ifmmode\allowbreak\else\-\fi} % Hyphens in seqsplit % This two commands together give roughly % the right line height in the tables \renewcommand{\arraystretch}{1.3} \onehalfspacing % Commands \newcommand{\SetRowColor}[1]{\noalign{\gdef\RowColorName{#1}}\rowcolor{\RowColorName}} % Shortcut for row colour \newcommand{\mymulticolumn}[3]{\multicolumn{#1}{>{\columncolor{\RowColorName}}#2}{#3}} % For coloured multi-cols \newcolumntype{x}[1]{>{\raggedright}p{#1}} % New column types for ragged-right paragraph columns \newcommand{\tn}{\tabularnewline} % Required as custom column type in use % Font and Colours \definecolor{HeadBackground}{HTML}{333333} \definecolor{FootBackground}{HTML}{666666} \definecolor{TextColor}{HTML}{333333} \definecolor{DarkBackground}{HTML}{595959} \definecolor{LightBackground}{HTML}{F4F4F4} \renewcommand{\familydefault}{\sfdefault} \color{TextColor} % Header and Footer \pagestyle{fancy} \fancyhead{} % Set header to blank \fancyfoot{} % Set footer to blank \fancyhead[L]{ \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{C} \SetRowColor{DarkBackground} \vspace{-7pt} {\parbox{\dimexpr\textwidth-2\fboxsep\relax}{\noindent \hspace*{-6pt}\includegraphics[width=5.8cm]{/web/www.cheatography.com/public/images/cheatography_logo.pdf}} } \end{tabulary} \columnbreak \begin{tabulary}{11cm}{L} \vspace{-2pt}\large{\bf{\textcolor{DarkBackground}{\textrm{Importing Data in Python I Cheat Sheet}}}} \\ \normalsize{by \textcolor{DarkBackground}{issambd} via \textcolor{DarkBackground}{\uline{cheatography.com/88527/cs/20287/}}} \end{tabulary} \end{multicols}} \fancyfoot[L]{ \footnotesize \noindent \begin{multicols}{3} \begin{tabulary}{5.8cm}{LL} \SetRowColor{FootBackground} \mymulticolumn{2}{p{5.377cm}}{\bf\textcolor{white}{Cheatographer}} \\ \vspace{-2pt}issambd \\ \uline{cheatography.com/issambd} \\ \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Cheat Sheet}} \\ \vspace{-2pt}Published 16th August, 2019.\\ Updated 16th August, 2019.\\ Page {\thepage} of \pageref{LastPage}. \end{tabulary} \vfill \columnbreak \begin{tabulary}{5.8cm}{L} \SetRowColor{FootBackground} \mymulticolumn{1}{p{5.377cm}}{\bf\textcolor{white}{Sponsor}} \\ \SetRowColor{white} \vspace{-5pt} %\includegraphics[width=48px,height=48px]{dave.jpeg} Measure your website readability!\\ www.readability-score.com \end{tabulary} \end{multicols}} \begin{document} \raggedright \raggedcolumns % Set font size to small. Switch to any value % from this page to resize cheat sheet text: % www.emerson.emory.edu/services/latex/latex_169.html \footnotesize % Small font. \begin{multicols*}{3} \begin{tabularx}{5.377cm}{x{2.23965 cm} x{2.73735 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Importing Text Files I}} \tn % Row 0 \SetRowColor{LightBackground} `open(file\_name, 'r')` & open the file \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} `file.read()` & read the file \tn % Row Count 3 (+ 1) % Row 2 \SetRowColor{LightBackground} `file.close()` & close the file \tn % Row Count 4 (+ 1) % Row 3 \SetRowColor{white} `file.closed()` & check if the file is closed \tn % Row Count 6 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{It is a good practice to close the file after reading it when using 'open'} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{3.23505 cm} x{1.74195 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Importing Text Files II}} \tn % Row 0 \SetRowColor{LightBackground} `with open(file\_name) as file :` & open the file \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} `file.read()` & read the file \tn % Row Count 3 (+ 1) % Row 2 \SetRowColor{LightBackground} file.readline() & read line by line \tn % Row Count 5 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{When using the 'with' statement there is no need to close the file} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.53827 cm} x{2.43873 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Importing Flat Files with Numpy I}} \tn % Row 0 \SetRowColor{LightBackground} `import numpy as np` & import numpy \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} \seqsplit{`np.loadtxt(file\_name}, delimiter= ' ')` & importing the file \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} `skiprows=1` & argument to skip a specific row \tn % Row Count 5 (+ 2) % Row 3 \SetRowColor{white} `usecols={[}0, 2{]}` & argument to only show specific columns \tn % Row Count 7 (+ 2) % Row 4 \SetRowColor{LightBackground} `dtype = str' & argument to import the data as string \tn % Row Count 9 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{loadtxt only works with numeric data} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{3.73275 cm} x{1.24425 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Importing Flat Files with Numpy II}} \tn % Row 0 \SetRowColor{LightBackground} `import numpy as np` & import numpy \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} `np.recfromcsv(file, delimiter=",", names=True, dtype=None)` & open the file \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} `np.genfromtxt(file, delimiter=',', names=True, dtype=None)` & open the file \tn % Row Count 6 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{with the functions recfromcsv() and genfromtxt() we are able to import data with different types} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{3.03597 cm} x{1.94103 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Importing Stata Files}} \tn % Row 0 \SetRowColor{LightBackground} `import pandas as pd` & importing pandas \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} `df = \seqsplit{pd.read\_stata('disarea.dta')`} & reading the stata file \tn % Row Count 4 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{1.69218 cm} x{3.28482 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Importing Flat Files With Pandas}} \tn % Row 0 \SetRowColor{LightBackground} `import pandas as pd` & import pandas \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \seqsplit{`pd.read\_csv(file)`} & open csv file \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} `nrows=5` & argument for the number of rows to load \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} \seqsplit{`header=None`} & argument for no header \tn % Row Count 7 (+ 1) % Row 4 \SetRowColor{LightBackground} `sep='\textbackslash{}t'` & argument to set delimiter \tn % Row Count 8 (+ 1) % Row 5 \SetRowColor{white} \seqsplit{`comment='\#'`} & argument takes characters that comments occur after in the file \tn % Row Count 11 (+ 3) % Row 6 \SetRowColor{LightBackground} \seqsplit{`na\_values='Nothing'`} & argument to recognize a string as a NaN Value \tn % Row Count 13 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{3.38436 cm} x{1.59264 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Import pickled files}} \tn % Row 0 \SetRowColor{LightBackground} `import pickle` & import the library \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} `with open(file\_name, 'rb') as file :` & open file \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} `pickle.load(file)` & read file \tn % Row Count 5 (+ 1) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.38896 cm} x{2.58804 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Importing Spreadsheet Files}} \tn % Row 0 \SetRowColor{LightBackground} `import pandas as pd` & importing pandas \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \seqsplit{`pd.ExcelFile(file)`} & opening the file \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} `xl.sheet\_names` & exporting the sheet names \tn % Row Count 6 (+ 2) % Row 3 \SetRowColor{white} \seqsplit{`xl.parse(sheet\_name/index)`} & loading a sheet to a dataframe \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} `skiprows={[}index{]}` & skipping a specific row \tn % Row Count 10 (+ 2) % Row 5 \SetRowColor{white} `names={[}List of Names{]}` & naming the sheet's columns \tn % Row Count 12 (+ 2) % Row 6 \SetRowColor{LightBackground} `usecols={[}0,{]}` & parse spesific columns \tn % Row Count 14 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{skiprows, names and useclos are all arguments of the function parse()} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.78712 cm} x{2.18988 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Importing SAS Files}} \tn % Row 0 \SetRowColor{LightBackground} `from sas7bdat import SAS7BDAT` & importing sas7bdat library \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} `import pandas as pd` & importing pandas \tn % Row Count 3 (+ 1) % Row 2 \SetRowColor{LightBackground} `with \seqsplit{SAS7BDAT('file\_name')} as file:` & opening the file \tn % Row Count 5 (+ 2) % Row 3 \SetRowColor{white} \seqsplit{`file.to\_data\_frame()`} & loading the file as dataframe \tn % Row Count 7 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.28942 cm} x{2.68758 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Importing HDF5 files}} \tn % Row 0 \SetRowColor{LightBackground} `import numpy as np` & import numpy \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} `import h5py` & importing the h5py library \tn % Row Count 4 (+ 2) % Row 2 \SetRowColor{LightBackground} `h5py.File(file, 'r')` & reading the file \tn % Row Count 6 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{3.08574 cm} x{1.89126 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Importing MATLAB files}} \tn % Row 0 \SetRowColor{LightBackground} `import scipy.io` & importing scipy.io \tn % Row Count 2 (+ 2) % Row 1 \SetRowColor{white} \seqsplit{`cipy.io.loadmat('file\_name')`} & reading the file \tn % Row Count 4 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.78712 cm} x{2.18988 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Relational databases I}} \tn % Row 0 \SetRowColor{LightBackground} `import pandas as pd` & importing pandas \tn % Row Count 1 (+ 1) % Row 1 \SetRowColor{white} `from sqlalchemy import create\_engine` & importing the necessary library \tn % Row Count 3 (+ 2) % Row 2 \SetRowColor{LightBackground} `engine = \seqsplit{create\_engine('databasetype:///name}.databasetype')` & creating an engine \tn % Row Count 6 (+ 3) % Row 3 \SetRowColor{white} `con = engine.connect()` & connecting to the engine \tn % Row Count 8 (+ 2) % Row 4 \SetRowColor{LightBackground} `rs = con.execute('SELECT * FROM Album')` & performe query \tn % Row Count 10 (+ 2) % Row 5 \SetRowColor{white} `df = \seqsplit{pd.DataFrame(rs.fetchall())`} & save as a dataframe \tn % Row Count 12 (+ 2) % Row 6 \SetRowColor{LightBackground} `df.columns = rs.keys` & set columns names \tn % Row Count 13 (+ 1) % Row 7 \SetRowColor{white} `con.close()` & close the connection \tn % Row Count 15 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{The best practice is to close the connection} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{2.63781 cm} x{2.33919 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Relational databases II}} \tn % Row 0 \SetRowColor{LightBackground} `engine = \seqsplit{create\_engine('databasetype:///name}.databasetype')` & creating an engine \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} `with engine.connect() as con:` & connecting to the engine \tn % Row Count 5 (+ 2) % Row 2 \SetRowColor{LightBackground} `rs = con.execute('sql code')` & performe query \tn % Row Count 7 (+ 2) % Row 3 \SetRowColor{white} `df = \seqsplit{pd.DataFrame(rs.fetchmany(size=3))`} & load a number of rows as a dataframe \tn % Row Count 9 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{With 'open' you don't have to close the connection at the end} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} \begin{tabularx}{5.377cm}{x{3.43413 cm} x{1.54287 cm} } \SetRowColor{DarkBackground} \mymulticolumn{2}{x{5.377cm}}{\bf\textcolor{white}{Relational databases III}} \tn % Row 0 \SetRowColor{LightBackground} `engine = \seqsplit{create\_engine('databasetype:///name}.databasetype')` & creating an engine \tn % Row Count 3 (+ 3) % Row 1 \SetRowColor{white} `df = pd.read\_sql\_query('SQL code', engine)` & performe query \tn % Row Count 5 (+ 2) \hhline{>{\arrayrulecolor{DarkBackground}}--} \SetRowColor{LightBackground} \mymulticolumn{2}{x{5.377cm}}{Fastest way to connect to a database and perform query} \tn \hhline{>{\arrayrulecolor{DarkBackground}}--} \end{tabularx} \par\addvspace{1.3em} % That's all folks \end{multicols*} \end{document}