from pyspark.sql.types import *
Schema=StructType([
StructField(‘Store’,StringType(),nullable=True),
StructField(‘StoreType’,StringType(),nullable=True),
StructField(‘Assortment’,StringType(),nullable=True),
StructField(‘CompetitionDistance’,FloatType(),nullable=True),
StructField(‘CompetitionOpenSinceMonth’,IntegerType(),nullable=True), StructField(‘CompetitionOpenSinceYear’,IntegerType(),nullable=True), StructField(‘Promo2’,IntegerType(),nullable=True),
StructField(‘Promo2SinceWeek’,IntegerType(),nullable=True),
StructField(‘Promo2SinceYear’,IntegerType(),nullable=True),
StructField(‘PromoInterval’,StringType(),nullable=True)
])
df = spark.read.option(“header”,True).schema(Schema).csv(‘store.csv’)
# We can drop invalid rows while reading the dataset by setting the read mode as “DROPMALFORMED”
df_1=spark.read.option(“header”,True).option(“mode”,’DROPMALFORMED’).csv(‘store.csv’)
df.show()
|
Created By
Metadata
Comments
No comments yet. Add yours below!
Add a Comment
Related Cheat Sheets
More Cheat Sheets by datamansam