Posts

PySpark Practice Question | Return companies with high profits

  data = [( 1 , 'Alpha Corp' ), ( 2 , 'Beta LLC' ), ( 3 , 'Gamma Inc' ), ( 4 , 'Delta Ltd' ), ( 5 , 'Epsilon Plc' ), ( 6 , 'Zeta GmbH' ), ( 7 , 'Eta SA' ), ( 8 , 'Theta Srl' ), ( 9 , 'Iota Partners' ), ( 10 , 'Kappa Co' )] schema = [ 'id' , 'name' ] company_df = spark.createDataFrame(data,schema) data1 =[( 1 , 5000 , 8000 ), ( 2 , 3000 , 4500 ), ( 3 , 7000 , 15000 ), ( 4 , 2000 , 3000 ), ( 5 , 8000 , 12000 ), ( 6 , 2500 , 4000 ), ( 7 , 4500 , 6500 ), ( 8 , 1000 , 1500 ), ( 9 , 6000 , 9000 ), ( 10 , 5500 , 8500 )] schema1 =[ 'company_id' , 'expenses' , 'revenue' ] sales_df = spark.createDataFrame(data1,schema1)

PySpark Practice Question | Map Adult and Child based on Age

  from pyspark.sql.types import * data =[( 'A1' , 'Adult' , 55 ), ( 'A2' , 'Adult' , 56 ), ( 'A3' , 'Adult' , 57 ), ( 'A4' , 'Adult' , 58 ), ( 'A5' , 'Adult' , 59 ), ( 'C1' , 'Child' , 11 ), ( 'C2' , 'Child' , 12 ), ( 'C3' , 'Child' , 13 ), ( 'C4' , 'Child' , 14 )] schema =[ "Name" , "Category" , "Age" ] df1=spark.createDataFrame(data,schema) display(df1)

PySpark Interview Question | Retrieve orders who sold multiple products

 --determine the set of unique orders that contain at least one quantity of product IDs 100 and 200. from pyspark.sql.types import * data = [( 1 , 1 , 100 , 1 ),         ( 1 , 2 , 200 , 1 ),         ( 1 , 3 , 300 , 1 ),         ( 2 , 1 , 200 , 1 ),         ( 2 , 2 , 500 , 1 ),         ( 3 , 1 , 100 , 1 ),         ( 3 , 2 , 300 , 1 ),         ( 3 , 3 , 400 , 1 ),         ( 4 , 1 , 100 , 1 ),         ( 4 , 2 , 200 , 1 ),         ( 4 , 3 , 300 , 1 ),         ( 5 , 1 , 100 , 1 ),         ( 5 , 2 , 400 , 1 ),         ( 6 , 1 , 200 , 1 ),         ( 6 , 2 , 200 , 1 ),         ( 6 , 3 , 500 , 1 ), ( 7 , 1 , 200 , 1 ), ( 7 , 2 , 300 , 1 ), ( 7 , 3 , 100 , 1 ), ( 7 , 4 , 100 , 1 )] schema =[ "or...