PySpark Interview Question | Retrieve orders who sold multiple products
--determine the set of unique orders that contain at least one quantity of product IDs 100 and 200.
from pyspark.sql.types import *
data = [(1,1,100,1),
(1,2,200,1),
(1,3,300,1),
(2,1,200,1),
(2,2,500,1),
(3,1,100,1),
(3,2,300,1),
(3,3,400,1),
(4,1,100,1),
(4,2,200,1),
(4,3,300,1),
(5,1,100,1),
(5,2,400,1),
(6,1,200,1),
(6,2,200,1),
(6, 3, 500, 1),
(7, 1, 200, 1),
(7, 2, 300, 1),
(7, 3, 100, 1),
(7, 4, 100, 1)]
schema =["order_id", "line_item_id", "product_id", "quantity"]
df1=spark.createDataFrame(data,schema)
display(df1)
Comments
Post a Comment