PySpark Interview Question | Retrieve orders who sold multiple products

 --determine the set of unique orders that contain at least one quantity of product IDs 100 and 200.

from pyspark.sql.types import *
data = [(1,1,100,1),
        (1,2,200,1),
        (1,3,300,1),
        (2,1,200,1),
        (2,2,500,1),
        (3,1,100,1),
        (3,2,300,1),
        (3,3,400,1),
        (4,1,100,1),
        (4,2,200,1),
        (4,3,300,1),
        (5,1,100,1),
        (5,2,400,1),
        (6,1,200,1),
        (6,2,200,1),
        (6, 3, 500, 1),
(7, 1, 200, 1),
(7, 2, 300, 1),
(7, 3, 100, 1),
(7, 4, 100, 1)]
schema =["order_id", "line_item_id", "product_id", "quantity"]
df1=spark.createDataFrame(data,schema)
display(df1)

Comments

Popular posts from this blog

PySpark Practice Question | Return companies with high profits

PySpark Practice Question | Map Adult and Child based on Age