Skip to content

Commit df35580

Browse files
author
bitoollearner
committed
LeetCode Pyspark Question Solution
1 parent a3fa145 commit df35580

File tree

599 files changed

+5382
-961
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

599 files changed

+5382
-961
lines changed

.databricks/commit_outputs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
2+
# .databricks/commit_outputs file is used to control whether a notebook's output should be included when committing changes to Git.
3+
# Outputs are included/excluded at a per notebook level, committing select outputs within a specific notebook is not feasible.
4+
#
5+
# Reference:
6+
# each line in .databricks/commit_outputs represents a glob pattern
7+
# line starting with # represents a comment
8+
# notebook that matches a glob pattern will have output included
9+
# line starting with ! represents a exclusion pattern (i.e. !folder_a/\* will exclude output for all notebooks in folder_a)
10+
# if a notebook path matches multiple glob patterns, the last matching glob pattern will be used
11+
# note that glob patterns are case sensitive
12+
# more examples can be found here: https://docs.databricks.com/repos/repos-setup.html#patterns-for-a-repo-config-file
13+
14+
# Uncomment the following pattern to include output for all notebooks
15+
**
16+

Solved/1045. Customers Who Bought All Products (Medium)-(Solved).ipynb

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,16 @@
130130
"title": ""
131131
}
132132
},
133-
"outputs": [],
133+
"outputs": [
134+
{
135+
"output_type": "stream",
136+
"name": "stdout",
137+
"output_type": "stream",
138+
"text": [
139+
"+-----------+-----------+\n|customer_id|product_key|\n+-----------+-----------+\n| 1| 5|\n| 2| 6|\n| 3| 5|\n| 3| 6|\n| 1| 6|\n+-----------+-----------+\n\n+-----------+\n|product_key|\n+-----------+\n| 5|\n| 6|\n+-----------+\n\n"
140+
]
141+
}
142+
],
134143
"source": [
135144
"customer_data_1045 = [\n",
136145
" (1, 5),\n",
@@ -191,7 +200,16 @@
191200
"title": ""
192201
}
193202
},
194-
"outputs": [],
203+
"outputs": [
204+
{
205+
"output_type": "stream",
206+
"name": "stdout",
207+
"output_type": "stream",
208+
"text": [
209+
"+-----------+\n|customer_id|\n+-----------+\n| 1|\n| 3|\n+-----------+\n\n"
210+
]
211+
}
212+
],
195213
"source": [
196214
"customer_df_1045\\\n",
197215
" .groupBy(\"customer_id\").agg(\n",

Solved/1050. Actors and Directors Who Cooperated At Least Three Times (Easy)-(Solved).ipynb

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,16 @@
115115
"title": ""
116116
}
117117
},
118-
"outputs": [],
118+
"outputs": [
119+
{
120+
"output_type": "stream",
121+
"name": "stdout",
122+
"output_type": "stream",
123+
"text": [
124+
"+--------+-----------+---------+\n|actor_id|director_id|timestamp|\n+--------+-----------+---------+\n| 1| 1| 0|\n| 1| 1| 1|\n| 1| 1| 2|\n| 1| 2| 3|\n| 1| 2| 4|\n| 2| 1| 5|\n| 2| 1| 6|\n+--------+-----------+---------+\n\n"
125+
]
126+
}
127+
],
119128
"source": [
120129
"actor_director_data_1050 = [\n",
121130
" (1, 1, 0),\n",
@@ -148,7 +157,16 @@
148157
"title": ""
149158
}
150159
},
151-
"outputs": [],
160+
"outputs": [
161+
{
162+
"output_type": "stream",
163+
"name": "stdout",
164+
"output_type": "stream",
165+
"text": [
166+
"+--------+-----------+\n|actor_id|director_id|\n+--------+-----------+\n| 1| 1|\n+--------+-----------+\n\n"
167+
]
168+
}
169+
],
152170
"source": [
153171
"actor_director_df_1050\\\n",
154172
" .groupBy(\"actor_id\", \"director_id\")\\\n",

Solved/1068. Product Sales Analysis I (Easy)-(Solved).ipynb

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,16 @@
139139
"title": ""
140140
}
141141
},
142-
"outputs": [],
142+
"outputs": [
143+
{
144+
"output_type": "stream",
145+
"name": "stdout",
146+
"output_type": "stream",
147+
"text": [
148+
"+-------+----------+----+--------+-----+\n|sale_id|product_id|year|quantity|price|\n+-------+----------+----+--------+-----+\n| 1| 100|2008| 10| 5000|\n| 2| 100|2009| 12| 5000|\n| 7| 200|2011| 15| 9000|\n+-------+----------+----+--------+-----+\n\n+----------+------------+\n|product_id|product_name|\n+----------+------------+\n| 100| Nokia|\n| 200| Apple|\n| 300| Samsung|\n+----------+------------+\n\n"
149+
]
150+
}
151+
],
143152
"source": [
144153
"sales_data_1068 = [\n",
145154
" (1, 100, 2008, 10, 5000),\n",
@@ -178,7 +187,16 @@
178187
"title": ""
179188
}
180189
},
181-
"outputs": [],
190+
"outputs": [
191+
{
192+
"output_type": "stream",
193+
"name": "stdout",
194+
"output_type": "stream",
195+
"text": [
196+
"+------------+----+-----+\n|product_name|year|price|\n+------------+----+-----+\n| Nokia|2009| 5000|\n| Nokia|2008| 5000|\n| Apple|2011| 9000|\n+------------+----+-----+\n\n"
197+
]
198+
}
199+
],
182200
"source": [
183201
"sales_df_1068\\\n",
184202
" .join(product_df_1068, on=\"product_id\", how=\"inner\")\\\n",

Solved/1069. Product Sales Analysis II (Easy)-(Solved).ipynb

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,16 @@
133133
"title": ""
134134
}
135135
},
136-
"outputs": [],
136+
"outputs": [
137+
{
138+
"output_type": "stream",
139+
"name": "stdout",
140+
"output_type": "stream",
141+
"text": [
142+
"+-------+----------+----+--------+-----+\n|sale_id|product_id|year|quantity|price|\n+-------+----------+----+--------+-----+\n| 1| 100|2008| 10| 5000|\n| 2| 100|2009| 12| 5000|\n| 7| 200|2011| 15| 9000|\n+-------+----------+----+--------+-----+\n\n+----------+------------+\n|product_id|product_name|\n+----------+------------+\n| 100| Nokia|\n| 200| Apple|\n| 300| Samsung|\n+----------+------------+\n\n"
143+
]
144+
}
145+
],
137146
"source": [
138147
"sales_data_1069 = [\n",
139148
" (1, 100, 2008, 10, 5000),\n",
@@ -172,7 +181,16 @@
172181
"title": ""
173182
}
174183
},
175-
"outputs": [],
184+
"outputs": [
185+
{
186+
"output_type": "stream",
187+
"name": "stdout",
188+
"output_type": "stream",
189+
"text": [
190+
"+----------+--------------+\n|product_id|total_quantity|\n+----------+--------------+\n| 100| 22|\n| 200| 15|\n+----------+--------------+\n\n"
191+
]
192+
}
193+
],
176194
"source": [
177195
"sales_df_1069\\\n",
178196
" .groupBy(\"product_id\").agg(sum(\"quantity\").alias(\"total_quantity\")).show()"

Solved/1070. Product Sales Analysis III (Medium)-(Solved).ipynb

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,16 @@
131131
"title": ""
132132
}
133133
},
134-
"outputs": [],
134+
"outputs": [
135+
{
136+
"output_type": "stream",
137+
"name": "stdout",
138+
"output_type": "stream",
139+
"text": [
140+
"+-------+----------+----+--------+-----+\n|sale_id|product_id|year|quantity|price|\n+-------+----------+----+--------+-----+\n| 1| 100|2008| 10| 5000|\n| 2| 100|2009| 12| 5000|\n| 7| 200|2011| 15| 9000|\n+-------+----------+----+--------+-----+\n\n+----------+------------+\n|product_id|product_name|\n+----------+------------+\n| 100| Nokia|\n| 200| Apple|\n| 300| Samsung|\n+----------+------------+\n\n"
141+
]
142+
}
143+
],
135144
"source": [
136145
"sales_data_1070 = [\n",
137146
" (1, 100, 2008, 10, 5000),\n",
@@ -193,7 +202,16 @@
193202
"title": ""
194203
}
195204
},
196-
"outputs": [],
205+
"outputs": [
206+
{
207+
"output_type": "stream",
208+
"name": "stdout",
209+
"output_type": "stream",
210+
"text": [
211+
"+------------+----------+--------+-----+\n|product_name|first_year|quantity|price|\n+------------+----------+--------+-----+\n| Nokia| 2008| 10| 5000|\n| Apple| 2011| 15| 9000|\n+------------+----------+--------+-----+\n\n"
212+
]
213+
}
214+
],
197215
"source": [
198216
"sales_with_rank_df_1070\\\n",
199217
" .join(product_df_1070, on=\"product_id\", how=\"inner\")\\\n",

Solved/1075. Project Employees I (Easy)-(Solved).ipynb

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,16 @@
134134
"title": ""
135135
}
136136
},
137-
"outputs": [],
137+
"outputs": [
138+
{
139+
"output_type": "stream",
140+
"name": "stdout",
141+
"output_type": "stream",
142+
"text": [
143+
"+----------+-----------+\n|project_id|employee_id|\n+----------+-----------+\n| 1| 1|\n| 1| 2|\n| 1| 3|\n| 2| 1|\n| 2| 4|\n+----------+-----------+\n\n+-----------+------+----------------+\n|employee_id| name|experience_years|\n+-----------+------+----------------+\n| 1|Khaled| 3|\n| 2| Ali| 2|\n| 3| John| 1|\n| 4| Doe| 2|\n+-----------+------+----------------+\n\n"
144+
]
145+
}
146+
],
138147
"source": [
139148
"project_data_1075 = [\n",
140149
" (1, 1),\n",
@@ -176,7 +185,16 @@
176185
"title": ""
177186
}
178187
},
179-
"outputs": [],
188+
"outputs": [
189+
{
190+
"output_type": "stream",
191+
"name": "stdout",
192+
"output_type": "stream",
193+
"text": [
194+
"+----------+-------------+\n|project_id|average_years|\n+----------+-------------+\n| 1| 2.0|\n| 2| 2.5|\n+----------+-------------+\n\n"
195+
]
196+
}
197+
],
180198
"source": [
181199
"project_df_1075\\\n",
182200
" .join(employee_df_1075, on=\"employee_id\", how=\"inner\")\\\n",

Solved/1076. Project Employees II (Easy)-(Solved).ipynb

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,16 @@
134134
"title": ""
135135
}
136136
},
137-
"outputs": [],
137+
"outputs": [
138+
{
139+
"output_type": "stream",
140+
"name": "stdout",
141+
"output_type": "stream",
142+
"text": [
143+
"+----------+-----------+\n|project_id|employee_id|\n+----------+-----------+\n| 1| 1|\n| 1| 2|\n| 1| 3|\n| 2| 1|\n| 2| 4|\n+----------+-----------+\n\n+-----------+------+----------------+\n|employee_id| name|experience_years|\n+-----------+------+----------------+\n| 1|Khaled| 3|\n| 2| Ali| 2|\n| 3| John| 1|\n| 4| Doe| 2|\n+-----------+------+----------------+\n\n"
144+
]
145+
}
146+
],
138147
"source": [
139148
"project_data_1076 = [\n",
140149
" (1, 1),\n",
@@ -218,7 +227,16 @@
218227
"title": ""
219228
}
220229
},
221-
"outputs": [],
230+
"outputs": [
231+
{
232+
"output_type": "stream",
233+
"name": "stdout",
234+
"output_type": "stream",
235+
"text": [
236+
"+----------+\n|project_id|\n+----------+\n| 1|\n+----------+\n\n"
237+
]
238+
}
239+
],
222240
"source": [
223241
"project_counts_1076.\\\n",
224242
" filter(project_counts_1076[\"employee_count\"] == max_count_1076) \\\n",

Solved/1077. Project Employees III (Medium)-(Solved).ipynb

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,16 @@
136136
"title": ""
137137
}
138138
},
139-
"outputs": [],
139+
"outputs": [
140+
{
141+
"output_type": "stream",
142+
"name": "stdout",
143+
"output_type": "stream",
144+
"text": [
145+
"+----------+-----------+\n|project_id|employee_id|\n+----------+-----------+\n| 1| 1|\n| 1| 2|\n| 1| 3|\n| 2| 1|\n| 2| 4|\n+----------+-----------+\n\n+-----------+------+----------------+\n|employee_id| name|experience_years|\n+-----------+------+----------------+\n| 1|Khaled| 3|\n| 2| Ali| 2|\n| 3| John| 3|\n| 4| Doe| 2|\n+-----------+------+----------------+\n\n"
146+
]
147+
}
148+
],
140149
"source": [
141150
"project_data_1077 = [\n",
142151
" (1, 1),\n",
@@ -219,7 +228,16 @@
219228
"title": ""
220229
}
221230
},
222-
"outputs": [],
231+
"outputs": [
232+
{
233+
"output_type": "stream",
234+
"name": "stdout",
235+
"output_type": "stream",
236+
"text": [
237+
"+----------+-----------+\n|project_id|employee_id|\n+----------+-----------+\n| 1| 1|\n| 1| 3|\n| 2| 1|\n+----------+-----------+\n\n"
238+
]
239+
}
240+
],
223241
"source": [
224242
"joined_df_1077\\\n",
225243
" .withColumn(\"rnk\", rank().over(windowSpec))\\\n",

Solved/1082. Sales Analysis I (Easy)-(Solved).ipynb

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,16 @@
137137
"title": ""
138138
}
139139
},
140-
"outputs": [],
140+
"outputs": [
141+
{
142+
"output_type": "stream",
143+
"name": "stdout",
144+
"output_type": "stream",
145+
"text": [
146+
"+---------+----------+--------+----------+--------+-----+\n|seller_id|product_id|buyer_id| sale_date|quantity|price|\n+---------+----------+--------+----------+--------+-----+\n| 1| 1| 1|2019-01-21| 2| 2000|\n| 1| 2| 2|2019-02-17| 1| 800|\n| 2| 2| 3|2019-06-02| 1| 800|\n| 3| 3| 4|2019-05-13| 2| 2800|\n+---------+----------+--------+----------+--------+-----+\n\n"
147+
]
148+
}
149+
],
141150
"source": [
142151
"sales_data_1082 = [\n",
143152
" (1, 1, 1, \"2019-01-21\", 2, 2000),\n",
@@ -210,7 +219,16 @@
210219
"title": ""
211220
}
212221
},
213-
"outputs": [],
222+
"outputs": [
223+
{
224+
"output_type": "stream",
225+
"name": "stdout",
226+
"output_type": "stream",
227+
"text": [
228+
"+---------+\n|seller_id|\n+---------+\n| 1|\n| 3|\n+---------+\n\n"
229+
]
230+
}
231+
],
214232
"source": [
215233
"seller_sales_1082.filter(seller_sales_1082[\"total_sales\"] == max_total_sales_1082) \\\n",
216234
" .select(\"seller_id\").show()"

0 commit comments

Comments
 (0)