Last active
January 23, 2022 18:12
-
-
Save novica/e01d404228d1e8b4ce8f36d2e0336167 to your computer and use it in GitHub Desktop.
dezoomcamp exercises
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* There are two question before these that don not require much code, | |
* so only the SQL related answers are written below. | |
*/ | |
/* See the data */ | |
SELECT | |
* | |
FROM | |
yellow_taxi_data | |
LIMIT | |
20; | |
/* | |
* Question 3. Count records | |
* How many taxi trips were there on January 15? | |
* Consider only trips that started on January 15. | |
*/ | |
SELECT | |
COUNT(*) | |
FROM | |
yellow_taxi_data | |
WHERE | |
DATE_TRUNC('day', tpep_pickup_datetime) = '2021-01-15'; | |
/* | |
* Question 4. Average Find the largest tip for each day. | |
* On which day it was the largest tip in January? | |
* Use the pick up time for your calculations. (note: it's not a typo, it's "tip", not "trip") | |
*/ | |
/* Largest tip for each day */ | |
SELECT | |
MAX(tip_amount) AS max_tip, DATE_TRUNC('day', tpep_pickup_datetime) AS tpep_pickpup_day | |
FROM | |
yellow_taxi_data | |
GROUP BY | |
tpep_pickpup_day; | |
/* On which day it was the largest tip in January? */ | |
SELECT | |
MAX(tip_amount) AS max_tip, DATE_TRUNC('day', tpep_pickup_datetime) AS tpep_pickpup_day | |
FROM | |
yellow_taxi_data | |
WHERE | |
DATE_TRUNC('month', tpep_pickup_datetime) = '2021-01-01' | |
GROUP BY | |
tpep_pickpup_day | |
ORDER BY | |
max_tip DESC | |
LIMIT | |
1; | |
/* | |
* Question 5. Most popular destination | |
* What was the most popular destination for passengers picked up | |
* in central park on January 14? | |
* Use the pick up time for your calculations. | |
* Enter the zone name (not id). If the zone name is unknown (missing), write "Unknown" | |
*/ | |
SELECT | |
DATE_TRUNC('day', taxi.tpep_pickup_datetime) as pick_up_day, | |
pick_up."Zone" AS "pick_up_loc", | |
drop_off."Zone" AS "drop_off_loc", | |
count(pick_up."Zone") AS "pick_up_count", | |
count(drop_off."Zone") AS "drop_off_count" | |
FROM | |
yellow_taxi_data taxi, | |
zones pick_up, | |
zones drop_off | |
WHERE | |
taxi."PULocationID" = pick_up."LocationID" AND | |
taxi."DOLocationID" = drop_off."LocationID" AND | |
DATE_TRUNC('day', taxi.tpep_pickup_datetime) = '2021-01-14' AND | |
pick_up."Zone" = 'Central Park' | |
GROUP BY | |
pick_up_day, pick_up."Zone", drop_off."Zone" | |
ORDER BY | |
drop_off_count DESC | |
LIMIT | |
1; | |
/* | |
* Question 6 | |
* What's the pickup-dropoff pair with the largest | |
* average price for a ride (calculated based on `total_amount`)? | |
* Enter two zone names separated by a slash. For example: | |
* "Jamaica Bay / Clinton East" | |
* If any of the zone names are unknown (missing), write "Unknown". For example, "Unknown / Clinton East". | |
*/ | |
SELECT | |
AVG(total_amount) as avg_price, | |
CASE | |
WHEN | |
(pick_up."Zone" IS NOT NULL AND drop_off."Zone" IS NULL) | |
THEN | |
CONCAT( pick_up."Zone", '/', 'Unknown') | |
WHEN | |
(pick_up."Zone" IS NULL AND drop_off."Zone" IS NOT NULL) | |
THEN | |
CONCAT( 'Unknown', '/', drop_off."Zone") | |
WHEN | |
(pick_up."Zone" IS NULL AND drop_off."Zone" IS NULL) | |
THEN | |
CONCAT( 'Unknown', '/', 'Unknown') | |
ELSE | |
CONCAT( pick_up."Zone", '/', drop_off."Zone") | |
END AS "loc_pair" | |
FROM | |
yellow_taxi_data taxi, | |
zones pick_up, | |
zones drop_off | |
WHERE | |
taxi."PULocationID" = pick_up."LocationID" AND | |
taxi."DOLocationID" = drop_off."LocationID" | |
GROUP BY | |
loc_pair | |
ORDER BY | |
avg_price DESC | |
LIMIT | |
1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment