Created
September 10, 2018 16:18
-
-
Save areeves87/249e280fbbd98254fd7280f3da5cea78 to your computer and use it in GitHub Desktop.
Counts the number of popular (score 90%ile) reddit comments in active subreddits from the June 2018 reddit comment database hosted on google bigquery
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH active_subreddits AS (SELECT subreddit, COUNT(subreddit) AS comments | |
FROM `fh-bigquery.reddit_comments.2018_06` | |
GROUP BY subreddit | |
HAVING comments > 1000), | |
p90_threshold AS (SELECT subreddit, score, | |
CASE WHEN body = '[removed]' THEN 1 ELSE 0 END AS removed, | |
PERCENTILE_DISC(score, 0.9) OVER(PARTITION BY subreddit) AS percentile_90 | |
FROM `fh-bigquery.reddit_comments.2018_06`), | |
events AS (SELECT subreddit, removed, percentile_90, | |
CASE WHEN score > 1 THEN 1 ELSE 0 END AS up, | |
CASE WHEN score < 1 THEN 1 ELSE 0 END AS down, | |
CASE WHEN score > 1 AND removed = 1 THEN 1 ELSE 0 END AS removed_up, | |
CASE WHEN score < 1 AND removed = 1 THEN 1 ELSE 0 END AS removed_down, | |
CASE WHEN score > percentile_90 THEN 1 ELSE 0 END AS count_percentile_90, | |
CASE WHEN score > percentile_90 AND removed = 1 THEN 1 ELSE 0 END AS removed_up_percentile_90 | |
FROM p90_threshold | |
WHERE subreddit IN( SELECT subreddit FROM active_subreddits ) ) | |
SELECT subreddit, | |
COUNT(subreddit) AS comments, | |
SUM(up) as ups, | |
SUM(down) as downs, | |
SUM(removed) as removeds, | |
SUM(removed_up) as removed_ups, | |
SUM(removed_down) as removed_downs, | |
MAX(percentile_90) as thresh_90_percentile, | |
SUM(count_percentile_90) as ups_90_percentile, | |
SUM(removed_up_percentile_90) as removed_ups_90_percentile | |
FROM events | |
GROUP BY subreddit |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment