Skip to content

Instantly share code, notes, and snippets.

@anorth
Created July 31, 2025 23:23
Show Gist options
  • Save anorth/ff3b113505b3cff96e675b564a802cac to your computer and use it in GitHub Desktop.
Save anorth/ff3b113505b3cff96e675b564a802cac to your computer and use it in GitHub Desktop.
Google BigQuery to rank GitHub projects by watch events per contributor
WITH
RepoStats AS (
SELECT
repo.name AS repository_name,
-- Proxies for impact: watches and forks
COUNTIF(type = 'WatchEvent') AS watch_count,
COUNTIF(type = 'ForkEvent') AS fork_count,
-- Proxies for contribution: pr, push, review
COUNTIF(type = 'PullRequestEvent') as pr_count,
COUNT(DISTINCT `IF`(type = 'PullRequestEvent', actor.login, NULL)) AS pr_actors,
COUNTIF(type = 'PushEvent') AS push_event_count,
COUNT(DISTINCT `IF`(type = 'PushEvent', actor.login, NULL)) AS push_actors,
COUNTIF(type = 'PullRequestReviewCommentEvent') as review_event_count,
COUNT(DISTINCT `IF`(type = 'PullRequestReviewCommentEvent', actor.login, NULL)) AS review_actors,
FROM (
SELECT * FROM `githubarchive.month.2025*` UNION ALL
SELECT * FROM `githubarchive.year.2024`
)
WHERE
type IN ('WatchEvent', 'PushEvent', 'ForkEvent', 'PullRequestEvent', 'PullRequestReviewCommentEvent')
GROUP BY repository_name
)
SELECT
repository_name,
watch_count,
fork_count,
pr_count,
push_event_count,
review_event_count,
pr_actors,
push_actors,
review_actors,
FLOOR(SAFE_DIVIDE(watch_count, push_actors)) AS watch_to_push_actor_ratio,
FLOOR(SAFE_DIVIDE(watch_count, pr_actors)) AS watch_to_pr_actor_ratio,
FLOOR(SAFE_DIVIDE(watch_count, review_actors)) AS watch_to_review_actor_ratio,
FROM
RepoStats
WHERE
push_actors >= 3 AND pr_actors >= 3
ORDER BY watch_to_pr_actor_ratio DESC
LIMIT 1000;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment