Last active
February 1, 2025 14:26
-
-
Save joebowbeer/adf4200273e929050414fa0670209c0a to your computer and use it in GitHub Desktop.
Twitch Gamers data in DuckDB with SQL/PGQ extensions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- From https://motherduck.com/blog/duckdb-puppygraph-graph-model-on-motherduck/ | |
install zipfs from community; | |
load zipfs; | |
load httpfs; -- TODO: autoload | |
CREATE TABLE account AS SELECT * FROM read_csv( | |
'zip://https://snap.stanford.edu/data/twitch_gamers.zip/large_twitch_features.csv', | |
types={mature: boolean, dead_account: boolean, affiliate: boolean} | |
); | |
CREATE TABLE follows AS SELECT * FROM read_csv( | |
'zip://https://snap.stanford.edu/data/twitch_gamers.zip/large_twitch_edges.csv', | |
names=[follower, followee] | |
); | |
-- SQL queries | |
select count(*) from follows; | |
select count(*) from account; | |
select count(*) from account where dead_account; | |
select * from account order by updated_at limit 5; | |
-- Property Graph | |
install duckpgq from community; | |
load duckpgq; | |
CREATE PROPERTY GRAPH gamers | |
VERTEX TABLES ( | |
account | |
) | |
EDGE TABLES ( | |
follows SOURCE KEY (follower) REFERENCES account (numeric_id) | |
DESTINATION KEY (followee) REFERENCES account (numeric_id) | |
); | |
-- Property Graph Queries | |
FROM GRAPH_TABLE (gamers MATCH (a:account)) ORDER BY updated_at LIMIT 5; | |
FROM GRAPH_TABLE (gamers | |
MATCH (a:account)-[f:follows]->(b:account) | |
COLUMNS (a.numeric_id, b.numeric_id) | |
) LIMIT 1; | |
-- List the top-5 viewed accounts among the 2-hop followers of | |
-- the least recently updated account. | |
FROM GRAPH_TABLE (gamers | |
MATCH (a:account)-[f:follows]-(b:account)-[g:follows]-(c:account) | |
WHERE c.numeric_id = ( | |
SELECT first(numeric_id ORDER BY updated_at) FROM account | |
) | |
COLUMNS (a.numeric_id, a.views) | |
) ORDER BY views DESC LIMIT 5; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment