Skip to content

Instantly share code, notes, and snippets.

@joebowbeer
Last active February 1, 2025 14:26
Show Gist options
  • Save joebowbeer/adf4200273e929050414fa0670209c0a to your computer and use it in GitHub Desktop.
Save joebowbeer/adf4200273e929050414fa0670209c0a to your computer and use it in GitHub Desktop.
Twitch Gamers data in DuckDB with SQL/PGQ extensions
-- From https://motherduck.com/blog/duckdb-puppygraph-graph-model-on-motherduck/
install zipfs from community;
load zipfs;
load httpfs; -- TODO: autoload
CREATE TABLE account AS SELECT * FROM read_csv(
'zip://https://snap.stanford.edu/data/twitch_gamers.zip/large_twitch_features.csv',
types={mature: boolean, dead_account: boolean, affiliate: boolean}
);
CREATE TABLE follows AS SELECT * FROM read_csv(
'zip://https://snap.stanford.edu/data/twitch_gamers.zip/large_twitch_edges.csv',
names=[follower, followee]
);
-- SQL queries
select count(*) from follows;
select count(*) from account;
select count(*) from account where dead_account;
select * from account order by updated_at limit 5;
-- Property Graph
install duckpgq from community;
load duckpgq;
CREATE PROPERTY GRAPH gamers
VERTEX TABLES (
account
)
EDGE TABLES (
follows SOURCE KEY (follower) REFERENCES account (numeric_id)
DESTINATION KEY (followee) REFERENCES account (numeric_id)
);
-- Property Graph Queries
FROM GRAPH_TABLE (gamers MATCH (a:account)) ORDER BY updated_at LIMIT 5;
FROM GRAPH_TABLE (gamers
MATCH (a:account)-[f:follows]->(b:account)
COLUMNS (a.numeric_id, b.numeric_id)
) LIMIT 1;
-- List the top-5 viewed accounts among the 2-hop followers of
-- the least recently updated account.
FROM GRAPH_TABLE (gamers
MATCH (a:account)-[f:follows]-(b:account)-[g:follows]-(c:account)
WHERE c.numeric_id = (
SELECT first(numeric_id ORDER BY updated_at) FROM account
)
COLUMNS (a.numeric_id, a.views)
) ORDER BY views DESC LIMIT 5;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment