Last active
August 4, 2022 02:13
-
-
Save yuzutas0/41475569a3405596c910e78affc9f62c to your computer and use it in GitHub Desktop.
https://speakerdeck.com/yuzutas0/20190905?slide=30 のサンプルSQLです。BigQueryのどのテーブルがどのくらい参照されているかUU・PVを計算するクエリです。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
tables AS ( | |
SELECT | |
table_id | |
FROM | |
`{project_id}.{dataset_name}`.__TABLES__ | |
WHERE | |
table_id NOT LIKE 'LOAD_TEMP_%' | |
AND table_id NOT LIKE 'TMP_%' | |
), | |
log AS ( | |
SELECT | |
REGEXP_REPLACE(data.resource, 'projects/{project_id}/datasets/{dataset_name}/tables/', '') AS table, | |
protopayload_auditlog.authenticationInfo.principalEmail AS user, | |
DATE(timestamp) AS day | |
FROM | |
`{project_id}.{source__cloudaudit__bigquery}.cloudaudit_googleapis_com_data_access_*`, | |
UNNEST(protopayload_auditlog.authorizationInfo) AS data | |
WHERE | |
data.permission = 'bigquery.tables.getData' | |
), | |
calc AS ( | |
SELECT | |
table, | |
day, | |
COUNT(*) AS PV, | |
COUNT(DISTINCT user) AS UU | |
FROM | |
log | |
WHERE | |
table NOT LIKE 'LOAD_TEMP_%' | |
AND table != '__TABLES__' | |
AND table NOT LIKE 'TMP_%' | |
GROUP BY | |
1, | |
2 | |
) | |
SELECT | |
tables.table_id, | |
calc.PV, | |
calc.UU, | |
REGEXP_REPLACE(CAST(calc.day AS STRING), '-', '') AS day | |
FROM | |
tables | |
LEFT JOIN | |
calc | |
ON | |
tables.table_id = calc.table |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
log AS ( | |
SELECT | |
REGEXP_REPLACE(data.resource, 'projects/{project_id}/datasets/', '') AS table, | |
protopayload_auditlog.authenticationInfo.principalEmail AS user, | |
DATE(timestamp) AS day | |
FROM | |
`{project_id}.{source__cloudaudit__bigquery}.cloudaudit_googleapis_com_data_access_*`, | |
UNNEST(protopayload_auditlog.authorizationInfo) AS data | |
WHERE | |
data.permission = 'bigquery.tables.getData' | |
), | |
calc AS ( | |
SELECT | |
table, | |
day, | |
COUNT(*) AS PV, | |
COUNT(DISTINCT user) AS UU | |
FROM | |
log | |
WHERE | |
table NOT LIKE '%LOAD_TEMP_%' | |
AND table != '__TABLES__' | |
AND table NOT LIKE '%TMP_%' | |
GROUP BY | |
1, | |
2 | |
) | |
SELECT | |
calc.table, | |
calc.PV, | |
calc.UU, | |
REGEXP_REPLACE(CAST(calc.day AS STRING), '-', '') AS day | |
FROM | |
calc |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
tables AS ( | |
SELECT DISTINCT | |
CONCAT('projects/', table_catalog, '/datasets/', table_schema, '/tables/', table_name) AS table, | |
DATE(creation_time) AS creation_date, | |
FROM | |
`region-us`.INFORMATION_SCHEMA.TABLES -- TODO: change region if you use another one | |
WHERE | |
table_catalog = 'xxxxx' -- TODO: set project name | |
AND table_name NOT LIKE '%LOAD_TEMP_%' | |
AND table_name != '__TABLES__' | |
AND table_name NOT LIKE '%TMP_%' | |
), | |
logs AS ( | |
SELECT | |
data.resource AS table, | |
protopayload_auditlog.authenticationInfo.principalEmail AS user, | |
DATE(timestamp) AS day | |
FROM | |
`xxx.xxx.cloudaudit_googleapis_com_data_access_*`, -- TODO: project & dataset name | |
UNNEST(protopayload_auditlog.authorizationInfo) AS data | |
WHERE | |
data.permission = 'bigquery.tables.getData' | |
), | |
days AS ( | |
SELECT DISTINCT | |
day | |
FROM | |
logs | |
), | |
base AS ( | |
SELECT | |
tables.table, | |
days.day | |
FROM | |
tables | |
CROSS JOIN | |
days | |
WHERE | |
creation_date <= days.day | |
), | |
calc AS ( | |
SELECT | |
table, | |
day, | |
COUNT(*) AS PV, | |
COUNT(DISTINCT user) AS UU | |
FROM | |
logs | |
WHERE | |
table NOT LIKE '%LOAD_TEMP_%' | |
AND table != '__TABLES__' | |
AND table NOT LIKE '%TMP_%' | |
GROUP BY | |
1, | |
2 | |
) | |
SELECT | |
table, | |
REGEXP_REPLACE(CAST(day AS STRING), '-', '') AS day, | |
IFNULL(calc.PV, 0) AS PV, | |
IFNULL(calc.UU, 0) AS UU, | |
FROM | |
base | |
LEFT JOIN | |
calc | |
USING | |
(table, day) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
tables AS ( | |
SELECT DISTINCT | |
CONCAT('projects/', table_catalog, '/datasets/', table_schema, '/tables/', table_name) AS table, | |
EXTRACT(year FROM creation_time) AS year, | |
FROM | |
`region-us`.INFORMATION_SCHEMA.TABLES -- TODO: change region if you use another one | |
WHERE | |
table_catalog = 'xxxxx' -- TODO: set project name | |
AND table_name NOT LIKE '%LOAD_TEMP_%' | |
AND table_name != '__TABLES__' | |
AND table_name NOT LIKE '%TMP_%' | |
), | |
logs AS ( | |
SELECT | |
data.resource AS table, | |
protopayload_auditlog.authenticationInfo.principalEmail AS user, | |
EXTRACT(year FROM timestamp) AS year, | |
FROM | |
`xxx.xxx.cloudaudit_googleapis_com_data_access_*`, -- TODO: project & dataset name | |
UNNEST(protopayload_auditlog.authorizationInfo) AS data | |
WHERE | |
data.permission = 'bigquery.tables.getData' | |
), | |
days AS ( | |
SELECT DISTINCT | |
year | |
FROM | |
logs | |
), | |
base AS ( | |
SELECT | |
tables.table, | |
days.year | |
FROM | |
tables | |
CROSS JOIN | |
days | |
WHERE | |
tables.year <= days.year | |
), | |
calc AS ( | |
SELECT | |
table, | |
year, | |
COUNT(*) AS PV, | |
COUNT(DISTINCT user) AS UU | |
FROM | |
logs | |
WHERE | |
table NOT LIKE '%LOAD_TEMP_%' | |
AND table != '__TABLES__' | |
AND table NOT LIKE '%TMP_%' | |
GROUP BY | |
1, | |
2 | |
) | |
SELECT | |
table, | |
year, | |
IFNULL(calc.PV, 0) AS PV, | |
IFNULL(calc.UU, 0) AS UU, | |
FROM | |
base | |
LEFT JOIN | |
calc | |
USING | |
(table, year) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
protopayload_auditlog.servicedata_v1_bigquery.jobGetQueryResultsResponse.job.jobConfiguration.query.query AS query, | |
EXTRACT(year FROM timestamp) AS year, | |
COUNT(*) AS count | |
FROM | |
`xxx.xxx.cloudaudit_googleapis_com_data_access_*`, -- TODO: project & dataset name | |
UNNEST(protopayload_auditlog.authorizationInfo) AS data | |
WHERE | |
protopayload_auditlog.methodName = 'jobservice.getqueryresults' | |
AND protopayload_auditlog.authenticationInfo.principalEmail = '[email protected]' -- TODO: service account for BI tool | |
AND protopayload_auditlog.servicedata_v1_bigquery.jobGetQueryResultsResponse.job.jobConfiguration.query.query IS NOT NULL | |
GROUP BY | |
1, 2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment