Skip to content

Instantly share code, notes, and snippets.

@yuzutas0
Last active August 4, 2022 02:13
Show Gist options
  • Save yuzutas0/41475569a3405596c910e78affc9f62c to your computer and use it in GitHub Desktop.
Save yuzutas0/41475569a3405596c910e78affc9f62c to your computer and use it in GitHub Desktop.
https://speakerdeck.com/yuzutas0/20190905?slide=30 のサンプルSQLです。BigQueryのどのテーブルがどのくらい参照されているかUU・PVを計算するクエリです。
WITH
tables AS (
SELECT
table_id
FROM
`{project_id}.{dataset_name}`.__TABLES__
WHERE
table_id NOT LIKE 'LOAD_TEMP_%'
AND table_id NOT LIKE 'TMP_%'
),
log AS (
SELECT
REGEXP_REPLACE(data.resource, 'projects/{project_id}/datasets/{dataset_name}/tables/', '') AS table,
protopayload_auditlog.authenticationInfo.principalEmail AS user,
DATE(timestamp) AS day
FROM
`{project_id}.{source__cloudaudit__bigquery}.cloudaudit_googleapis_com_data_access_*`,
UNNEST(protopayload_auditlog.authorizationInfo) AS data
WHERE
data.permission = 'bigquery.tables.getData'
),
calc AS (
SELECT
table,
day,
COUNT(*) AS PV,
COUNT(DISTINCT user) AS UU
FROM
log
WHERE
table NOT LIKE 'LOAD_TEMP_%'
AND table != '__TABLES__'
AND table NOT LIKE 'TMP_%'
GROUP BY
1,
2
)
SELECT
tables.table_id,
calc.PV,
calc.UU,
REGEXP_REPLACE(CAST(calc.day AS STRING), '-', '') AS day
FROM
tables
LEFT JOIN
calc
ON
tables.table_id = calc.table
WITH
log AS (
SELECT
REGEXP_REPLACE(data.resource, 'projects/{project_id}/datasets/', '') AS table,
protopayload_auditlog.authenticationInfo.principalEmail AS user,
DATE(timestamp) AS day
FROM
`{project_id}.{source__cloudaudit__bigquery}.cloudaudit_googleapis_com_data_access_*`,
UNNEST(protopayload_auditlog.authorizationInfo) AS data
WHERE
data.permission = 'bigquery.tables.getData'
),
calc AS (
SELECT
table,
day,
COUNT(*) AS PV,
COUNT(DISTINCT user) AS UU
FROM
log
WHERE
table NOT LIKE '%LOAD_TEMP_%'
AND table != '__TABLES__'
AND table NOT LIKE '%TMP_%'
GROUP BY
1,
2
)
SELECT
calc.table,
calc.PV,
calc.UU,
REGEXP_REPLACE(CAST(calc.day AS STRING), '-', '') AS day
FROM
calc
WITH
tables AS (
SELECT DISTINCT
CONCAT('projects/', table_catalog, '/datasets/', table_schema, '/tables/', table_name) AS table,
DATE(creation_time) AS creation_date,
FROM
`region-us`.INFORMATION_SCHEMA.TABLES -- TODO: change region if you use another one
WHERE
table_catalog = 'xxxxx' -- TODO: set project name
AND table_name NOT LIKE '%LOAD_TEMP_%'
AND table_name != '__TABLES__'
AND table_name NOT LIKE '%TMP_%'
),
logs AS (
SELECT
data.resource AS table,
protopayload_auditlog.authenticationInfo.principalEmail AS user,
DATE(timestamp) AS day
FROM
`xxx.xxx.cloudaudit_googleapis_com_data_access_*`, -- TODO: project & dataset name
UNNEST(protopayload_auditlog.authorizationInfo) AS data
WHERE
data.permission = 'bigquery.tables.getData'
),
days AS (
SELECT DISTINCT
day
FROM
logs
),
base AS (
SELECT
tables.table,
days.day
FROM
tables
CROSS JOIN
days
WHERE
creation_date <= days.day
),
calc AS (
SELECT
table,
day,
COUNT(*) AS PV,
COUNT(DISTINCT user) AS UU
FROM
logs
WHERE
table NOT LIKE '%LOAD_TEMP_%'
AND table != '__TABLES__'
AND table NOT LIKE '%TMP_%'
GROUP BY
1,
2
)
SELECT
table,
REGEXP_REPLACE(CAST(day AS STRING), '-', '') AS day,
IFNULL(calc.PV, 0) AS PV,
IFNULL(calc.UU, 0) AS UU,
FROM
base
LEFT JOIN
calc
USING
(table, day)
WITH
tables AS (
SELECT DISTINCT
CONCAT('projects/', table_catalog, '/datasets/', table_schema, '/tables/', table_name) AS table,
EXTRACT(year FROM creation_time) AS year,
FROM
`region-us`.INFORMATION_SCHEMA.TABLES -- TODO: change region if you use another one
WHERE
table_catalog = 'xxxxx' -- TODO: set project name
AND table_name NOT LIKE '%LOAD_TEMP_%'
AND table_name != '__TABLES__'
AND table_name NOT LIKE '%TMP_%'
),
logs AS (
SELECT
data.resource AS table,
protopayload_auditlog.authenticationInfo.principalEmail AS user,
EXTRACT(year FROM timestamp) AS year,
FROM
`xxx.xxx.cloudaudit_googleapis_com_data_access_*`, -- TODO: project & dataset name
UNNEST(protopayload_auditlog.authorizationInfo) AS data
WHERE
data.permission = 'bigquery.tables.getData'
),
days AS (
SELECT DISTINCT
year
FROM
logs
),
base AS (
SELECT
tables.table,
days.year
FROM
tables
CROSS JOIN
days
WHERE
tables.year <= days.year
),
calc AS (
SELECT
table,
year,
COUNT(*) AS PV,
COUNT(DISTINCT user) AS UU
FROM
logs
WHERE
table NOT LIKE '%LOAD_TEMP_%'
AND table != '__TABLES__'
AND table NOT LIKE '%TMP_%'
GROUP BY
1,
2
)
SELECT
table,
year,
IFNULL(calc.PV, 0) AS PV,
IFNULL(calc.UU, 0) AS UU,
FROM
base
LEFT JOIN
calc
USING
(table, year)
SELECT
protopayload_auditlog.servicedata_v1_bigquery.jobGetQueryResultsResponse.job.jobConfiguration.query.query AS query,
EXTRACT(year FROM timestamp) AS year,
COUNT(*) AS count
FROM
`xxx.xxx.cloudaudit_googleapis_com_data_access_*`, -- TODO: project & dataset name
UNNEST(protopayload_auditlog.authorizationInfo) AS data
WHERE
protopayload_auditlog.methodName = 'jobservice.getqueryresults'
AND protopayload_auditlog.authenticationInfo.principalEmail = '[email protected]' -- TODO: service account for BI tool
AND protopayload_auditlog.servicedata_v1_bigquery.jobGetQueryResultsResponse.job.jobConfiguration.query.query IS NOT NULL
GROUP BY
1, 2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment