Last active
July 19, 2024 15:40
-
-
Save chrispruitt/c0333f2fed69225398b849c63a580557 to your computer and use it in GitHub Desktop.
Idempotent script for managing an aws alb access log athena table.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Dependencies: | |
# - athena-cli - https://github.com/justmiles/athena-cli | |
# - aws cli | |
# Prerequisites: | |
# - AWS ALB Access log s3 bucket | |
# - AWS ALB Access logs created | |
# - Update the variables below to match your environment | |
# - Ensure the create table query is up to date with the latest spec: https://docs.aws.amazon.com/athena/latest/ug/application-load-balancer-logs.html | |
set -e | |
# Update these to match your envrionement | |
ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) | |
DATABASE="albaccesslogs" | |
TABLE_NAME="${DATABASE}.alb_access_logs" | |
S3_ACCESS_LOG_LOCATION="s3://alb-access-logs-${ACCOUNT_ID}/qa/" | |
echo "Creating database if not exists." | |
athena query --statistics --sql "$(cat <<EOF | |
CREATE DATABASE IF NOT EXISTS ${DATABASE}; | |
EOF | |
)" | |
echo "Dropping table ${TABLE_NAME}" | |
athena query --statistics --sql "$(cat <<EOF | |
DROP TABLE IF EXISTS ${TABLE_NAME}; | |
EOF | |
)" | |
echo "Creating ${TABLE_NAME}" | |
athena query --statistics --sql "$(cat <<EOF | |
CREATE EXTERNAL TABLE IF NOT EXISTS ${TABLE_NAME} ( | |
\`type\` string, | |
\`time\` string, | |
elb string, | |
client_ip string, | |
client_port int, | |
target_ip string, | |
target_port int, | |
request_processing_time double, | |
target_processing_time double, | |
response_processing_time double, | |
elb_status_code int, | |
target_status_code string, | |
received_bytes bigint, | |
sent_bytes bigint, | |
request_verb string, | |
request_url string, | |
request_proto string, | |
user_agent string, | |
ssl_cipher string, | |
ssl_protocol string, | |
target_group_arn string, | |
trace_id string, | |
domain_name string, | |
chosen_cert_arn string, | |
matched_rule_priority string, | |
request_creation_time string, | |
actions_executed string, | |
redirect_url string, | |
lambda_error_reason string, | |
target_port_list string, | |
target_status_code_list string, | |
classification string, | |
classification_reason string, | |
conn_trace_id string | |
) | |
PARTITIONED BY ( | |
\`lb_name\` string COMMENT '', | |
\`day\` string COMMENT 'yyyy/MM/dd format' | |
) | |
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe' | |
WITH SERDEPROPERTIES ( | |
'serialization.format' = '1', | |
'input.regex' = '([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \"([^ ]*) (.*) (- |[^ ]*)\" \"([^\"]*)\" ([A-Z0-9-_]+) ([A-Za-z0-9.-]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^\"]*)\" ([-.0-9]*) ([^ ]*) \"([^\"]*)\" \"([^\"]*)\" \"([^ ]*)\" \"([^\\s]+?)\" \"([^\\s]+)\" \"([^ ]*)\" \"([^ ]*)\" ?([^ ]*)?( .*)?' | |
) | |
STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' | |
LOCATION 's3://alb-access-logs-${ACCOUNT_ID}/' | |
TBLPROPERTIES ( | |
'projection.account_id.type' = '${ACCOUNT_ID}', | |
'projection.day.format' = 'yyyy/MM/dd', | |
'projection.day.interval' = '1', | |
'projection.day.interval.unit' = 'DAYS', | |
'projection.day.range' = 'NOW-90DAYS,NOW', | |
'projection.day.type' = 'date', | |
'projection.enabled' = 'true', | |
'projection.hour.digits' = '2', | |
'projection.hour.range' = '0,23', | |
'projection.hour.type' = 'integer', | |
'projection.lb_name.type' = 'injected', | |
'storage.location.template' = '${S3_ACCESS_LOG_LOCATION}\${lb_name}/AWSLogs/${ACCOUNT_ID}/elasticloadbalancing/us-east-1/\${day}/' | |
); | |
EOF | |
)" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment