Last active
September 11, 2023 14:59
-
-
Save zcapper/adc97dee73ffa2b53e79b085a6f246ff to your computer and use it in GitHub Desktop.
This is a CloudFormation template that creates AWS Glue tables for your AWS logs so you can easily query your ELB access logs and CloudTrails in AWS Athena
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
AWSTemplateFormatVersion: 2010-09-09 | |
Parameters: | |
LoggingBucket: | |
Description: The name of the S3 bucket that contains your AWS logs | |
Type: String | |
Resources: | |
GlueDatabase: | |
Type: AWS::Glue::Database | |
Properties: | |
DatabaseInput: | |
Name: | |
Ref: LoggingBucket | |
Description: | |
Fn::Sub: Infrastructure and audit log data in the ${LoggingBucket} S3 bucket | |
CatalogId: | |
Ref: AWS::AccountId | |
CloudTrails: | |
Type: AWS::Glue::Table | |
Properties: | |
CatalogId: | |
Ref: AWS::AccountId | |
DatabaseName: | |
Ref: GlueDatabase | |
TableInput: | |
Name: cloudtrails | |
Description: AWS CloudTrail data | |
TableType: EXTERNAL_TABLE | |
Parameters: { | |
"EXTERNAL": "TRUE" | |
} | |
StorageDescriptor: | |
Columns: | |
- Name: eventversion | |
Type: string | |
- Name: useridentity | |
Type: struct<type:string,principalid:string,arn:string,accountid:string,invokedby:string,accesskeyid:string,username:string,sessioncontext:struct<attributes:struct<mfaauthenticated:string,creationdate:string>,sessionissuer:struct<type:string,principalid:string,arn:string,accountid:string,username:string>>> | |
- Name: eventtime | |
Type: string | |
- Name: eventsource | |
Type: string | |
- Name: eventname | |
Type: string | |
- Name: awsregion | |
Type: string | |
- Name: sourceipaddress | |
Type: string | |
- Name: useragent | |
Type: string | |
- Name: errorcode | |
Type: string | |
- Name: errormessage | |
Type: string | |
- Name: requestparameters | |
Type: string | |
- Name: responseelements | |
Type: string | |
- Name: additionaleventdata | |
Type: string | |
- Name: requestid | |
Type: string | |
- Name: eventid | |
Type: string | |
- Name: resources | |
Type: array<struct<arn:string,accountid:string,type:string>> | |
- Name: eventtype | |
Type: string | |
- Name: apiversion | |
Type: string | |
- Name: readonly | |
Type: string | |
- Name: recipientaccountid | |
Type: string | |
- Name: serviceeventdetails | |
Type: string | |
- Name: sharedeventid | |
Type: string | |
- Name: vpcendpointid | |
Type: string | |
- Name: _ | |
Type: string | |
Location: | |
Fn::Sub: s3://${LoggingBucket}/AWSLogs/${AWS::AccountId}/CloudTrail/ | |
InputFormat: com.amazon.emr.cloudtrail.CloudTrailInputFormat | |
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat | |
Compressed: False | |
StoredAsSubDirectories: False | |
SerdeInfo: | |
SerializationLibrary: com.amazon.emr.hive.serde.CloudTrailSerde | |
Parameters: { | |
"serialization.format": "1" | |
} | |
Parameters: {} | |
Retention: 0 | |
ELBAccessLogs: | |
Type: AWS::Glue::Table | |
Properties: | |
CatalogId: | |
Ref: AWS::AccountId | |
DatabaseName: | |
Ref: GlueDatabase | |
TableInput: | |
Name: elb-logs | |
Description: Elastic Load Balancing access log data | |
TableType: EXTERNAL_TABLE | |
Parameters: { | |
"EXTERNAL": "TRUE" | |
} | |
StorageDescriptor: | |
Columns: | |
- Name: type | |
Type: string | |
- Name: time | |
Type: string | |
- Name: elb | |
Type: string | |
- Name: client_ip | |
Type: string | |
- Name: client_port | |
Type: int | |
- Name: target_ip | |
Type: string | |
- Name: target_port | |
Type: int | |
- Name: request_processing_time | |
Type: double | |
- Name: target_processing_time | |
Type: double | |
- Name: response_processing_time | |
Type: double | |
- Name: elb_status_code | |
Type: string | |
- Name: target_status_code | |
Type: string | |
- Name: received_bytes | |
Type: bigint | |
- Name: sent_bytes | |
Type: bigint | |
- Name: request_verb | |
Type: string | |
- Name: request_scheme | |
Type: string | |
- Name: request_host | |
Type: string | |
- Name: request_path | |
Type: string | |
- Name: request_query | |
Type: string | |
- Name: request_proto | |
Type: string | |
- Name: user_agent | |
Type: string | |
- Name: ssl_cipher | |
Type: string | |
- Name: ssl_protocol | |
Type: string | |
- Name: target_group_arn | |
Type: string | |
- Name: trace_id | |
Type: string | |
- Name: domain_name | |
Type: string | |
- Name: chosen_cert_arn | |
Type: string | |
- Name: matched_rule_priority | |
Type: string | |
- Name: request_creation_time | |
Type: string | |
- Name: actions_executed | |
Type: string | |
- Name: redirect_url | |
Type: string | |
- Name: lambda_error_reason | |
Type: string | |
Location: | |
Fn::Sub: s3://${LoggingBucket}/AWSLogs/${AWS::AccountId}/elasticloadbalancing/ | |
InputFormat: org.apache.hadoop.mapred.TextInputFormat | |
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat | |
Compressed: False | |
StoredAsSubDirectories: False | |
SerdeInfo: | |
SerializationLibrary: org.apache.hadoop.hive.serde2.RegexSerDe | |
Parameters: | |
"serialization.format": "1" | |
"input.regex": ([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) "([^ ]*) ([\w]+):\/\/([^\s\/?#]+)-?(?:\/([^\s?#]*))?(\?[^\s#]+)? (- |[^ ]*)" "([^"]*)" ([A-Z0-9-]+) ([A-Za-z0-9.-]*) ([^ ]*) "([^"]*)" "([^"]*)" "([^"]*)" ([-.0-9]*) ([^ ]*) "([^"]*)" "([^"]*)"($| "[^ ]*").* | |
Parameters: {} | |
Retention: 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment