Created
January 31, 2022 06:35
-
-
Save dannysauer/94e8df84d25d78806aef313b58ffae46 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use warnings; | |
use strict; | |
use Carp; | |
use DateTime; | |
use Date::Parse qw( str2time ); | |
use JSON qw( decode_json ); | |
# legacy ingress log examples: | |
# 198.53.182.251 - - [02/Dec/2021:01:15:03 +0000] "GET /insomnia-ubuntu/dists/default/Release.gpg HTTP/1.1" 404 14 "-" "Debian APT-HTTP/1.3 (2.0.6)" 127 0.054 [pulp-pulp-content-24816] [] 10.17.31.6:24816 14 0.052 404 eba226d18193c0907e4f76746410a7a7 | |
# 183.3.220.130 - - [01/Dec/2021:09:25:40 +0000] "GET /favicon.ico HTTP/2.0" 404 14 "https://download.konghq.com/" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36" 130 0.006 [pulp-pulp-content-24816] [] 10.17.13.2:24816 14 0.004 404 7260c85a9fb316187c5fa1ef0a8428c7 | |
# 54.169.106.92 - - [25/Nov/2021:05:46:15 +0000] "GET / HTTP/1.1" 200 1168 "() { ignored; }; echo Content-Type: text/html; echo ; /bin/cat /etc/passwd" "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36" 494 0.009 [pulp-pulp-content-24816] [] 10.17.31.6:24816 12555 0.008 200 c9b6ff27f6ed78f88a8ce5b68b0dc6f1 | |
# | |
# | |
# JSON ingress log example: | |
# root@danny-ubuntu:/tmp# zcat ingress.gz | head -n1 | jq | |
#{ | |
# "ts": "2022-01-31T04:10:37+00:00", | |
# "tsNs": "1643602237.917", | |
# "level": "info", | |
# "msg": "302 -> GET download.konghq.com/gateway-0.x-centos-7/Packages/k/kong-0.14.1.el7.noarch.rpm", | |
# "service": "nginx-ingress", | |
# "duration": "0.368 s", | |
# "totalBytes": "10", | |
# "traceID": "", | |
# "token": "no_token", | |
# "clientIP": "74.11.33.194", | |
# "request": { | |
# "method": "GET", | |
# "requestSize": "416", | |
# "responseSize": "10", | |
# "userAgent": "Chef Client/14.5.33 (ruby-2.5.1-p57; ohai-14.5.4; x86_64-linux; +https://chef.io)", | |
# "remoteIp": "74.11.33.194", | |
# "referer": "", | |
# "protocol": "HTTP/1.1" | |
# }, | |
# "status": "302", | |
# "uri": "download.konghq.com/gateway-0.x-centos-7/Packages/k/kong-0.14.1.el7.noarch.rpm", | |
# "extra": { | |
# "proxyUpstreamName": "pulp-pulp-content-24816", | |
# "proxyAlternativeUpstreamName": "", | |
# "upstreamStatus": "302", | |
# "upstreamAddr": "10.17.21.33:24816" | |
# } | |
#} | |
sub emit_log { | |
my( | |
$timestamp, | |
$ip, | |
$status, | |
$useragent, | |
$method, | |
$file, | |
) = @_; | |
return if $file eq '/'; | |
# calculate a couple of fields | |
#print "parsing time '$timestamp'\n"; | |
my $dt = DateTime->from_epoch(epoch=>str2time($timestamp)); | |
$dt->set_time_zone('UTC'); | |
my $ts = $dt->strftime("[%d/%b/%Y:%H:%M:%S %z]"); | |
my $simpath = $file; | |
$simpath =~ s|/|__|g; | |
my $filename = substr($file, 1+rindex($file, '/')); | |
my $reqURI = "$method /pulp3-media/simulated/someobject?response-content-disposition=attachment;x-pulp-artifact-path=${simpath};filename=${filename}&X-fake-parameters=yessir HTTP/1.1"; | |
# the S3 format | |
print qq{owner kong-cloud-01-prod-us-east-2-kong-packages-origin $ts $ip - requestor REST.GET.OBJECT key "$reqURI" $status "-" "-" 1 1 1 "referer" "$useragent" "-" hostid SigV2 - - host.header - arn \n}; | |
} | |
my $decoded; | |
my %fields; | |
while(<>){ | |
if( m/ | |
(?P<ip>(\d{1,3}\.?){4})\s+ | |
(?P<identity>\S+)\s+ | |
(?P<userid>\S+)\s+ | |
\[(?P<time>.*?)\]\s+ | |
"(?P<request> | |
(?P<request_method>\S+)\s+ | |
(?P<request_path>\S+)\s* | |
.*? | |
)"\s+ | |
(?P<status>\d+)\s+ | |
(?P<size>\d+|"-")\s+ | |
"(?P<referer>.*?)"\s+ | |
"(?P<useragent>.*?)"\s+ | |
(?P<body_bytes>\d+)\s+ | |
(?P<response_time>\d+(?:\.\d+)?)\s+ | |
(?P<rest_of_line>.*)\s* | |
$ | |
/x ){ | |
1; | |
#print qq[$+{request_method} request of $+{request_path} (via "$+{request}") from $+{ip} using $+{useragent} returned status $+{status} in $+{response_time} seconds\n]; | |
%fields = %+; | |
emit_log( @+{'time', 'ip', 'status', 'useragent', 'request_method', 'request_path'} ) | |
} | |
elsif( $decoded = decode_json($_) ){ | |
%fields = %$decoded; | |
#print "found json ($fields{msg})\n"; | |
emit_log( @fields{'ts', 'clientIP', 'status'}, | |
$fields{request}{userAgent}, | |
$fields{request}{method}, | |
# JSON logs use "hostname/path" in URI | |
substr($fields{uri}, index($fields{uri}, '/')), | |
) | |
} | |
else{ | |
carp $_; | |
continue; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment