Skip to content

Instantly share code, notes, and snippets.

@mcanaves
Forked from gane5h/datadog-nginx
Last active July 11, 2017 15:48
Show Gist options
  • Save mcanaves/3539c8aff31985bebd24305678fccfd6 to your computer and use it in GitHub Desktop.
Save mcanaves/3539c8aff31985bebd24305678fccfd6 to your computer and use it in GitHub Desktop.
Nginx json log parsing with datadog
"""
Thanks to gane5h for the original script.
Custom parser for JSON nginx log suitable for use by Datadog 'dogstreams'.
To use, add to datadog.conf as follows:
dogstreams: [path to ngnix log (e.g: "/var/log/nginx/access.log"]:[path to this python script (e.g
"/usr/share/datadog/agent/dogstream/nginx.py")]:[name of parsing method of this file ("parse")]
so, an example line would be:
dogstreams: /var/log/nginx/access.log:/usr/share/datadog/agent/dogstream/nginx.py:parse
Log of nginx should be defined like that:
log_format access_json '{ "time": "$time_local", '
'"client": "$remote_addr", '
'"method": "$request_method", '
'"request": "$request", '
'"request_length": "$request_length", '
'"status": "$status", '
'"response_length": "$body_bytes_sent", '
'"referrer": "$http_referer", '
'"user_agent": "$http_user_agent", '
'"request_time": "$request_time", '
'"upstream_time": "$upstream_response_time", '
'"is_pipelined": "$pipe" }';
when starting dd-agent, you can find the collector.log and check if the dogstream initialized successfully
"""
from datetime import datetime
import time
import re
# mapping between datadog and supervisord log levels
METRIC_TYPES = {
'AVERAGE_RESPONSE': 'nginx.net.avg_response',
'FIVE_HUNDRED_STATUS': 'nginx.net.5xx_status',
'FOUR_HUNDRED_STATUS': 'nginx.net.4xx_status',
'THREE_HUNDRED_STATUS': 'nginx.net.3xx_status',
'TWO_HUNDRED_STATUS': 'nginx.net.2xx_status',
'ONE_HUNDRED_STATUS': 'nginx.net.1xx_status',
'HTTP_1_REQUESTS': "nginx.net.http_1_requests",
'HTTP_2_REQUESTS': "nginx.net.http_2_requests",
}
TIME_REGEX = '"time": "\d{2}\/[a-zA-Z]{3}\/\d{4}:\d{2}:\d{2}:\d{2}'
TIME_REGEX_SPLIT = re.compile('"time": "')
REQUEST_TIME_REGEX = '"request_time": "[-+]?[0-9]*\.?[0-9]+'
REQUEST_REGEX_SPLIT = re.compile('"request_time": "')
STATUS_REGEX_5xx = '"status": "5[0-9]{2}"'
STATUS_REGEX_4xx = '"status": "4[0-9]{2}"'
STATUS_REGEX_3xx = '"status": "3[0-9]{2}"'
STATUS_REGEX_2xx = '"status": "2[0-9]{2}"'
STATUS_REGEX_1xx = '"status": "1[0-9]{2}"'
REQUEST_REGEX_HTTP_1 = 'HTTP\/1.'
REQUEST_REGEX_HTTP_2 = 'HTTP\/2.'
def parse(log_obj, log_line):
if len(log_line) == 0:
log_obj.info("Skipping empty line")
return None
timestamp = get_timestamp(log_line)
avg_time = parse_avg_time(log_line)
obj_to_return = []
if is_http_response5xx(log_line):
obj_to_return.append((METRIC_TYPES['FIVE_HUNDRED_STATUS'], timestamp, 1, {'metric_type': 'counter'}))
if is_http_response_4xx(log_line):
obj_to_return.append((METRIC_TYPES['FOUR_HUNDRED_STATUS'], timestamp, 1, {'metric_type': 'counter'}))
if is_http_response_3xx(log_line):
obj_to_return.append((METRIC_TYPES['THREE_HUNDRED_STATUS'], timestamp, 1, {'metric_type': 'counter'}))
if is_http_response_2xx(log_line):
obj_to_return.append((METRIC_TYPES['TWO_HUNDRED_STATUS'], timestamp, 1, {'metric_type': 'counter'}))
if is_http_response_1xx(log_line):
obj_to_return.append((METRIC_TYPES['ONE_HUNDRED_STATUS'], timestamp, 1, {'metric_type': 'counter'}))
if is_request_http1(log_line):
obj_to_return.append((METRIC_TYPES['HTTP_1_REQUESTS'], timestamp, 1, {'metric_type': 'counter'}))
if is_request_http2(log_line):
obj_to_return.append((METRIC_TYPES['HTTP_2_REQUESTS'], timestamp, 1, {'metric_type': 'counter'}))
if avg_time is not None:
obj_to_return.append((METRIC_TYPES['AVERAGE_RESPONSE'], timestamp, avg_time, {'metric_type': 'gauge'}))
return obj_to_return
def get_timestamp(log_line):
log_time = re.search(TIME_REGEX, log_line)
log_time = log_time.group(0)
log_time = TIME_REGEX_SPLIT.split(log_time)
date = datetime.strptime(log_time[1], "%d/%b/%Y:%H:%M:%S")
date = time.mktime(date.timetuple())
return date
def parse_avg_time(log_line):
log_time = re.search(REQUEST_TIME_REGEX, log_line)
if log_time is not None:
log_time = log_time.group(0)
log_time = REQUEST_REGEX_SPLIT.split(log_time)
if len(log_time) == 2:
return float(log_time[1])
return None
def is_http_response5xx(log_line):
response = re.search(STATUS_REGEX_5xx, log_line)
return response is not None
def is_http_response_4xx(log_line):
response = re.search(STATUS_REGEX_4xx, log_line)
return response is not None
def is_http_response_3xx(log_line):
response = re.search(STATUS_REGEX_3xx, log_line)
return response is not None
def is_http_response_2xx(log_line):
response = re.search(STATUS_REGEX_2xx, log_line)
return response is not None
def is_http_response_1xx(log_line):
response = re.search(STATUS_REGEX_1xx, log_line)
return response is not None
def is_request_http1(log_line):
response = re.search(REQUEST_REGEX_HTTP_1, log_line)
return response is not None
def is_request_http2(log_line):
response = re.search(REQUEST_REGEX_HTTP_2, log_line)
return response is not None
if __name__ == "__main__":
import sys
import pprint
import logging
logging.basicConfig()
log = logging.getLogger()
lines = open(sys.argv[1]).readlines()
pprint.pprint([parse(log, line) for line in lines])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment