-
-
Save rempargo/ddf6cca77ec9ad7281b54e82d82e1bc4 to your computer and use it in GitHub Desktop.
Nginx Logger Parser for Ruby
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'pry' | |
require 'json' | |
require 'uri' | |
module Application | |
class NginxLogParser | |
DEFAULT_FORMAT_REGEXP = /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s?\-\s?-\s?\[(\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\]\s?\\?"?(GET|POST|PUT|HEAD|DELETE|OPTIONS)\s?(.*?)\s(HTTP\/\d\.\d)\\?"?\s?(\d{3})\s?(\d+)\s?\\?\"\-\\?\"\s?\\?\"(.*?)\"/i | |
REQUEST_FORMAT = [ | |
:ip_address, | |
:date, | |
:symbol, | |
:request_method, | |
:request_path, | |
:http_version, | |
:response_status, | |
:body_size, | |
:user_agent | |
] | |
attr_reader :log_gile, :current_line, :percent_read, :total_lines | |
def initialize(log_file, regexp = nil) | |
@log_file = File.open(log_file) | |
@total_lines = File.open(log_file).readlines.size | |
@percent_read = 0 | |
@regexp = regexp || DEFAULT_FORMAT_REGEXP | |
end | |
def parse | |
while line_data = readline | |
parsed_line = parse_line_to_object(line_data) | |
yield(parsed_line) if block_given? | |
end | |
end | |
def parse_matching(regexp, field = :request_path) | |
parsed_results = [] | |
parse do |parsed_line| | |
field_value = parsed_line[field] | |
next unless field_value | |
if matches = field_value.match(regexp) | |
yield(parsed_line, matches) if block_given? | |
end | |
end | |
end | |
private | |
def readline | |
return nil if @log_file.eof? | |
@current_line = $. | |
@percent_read = ((@current_line * 100)/total_lines) # $. is the current line in file reading | |
return @log_file.readline | |
end | |
def parse_line_to_object(line) | |
matches = line.match(@regexp) | |
data = matches ? matches[1, matches.size] : [] | |
Hash[REQUEST_FORMAT.zip(data)] | |
end | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require_relative 'application_nginx_log_parser' | |
start_time = Time.now | |
RESULT_FILE = 'nginx.search.results.json' | |
SEARCH_REGEXP = /search\/questions\?(.*?)((q\=(.*?)\&))/i | |
def parse_file | |
match_results = [] | |
# Withou second parameter the default log format of nginx will be assumed | |
parser = Application::NginxLogParser.new('nginx.access.log') | |
# It's possible to use a custom regexp to read line-by-line | |
# parser = Application::NginxLogParser.new('nginx.access.log', /(.*)/) | |
# parse and find for lines matching an regexp | |
parser.parse_matching(SEARCH_REGEXP) do |parsed_line, matches| | |
print "\r#{parser.current_line}/#{parser.total_lines} = #{parser.percent_read}%" | |
match_results << { search: matches[-1] }.merge(parsed_line) | |
end | |
# Generate well formated results file for each line of JSON | |
File.open(RESULT_FILE , 'wb') do |f| | |
f.write(JSON.generate(match_results)) | |
end | |
end | |
def skip_log_file_parse? | |
ARGV.include?('-s') || ARGV.include?('--skip-log-parse') | |
end | |
parse_file unless skip_log_file_parse? | |
# group data by term | |
data = JSON.parse(File.read(RESULT_FILE)) | |
grouped_data = data.group_by {|d| URI.decode(d["search"]) } | |
total_by_term = grouped_data.map {|data| { search: data[0].gsub(/\+/, ' '), total: data[1].size } } | |
sorted = total_by_term.sort_by {|data| data[:total] } | |
# well formated results of search by term | |
File.open('total_searches.json', 'wb') do |f| | |
total_searches = sorted.inject(0) {|total, data| total += data[:total] } | |
json = JSON.pretty_generate({ total_searches: total_searches, report: sorted.reverse }) | |
f.write(json) | |
end | |
end_time = Time.now | |
runtime = (end_time - start_time) | |
puts "\nExecuted in %s seconds" % runtime |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment