Last active
November 17, 2019 08:45
-
-
Save fidelisrafael/db05790cd71db962b2da44e1d68a2981 to your computer and use it in GitHub Desktop.
Nginx Logger Parser for Ruby
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'pry' | |
require 'json' | |
require 'uri' | |
module Application | |
class NginxLogParser | |
DEFAULT_FORMAT_REGEXP = /(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s?\-\s?-\s?\[(\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\]\s?\\?"?(GET|POST|PUT|HEAD|DELETE|OPTIONS)\s?(.*?)\s(HTTP\/\d\.\d)\\?"?\s?(\d{3})\s?(\d+)\s?\\?\"\-\\?\"\s?\\?\"(.*?)\"/i | |
REQUEST_FORMAT = [ | |
:ip_address, | |
:date, | |
:symbol, | |
:request_method, | |
:request_path, | |
:http_version, | |
:response_status, | |
:body_size, | |
:user_agent | |
] | |
attr_reader :log_gile, :current_line, :percent_read, :total_lines | |
def initialize(log_file, regexp = nil) | |
@log_file = File.open(log_file) | |
@total_lines = File.open(log_file).readlines.size | |
@percent_read = 0 | |
@regexp = regexp || DEFAULT_FORMAT_REGEXP | |
end | |
def parse | |
while line_data = readline | |
parsed_line = parse_line_to_object(line_data) | |
yield(parsed_line) if block_given? | |
end | |
end | |
def parse_matching(regexp, field = :request_path) | |
parsed_results = [] | |
parse do |parsed_line| | |
field_value = parsed_line[field] | |
next unless field_value | |
if matches = field_value.match(regexp) | |
yield(parsed_line, matches) if block_given? | |
end | |
end | |
end | |
private | |
def readline | |
return nil if @log_file.eof? | |
@current_line = $. | |
@percent_read = ((@current_line * 100)/total_lines) # $. is the current line in file reading | |
return @log_file.readline | |
end | |
def parse_line_to_object(line) | |
matches = line.match(@regexp) | |
data = matches ? matches[1, matches.size] : [] | |
Hash[REQUEST_FORMAT.zip(data)] | |
end | |
end | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require_relative 'application_nginx_log_parser' | |
start_time = Time.now | |
RESULT_FILE = 'nginx.search.results.json' | |
SEARCH_REGEXP = /search\/questions\?(.*?)((q\=(.*?)\&))/i | |
def parse_file | |
match_results = [] | |
# Withou second parameter the default log format of nginx will be assumed | |
parser = Application::NginxLogParser.new('nginx.access.log') | |
# It's possible to use a custom regexp to read line-by-line | |
# parser = Application::NginxLogParser.new('nginx.access.log', /(.*)/) | |
# parse and find for lines matching an regexp | |
parser.parse_matching(SEARCH_REGEXP) do |parsed_line, matches| | |
print "\r#{parser.current_line}/#{parser.total_lines} = #{parser.percent_read}%" | |
match_results << { search: matches[-1] }.merge(parsed_line) | |
end | |
# Generate well formated results file for each line of JSON | |
File.open(RESULT_FILE , 'wb') do |f| | |
f.write(JSON.generate(match_results)) | |
end | |
end | |
def skip_log_file_parse? | |
ARGV.include?('-s') || ARGV.include?('--skip-log-parse') | |
end | |
parse_file unless skip_log_file_parse? | |
# group data by term | |
data = JSON.parse(File.read(RESULT_FILE)) | |
grouped_data = data.group_by {|d| URI.decode(d["search"]) } | |
total_by_term = grouped_data.map {|data| { search: data[0].gsub(/\+/, ' '), total: data[1].size } } | |
sorted = total_by_term.sort_by {|data| data[:total] } | |
# well formated results of search by term | |
File.open('total_searches.json', 'wb') do |f| | |
total_searches = sorted.inject(0) {|total, data| total += data[:total] } | |
json = JSON.pretty_generate({ total_searches: total_searches, report: sorted.reverse }) | |
f.write(json) | |
end | |
end_time = Time.now | |
runtime = (end_time - start_time) | |
puts "\nExecuted in %s seconds" % runtime |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment