Last active
May 29, 2018 07:51
-
-
Save frayos/e6282eced8e1bdb575dbed7f5ca0dc23 to your computer and use it in GitHub Desktop.
LogFilter Viya Py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python | |
# | |
# Script to scan dedicated time frames from sas/viya config folder log files. | |
# USAGE : | |
# python logfilter.py -i /opt/sas/viya/config/var/log -l /home/cloud-user/logfiltered.log -o /home/cloud-user/logO.log -v -b "2018-03-01 12:20:00" -e "2018-03-01 13:00:00" | |
# | |
# Given a begin and end time it scans all "*.log" files from given sas config folder. | |
# Requirement is that the logfiles start with the default formats like | |
# - 2017-10-04 12:55:53,380 | |
# - 2017-10-04T11:00:00,600 | |
# This is due to the limitation of dateutil.parser.parse which requires to | |
# extract the datetime part of the string from the rest of the line | |
# if the first 24 characters of the line contain a datetime format | |
# everyting between begin and end time will be dumped to the output file. | |
# | |
# Author: Thomas Rocks / gertro | |
# Version 0.1 October 4th, 2017: First test release | |
# Version 0.2 December 5th, 2017: Some bug fixes | |
# Version 0.3 January 31st, 2018: added support for Viya | |
# Version 0.4 February 23rd, 2018: minor fixes for file modification and creation conditions | |
# Version 0.5 March 2nd, 2018: IOError instead of FileNotFoundError exception for Python 2.x compatibility | |
# Version 0.6 April 27th: some extensions for omitting file date checks | |
# Version 0.7 May 18th: Allow filtering | |
import argparse | |
import datetime | |
import dateutil | |
import dateutil.parser | |
import logging | |
import os | |
import sys | |
from glob import glob | |
# Try to guess datetime from string | |
def getDT(str, fmts): | |
dt = None | |
if str != None: | |
for fmt, strlen in fmts: | |
try: | |
if len(str) > strlen: | |
sub = str[0:strlen-1] | |
else: | |
sub = str | |
dt = datetime.datetime.strptime(sub, fmt) | |
logging.debug("in: %s" % sub) | |
logging.debug("dt[%s]: %r" % (fmt, dt)) | |
logging.debug("dt.date: %r" % dt.date()) | |
logging.debug("dt.time: %r" % dt.time()) | |
break | |
except ValueError: | |
pass | |
if dt is None: | |
try: | |
if len(str) > 24: | |
sub = str[0:23] | |
else: | |
sub = str | |
dt = dateutil.parser.parse(sub) | |
logging.debug("in: %s" % sub) | |
logging.debug("dt[parsed]: %r" % dt) | |
logging.debug("dt.date: %r" % dt.date()) | |
logging.debug("dt.time: %r" % dt.time()) | |
dt = dt.replace(tzinfo=None) | |
logging.debug("dt[parsed w/o tz]: %r" % dt) | |
logging.debug("dt.date w/o tz: %r" % dt.date()) | |
logging.debug("dt.time w/o tz: %r" % dt.time()) | |
except (ValueError, OverflowError) as e: | |
dt = None | |
logging.debug("No valid time identified: %r" % str) | |
return dt | |
# Future: scanning saslog-files | |
#def saslogs(folder, dtBegin, dtEnd, fmts, exclude_list): | |
# matches = [y for x in os.walk(folder) for y in glob(os.path.join(x[0], "*.saslog"))] | |
def filelist(folder, all, dtBegin, dtEnd, fmts, exclude_list): | |
matches = [y for x in os.walk(folder) for y in glob(os.path.join(x[0], "*.log"))] | |
matches = matches + [y for x in os.walk(folder) for y in glob(os.path.join(x[0], "*.log.?"))] | |
matches = matches + [y for x in os.walk(folder) for y in glob(os.path.join(x[0], "*.err"))] | |
matches = matches + [y for x in os.walk(folder) for y in glob(os.path.join(x[0], "*.watch-log"))] | |
filtered = [] | |
for file in matches: | |
logging.debug("Checking valid time frame %r" % file) | |
mod = datetime.datetime.fromtimestamp(os.path.getmtime(file)) | |
creat = datetime.datetime.fromtimestamp(os.path.getctime(file)) | |
logging.debug("file: %r ctime: %r mtime: %r" % (file, creat, mod)) | |
# Some files seem to get a new creation date when changed | |
# in this case we try to get the creation date from the first line | |
if all or ((creat >= dtEnd) and (creat >= mod)): | |
logging.debug("file: %r ctime: %r == mtime: %r" % (file, creat, mod)) | |
size = os.path.getsize(file) | |
if size > 24: | |
f = open(file) | |
for i in range(0, 10): | |
try: | |
chkline = f.readline() | |
logging.debug("file: %r firstline: %r" % (file, chkline)) | |
chkstr = chkline[0:23] | |
creat2 = getDT(chkstr, fmts) | |
if creat2 is not None and creat2 < creat: | |
logging.debug("file reassign: %r ctime: %r new ctime: %r" % (file, creat, creat2)) | |
creat = creat2 | |
break | |
except UnicodeDecodeError: | |
pass | |
f.close() | |
logging.debug("basename: %r" % os.path.basename(file)) | |
if all or ((mod >= dtBegin) and (creat <= dtEnd)) and os.path.basename(file) not in exclude_list: | |
logging.debug("Adding %r to filtered list [creat: %r mod: %r]" % (file, creat, mod)) | |
filtered.append(file) | |
logging.debug("filtered after: %r" % filtered) | |
return sorted(filtered, key=os.path.getmtime) | |
def main(argv): | |
parser = argparse.ArgumentParser(description='Search Logfiles recursively and scan for dedicated time frame (by default last hour)') | |
parser.add_argument('-i', '--input', help='input folder', required=True) | |
parser.add_argument('-o', '--output', help='output file', default=sys.argv[0].rsplit(".", 1)[0] + ".txt") | |
parser.add_argument('-x', '--exclude', help='exclude list', default=sys.argv[0].rsplit(".", 1)[0] + ".exclude") | |
parser.add_argument('-a', '--all', help='Scan all available files, ignore file dates', action='store_true', default=False) | |
parser.add_argument('-f', '--filter', help='List of keywords to search for (case sensitive and separated by white space)', nargs='*', default=[]) | |
parser.add_argument('-b', '--begintime', help='Begin of extraction (%Y-%m-%d %H:%M:%S)', default=(datetime.datetime.now()- datetime.timedelta(hours=1)).strftime("%Y-%m-%d %H:%M:%S")) | |
parser.add_argument('-e', '--endtime', help='End of extraction (%Y-%m-%d %H:%M:%S)', default=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) | |
parser.add_argument('-l', '--logfile', help='path of logfile', default=sys.argv[0].rsplit(".", 1)[0] + ".log") | |
parser.add_argument('-v', '--verbose', help='Use Log-Level DEBUG instead of INFO', action='store_true', default=False) | |
args = parser.parse_args() | |
if args.verbose: | |
logging.basicConfig(filename=args.logfile, filemode='w', level=logging.DEBUG, format='%(asctime)s %(message)s') | |
else: | |
logging.basicConfig(filename=args.logfile, filemode='w', level=logging.INFO, format='%(asctime)s %(message)s') | |
fmts = [("%Y-%m-%d %H:%M:%S", 20), | |
("%d-%m-%Y %H:%M:%S", 20), | |
("%Y-%m-%d %H:%M:%S,%f", 24), | |
("%Y-%m-%dT%H:%M:%S,%f", 24), | |
("%d-%m-%Y %H:%M:%S,%f", 24), | |
("%Y-%m-%d %H:%M:%S,%f", 27), | |
("%Y-%m-%dT%H:%M:%S,%f", 27), | |
("%d-%m-%Y %H:%M:%S,%f", 27), | |
("%Y/%m/%d %H:%M:%S", 20)] | |
dtBegin = getDT(args.begintime, fmts) | |
dtEnd = getDT(args.endtime, fmts) | |
exclude_list = [] | |
if os.path.exists(args.exclude): | |
with open(args.exclude) as exclude: | |
exclude_list = exclude.read().splitlines() | |
logging.debug("exclude_list: %r" % exclude_list) | |
file_list = filelist(args.input, args.all, dtBegin, dtEnd, fmts, exclude_list) | |
logging.debug("file_list valid for timeframe: %r" % file_list) | |
filterlist = [] | |
for val in args.filter: | |
filterlist.append(' '+val+' ') | |
outlist = [] | |
for filename in file_list: | |
try: | |
between = False | |
lineNumber = 0 | |
datesfound = 0 | |
with open(filename) as infile: | |
try: | |
for line in infile: | |
lineNumber += 1 | |
logging.debug("filename: %r line: %r" % (filename, line)) | |
dtLine = getDT(line, fmts) | |
if dtLine is not None: | |
datesfound +=1 | |
if dtLine >= dtBegin: | |
logging.debug("%r[%d] dtLine %r dtBegin %r begins" % (filename, lineNumber, dtLine, dtBegin)) | |
between = True | |
if dtLine > dtEnd: | |
logging.debug("%r[%d] dtLine %r dtEnd %r ends" % (filename, lineNumber, dtLine, dtEnd)) | |
between = False | |
break | |
if between: | |
if args.filter: | |
for val in filterlist: | |
if val in line: | |
outlist.append("%s[line %d]:%s" % (filename, lineNumber, line)) | |
break | |
else: | |
outlist.append("%s[line %d]:%s" % (filename, lineNumber, line)) | |
except UnicodeDecodeError: | |
pass | |
if datesfound == 0: | |
logging.error("%r doesn't contain any parsable timestamp" % (filename)) | |
# except FileNotFoundError: | |
except IOError: | |
logging.error("%r couldn't be opened!" % (filename)) | |
pass | |
with open(args.output, "w") as outfile: | |
for line in outlist: | |
outfile.writelines(line) | |
if __name__ == "__main__": | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How-To for logfilter.py
Requirements: python-dateutil library installed (works with system python2 and python3)
Installation for system python2: "yum install python-dateutil"
Installation for other python versions: "python -m pip install python-dateutil"
The tool is parameterized via command line options.
'-i','--input' : Root folder for recursive log file scan (required)
'-o','--output' : Output file with extracted log lines (default: logfilter.txt)
'-x','--exclude' : File containing list of fies to be excluded (spares time, by default uses "logfilter.exclude" when exists)
'-a','--all' : Scan all available files, ignore file dates
'-f','--filter' : List of keywords to search for (case sensitive and separated by white space)
'-b','--begintime' : Begin timestamp, format %Y-%m-%d %H:%M:%S (default: current time - 1 hour. Warning: can be huge)
'-e','--endtime' : End timestamp format: %Y-%m-%d %H:%M:%S (default. current time)
'-l','--logfile' : In case you assume an error using the tool you can check the logfile (default: logfilter.log)
'-v','--verbose' : In case you assume an error, be very chatty in the logfile (Warning: contains nearly all logfile contents)
The most complex issue with identifying timestamps from log: various date formats used. It's lacking an all-purpose and fast parser that identifies all datetime formats regardless where its positioned in a string.
Here an example using it on Windows for cutting two small timeframes into two files (customer sent the archive for analyzing:
python C:\Users\gertro\Documents\SAS\WORK\saspython\logfilter.py -i log -o logextract_0850AM.txt -l logfilter.log -v -b "2018-02-22 08:49:00" -e "2018-02-22 08:53:00"
python C:\Users\gertro\Documents\SAS\WORK\saspython\logfilter.py -i log -o logextract_0830AM.txt -l logfilter.log -v -b "2018-02-22 08:29:00" -e "2018-02-22 08:31:00"
Here an execution live on the system:
sudo python /sas/bin/logfilter.py -i /opt/sas/viya/config/var/log -l /home/sas/backup/logfilter.log -o /home/sas/backup/viyalogs2.log -v -b "2018-02-12 04:19:00" -e "2018-02-12 04:21:00"
Caution: to read all logs you need root permissions.