Created
February 26, 2015 21:04
-
-
Save ajdavis/812fd2ad8fa4ca33ff78 to your computer and use it in GitHub Desktop.
Has an "-o" option to show only the matching portion of a line. Adapted from Batchelder: http://nedbatchelder.com/code/utilities/pygrep.py
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
""" | |
Usage: grep [OPTION]... PATTERN [FILE] ... | |
Search for PATTERN in each FILE or standard input. | |
Example: grep -i 'hello world' menu.h main.c | |
Regexp selection and interpretation: | |
-E, --extended-regexp PATTERN is an extended regular expression | |
-F, --fixed-strings PATTERN is a set of newline-separated strings | |
-G, --basic-regexp PATTERN is a basic regular expression | |
-e, --regexp=PATTERN use PATTERN as a regular expression | |
-f, --file=FILE obtain PATTERN from FILE | |
-w, --word-regexp force PATTERN to match only whole words | |
-x, --line-regexp force PATTERN to match only whole lines | |
-z, --null-data a data line ends in 0 byte, not newline | |
Miscellaneous: | |
-s, --no-messages suppress error messages | |
-v, --invert-match select non-matching lines | |
--mmap use memory-mapped input if possible | |
Output control: | |
-b, --byte-offset print the byte offset with output lines | |
-H, --with-filename print the filename for each match | |
-h, --no-filename suppress the prefixing filename on output | |
-q, --quiet, --silent suppress all normal output | |
--binary-files=TYPE assume that binary files are TYPE | |
TYPE is 'binary', 'text', or 'without-match'. | |
-a, --text equivalent to --binary-files=text | |
-I equivalent to --binary-files=without-match | |
-d, --directories=ACTION how to handle directories | |
ACTION is 'read', 'recurse', or 'skip'. | |
-o, --only-matching prints only the matching part of the lines. | |
-r, --recursive equivalent to --directories=recurse. | |
-L, --files-without-match only print FILE names containing no match | |
-l, --files-with-matches only print FILE names containing matches | |
-c, --count only print a count of matching lines per FILE | |
-Z, --null print 0 byte after FILE name | |
Context control: | |
-NUM same as --context=NUM | |
-U, --binary do not strip CR characters at EOL (MSDOS) | |
-u, --unix-byte-offsets report offsets as if CRs were not there (MSDOS) | |
`egrep' means `grep -E'. `fgrep' means `grep -F'. | |
With no FILE, or when FILE is -, read standard input. If less than | |
two FILEs given, assume -h. Exit status is 0 if match, 1 if no match, | |
and 2 if trouble. | |
Report bugs to <[email protected]>.""" | |
import sys, re, getopt | |
class Queue: | |
def __init__(self, size): | |
self.size = size | |
self.values = [] | |
def append(self, value): | |
if self.size: | |
self.values.append(value) | |
while len(self.values) > self.size: | |
self.values.pop(0) | |
def pop(self): | |
if self.values: | |
return self.values.pop(0) | |
return None | |
def __len__(self): | |
return len(self.values) | |
def __getitem__(self, n): | |
return self.values[n] | |
def grep(pattern, f, options): | |
if options.whole_word: | |
pattern = r"\b" + pattern + r"\b" | |
if options.ignore_case: | |
p = re.compile(pattern, re.I) | |
else: | |
p = re.compile(pattern) | |
queue = Queue(options.before_context) | |
line_nb = 0 | |
from_line = -1 | |
to_line = -1 | |
last_line = 0 # Last printed line. | |
total = 0 | |
while 1: | |
l = f.readline() | |
line_nb += 1 | |
if not l: | |
break | |
match = 0 | |
if re.search(p, l): | |
from_line = line_nb - options.before_context | |
to_line = line_nb + options.after_context | |
match = 1 | |
total += 1 | |
if line_nb <= to_line: | |
if options.before_context or options.after_context: | |
if last_line and from_line > last_line + 1: | |
sys.stdout.write("--\n") | |
from_line = max(from_line, last_line + 1, -len(queue)) | |
for i in range(from_line - line_nb, 0): | |
if options.line_number: | |
sys.stdout.write("%d-" % (i + line_nb)) | |
sys.stdout.write(queue[i]) | |
if options.line_number: | |
if match: | |
sys.stdout.write("%d:" % line_nb) | |
else: | |
sys.stdout.write("%d-" % line_nb) | |
elif options.file_and_line: | |
sys.stdout.write("%s(%d): " % (f.name, line_nb)) | |
if options.only_matching: | |
sys.stdout.write(p.search(l).group(0)) | |
sys.stdout.write('\n') | |
else: | |
sys.stdout.write(l) | |
last_line = line_nb | |
queue.append(l) | |
return total | |
def printUsage(): | |
"""Print the help string that should printed by grep.py -h""" | |
print "usage: grep.py [options] pattern [file]" | |
print """ | |
-i, --ignore-case ignore case distinctions | |
-B, --before-context=NUM print NUM lines of leading context | |
-A, --after-context=NUM print NUM lines of trailing context | |
-C, --context[=NUM] print NUM (default 2) lines of output context | |
unless overridden by -A or -B | |
-n, --line-number print line number with output lines | |
-o, --only-matching prints only the matching part of the lines. | |
-V, --version print version information and exit | |
-w, --word-regexp force PATTERN to match only whole words | |
--help display this help and exit | |
See http://www.vdesmedt.com/~vds2212/grep.html for informations and updates. | |
Send an email to [email protected] for comments and bug reports.""" | |
def printVersion(): | |
print "pygrep.py version 0.6.0" | |
class Options: | |
def __init__(self): | |
self.ignore_case = 0 | |
self.before_context = 0 | |
self.after_context = 0 | |
self.line_number = 0 | |
self.only_matching = 0 | |
self.file_and_line = 0 | |
self.whole_word = 0 | |
def main(argv): | |
options = Options() | |
opts, args = getopt.getopt(argv, "ViA:B:C:onw", | |
["help", "version", "ignore-case", | |
"before-context=", "after-context=", "context=", | |
"only-matching", | |
"line-number", "word-regexp"]) | |
for o, v in opts: | |
if o in ["-i", "--ignore-case"]: | |
options.ignore_case = 1 | |
if o in ["-A", "--after-context"]: | |
options.after_context = int(v) | |
if o in ["-B", "--before-context"]: | |
options.before_context = int(v) | |
if o in ["-C", "--context"]: | |
if not v: | |
v = 2 | |
options.after_context = int(v) | |
options.before_context = int(v) | |
if o in ["-n", "--line-number"]: | |
options.line_number = 1 | |
if o in ["-o", "--only-matching"]: | |
options.only_matching = 1 | |
if o in ["-w", "--word-regexp"]: | |
options.whole_word = 1 | |
elif o in ["-V", "--version"]: | |
printVersion() | |
return 0 | |
elif o in ["--help"]: | |
printUsage() | |
return 0 | |
if len(args) <= 0: | |
printUsage() | |
return 1 | |
pattern = args[0] | |
if len(args) == 1: | |
return grep(pattern, sys.stdin, options) | |
if args[1][0] == '@': | |
options.file_and_line = 1 | |
total = 0 | |
ffile = open(args[1][1:]) | |
for fname in ffile.readlines(): | |
f = None | |
try: | |
f = open(fname.strip()) | |
except: | |
pass | |
if f: | |
total += grep(pattern, f, options) | |
sys.stdout.write("total found: %d\n\n\n" % total) | |
else: | |
f = open(args[1]) | |
return grep(pattern, f, options) | |
f.close() | |
if __name__ == "__main__": | |
sys.exit(main(sys.argv[1:])) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment