Created
August 19, 2016 15:36
-
-
Save asottile/a07a28accd3ccdc3e99ee47359379cb3 to your computer and use it in GitHub Desktop.
Search rtf files in a directory. (At the time requires python2 (due to pyth))
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import os | |
from pyth.plugins.rtf15.reader import Rtf15Reader | |
from pyth.plugins.plaintext.writer import PlaintextWriter | |
def get_text(filename): | |
with open(filename) as f: | |
doc = Rtf15Reader.read(f) | |
return PlaintextWriter.write(doc).getvalue() | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('search_term') | |
parser.add_argument('directory') | |
args = parser.parse_args() | |
term = args.search_term.lower() | |
for root, _, filenames in os.walk(args.directory): | |
for filename in filenames: | |
if not filename.lower().endswith('.rtf') or filename.startswith('~'): | |
continue | |
try: | |
text = get_text(os.path.join(root, filename)) | |
except ValueError: | |
print('Could not parse {}'.format(filename)) | |
if term in text.lower(): | |
print(filename) | |
if __name__ == '__main__': | |
exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment