Last active
November 29, 2016 15:13
-
-
Save priintpar/8a54443e57255d5814cbc1bdf177fcdf to your computer and use it in GitHub Desktop.
Modified version from https://gist.github.com/Azlirn/a7107212401f1865106a5e1476303f3e
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import time | |
# In the current configuration, this script should be able to process: | |
# 200,000 rps (records per second) | |
# GLOBAL VARIABLE | |
def rmDomain(oldfile, newfile, cropfile): | |
start_time = time.time() | |
hitcounter = 0 | |
pcounter = 0 | |
emailDomains = [line.rstrip('\n') for line in open(cropfile)] | |
try: | |
with open(oldfile) as oFile, open(newfile, 'w') as nFile: | |
try: | |
for line in oFile: | |
pcounter = pcounter + 1 | |
lowerLine = line.lower() | |
if not any(domain in lowerLine for domain in emailDomains): | |
nFile.write(line) | |
hitcounter = hitcounter + 1 | |
print '\r[*] - {%s} records processed...' % (pcounter), | |
except Exception as e: | |
print '[!] Error Occurred: %s' % e | |
# | |
# Uncomment the below if you would like the script to restart to 'main' if it encounters an error | |
# | |
# print '[*] Restarting script...' | |
# time.sleep(5) | |
# reload(main()) | |
except Exception as e: | |
print "[!] Error opening %s: %s" % (oldfile, e) | |
print "[!] Ensure the file %s exists in your current directory." % oldfile | |
print "[*] Also, check your spelling and be sure you add the extension to your file name!" | |
time.sleep(5) | |
reload(main()) | |
ctime = time.time() - start_time | |
print '\n[*] === COMPLETE === [*]' | |
print '[*] %s was saved' % newfile | |
print '[*] There are %s records in your saved file.' % hitcounter | |
print '[*] You processed %s total records.\n' % pcounter | |
print "[*] === Completed in %s seconds === [*]" % ctime | |
time.sleep(5) | |
exit() | |
def main(): | |
os.system('cls' if os.name == 'nt' else 'clear') | |
print ''' | |
,--. o ,---.| o | |
| |,---.,-.-.,---..,---. `---.|--- ,---..,---.,---.,---.,---. | |
| || || | |,---||| | || | || || ||---'| | |
`--' `---'` ' '`---^`` ' `---'`---'` `|---'|---'`---'` | |
| | | |
''' | |
print '\n[!] Currently this script only supports stripping text files.\n' | |
#TODO: Add flag to allow csv files to be processed | |
print '[*] Thanks to leakedsource.com for providing an awesome list of domains to ignore.\n' | |
oldfile = raw_input('{*} Enter the file (with extension) you would like to strip domains from: ') | |
if oldfile == '': | |
print '[!] You must define a filename' | |
time.sleep(2) | |
reload(main()) | |
newfile = raw_input('{*} Enter the name of the file (with extension) you would like me to save: ') | |
if newfile == '': | |
print '[!] You must define a filename' | |
time.sleep(2) | |
reload(main()) | |
cropfile = raw_input('{*} Enter the name of the file (with extension) including the cropwords: ') | |
if cropfile == '': | |
print '[!] You must define a filename' | |
time.sleep(2) | |
reload(main()) | |
raw_input("\n[!] Press any key to start...\n") | |
rmDomain(oldfile, newfile, cropfile) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment