-
-
Save ngocnguyenth/082517167c62abb243045ec20af63e14 to your computer and use it in GitHub Desktop.
Simple Python script to remove duplicate lines from a file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Compares all lines in a given file and removes duplicates. | |
''' | |
import sys | |
import time as t | |
def get_time(): | |
return t.strftime('%H:%M:%S') | |
try: | |
fileName = sys.argv[1] | |
except: | |
print('You didn\'t supply a valid filename.') | |
exit() | |
with open(fileName, 'r') as f: | |
file = f.readlines() | |
wordList = [] | |
badList = [] | |
for line in file: | |
if line in wordList: | |
badList.append(line) | |
else: | |
wordList.append(line) | |
file = open(fileName, 'w') | |
for line in wordList: | |
file.write(line) | |
file.close() | |
print('[{0}]: {1} duplicate lines removed from {2}.'.format(get_time(), len(badList), fileName)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment