Last active
November 1, 2024 01:31
-
-
Save rivermont/10482553bd64f9fc82b7270a32647339 to your computer and use it in GitHub Desktop.
Simple Python script to remove duplicate lines from a file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Compares all lines in a given file and removes duplicates. | |
''' | |
import sys | |
import time as t | |
def get_time(): | |
return t.strftime('%H:%M:%S') | |
try: | |
fileName = sys.argv[1] | |
except: | |
print('You didn\'t supply a valid filename.') | |
exit() | |
with open(fileName, 'r') as f: | |
file = f.readlines() | |
wordList = [] | |
badList = [] | |
for line in file: | |
if line in wordList: | |
badList.append(line) | |
else: | |
wordList.append(line) | |
file = open(fileName, 'w') | |
for line in wordList: | |
file.write(line) | |
file.close() | |
print('[{0}]: {1} duplicate lines removed from {2}.'.format(get_time(), len(badList), fileName)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey, thanks!