Skip to content

Instantly share code, notes, and snippets.

@ericoporto
Created October 22, 2024 02:14
Show Gist options
  • Save ericoporto/0fdd629501e197f360c75e5ff7d567bd to your computer and use it in GitHub Desktop.
Save ericoporto/0fdd629501e197f360c75e5ff7d567bd to your computer and use it in GitHub Desktop.
Python script to clean trailing whitespace, preserving individual line endings
import os
import argparse
import chardet
def is_text_file(filename, chunk_size=1024):
"""Check if the file is likely a text file by reading a small chunk."""
try:
with open(filename, 'rb') as f:
chunk = f.read(chunk_size)
# Use chardet to detect encoding and ensure it's text-like
result = chardet.detect(chunk)
encoding = result.get('encoding')
return encoding is not None
except Exception as e:
print(f"Error reading file {filename}: {e}")
return False
def remove_trailing_whitespace(file_path):
with open(file_path, 'rb') as f:
lines = f.readlines()
new_lines = []
for line in lines:
# Detect the end-of-line (EOL) sequence (\r\n, \n, or no EOL).
if line.endswith(b'\r\n'):
eol = b'\r\n'
stripped_line = line.rstrip(b' \t\r\n')
elif line.endswith(b'\n'):
eol = b'\n'
stripped_line = line.rstrip(b' \t\n')
else:
# No EOL, meaning this could be the last line in the file
eol = b''
stripped_line = line.rstrip(b' \t')
new_lines.append(stripped_line + eol)
# Write the cleaned lines back to the file only if there were changes
with open(file_path, 'wb') as f:
f.writelines(new_lines)
def main():
parser = argparse.ArgumentParser(description="Remove trailing whitespace from text files.")
parser.add_argument('files', metavar='file', type=str, nargs='+', help='Text files to clean')
args = parser.parse_args()
for file_path in args.files:
if not os.path.isfile(file_path):
print(f"Skipping {file_path}: not a file.")
continue
if not is_text_file(file_path):
print(f"Skipping {file_path}: not a text file.")
continue
print(f"Cleaning {file_path}...")
try:
remove_trailing_whitespace(file_path)
print(f"Finished cleaning {file_path}.")
except Exception as e:
print(f"Error cleaning {file_path}: {e}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment