Created
October 22, 2024 02:14
-
-
Save ericoporto/0fdd629501e197f360c75e5ff7d567bd to your computer and use it in GitHub Desktop.
Python script to clean trailing whitespace, preserving individual line endings
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import argparse | |
import chardet | |
def is_text_file(filename, chunk_size=1024): | |
"""Check if the file is likely a text file by reading a small chunk.""" | |
try: | |
with open(filename, 'rb') as f: | |
chunk = f.read(chunk_size) | |
# Use chardet to detect encoding and ensure it's text-like | |
result = chardet.detect(chunk) | |
encoding = result.get('encoding') | |
return encoding is not None | |
except Exception as e: | |
print(f"Error reading file {filename}: {e}") | |
return False | |
def remove_trailing_whitespace(file_path): | |
with open(file_path, 'rb') as f: | |
lines = f.readlines() | |
new_lines = [] | |
for line in lines: | |
# Detect the end-of-line (EOL) sequence (\r\n, \n, or no EOL). | |
if line.endswith(b'\r\n'): | |
eol = b'\r\n' | |
stripped_line = line.rstrip(b' \t\r\n') | |
elif line.endswith(b'\n'): | |
eol = b'\n' | |
stripped_line = line.rstrip(b' \t\n') | |
else: | |
# No EOL, meaning this could be the last line in the file | |
eol = b'' | |
stripped_line = line.rstrip(b' \t') | |
new_lines.append(stripped_line + eol) | |
# Write the cleaned lines back to the file only if there were changes | |
with open(file_path, 'wb') as f: | |
f.writelines(new_lines) | |
def main(): | |
parser = argparse.ArgumentParser(description="Remove trailing whitespace from text files.") | |
parser.add_argument('files', metavar='file', type=str, nargs='+', help='Text files to clean') | |
args = parser.parse_args() | |
for file_path in args.files: | |
if not os.path.isfile(file_path): | |
print(f"Skipping {file_path}: not a file.") | |
continue | |
if not is_text_file(file_path): | |
print(f"Skipping {file_path}: not a text file.") | |
continue | |
print(f"Cleaning {file_path}...") | |
try: | |
remove_trailing_whitespace(file_path) | |
print(f"Finished cleaning {file_path}.") | |
except Exception as e: | |
print(f"Error cleaning {file_path}: {e}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment