Created
July 18, 2025 22:10
-
-
Save Techcable/5eed6850c1b390469d1456540305829b to your computer and use it in GitHub Desktop.
Strip line comments from a file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Strip line comments from a file | |
By default, this requires whitespace to surround a comment for it to be valid. | |
This means that `a#a` is not a valid comment. | |
""" | |
from __future__ import annotations | |
import argparse | |
import re | |
import sys | |
from collections.abc import Iterator | |
_VALID_LINE_COMMENTS = ("#", "//") | |
def strip_line_comments[S: (str, bytes)](source: Iterator[S], *, comment: str, require_whitespace: bool) -> Iterator[S]: | |
"""Strip line comments from an iterator over lines.""" | |
if comment not in _VALID_LINE_COMMENTS: | |
raise ValueError(f"Invalid comment: {comment!r}") | |
escaped_comment = re.escape(comment) | |
pattern_str: str | |
if require_whitespace: | |
pattern_str = rf"(\s{escaped_comment}.*)|({escaped_comment}\s.*)" | |
else: | |
pattern_str = f"{escaped_comment}.*" | |
pattern: re.Pattern[S] | None = None # lazily compiled on first iteration | |
typed_empty: S | None = None | |
for line in source: | |
if pattern is None: | |
pattern = re.compile(pattern_str if isinstance(line, str) else pattern_str.encode("ascii")) | |
if typed_empty is None: | |
typed_empty = "" if isinstance(line, str) else b"" | |
if not line: | |
# no possible comment | |
yield line | |
continue | |
(stripped_line, number_of_subs) = pattern.subn(typed_empty, line) | |
assert number_of_subs <= 1, number_of_subs | |
if not stripped_line: | |
# if new line is empty, the line was entirely a comment. | |
# This is because we already checked for empty lines above | |
assert number_of_subs == 1, number_of_subs | |
continue | |
else: | |
yield stripped_line | |
def main() -> None: | |
parser = argparse.ArgumentParser(description="Strips line comments from a file") | |
parser.add_argument("--comment", "-c", choices=_VALID_LINE_COMMENTS, default="#") | |
parser.add_argument("input", type=argparse.FileType("rb"), help="The file to read from") | |
parser.add_argument( | |
"--no-require-whitespace", | |
"-w", | |
dest="require_whitespace", | |
action="store_false", | |
help="Do not require whitespace before/after a comment, allowing `a#a` to behave as a comment", | |
) | |
args = parser.parse_args() | |
for line in strip_line_comments( | |
(line.rstrip(b"\r\n") for line in args.input.readlines()), | |
comment=args.comment, | |
require_whitespace=args.require_whitespace, | |
): | |
sys.stdout.buffer.write(line) | |
sys.stdout.buffer.write(b"\n") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment