Created
September 12, 2022 19:24
-
-
Save rldotai/31fd8fa76f8bdea1537f0b2612b1e322 to your computer and use it in GitHub Desktop.
Get comments in the supplied Python code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io, tokenize | |
def extract_comments(code: str | io.TextIOBase) -> str: | |
""" | |
Extract comments from a piece of Python code, returning a string of | |
*just* the comments. | |
Example: | |
>>> extract_comments(r''' | |
... # A comment | |
... def identity(x): | |
... "This is a docstring, not a comment." | |
... # Here's a comment inside a function | |
... return x # and an inline comment | |
... | |
... ''') | |
"# A comment\n# Here's a comment inside a function\n# and an inline comment\n" | |
A modified version of: https://stackoverflow.com/a/34512388 | |
set to use Python 3. | |
""" | |
res = [] | |
last = None | |
if isinstance(code, str): | |
buffer = io.StringIO(code) | |
else: | |
buffer = code | |
# pass in stringio.readline to generate_tokens | |
for toktype, tokval, begin, end, line in tokenize.generate_tokens(buffer.readline): | |
if toktype == tokenize.COMMENT: | |
res.append((toktype, tokval)) | |
elif toktype in (tokenize.NEWLINE, tokenize.NL) and last == tokenize.COMMENT: | |
res.append((toktype, tokval)) | |
else: | |
pass | |
# Record the token type (for preserving newlines) | |
last = toktype | |
return tokenize.untokenize(res) | |
if __name__ == "__main__": | |
import argparse | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"input", type=argparse.FileType("r"), help="Input to extract comments from" | |
) | |
args = parser.parse_args() | |
print(extract_comments(args.input.read())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment