Created
July 22, 2025 08:18
-
-
Save harryf/e80dab4d05ae35ef8ae9646498175339 to your computer and use it in GitHub Desktop.
Extract a selected history of a python function from a file given a list commit hashes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
import re | |
import subprocess | |
# Usage: | |
# echo "sha1..." | ./extract_function_history.py fin_toolkit/cli.py search | |
# or | |
# ./extract_function_history.py fin_toolkit/cli.py search sha1_file.txt | |
file_path = sys.argv[1] | |
func_name = sys.argv[2] | |
# Determine where to read hashes from | |
if not sys.stdin.isatty(): | |
commit_hashes = [line.strip() for line in sys.stdin if line.strip()] | |
elif len(sys.argv) > 3: | |
with open(sys.argv[3]) as f: | |
commit_hashes = [line.strip() for line in f if line.strip()] | |
else: | |
print("Usage: extract_function_history.py <path-to-file> <function-name> [hash-file]\n" | |
"or pipe commit hashes via STDIN") | |
sys.exit(1) | |
decorator_pat = re.compile(r'^\s*@') | |
def_pat = re.compile(rf'^\s*def {re.escape(func_name)}\s*\(') | |
def extract_function(code, func_name): | |
lines = code.splitlines() | |
idx = 0 | |
while idx < len(lines): | |
if def_pat.match(lines[idx]): | |
# Look for decorators above | |
start = idx | |
while start > 0 and decorator_pat.match(lines[start - 1]): | |
start -= 1 | |
indent = len(lines[idx]) - len(lines[idx].lstrip()) | |
func_lines = lines[start:idx + 1] | |
idx += 1 | |
while idx < len(lines): | |
line = lines[idx] | |
if line.strip() == '': | |
func_lines.append(line) | |
idx += 1 | |
continue | |
cur_indent = len(line) - len(line.lstrip()) | |
if cur_indent > indent: | |
func_lines.append(line) | |
idx += 1 | |
else: | |
break | |
return "\n".join(func_lines) | |
idx += 1 | |
return None | |
seen = set() | |
for commit in commit_hashes: | |
try: | |
# Get commit message | |
commit_msg = subprocess.check_output( | |
['git', 'show', '--no-patch', '--format=%B', commit], | |
text=True, | |
).strip() | |
# Get file at that commit | |
code = subprocess.check_output( | |
['git', 'show', f'{commit}:{file_path}'], | |
text=True, | |
stderr=subprocess.DEVNULL, | |
) | |
func_code = extract_function(code, func_name) | |
if func_code and func_code not in seen: | |
print(f"\n--- Commit: {commit} ---\n") | |
print(f"Commit message:\n{commit_msg}\n") | |
print(func_code) | |
seen.add(func_code) | |
except subprocess.CalledProcessError: | |
continue # file may not exist in some commits, skip |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If we have a python file like foo/bar.py that contains a python function "do_something" I might do this (skipping any commits described as a fix or refactoring);
$ git log --oneline -- foo/bar.py | grep -viE 'fix|refactor' | awk '{print $1}' | ./extract_function_history.py foo/bar.py do_something
This might give output like this which can be used to give an LLM useful context about the function...