Skip to content

Instantly share code, notes, and snippets.

@harryf
Created July 22, 2025 08:18
Show Gist options
  • Save harryf/e80dab4d05ae35ef8ae9646498175339 to your computer and use it in GitHub Desktop.
Save harryf/e80dab4d05ae35ef8ae9646498175339 to your computer and use it in GitHub Desktop.
Extract a selected history of a python function from a file given a list commit hashes
#!/usr/bin/env python3
import sys
import re
import subprocess
# Usage:
# echo "sha1..." | ./extract_function_history.py fin_toolkit/cli.py search
# or
# ./extract_function_history.py fin_toolkit/cli.py search sha1_file.txt
file_path = sys.argv[1]
func_name = sys.argv[2]
# Determine where to read hashes from
if not sys.stdin.isatty():
commit_hashes = [line.strip() for line in sys.stdin if line.strip()]
elif len(sys.argv) > 3:
with open(sys.argv[3]) as f:
commit_hashes = [line.strip() for line in f if line.strip()]
else:
print("Usage: extract_function_history.py <path-to-file> <function-name> [hash-file]\n"
"or pipe commit hashes via STDIN")
sys.exit(1)
decorator_pat = re.compile(r'^\s*@')
def_pat = re.compile(rf'^\s*def {re.escape(func_name)}\s*\(')
def extract_function(code, func_name):
lines = code.splitlines()
idx = 0
while idx < len(lines):
if def_pat.match(lines[idx]):
# Look for decorators above
start = idx
while start > 0 and decorator_pat.match(lines[start - 1]):
start -= 1
indent = len(lines[idx]) - len(lines[idx].lstrip())
func_lines = lines[start:idx + 1]
idx += 1
while idx < len(lines):
line = lines[idx]
if line.strip() == '':
func_lines.append(line)
idx += 1
continue
cur_indent = len(line) - len(line.lstrip())
if cur_indent > indent:
func_lines.append(line)
idx += 1
else:
break
return "\n".join(func_lines)
idx += 1
return None
seen = set()
for commit in commit_hashes:
try:
# Get commit message
commit_msg = subprocess.check_output(
['git', 'show', '--no-patch', '--format=%B', commit],
text=True,
).strip()
# Get file at that commit
code = subprocess.check_output(
['git', 'show', f'{commit}:{file_path}'],
text=True,
stderr=subprocess.DEVNULL,
)
func_code = extract_function(code, func_name)
if func_code and func_code not in seen:
print(f"\n--- Commit: {commit} ---\n")
print(f"Commit message:\n{commit_msg}\n")
print(func_code)
seen.add(func_code)
except subprocess.CalledProcessError:
continue # file may not exist in some commits, skip
@harryf
Copy link
Author

harryf commented Jul 22, 2025

If we have a python file like foo/bar.py that contains a python function "do_something" I might do this (skipping any commits described as a fix or refactoring);

$ git log --oneline -- foo/bar.py | grep -viE 'fix|refactor' | awk '{print $1}' | ./extract_function_history.py foo/bar.py do_something

This might give output like this which can be used to give an LLM useful context about the function...

--- Commit: 91f34e2 ---

Commit message:
✨ feat(bar): add do_something with initial logic

def do_something(a, b):
    """Returns the sum of a and b."""
    return a + b

--- Commit: 105caa1 ---

Commit message:
✨ feat(bar): add support for logging in do_something

import logging

def do_something(a, b):
    """Returns the sum of a and b, and logs the operation."""
    result = a + b
    logging.info(f"do_something called with a={a}, b={b}, result={result}")
    return result

--- Commit: b99d610 ---

Commit message:
✨ feat(bar): allow do_something to handle lists

import logging

def do_something(a, b):
    """
    Returns the sum of a and b, and logs the operation.
    If either argument is a list, sums the elements.
    """
    if isinstance(a, list):
        a = sum(a)
    if isinstance(b, list):
        b = sum(b)
    result = a + b
    logging.info(f"do_something called with a={a}, b={b}, result={result}")
    return result

--- Commit: 123abc7 ---

Commit message:
✨ feat(bar): add input validation to do_something

import logging

def do_something(a, b):
    """
    Returns the sum of a and b, logs the operation.
    Handles lists and checks for numeric types.
    """
    if not (isinstance(a, (int, float, list)) and isinstance(b, (int, float, list))):
        raise ValueError("Arguments must be int, float, or list")
    if isinstance(a, list):
        a = sum(a)
    if isinstance(b, list):
        b = sum(b)
    result = a + b
    logging.info(f"do_something called with a={a}, b={b}, result={result}")
    return result


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment