kebman · May 20, 2026 11:50 · kebman · May 14, 2026
diff --git a/smartpatch.py b/smartpatch.py
 #!/usr/bin/env python3
 """
 smartpatch.py — conservative fuzzy patch applier for AI-made unified diffs.

 Design goal:
  Treat AI diffs as intent packets, not as mechanically exact Git patches.
  Ignore unreliable hunk line numbers. Prefer exact content/context matches.
  Apply only when confidence is high enough. Report everything else.

 Safe default:
  This script does a dry run unless --write is passed.

 Usage:
  python3 ~/bin/smartpatch.py apply ai.patch
  python3 ~/bin/smartpatch.py apply ai.patch --write --backup
  python3 ~/bin/smartpatch.py apply ai.patch --report
  python3 ~/bin/smartpatch.py apply ai.patch --report custom-report.md
 """

 from __future__ import annotations

 import argparse
 import difflib
 import json
 import re
 import shutil
 import subprocess
 import sys
 from dataclasses import dataclass, field, asdict
 from datetime import datetime
 from pathlib import Path
 from typing import Literal

 DiffKind = Literal["context", "remove", "add"]

 MARKDOWN_EXTENSIONS = {".md", ".mdx", ".markdown"}
 STRICT_EXTENSIONS = {".py", ".yaml", ".yml", ".json", ".toml"}
 PLACEHOLDER_LINES = {"...", "[… ]", "[...]", "<...>"}


 @dataclass
 class DiffLine:
    kind: DiffKind
    text: str
    raw: str


 @dataclass
 class Hunk:
    header: str | None
    lines: list[DiffLine] = field(default_factory=list)
    parse_warnings: list[str] = field(default_factory=list)


 @dataclass
 class PatchFile:
    old_path: str | None = None
    new_path: str | None = None
    hunks: list[Hunk] = field(default_factory=list)
    parse_warnings: list[str] = field(default_factory=list)

    @property
    def target_path(self) -> str | None:
        return self.new_path or self.old_path


 @dataclass
 class MatchResult:
    found: bool
    start: int | None = None
    end: int | None = None
    confidence: float = 0.0
    method: str = "not_found"
    warnings: list[str] = field(default_factory=list)
    missing_lines: list[str] = field(default_factory=list)
    candidate_count: int = 0


 @dataclass
 class HunkReport:
    file: str
    hunk_index: int
    action: Literal["applied", "would_apply", "already_applied", "skipped"]
    confidence: float
    method: str
    start_line: int | None
    end_line: int | None
    warnings: list[str] = field(default_factory=list)
    missing_lines: list[str] = field(default_factory=list)
    details: str = ""


 @dataclass
 class FileReport:
    file: str
    exists: bool
    hunks_total: int
    applied: int = 0
    would_apply: int = 0
    already_applied: int = 0
    skipped: int = 0
    warnings: list[str] = field(default_factory=list)
    hunk_reports: list[HunkReport] = field(default_factory=list)


 @dataclass
 class RunReport:
    patch: str
    root: str
    dry_run: bool
    min_confidence: float
    files_total: int
    hunks_total: int
    applied: int = 0
    would_apply: int = 0
    already_applied: int = 0
    skipped: int = 0
    warnings: list[str] = field(default_factory=list)
    file_reports: list[FileReport] = field(default_factory=list)


 def clean_path(token: str) -> str | None:
    """Clean paths from diff headers. Returns None for /dev/null.

    Important: repo paths may contain normal spaces, especially Markdown docs like
    `docs/ux/Organizer Views/Bundling/Bundling View.md`. Therefore we only strip
    tab-separated metadata, not ordinary spaces inside the path.
    """
    token = token.strip()
    if not token:
        return None

    # Git file headers usually separate optional timestamps with a tab.
    token = token.split("\t", 1)[0].strip()

    # Remove simple surrounding quotes if an AI/Git output quoted the path.
    if len(token) >= 2 and token[0] == token[-1] and token[0] in {"'", '"'}:
        token = token[1:-1]

    if token == "/dev/null":
        return None
    if token.startswith("a/") or token.startswith("b/"):
        token = token[2:]
    return token or None


 def parse_diff_git_paths(line: str) -> tuple[str | None, str | None]:
    """Parse `diff --git a/path b/path`, including unquoted paths with spaces.

    Standard Git often quotes unusual paths, but AI-written diffs may not. The
    least-bad recovery for unquoted paths is to split on the ` b/` marker.
    """
    rest = line[len("diff --git ") :].strip()
    if rest.startswith("a/") and " b/" in rest:
        split_at = rest.find(" b/")
        return clean_path(rest[:split_at]), clean_path(rest[split_at + 1 :])

    parts = rest.split(maxsplit=1)
    if len(parts) == 2:
        return clean_path(parts[0]), clean_path(parts[1])
    return None, None


 def normalize_line(text: str) -> str:
    return re.sub(r"\s+", " ", text.strip())


 def normalize_block(lines: list[str]) -> list[str]:
    return [normalize_line(line) for line in lines]


 def load_target_lines(root: Path, rel: str | None) -> list[str]:
    if not rel:
        return []
    try:
        path = safe_target_path(root, rel)
    except Exception:
        return []
    if not path.exists():
        return []
    try:
        return path.read_text(encoding="utf-8").splitlines()
    except UnicodeDecodeError:
        return []


 def load_target_line_set(root: Path, rel: str | None) -> set[str]:
    return set(load_target_lines(root, rel))


 def unique_target_line_match(target_lines: list[str], raw_line: str) -> str | None:
    """Return exact target line if raw_line matches uniquely, ignoring indentation/trailing spaces."""
    raw_stripped = raw_line.strip()
    if not raw_stripped:
        return None

    exact = [line for line in target_lines if line == raw_line]
    if len(exact) == 1:
        return exact[0]

    stripped = [line for line in target_lines if line.strip() == raw_stripped]
    if len(stripped) == 1:
        return stripped[0]

    norm = normalize_line(raw_line)
    normalized = [line for line in target_lines if normalize_line(line) == norm]
    if len(normalized) == 1:
        return normalized[0]

    return None


 def looks_like_markdown_bullet(line: str) -> bool:
    stripped = line.lstrip()
    return bool(re.match(r"^([-*+]\s+|\d+[.)]\s+)", stripped))


 def looks_like_markdown_horizontal_rule(line: str) -> bool:
    return line.strip() in {"---", "***", "___"}


 def markdown_bullet_match(line: str) -> re.Match[str] | None:
    return re.match(
        r"^(?P<indent>\s*)(?P<marker>[-*+]|\d+[.)])\s+(?P<body>.*?)(?P<trailing>\s*)$",
        line,
    )


 def markdown_bullet_body(line: str) -> str | None:
    m = markdown_bullet_match(line)
    if not m:
        return None
    return normalize_line(m.group("body"))


 def markdown_bullet_marker(line: str) -> str | None:
    m = markdown_bullet_match(line)
    if not m:
        return None
    return m.group("marker")


 def rewrite_markdown_bullet_marker(line: str, marker: str) -> str:
    m = markdown_bullet_match(line)
    if not m:
        return line
    return f"{m.group('indent')}{marker} {m.group('body')}{m.group('trailing')}"


 def markdown_equiv_key(line: str) -> tuple[str, str]:
    body = markdown_bullet_body(line)
    if body is not None:
        return ("md_bullet", body)
    return ("line", normalize_line(line))


 def markdown_lines_equiv(a: str, b: str) -> bool:
    return markdown_equiv_key(a) == markdown_equiv_key(b)


 def unique_markdown_bullet_target_match(target_lines: list[str], raw_line: str) -> str | None:
    raw_body = markdown_bullet_body(raw_line)
    if raw_body is None:
        return None

    matches = [
        line for line in target_lines
        if markdown_bullet_body(line) == raw_body
    ]

    if len(matches) == 1:
        return matches[0]

    return None


 def find_markdown_equiv_block(file_lines: list[str], block: list[str]) -> list[tuple[int, int]]:
    if not block:
        return []

    n = len(block)
    matches: list[tuple[int, int]] = []

    for i in range(0, len(file_lines) - n + 1):
        window = file_lines[i : i + n]
        if all(markdown_lines_equiv(file_line, patch_line) for file_line, patch_line in zip(window, block)):
            matches.append((i, i + n))

    return matches


 def replacement_preserving_matched_context(
    hunk: Hunk,
    matched_old_lines: list[str],
    path: Path,
 ) -> list[str]:
    is_markdown = path.suffix.lower() in MARKDOWN_EXTENSIONS

    replacement: list[str] = []
    old_i = 0
    preferred_bullet_marker: str | None = None

    if is_markdown:
        for line in matched_old_lines:
            marker = markdown_bullet_marker(line)
            if marker in {"-", "*", "+"}:
                preferred_bullet_marker = marker
                break

    for dl in hunk.lines:
        if dl.kind == "context":
            if old_i < len(matched_old_lines):
                replacement.append(matched_old_lines[old_i])
                old_i += 1
            else:
                replacement.append(dl.text)

        elif dl.kind == "remove":
            if old_i < len(matched_old_lines):
                marker = markdown_bullet_marker(matched_old_lines[old_i])
                if marker in {"-", "*", "+"}:
                    preferred_bullet_marker = marker
                old_i += 1

        elif dl.kind == "add":
            line = dl.text
            if (
                is_markdown
                and preferred_bullet_marker in {"-", "*", "+"}
                and markdown_bullet_marker(line) in {"-", "*", "+"}
            ):
                line = rewrite_markdown_bullet_marker(line, preferred_bullet_marker)
            replacement.append(line)

    return replacement


 def hunk_line_has_diff_marker(line: str) -> bool:
    return bool(line.startswith((" ", "+", "-", "\\")))


 def looks_like_hunk_header(line: str) -> bool:
    return bool(re.match(r"^\s*@@\s+-\d", line))


 def looks_like_outer_code_fence(line: str) -> bool:
    """Detect outer fences used to wrap a diff in Markdown.

    Important:
      - Four or more backticks are treated as wrapper fences.
      - Three-backtick fences are NOT ignored here because Markdown target files
        often legitimately contain ``` code fences inside hunks.
    """
    return bool(re.match(r"^`{4,}(?:diff|patch)?\s*$", line.strip()))


 def split_attached_code_fence_context(line: str) -> list[str] | None:
    """Repair naked context like ```textOrganizer into two context lines."""
    m = re.match(r"^```([A-Za-z0-9_-]+)(\S.*)$", line)
    if not m:
        return None
    lang, rest = m.groups()
    return [f" ```{lang}", f" {rest}"]


 def strip_ai_text_fence_prefix(text: str) -> str:
    """Strip broken AI fence prefixes from hunk content."""
    for prefix in ("```text", "`text", "```"):
        if text.startswith(prefix):
            return text[len(prefix):]
    return text


 def repair_inline_text_replacement(line: str) -> list[str] | None:
    """Repair collapsed AI output like:

    -`textOld question? +`textNew question?

    into:

    -Old question?
    +New question?
    """
    m = re.match(r"^-\s*`{1,3}text(?P<old>.+?)\s+\+\s*`{1,3}text(?P<new>.+)$", line)
    if not m:
        return None
    return [f"-{m.group('old').strip()}", f"+{m.group('new').strip()}"]


 def repair_attached_fence_change_pair(
    current: str,
    next_line: str | None,
 ) -> tuple[list[str] | None, bool]:
    """Repair pair like:

    -```textOld
    +```textNew

    into:

    -Old
    +New

    Returns (repaired_lines, consumed_next).
    """
    if next_line is None:
        return None, False

    m1 = re.match(r"^-\s*```text(?P<old>.+)$", current)
    m2 = re.match(r"^\+\s*```text(?P<new>.+)$", next_line)
    if m1 and m2:
        return [f"-{m1.group('old').strip()}", f"+{m2.group('new').strip()}"], True

    return None, False


 def repair_single_attached_fence_change(line: str) -> str | None:
    """Repair single line like -```textOld or +```textNew."""
    m = re.match(r"^(?P<marker>[+-])\s*```text(?P<body>.+)$", line)
    if not m:
        return None
    return f"{m.group('marker')}{m.group('body').strip()}"


 def repair_compact_markdown_bullet_change(
    line: str,
    target_lines: list[str],
 ) -> str | None:
    """Repair accidentally indented diff-marker lines.

    Examples:
      '    -* Offer list'   -> '-* Offer list'
      '    +* Offer list'   -> '+* Offer list'
      '    -foo'            -> '-foo'
      '    +foo'            -> '+foo'

    Deliberately does not touch valid column-1 diff lines.
    """
    m = re.match(r"^\s+(?P<marker>[+-])(?P<body>\S.*)$", line)
    if not m:
        return None

    return f"{m.group('marker')}{m.group('body')}"


 def repair_probable_markdown_context_bullet(
    line: str,
    current_path: str | None,
    target_lines: list[str],
 ) -> str | None:
    """Repair naked Markdown list context.

    Examples:
      '* public links'
      '- public links'
      '+ public links'

    If that list item exists uniquely in the target file, treat it as context.
    Marker style may differ between patch and target.
    """
    if not current_path or Path(current_path).suffix.lower() not in MARKDOWN_EXTENSIONS:
        return None

    if not looks_like_markdown_bullet(line):
        return None

    target_match = unique_target_line_match(target_lines, line)
    if target_match and looks_like_markdown_bullet(target_match):
        return " " + target_match

    target_match = unique_markdown_bullet_target_match(target_lines, line)
    if target_match:
        return " " + target_match

    return None


 def repair_parsed_markdown_context_removals(
    hunk: Hunk,
    file_lines: list[str],
    path: Path,
 ) -> tuple[Hunk, list[str]]:
    """Repair parsed Markdown bullets that are probably context, not removals.

    Some malformed AI diffs emit normal Markdown list context as:

      - browse-first customer navigation

    instead of valid context form:

       - browse-first customer navigation

    If the parsed removal is a Markdown bullet that already exists uniquely in
    the target file, treat it as context. This lets add-only insertion logic run
    instead of falling through to fuzzy_window.
    """
    if path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return hunk, []

    changed = False
    warnings: list[str] = []
    repaired_lines: list[DiffLine] = []

    for dl in hunk.lines:
        if dl.kind != "remove" or not looks_like_markdown_bullet(dl.text):
            repaired_lines.append(dl)
            continue

        target_match = unique_target_line_match(file_lines, dl.text)
        if not target_match:
            target_match = unique_markdown_bullet_target_match(file_lines, dl.text)

        if target_match and looks_like_markdown_bullet(target_match):
            repaired_lines.append(DiffLine("context", target_match, " " + target_match))
            changed = True
            warnings.append(
                f"Parsed Markdown bullet removal reclassified as context: {dl.text}"
            )
        else:
            repaired_lines.append(dl)

    if not changed:
        return hunk, []

    return Hunk(
        header=hunk.header,
        lines=repaired_lines,
        parse_warnings=list(hunk.parse_warnings),
    ), warnings


 def repair_ai_hunk_lines(
    hunk_lines: list[tuple[int, str]],
    current_path: str | None,
    target_lines: list[str],
    warnings: list[str],
 ) -> list[str]:
    """Repair one hunk before parse_patch sees it."""
    is_markdown = bool(
        current_path and Path(current_path).suffix.lower() in MARKDOWN_EXTENSIONS
    )

    repaired: list[str] = []
    i = 0

    while i < len(hunk_lines):
        idx, line = hunk_lines[i]
        next_line = hunk_lines[i + 1][1] if i + 1 < len(hunk_lines) else None

        # ChatGPT wrapping artifact: a line containing only spaces inside a hunk.
        if line and line.strip() == "":
            warnings.append(f"Line {idx}: dropped whitespace-only hunk artifact line")
            i += 1
            continue

        # Bare Markdown horizontal rule. Must be context, not a diff removal.
        if is_markdown and looks_like_markdown_horizontal_rule(line):
            repaired.append(" " + line.strip())
            warnings.append(f"Line {idx}: repaired Markdown horizontal rule as context")
            i += 1
            continue

        inline = repair_inline_text_replacement(line)
        if inline:
            repaired.extend(inline)
            warnings.append(f"Line {idx}: repaired collapsed inline text replacement")
            i += 1
            continue

        pair, consumed_next = repair_attached_fence_change_pair(line, next_line)
        if pair:
            repaired.extend(pair)
            warnings.append(f"Line {idx}: repaired attached ```text replacement pair")
            i += 2 if consumed_next else 1
            continue

        single_fence = repair_single_attached_fence_change(line)
        if single_fence:
            repaired.append(single_fence)
            warnings.append(f"Line {idx}: repaired attached ```text change line")
            i += 1
            continue

        compact_bullet = repair_compact_markdown_bullet_change(line, target_lines)
        if compact_bullet:
            repaired.append(compact_bullet)
            warnings.append(f"Line {idx}: repaired indented diff marker")
            i += 1
            continue

        context_bullet = repair_probable_markdown_context_bullet(line, current_path, target_lines)
        if context_bullet:
            repaired.append(context_bullet)
            warnings.append(
                f"Line {idx}: repaired probable Markdown bullet context line in {current_path}"
            )
            i += 1
            continue

        if not hunk_line_has_diff_marker(line):
            split_context = split_attached_code_fence_context(line)
            if split_context:
                repaired.extend(split_context)
                warnings.append(f"Line {idx}: repaired attached Markdown code-fence context line")
            else:
                repaired.append(" " + line)
                if line:
                    warnings.append(f"Line {idx}: repaired naked hunk line as context")
                else:
                    warnings.append(f"Line {idx}: repaired naked blank hunk line as context")
            i += 1
            continue

        repaired.append(line)
        i += 1

    return repaired


 def repair_ai_patch_text(patch_text: str, root: Path) -> tuple[str, list[str]]:
    """Repair common AI diff formatting mistakes before parsing."""
    out: list[str] = []
    warnings: list[str] = []

    in_hunk = False
    current_path: str | None = None
    target_lines: list[str] = []
    hunk_buffer: list[tuple[int, str]] = []

    def flush_hunk() -> None:
        nonlocal hunk_buffer
        if hunk_buffer:
            out.extend(
                repair_ai_hunk_lines(
                    hunk_lines=hunk_buffer,
                    current_path=current_path,
                    target_lines=target_lines,
                    warnings=warnings,
                )
            )
            hunk_buffer = []

    for idx, line in enumerate(patch_text.splitlines(), start=1):
        # Ignore outer Markdown fences used to wrap the diff. Four or more
        # backticks are wrapper fences; normal triple-backtick fences may be
        # real Markdown content and must stay.
        if looks_like_outer_code_fence(line):
            flush_hunk()
            warnings.append(f"Line {idx}: ignored outer Markdown code fence")
            continue

        # AI sometimes indents hunk headers. Promote them back to real headers.
        if looks_like_hunk_header(line):
            flush_hunk()
            stripped = line.strip()
            if stripped != line:
                warnings.append(f"Line {idx}: repaired indented hunk header")
            in_hunk = True
            out.append(stripped)
            continue

        if line.startswith("diff --git "):
            flush_hunk()
            old_path, new_path = parse_diff_git_paths(line)
            current_path = new_path or old_path
            target_lines = load_target_lines(root, current_path)
            in_hunk = False
            out.append(line)
            continue

        if line.startswith("--- "):
            flush_hunk()
            old_path = clean_path(line[4:])
            if current_path is None:
                current_path = old_path
                target_lines = load_target_lines(root, current_path)
            in_hunk = False
            out.append(line)
            continue

        if line.startswith("+++ "):
            flush_hunk()
            new_path = clean_path(line[4:])
            current_path = new_path or current_path
            target_lines = load_target_lines(root, current_path)
            in_hunk = False
            out.append(line)
            continue

        if line.startswith("@@"):
            flush_hunk()
            in_hunk = True
            out.append(line)
            continue

        if in_hunk:
            hunk_buffer.append((idx, line))
        else:
            out.append(line)

    flush_hunk()

    return "\n".join(out) + "\n", warnings


 def strip_one_diff_marker(line: str) -> tuple[str, str] | None:
    """
    Strip exactly one unified-diff marker from a hunk line.

    Important Markdown behavior:
      raw: '- - bullet' -> kind remove, text '- bullet' after optional post-marker space normalization.
      raw: '+ - bullet' -> kind add,    text '- bullet'.
      raw: '  - bullet' -> kind context,text '- bullet'.

    We only call this while inside a parsed hunk.
    """
    if not line:
        return None
    marker = line[0]
    if marker not in {"+", "-", " "}:
        return None

    text = line[1:]

    # AI diffs commonly include a readability space after +/-. In real unified
    # diffs that space is part of content, but for Markdown bullets we want:
    # '- - item' -> '- item', not ' - item'.
    # Keep leading spaces for indented code except in the specific marker-space-bullet case.
    if marker in {"+", "-"} and text.startswith(" - "):
        text = text[1:]
    elif marker in {"+", "-"} and text.startswith(" * "):
        text = text[1:]
    elif marker in {"+", "-"} and text.startswith(" + "):
        text = text[1:]
    elif marker in {"+", "-"} and text.startswith(" ") and not text.startswith("  "):
        # General AI convenience form: '- old line' means content 'old line'.
        # This is deliberately not applied to double-space indentation.
        text = text[1:]
    elif marker == " " and text.startswith(" - "):
        text = text[1:]
    elif marker == " " and text.startswith(" * "):
        text = text[1:]
    elif marker == " " and text.startswith(" + "):
        text = text[1:]

    kind: str
    if marker == "+":
        kind = "add"
    elif marker == "-":
        kind = "remove"
    else:
        kind = "context"
    return kind, text


 def parse_patch(patch_text: str) -> tuple[list[PatchFile], list[str]]:
    warnings: list[str] = []
    files: list[PatchFile] = []
    current: PatchFile | None = None
    current_hunk: Hunk | None = None
    in_hunk = False

    def finish_empty_file_if_needed() -> None:
        nonlocal current
        if current and (current.hunks or current.old_path or current.new_path):
            if current not in files:
                files.append(current)

    def start_file(old_path: str | None = None, new_path: str | None = None) -> PatchFile:
        nonlocal current, current_hunk, in_hunk
        finish_empty_file_if_needed()
        current = PatchFile(old_path=old_path, new_path=new_path)
        current_hunk = None
        in_hunk = False
        return current

    lines = patch_text.splitlines()
    for idx, line in enumerate(lines, start=1):
        stripped = line.strip()

        # Ignore outer fenced-code boundaries if someone saved a markdown reply as a patch.
        # Do not ignore them inside hunks: Markdown files often contain real ``` fences.
        if not in_hunk and stripped.startswith("```"):
            continue
        if not in_hunk and stripped in {"*** Begin Patch", "*** End Patch"}:
            continue

        if line.startswith("diff --git "):
            old_path, new_path = parse_diff_git_paths(line)
            start_file(old_path=old_path, new_path=new_path)
            continue

        # Support common AI/apply_patch style file markers enough to recover the path.
        m = re.match(r"^\*\*\*\s+(Update|Add|Delete) File:\s+(.+)$", line)
        if m:
            path = clean_path(m.group(2))
            if m.group(1) == "Add":
                start_file(old_path=None, new_path=path)
            elif m.group(1) == "Delete":
                start_file(old_path=path, new_path=None)
            else:
                start_file(old_path=path, new_path=path)
            continue

        if line.startswith("--- "):
            path = clean_path(line[4:])
            if current is None or current.hunks:
                current = start_file(old_path=path)
            else:
                current.old_path = path
            in_hunk = False
            current_hunk = None
            continue

        if line.startswith("+++ "):
            path = clean_path(line[4:])
            if current is None:
                current = start_file(new_path=path)
            else:
                current.new_path = path
            in_hunk = False
            current_hunk = None
            continue

        if line.startswith("@@"):
            if current is None:
                current = start_file()
                current.parse_warnings.append(f"Line {idx}: hunk found before file path")
            current_hunk = Hunk(header=line)
            current.hunks.append(current_hunk)
            in_hunk = True
            continue

        if line.startswith("\\ No newline at end of file"):
            continue

        if in_hunk and current_hunk is not None:
            if line.startswith("+++ ") or line.startswith("--- "):
                # Defensive; file headers should already have reset hunk state.
                # Bare `---` is a valid Markdown horizontal rule and must remain hunk content.
                current_hunk.parse_warnings.append(f"Line {idx}: header-looking line inside hunk: {line}")
                in_hunk = False
                current_hunk = None
                continue

            parsed = strip_one_diff_marker(line)
            if parsed is None:
                # AI often emits unmarked hunk context lines. Treat as context, but warn.
                current_hunk.lines.append(DiffLine("context", line, line))
                if line:
                    current_hunk.parse_warnings.append(
                        f"Line {idx}: unmarked line inside hunk treated as context"
                    )
                else:
                    current_hunk.parse_warnings.append(
                        f"Line {idx}: blank unmarked line inside hunk treated as context"
                    )
            else:
                kind, text = parsed
                current_hunk.lines.append(DiffLine(kind, text, line))
            continue

        # Outside hunks, do nothing. Important: '-' outside a hunk is never deletion.

    finish_empty_file_if_needed()

    # Drop file entries with no hunks; they are not actionable for this v1.
    actionable = [pf for pf in files if pf.hunks]
    for pf in actionable:
        if not pf.target_path:
            pf.parse_warnings.append("No target path found for file diff")
    if not actionable:
        warnings.append("No actionable hunks found. Is this a unified diff or AI patch?")

    for pf in actionable:
        pf.hunks = split_large_ai_hunks(pf.hunks)

    return actionable, warnings


 def old_new_blocks(hunk: Hunk) -> tuple[list[str], list[str], list[str], list[str], list[str]]:
    """
    Returns:
      old_block: context + removed lines
      new_block: context + added lines
      removed_only
      added_only
      context_only
    """
    old_block: list[str] = []
    new_block: list[str] = []
    removed_only: list[str] = []
    added_only: list[str] = []
    context_only: list[str] = []

    for dl in hunk.lines:
        if dl.kind == "context":
            old_block.append(dl.text)
            new_block.append(dl.text)
            context_only.append(dl.text)
        elif dl.kind == "remove":
            old_block.append(dl.text)
            removed_only.append(dl.text)
        elif dl.kind == "add":
            new_block.append(dl.text)
            added_only.append(dl.text)

    return old_block, new_block, removed_only, added_only, context_only


 def split_large_ai_hunk(hunk: Hunk, max_hunk_lines: int = 24, context_radius: int = 1) -> list[Hunk]:
    """Split AI hunks into smaller change groups.

    AI diffs often bundle multiple simple edits into one hunk. A human applies
    these line-by-line; smartpatch should also avoid letting one stale line poison
    the whole hunk.

    This now splits any hunk that has multiple separated change groups, not only
    very large hunks.
    """
    change_indexes = [
        i for i, dl in enumerate(hunk.lines)
        if dl.kind in {"add", "remove"}
    ]

    if len(change_indexes) <= 1:
        return [hunk]

    ranges: list[tuple[int, int]] = []

    run_start = change_indexes[0]
    run_end = change_indexes[0]

    for idx in change_indexes[1:]:
        # Keep directly adjacent remove/add lines together.
        # Split when there is meaningful context between edits.
        if idx - run_end <= context_radius + 1:
            run_end = idx
        else:
            start = max(0, run_start - context_radius)
            end = min(len(hunk.lines), run_end + context_radius + 1)
            ranges.append((start, end))
            run_start = idx
            run_end = idx

    start = max(0, run_start - context_radius)
    end = min(len(hunk.lines), run_end + context_radius + 1)
    ranges.append((start, end))

    merged: list[tuple[int, int]] = []
    for start, end in ranges:
        if not merged or start > merged[-1][1]:
            merged.append((start, end))
        else:
            prev_start, prev_end = merged[-1]
            merged[-1] = (prev_start, max(prev_end, end))

    if len(merged) <= 1:
        return [hunk]

    split_hunks: list[Hunk] = []
    for part_index, (start, end) in enumerate(merged, start=1):
        part_lines = hunk.lines[start:end]
        if not any(dl.kind in {"add", "remove"} for dl in part_lines):
            continue
        split_hunks.append(
            Hunk(
                header=f"{hunk.header or '@@'} [smartpatch split {part_index}/{len(merged)}]",
                lines=part_lines,
                parse_warnings=list(hunk.parse_warnings)
                + [f"AI hunk split into {len(merged)} smaller change group(s)."],
            )
        )

    return split_hunks or [hunk]

 def split_large_ai_hunks(hunks: list[Hunk]) -> list[Hunk]:
    out: list[Hunk] = []
    for hunk in hunks:
        out.extend(split_large_ai_hunk(hunk))
    return out


 def find_exact_block(file_lines: list[str], block: list[str]) -> list[tuple[int, int]]:
    if not block:
        return []
    n = len(block)
    matches: list[tuple[int, int]] = []
    for i in range(0, len(file_lines) - n + 1):
        if file_lines[i : i + n] == block:
            matches.append((i, i + n))
    return matches


 def find_normalized_block(file_lines: list[str], block: list[str]) -> list[tuple[int, int]]:
    if not block:
        return []
    norm_file = normalize_block(file_lines)
    norm_block = normalize_block(block)
    n = len(norm_block)
    matches: list[tuple[int, int]] = []
    for i in range(0, len(norm_file) - n + 1):
        if norm_file[i : i + n] == norm_block:
            matches.append((i, i + n))
    return matches


 def is_placeholder(line: str) -> bool:
    stripped = line.strip()
    return stripped in PLACEHOLDER_LINES or stripped in {"// ...", "# ...", "<!-- ... -->"}


 def markdown_bullet_variants(line: str) -> list[str]:
    """Fallback for AI-sloppy Markdown diffs that omit the real bullet marker."""
    variants = [line]
    stripped = line.lstrip()
    leading = line[: len(line) - len(stripped)]

    bullet_re = re.compile(r"^([-*+]\s+|\d+[.)]\s+)")
    if not bullet_re.match(stripped):
        variants.extend([
            f"{leading}- {stripped}",
            f"{leading}* {stripped}",
        ])
    return list(dict.fromkeys(variants))


 def expand_markdown_old_block_variants(block: list[str], path: Path, enable: bool) -> list[list[str]]:
    if not enable or path.suffix.lower() not in MARKDOWN_EXTENSIONS or not block:
        return [block]

    # Keep this conservative: only produce variants when one or two lines are involved.
    # Larger combinatorial variants are risky and noisy.
    if len(block) > 3:
        return [block]

    variants: list[list[str]] = [[]]
    for line in block:
        line_variants = markdown_bullet_variants(line)
        variants = [prefix + [v] for prefix in variants for v in line_variants]
    unique: list[list[str]] = []
    seen: set[tuple[str, ...]] = set()
    for v in variants:
        key = tuple(v)
        if key not in seen:
            unique.append(v)
            seen.add(key)
    return unique


 def tail_anchor(line: str, words: int = 8) -> str | None:
    parts = re.findall(r"\S+", line.strip())
    if len(parts) < 4:
        return None
    return " ".join(parts[-words:])


 def fuzzy_window_candidates(file_lines: list[str], old_block: list[str]) -> list[tuple[int, int, float]]:
    """Conservative fuzzy fallback over same-sized nearby windows."""
    if not old_block:
        return []
    if len(old_block) > 30:
        return []

    target = "\n".join(normalize_block(old_block))
    n = len(old_block)
    candidates: list[tuple[int, int, float]] = []

    # Allow a small window-size wiggle for AI context drift.
    for size in range(max(1, n - 2), min(len(file_lines), n + 2) + 1):
        for i in range(0, len(file_lines) - size + 1):
            window = "\n".join(normalize_block(file_lines[i : i + size]))
            score = difflib.SequenceMatcher(None, target, window).ratio()
            if score >= 0.82:
                candidates.append((i, i + size, score))

    candidates.sort(key=lambda x: x[2], reverse=True)
    return candidates[:10]



 def parse_hunk_old_start(header: str | None) -> int | None:
    """Return 0-based old-file start line from a unified diff hunk header.

    Example:
      @@ -34,14 +40,43 @@
      -> 33

    Hunk line numbers are not trusted as primary evidence, but they are useful
    as a tie-breaker when fuzzy candidates are otherwise equally strong.
    """
    if not header:
        return None

    m = re.search(r"@@\s+-(?P<start>\d+)", header)
    if not m:
        return None

    return max(0, int(m.group("start")) - 1)


 def hunk_header_context_text(header: str | None) -> str | None:
    """Return trailing context text from a unified diff hunk header.

    Example:
      @@ -210,7 +284,7 @@ The customer should not need...
      -> "The customer should not need..."

    This is weak evidence, but useful as a bounded anchor for malformed AI
    tail hunks that have no explicit context lines left after repair/splitting.
    """
    if not header:
        return None

    parts = header.split("@@", 2)
    if len(parts) < 3:
        return None

    text = parts[2].strip()
    if not text:
        return None

    # Remove smartpatch split suffix if present.
    text = re.sub(r"\s+\[smartpatch split \d+/\d+\]\s*$", "", text).strip()
    return text or None


 def header_guided_fuzzy_candidate(
    candidates: list[tuple[int, int, float]],
    hunk: Hunk,
    score_floor: float = 0.94,
    max_distance: int = 120,
 ) -> tuple[int, int, float] | None:
    """Choose a fuzzy candidate near the hunk header line.

    Used when fuzzy candidates are strong but not unique. This mirrors how a
    human uses the hunk header as a rough locality hint after content matching
    has narrowed the candidates.

    Conservative limits:
      - require high score
      - require parsed old-start line
      - require candidate close to old-start
      - for merely good scores, require unique nearest
      - for near-perfect scores, nearest candidate is enough
    """
    if not candidates:
        return None

    old_start = parse_hunk_old_start(hunk.header)
    if old_start is None:
        return None

    best_score = max(score for _start, _end, score in candidates)
    if best_score < score_floor:
        return None

    close = [c for c in candidates if best_score - c[2] < 0.03]
    if not close:
        return None

    ranked = sorted(close, key=lambda c: abs(c[0] - old_start))
    best = ranked[0]
    best_distance = abs(best[0] - old_start)

    if best_distance > max_distance:
        return None

    # Near-perfect duplicate fuzzy windows are usually repeated Markdown
    # fragments. The hunk header is a good enough tie-breaker if it selects the
    # closest candidate.
    if best[2] >= 0.99:
        return best

    # For lower scores, require a uniquely nearest candidate.
    if len(ranked) > 1:
        second_distance = abs(ranked[1][0] - old_start)
        if second_distance == best_distance:
            return None

    return best


 def line_presence_missing(file_lines: list[str], expected_lines: list[str], path: Path, markdown_recovery: bool) -> list[str]:
    norm_file = set(normalize_block(file_lines))
    missing: list[str] = []
    for line in expected_lines:
        if is_placeholder(line):
            continue
        variants = [line]
        if markdown_recovery and path.suffix.lower() in MARKDOWN_EXTENSIONS:
            variants = markdown_bullet_variants(line)
        if not any(normalize_line(v) in norm_file for v in variants):
            missing.append(line)
    return missing


 def markdown_or_normal_similarity(a: str, b: str, path: Path) -> float:
    """Similarity for one line, treating Markdown bullet bodies as comparable."""
    if path.suffix.lower() in MARKDOWN_EXTENSIONS and "markdown_bullet_body" in globals():
        ab = markdown_bullet_body(a)
        bb = markdown_bullet_body(b)
        if ab is not None and bb is not None:
            return difflib.SequenceMatcher(None, ab, bb).ratio()

    return difflib.SequenceMatcher(None, normalize_line(a), normalize_line(b)).ratio()


 def find_unique_fuzzy_line(
    file_lines: list[str],
    target_line: str,
    path: Path,
    min_score: float = 0.86,
 ) -> tuple[int, float] | None:
    """Find one unique near-match for a stale old line."""
    candidates: list[tuple[int, float]] = []

    for i, line in enumerate(file_lines):
        score = markdown_or_normal_similarity(line, target_line, path)
        if score >= min_score:
            candidates.append((i, score))

    if not candidates:
        return None

    candidates.sort(key=lambda item: item[1], reverse=True)
    best_i, best_score = candidates[0]
    close = [c for c in candidates if best_score - c[1] < 0.03]

    if len(close) == 1:
        return best_i, best_score

    return None


 def replacement_preserving_single_line_style(
    added_lines: list[str],
    matched_old_line: str,
    path: Path,
 ) -> list[str]:
    """Preserve target Markdown bullet marker for line-replacement atoms."""
    if path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return added_lines

    if "markdown_bullet_marker" not in globals():
        return added_lines

    old_marker = markdown_bullet_marker(matched_old_line)
    if old_marker not in {"-", "*", "+"}:
        return added_lines

    out: list[str] = []
    for line in added_lines:
        marker = markdown_bullet_marker(line)
        if marker in {"-", "*", "+"}:
            out.append(rewrite_markdown_bullet_marker(line, old_marker))
        else:
            out.append(line)
    return out


 def find_unique_context_anchor(
    file_lines: list[str],
    anchor: str,
    path: Path,
 ) -> tuple[int, str] | None:
    """Find exact/normalized/markdown-equivalent/fuzzy context anchor."""
    exact = [i for i, line in enumerate(file_lines) if line == anchor]
    if len(exact) == 1:
        return exact[0], "exact"

    norm_anchor = normalize_line(anchor)
    normalized = [i for i, line in enumerate(file_lines) if normalize_line(line) == norm_anchor]
    if len(normalized) == 1:
        return normalized[0], "normalized"

    if path.suffix.lower() in MARKDOWN_EXTENSIONS and "markdown_lines_equiv" in globals():
        md = [i for i, line in enumerate(file_lines) if markdown_lines_equiv(line, anchor)]
        if len(md) == 1:
            return md[0], "markdown_equiv"

    fuzzy = find_unique_fuzzy_line(file_lines, anchor, path, min_score=0.90)
    if fuzzy:
        return fuzzy[0], "fuzzy"

    return None


 def block_already_matches(
    current: list[str],
    expected: list[str],
    path: Path,
 ) -> bool:
    if current == expected:
        return True

    if normalize_block(current) == normalize_block(expected):
        return True

    if (
        path.suffix.lower() in MARKDOWN_EXTENSIONS
        and "markdown_lines_equiv" in globals()
        and len(current) == len(expected)
        and all(markdown_lines_equiv(a, b) for a, b in zip(current, expected))
    ):
        return True

    return False


 def infer_local_bullet_marker(lines: list[str], path: Path) -> str | None:
    if path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return None

    if "markdown_bullet_marker" not in globals():
        return None

    for line in lines:
        marker = markdown_bullet_marker(line)
        if marker in {"-", "*", "+"}:
            return marker

    return None


 def rewrite_added_bullets_to_local_style(
    lines: list[str],
    marker: str | None,
    path: Path,
 ) -> list[str]:
    if not marker or path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return lines

    if "markdown_bullet_marker" not in globals():
        return lines

    out: list[str] = []
    for line in lines:
        own_marker = markdown_bullet_marker(line)
        if own_marker in {"-", "*", "+"}:
            out.append(rewrite_markdown_bullet_marker(line, marker))
        else:
            out.append(line)

    return out


 def hunk_change_bounds(hunk: Hunk) -> tuple[int, int] | None:
    indexes = [i for i, dl in enumerate(hunk.lines) if dl.kind in {"add", "remove"}]
    if not indexes:
        return None
    return indexes[0], indexes[-1]


 def nearest_context_before(hunk: Hunk, index: int) -> tuple[int, str] | None:
    for i in range(index - 1, -1, -1):
        dl = hunk.lines[i]
        if dl.kind == "context" and dl.text.strip():
            return i, dl.text
    return None


 def nearest_context_after(hunk: Hunk, index: int) -> tuple[int, str] | None:
    for i in range(index + 1, len(hunk.lines)):
        dl = hunk.lines[i]
        if dl.kind == "context" and dl.text.strip():
            return i, dl.text
    return None


 def hunk_new_region_lines(
    hunk: Hunk,
    start: int,
    end: int,
    path: Path,
    preferred_marker: str | None,
 ) -> list[str]:
    """Return the new version of hunk.lines[start:end+1].

    Removes deleted lines, keeps context, keeps additions.
    """
    out: list[str] = []

    for dl in hunk.lines[start : end + 1]:
        if dl.kind == "remove":
            continue
        if dl.kind in {"context", "add"}:
            out.append(dl.text)

    return rewrite_added_bullets_to_local_style(out, preferred_marker, path)


 def locate_context_bracket_atom(
    file_lines: list[str],
    hunk: Hunk,
    path: Path,
    warnings: list[str],
 ) -> tuple[MatchResult, list[str]] | None:
    """Replace the changed hunk region between two unique context anchors."""
    bounds = hunk_change_bounds(hunk)
    if bounds is None:
        return None

    first_change, last_change = bounds

    prev_ctx = nearest_context_before(hunk, first_change)
    next_ctx = nearest_context_after(hunk, last_change)

    if not prev_ctx or not next_ctx:
        return None

    _, prev_text = prev_ctx
    _, next_text = next_ctx

    found_prev = find_unique_context_anchor(file_lines, prev_text, path)
    found_next = find_unique_context_anchor(file_lines, next_text, path)

    if not found_prev or not found_next:
        return None

    prev_line, prev_method = found_prev
    next_line, next_method = found_next

    if prev_line >= next_line:
        return None

    replace_start = prev_line + 1
    replace_end = next_line
    existing_span = replace_end - replace_start

    if existing_span > 80:
        warnings.append(
            f"Context-bracket span too large ({existing_span} lines); skipped atom strategy."
        )
        return None

    local_marker = infer_local_bullet_marker(
        file_lines[max(0, prev_line - 3) : min(len(file_lines), next_line + 4)],
        path,
    )

    replacement = hunk_new_region_lines(
        hunk=hunk,
        start=first_change,
        end=last_change,
        path=path,
        preferred_marker=local_marker,
    )

    if len(replacement) > 140:
        warnings.append(
            f"Context-bracket replacement too large ({len(replacement)} lines); skipped atom strategy."
        )
        return None

    current_inner = file_lines[replace_start:replace_end]

    if block_already_matches(current_inner, replacement, path):
        return MatchResult(
            found=False,
            confidence=0.96,
            method="already_applied_context_bracket_atom",
            warnings=warnings + [
                f"Context-bracket atom already matches between unique anchors ({prev_method}/{next_method})."
            ],
            missing_lines=[],
            candidate_count=1,
        ), replacement

    return MatchResult(
        True,
        replace_start,
        replace_end,
        0.93,
        f"context_bracket_atom_{prev_method}_{next_method}",
        warnings + [
            f"Context-bracket atom used between unique anchors ({prev_method}/{next_method})."
        ],
        [],
        1,
    ), replacement


 def find_best_fuzzy_line_in_range(
    file_lines: list[str],
    target_line: str,
    path: Path,
    start: int,
    end: int,
    min_score: float = 0.74,
 ) -> tuple[int, float] | None:
    """Find a unique fuzzy line inside a bounded local range."""
    start = max(0, start)
    end = min(len(file_lines), end)

    candidates: list[tuple[int, float]] = []
    for i in range(start, end):
        score = markdown_or_normal_similarity(file_lines[i], target_line, path)
        if score >= min_score:
            candidates.append((i, score))

    if not candidates:
        return None

    candidates.sort(key=lambda item: item[1], reverse=True)
    best_i, best_score = candidates[0]
    close = [c for c in candidates if best_score - c[1] < 0.03]

    if len(close) == 1:
        return best_i, best_score

    return None


 def is_numbered_markdown_item(line: str) -> bool:
    return bool(re.match(r"^\s*\d+[.)]\s+", line.strip()))





 def text_token_set(lines: list[str]) -> set[str]:
    """Small token set for stale paragraph similarity checks."""
    text = " ".join(lines).lower()
    tokens = set(re.findall(r"[a-z][a-z0-9_-]{2,}", text))
    stop = {
        "the", "and", "for", "that", "this", "with", "from", "into", "they",
        "must", "not", "may", "still", "should", "one", "item", "entry",
        "link", "links", "code", "codes",
    }
    return {t for t in tokens if t not in stop}


 def joined_similarity(a: list[str], b: list[str]) -> float:
    return difflib.SequenceMatcher(
        None,
        normalize_line(" ".join(a)),
        normalize_line(" ".join(b)),
    ).ratio()




 def strip_outer_blank_lines(lines: list[str]) -> list[str]:
    out = list(lines)
    while out and not out[0].strip():
        out.pop(0)
    while out and not out[-1].strip():
        out.pop()
    return out


 def locate_collapsed_stale_paragraph_atom(
    file_lines: list[str],
    hunk: Hunk,
    path: Path,
    warnings: list[str],
 ) -> tuple[MatchResult, list[str]] | None:
    """Replace a stale multi-line paragraph currently collapsed into fewer lines.

    Handles Markdown docs where an AI patch expects:

        - old line 1
        - old line 2
        - old line 3
        - old line 4

    but the target has the same semantic paragraph collapsed into one long line.

    Conservative limits:
      - Markdown only
      - removed + added paragraph hunk
      - explicit context is blank-only
      - small old/new paragraph
      - search bounded around hunk header old-start
      - unique best local window by joined text similarity
    """
    if path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return None

    old_block, new_block, removed_only, added_only, context_only = old_new_blocks(hunk)

    if not removed_only or not added_only:
        return None

    # This is specifically for blank-context paragraph replacements.
    if any(line.strip() for line in context_only):
        return None

    if len(removed_only) < 2 or len(removed_only) > 8:
        return None

    if len(added_only) < 2 or len(added_only) > 10:
        return None

    old_start = parse_hunk_old_start(hunk.header)
    if old_start is None:
        return None

    bounds = hunk_change_bounds(hunk)
    if bounds is None:
        return None

    first_change, last_change = bounds

    replacement = strip_outer_blank_lines(
        hunk_new_region_lines(
            hunk=hunk,
            start=first_change,
            end=last_change,
            path=path,
            preferred_marker=None,
        )
    )

    if not replacement or len(replacement) > 14:
        return None

    search_start = max(0, old_start - 140)
    search_end = min(len(file_lines), old_start + 180)

    candidates: list[tuple[float, int, int, int]] = []

    n = len(removed_only)
    for size in range(1, min(8, n + 2) + 1):
        for start in range(search_start, max(search_start, search_end - size + 1)):
            end = start + size
            window = file_lines[start:end]

            if not any(line.strip() for line in window):
                continue

            stripped_nonblank = [line.strip() for line in window if line.strip()]
            if any(line.startswith("#") for line in stripped_nonblank):
                continue
            if any(line.startswith("```") for line in stripped_nonblank):
                continue
            if any(line.startswith(("- ", "* ", "+ ", "1.", "2.", "3.", "4.", "5.")) for line in stripped_nonblank):
                continue

            sim = joined_similarity(window, removed_only)

            # Prefer compact/collapsed windows when similarity is equal.
            compact_bonus = 0.03 if size == 1 else 0.0
            score = sim + compact_bonus

            if sim >= 0.62:
                candidates.append((score, start, end, size))

    if not candidates:
        return None

    candidates.sort(key=lambda item: item[0], reverse=True)
    best_score, start, end, size = candidates[0]
    close = [c for c in candidates if best_score - c[0] < 0.025]

    # Require unique best, or uniquely nearest to hunk header.
    if len(close) > 1:
        ranked = sorted(close, key=lambda c: abs(c[1] - old_start))
        best_distance = abs(ranked[0][1] - old_start)
        second_distance = abs(ranked[1][1] - old_start)
        if best_distance == second_distance:
            return None
        best_score, start, end, size = ranked[0]

    current = file_lines[start:end]

    if block_already_matches(current, replacement, path):
        return MatchResult(
            found=False,
            confidence=0.96,
            method="already_applied_collapsed_stale_paragraph_atom",
            warnings=warnings + [
                f"Collapsed stale paragraph already matches; score={best_score:.2f}."
            ],
            missing_lines=[],
            candidate_count=len(candidates),
        ), replacement

    return MatchResult(
        True,
        start,
        end,
        0.91,
        "collapsed_stale_paragraph_atom",
        warnings + [
            f"Collapsed stale paragraph atom used; score={best_score:.2f}, span={size}, candidates={len(candidates)}."
        ],
        [],
        len(candidates),
    ), replacement


 def locate_stale_paragraph_by_header_window_atom(
    file_lines: list[str],
    hunk: Hunk,
    path: Path,
    warnings: list[str],
 ) -> tuple[MatchResult, list[str]] | None:
    """Replace a small stale paragraph near the hunk header line.

    This is for cases where:
      - the old paragraph text has drifted
      - explicit context anchors are absent or useless
      - the hunk header line number is still close enough
      - the old/new paragraph is small
      - the local target window shares strong domain terms with removed text

    It is intentionally Markdown-only and bounded by hunk locality.
    """
    if path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return None

    old_block, new_block, removed_only, added_only, context_only = old_new_blocks(hunk)

    if not removed_only or not added_only:
        return None

    # Do not hijack hunks that already have explicit context. Let bracket,
    # side-anchored, normalized, and other safer strategies handle those.
    if context_only:
        return None

    if len(removed_only) < 2 or len(removed_only) > 8:
        return None

    if len(added_only) > 14 or len(new_block) > 18:
        return None

    old_start = parse_hunk_old_start(hunk.header)
    if old_start is None:
        return None

    old_tokens = text_token_set(removed_only)
    if len(old_tokens) < 4:
        return None

    bounds = hunk_change_bounds(hunk)
    if bounds is None:
        return None

    first_change, last_change = bounds

    replacement = hunk_new_region_lines(
        hunk=hunk,
        start=first_change,
        end=last_change,
        path=path,
        preferred_marker=None,
    )

    if not replacement or len(replacement) > 18:
        return None

    n = len(removed_only)
    search_start = max(0, old_start - 90)
    search_end = min(len(file_lines), old_start + 140)

    candidates: list[tuple[float, int, int, float, float, int]] = []

    for size in range(max(1, n - 2), min(10, n + 3) + 1):
        for start in range(search_start, max(search_start, search_end - size + 1)):
            end = start + size
            current = file_lines[start:end]

            if not current:
                continue

            # Avoid obviously structural windows.
            nonblank = [line for line in current if line.strip()]
            if not nonblank:
                continue
            if any(line.lstrip().startswith("# ") for line in nonblank):
                continue

            sim = joined_similarity(current, removed_only)

            current_tokens = text_token_set(current)
            shared = old_tokens & current_tokens
            overlap = len(shared) / max(1, min(len(old_tokens), len(current_tokens)))

            # Score balances phrase similarity, token overlap, and locality.
            distance = abs(start - old_start)
            locality = max(0.0, 1.0 - (distance / 140.0))
            score = (sim * 0.50) + (overlap * 0.35) + (locality * 0.15)

            # Need enough evidence. Hunk 10 should have repeated domain terms,
            # but we still require either phrase similarity or strong overlap.
            if sim < 0.32 and not (overlap >= 0.52 and len(shared) >= 5):
                continue

            candidates.append((score, start, end, sim, overlap, len(shared)))

    if not candidates:
        return None

    candidates.sort(key=lambda item: item[0], reverse=True)
    best = candidates[0]
    close = [c for c in candidates if best[0] - c[0] < 0.04]

    # Require a unique best candidate. If tied, choose only if nearest to hunk
    # header is uniquely nearest.
    if len(close) > 1:
        ranked_by_distance = sorted(close, key=lambda c: abs(c[1] - old_start))
        best_distance = abs(ranked_by_distance[0][1] - old_start)
        second_distance = abs(ranked_by_distance[1][1] - old_start)
        if best_distance == second_distance:
            return None
        best = ranked_by_distance[0]

    score, start, end, sim, overlap, shared_count = best
    current = file_lines[start:end]

    if block_already_matches(current, replacement, path):
        return MatchResult(
            found=False,
            confidence=0.96,
            method="already_applied_stale_paragraph_header_window_atom",
            warnings=warnings + [
                f"Header-window stale paragraph already matches; sim={sim:.2f}, overlap={overlap:.2f}, shared_terms={shared_count}."
            ],
            missing_lines=[],
            candidate_count=len(candidates),
        ), replacement

    return MatchResult(
        True,
        start,
        end,
        0.91,
        "stale_paragraph_header_window_atom",
        warnings + [
            f"Header-window stale paragraph atom used; sim={sim:.2f}, overlap={overlap:.2f}, shared_terms={shared_count}, candidates={len(candidates)}."
        ],
        [],
        len(candidates),
    ), replacement


 def locate_stale_paragraph_after_anchor_atom(
    file_lines: list[str],
    hunk: Hunk,
    path: Path,
    warnings: list[str],
 ) -> tuple[MatchResult, list[str]] | None:
    """Replace a small stale Markdown paragraph after a unique anchor.

    This is for cases where the old paragraph text has drifted enough that the
    literal removed lines no longer exist, but the hunk still clearly describes
    a bounded paragraph replacement.

    Conservative limits:
      - Markdown only
      - removed + added paragraph-style hunk
      - small old/new regions
      - unique previous context or hunk-header context anchor
      - bounded local replacement span
      - meaningful joined-text or token overlap with the stale old text
    """
    if path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return None

    old_block, new_block, removed_only, added_only, context_only = old_new_blocks(hunk)

    if not removed_only or not added_only:
        return None

    if len(removed_only) < 2 or len(removed_only) > 8:
        return None

    if len(added_only) > 12:
        return None

    bounds = hunk_change_bounds(hunk)
    if bounds is None:
        return None

    first_change, last_change = bounds

    prev_ctx = nearest_context_before(hunk, first_change)
    next_ctx = nearest_context_after(hunk, last_change)

    anchor_candidates: list[tuple[str, str]] = []

    if prev_ctx and prev_ctx[1].strip():
        anchor_candidates.append((prev_ctx[1], "prev_context"))

    header_text = hunk_header_context_text(hunk.header)
    if header_text:
        anchor_candidates.append((header_text, "hunk_header"))

    if not anchor_candidates:
        return None

    replacement = hunk_new_region_lines(
        hunk=hunk,
        start=first_change,
        end=last_change,
        path=path,
        preferred_marker=None,
    )

    if not replacement or len(replacement) > 18:
        return None

    old_tokens = text_token_set(removed_only)
    if not old_tokens:
        return None

    for anchor_text, anchor_source in anchor_candidates:
        found_anchor = find_unique_context_anchor(file_lines, anchor_text, path)

        if not found_anchor and anchor_source == "hunk_header":
            fuzzy_anchor = find_unique_fuzzy_line(
                file_lines=file_lines,
                target_line=anchor_text,
                path=path,
                min_score=0.78,
            )
            if fuzzy_anchor:
                found_anchor = (fuzzy_anchor[0], "hunk_header_fuzzy")

        if not found_anchor:
            continue

        anchor_i, anchor_method = found_anchor

        start = anchor_i + 1
        while start < len(file_lines) and not file_lines[start].strip():
            start += 1

        if start >= len(file_lines):
            continue

        # Prefer explicit following context if it uniquely resolves after anchor.
        end: int | None = None
        next_method = "none"

        if next_ctx and next_ctx[1].strip():
            found_next = find_unique_context_anchor(file_lines, next_ctx[1], path)
            if found_next:
                next_i, next_method = found_next
                if anchor_i < next_i and (next_i - start) <= 16:
                    end = next_i

        # Otherwise replace same-sized stale paragraph region.
        if end is None:
            end = min(len(file_lines), start + len(removed_only))

        if end <= start or (end - start) > 16:
            continue

        current = file_lines[start:end]
        if not current:
            continue

        sim = joined_similarity(current, removed_only)

        current_tokens = text_token_set(current)
        shared = old_tokens & current_tokens
        overlap = len(shared) / max(1, min(len(old_tokens), len(current_tokens)))

        # Bounded but not blind.
        #
        # Require some actual phrase similarity. Token overlap alone can be too
        # permissive for docs that repeat domain terms like Product, Offer,
        # Purchase Entry, Shop QR, etc.
        #
        # Exception: hunk-header anchored matches may pass with slightly lower
        # phrase similarity if token overlap is very strong.
        if sim < 0.34:
            if not (
                anchor_source == "hunk_header"
                and sim >= 0.28
                and overlap >= 0.58
                and len(shared) >= 8
            ):
                continue

        if block_already_matches(current, replacement, path):
            return MatchResult(
                found=False,
                confidence=0.96,
                method="already_applied_stale_paragraph_after_anchor_atom",
                warnings=warnings + [
                    f"Stale paragraph after anchor already matches ({anchor_source}/{anchor_method})."
                ],
                missing_lines=[],
                candidate_count=1,
            ), replacement

        return MatchResult(
            True,
            start,
            end,
            0.91,
            f"stale_paragraph_after_anchor_atom_{anchor_source}_{anchor_method}_{next_method}",
            warnings + [
                f"Stale paragraph atom used after unique anchor; sim={sim:.2f}, overlap={overlap:.2f}, shared_terms={len(shared)}."
            ],
            [],
            1,
        ), replacement

    return None


 def locate_header_anchor_tail_region_atom(
    file_lines: list[str],
    hunk: Hunk,
    path: Path,
    warnings: list[str],
 ) -> tuple[MatchResult, list[str]] | None:
    """Replace a small tail region immediately after hunk-header context.

    This is for malformed/stale tail hunks where:
      - the hunk header has useful trailing context
      - explicit old lines have drifted and no longer match
      - the intended replacement is small
      - no following context anchor is available

    It is deliberately conservative:
      - Markdown/text files only
      - must have both removed and added lines
      - must have hunk-header context
      - header context must resolve uniquely
      - replacement span is small
      - starts at the first nonblank line after the header anchor
    """
    if path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return None

    old_block, new_block, removed_only, added_only, context_only = old_new_blocks(hunk)

    if not removed_only or not added_only:
        return None

    # Header-tail replacement is only for true tail hunks where the hunk header
    # is the only useful anchor. If the hunk still has explicit context lines,
    # use the normal context-bracket / add-only / side-anchored strategies.
    # This prevents weak header-tail matches from hijacking normal mid-file edits.
    if context_only:
        return None

    if len(removed_only) > 8 or len(added_only) > 10:
        return None

    bounds = hunk_change_bounds(hunk)
    if bounds is None:
        return None

    first_change, last_change = bounds

    header_text = hunk_header_context_text(hunk.header)
    if not header_text:
        return None

    found = find_unique_context_anchor(file_lines, header_text, path)

    if not found:
        fuzzy_header = find_unique_fuzzy_line(
            file_lines=file_lines,
            target_line=header_text,
            path=path,
            min_score=0.78,
        )
        if fuzzy_header:
            found = (fuzzy_header[0], "hunk_header_fuzzy")

    if not found:
        return None

    anchor_i, anchor_method = found

    # Build old/new region lengths from the hunk region itself, not only
    # removed/added lines, so internal blank/context lines are preserved.
    old_region = [
        dl.text
        for dl in hunk.lines[first_change : last_change + 1]
        if dl.kind in {"context", "remove"}
    ]

    replacement = hunk_new_region_lines(
        hunk=hunk,
        start=first_change,
        end=last_change,
        path=path,
        preferred_marker=infer_local_bullet_marker(
            file_lines[max(0, anchor_i - 3) : min(len(file_lines), anchor_i + 10)],
            path,
        ),
    )

    if not old_region or not replacement:
        return None

    if len(old_region) > 12 or len(replacement) > 16:
        return None

    # Tail starts after the header anchor. Preserve blank separation by starting
    # at first nonblank line, because the hunk region begins at the first actual
    # changed line.
    start = anchor_i + 1
    while start < len(file_lines) and not file_lines[start].strip():
        start += 1

    if start >= len(file_lines):
        return None

    end = min(len(file_lines), start + len(old_region))

    if end <= start or (end - start) > 12:
        return None

    # Guard: local text should at least weakly resemble the stale old region,
    # unless the first old line is totally stale. This prevents random tail cuts.
    current = file_lines[start:end]
    scores = [
        markdown_or_normal_similarity(a, b, path)
        for a, b in zip(current, old_region)
    ]
    avg_score = sum(scores) / len(scores) if scores else 0.0
    best_score = max(scores) if scores else 0.0

    # Require meaningful resemblance to the stale old region. The old text may
    # drift, but if both average and best-line similarity are weak, the match is
    # too risky.
    if avg_score < 0.45 and best_score < 0.70:
        return None

    if block_already_matches(current, replacement, path):
        return MatchResult(
            found=False,
            confidence=0.96,
            method="already_applied_header_anchor_tail_region_atom",
            warnings=warnings + [
                f"Header-anchor tail region already matches after unique header context ({anchor_method})."
            ],
            missing_lines=[],
            candidate_count=1,
        ), replacement

    return MatchResult(
        True,
        start,
        end,
        0.91,
        f"header_anchor_tail_region_atom_{anchor_method}",
        warnings + [
            f"Header-anchor tail region atom used; avg_score={avg_score:.2f}, best_score={best_score:.2f}."
        ],
        [],
        1,
    ), replacement


 def locate_prev_anchor_tail_replacement_atom(
    file_lines: list[str],
    hunk: Hunk,
    path: Path,
    warnings: list[str],
 ) -> tuple[MatchResult, list[str]] | None:
    """Replace a bounded stale tail region after a unique previous context.

    Handles hunks like:

      context anchor
      -old line 1
      -old line 2
      +new line 1
      +new line 2

    with no following context line.

    This is deliberately limited:
      - must have a unique previous context anchor
      - must have removals and additions
      - must have no following context anchor
      - replacement/removal span must be small
      - removed lines must fuzzy-match a nearby local span
    """
    old_block, new_block, removed_only, added_only, context_only = old_new_blocks(hunk)

    if not removed_only or not added_only:
        return None

    if len(removed_only) > 12 or len(added_only) > 16:
        return None

    bounds = hunk_change_bounds(hunk)
    if bounds is None:
        return None

    first_change, last_change = bounds

    prev_ctx = nearest_context_before(hunk, first_change)
    next_ctx = nearest_context_after(hunk, last_change)

    # This strategy is specifically for tail hunks without a following anchor.
    if next_ctx:
        return None

    prev_anchor_text: str | None = prev_ctx[1] if prev_ctx else None
    prev_anchor_source = "context"

    if not prev_anchor_text:
        prev_anchor_text = hunk_header_context_text(hunk.header)
        prev_anchor_source = "hunk_header"

    if not prev_anchor_text:
        return None

    found_prev = find_unique_context_anchor(file_lines, prev_anchor_text, path)
    if not found_prev:
        return None

    prev_line, prev_method = found_prev

    search_start = prev_line + 1
    search_end = min(len(file_lines), prev_line + 40)

    if search_start >= search_end:
        return None

    first_found = find_best_fuzzy_line_in_range(
        file_lines=file_lines,
        target_line=removed_only[0],
        path=path,
        start=search_start,
        end=search_end,
        min_score=0.68,
    )

    if first_found is None:
        # Hunk-header tail fallback: stale old lines may no longer fuzzy-match,
        # but the hunk header can still identify the local tail region. Only do
        # this for small tail hunks and only immediately after the anchor.
        if prev_anchor_source != "hunk_header" or len(removed_only) > 6 or len(added_only) > 8:
            return None
        start = search_start
        first_score = 0.70
    else:
        start, first_score = first_found

    end = start + len(removed_only)

    if end > search_end or end > len(file_lines):
        return None

    candidate = file_lines[start:end]
    if len(candidate) != len(removed_only):
        return None

    scores = [
        markdown_or_normal_similarity(file_line, old_line, path)
        for file_line, old_line in zip(candidate, removed_only)
    ]

    avg_score = sum(scores) / len(scores)

    if first_score < 0.68 or avg_score < 0.66:
        return None

    local_marker = infer_local_bullet_marker(
        file_lines[max(0, start - 4) : min(len(file_lines), end + 4)],
        path,
    )

    replacement = hunk_new_region_lines(
        hunk=hunk,
        start=first_change,
        end=last_change,
        path=path,
        preferred_marker=local_marker,
    )

    if len(replacement) > 24:
        return None

    return MatchResult(
        True,
        start,
        end,
        0.91,
        f"prev_anchor_tail_replacement_atom_{prev_anchor_source}_{prev_method}",
        warnings + [
            f"Previous-anchor tail replacement atom used from {prev_anchor_source}; first_score={first_score:.2f}, avg_score={avg_score:.2f}."
        ],
        [],
        1,
    ), replacement



 def locate_side_anchored_single_line_atom(
    file_lines: list[str],
    hunk: Hunk,
    path: Path,
    warnings: list[str],
 ) -> tuple[MatchResult, list[str]] | None:
    """Replace one stale line near one or two unique context anchors.

    This is for AI patches where the old line has drifted, but the surrounding
    context still identifies the location safely.
    """
    old_block, new_block, removed_only, added_only, context_only = old_new_blocks(hunk)

    if len(removed_only) != 1 or not added_only:
        return None

    bounds = hunk_change_bounds(hunk)
    if bounds is None:
        return None

    first_change, last_change = bounds
    prev_ctx = nearest_context_before(hunk, first_change)
    next_ctx = nearest_context_after(hunk, last_change)

    found_prev = find_unique_context_anchor(file_lines, prev_ctx[1], path) if prev_ctx else None
    found_next = find_unique_context_anchor(file_lines, next_ctx[1], path) if next_ctx else None

    search_start = 0
    search_end = len(file_lines)
    anchor_desc = ""

    if found_prev and found_next:
        prev_line, prev_method = found_prev
        next_line, next_method = found_next
        if prev_line >= next_line:
            return None
        search_start = prev_line + 1
        search_end = next_line
        anchor_desc = f"{prev_method}/{next_method}"
    elif found_prev:
        prev_line, prev_method = found_prev
        search_start = prev_line + 1
        search_end = min(len(file_lines), prev_line + 8)
        anchor_desc = f"{prev_method}/none"
    elif found_next:
        next_line, next_method = found_next
        search_start = max(0, next_line - 8)
        search_end = next_line
        anchor_desc = f"none/{next_method}"
    else:
        return None

    if search_end <= search_start or (search_end - search_start) > 20:
        return None

    found = find_best_fuzzy_line_in_range(
        file_lines=file_lines,
        target_line=removed_only[0],
        path=path,
        start=search_start,
        end=search_end,
        min_score=0.74,
    )

    # Extra fallback for numbered Markdown question lists.
    #
    # Handles tail/list patches like:
    #   6. existing anchor
    #  -7. old question
    #  +7. new question
    #   8. existing context
    #  +9. new question
    #  +10. new question
    #
    # The old implementation replaced only line 7 with added_only, which would
    # place 9/10 before 8. This version replaces the local numbered span and
    # preserves embedded context order.
    if found is None and found_prev and is_numbered_markdown_item(removed_only[0]):
        prev_line = found_prev[0]
        scan_limit = min(len(file_lines), prev_line + 20)

        # Find the first numbered item after the previous anchor.
        candidate: int | None = None
        for i in range(prev_line + 1, scan_limit):
            if not file_lines[i].strip():
                continue
            if is_numbered_markdown_item(file_lines[i]):
                candidate = i
                break

        if candidate is not None:
            local_marker = infer_local_bullet_marker(
                file_lines[max(0, prev_line - 3) : min(len(file_lines), scan_limit)],
                path,
            )

            replacement = hunk_new_region_lines(
                hunk=hunk,
                start=first_change,
                end=last_change,
                path=path,
                preferred_marker=local_marker,
            )

            replace_end = candidate + 1

            # If the replacement region contains existing context lines after
            # the removed item, include those target-file lines in the span.
            # This keeps order correct for:
            #   replace 7, keep 8, append 9/10
            search_from = candidate + 1
            for dl in hunk.lines[first_change : last_change + 1]:
                if dl.kind != "context" or not dl.text.strip():
                    continue

                for j in range(search_from, scan_limit):
                    if (
                        file_lines[j] == dl.text
                        or normalize_line(file_lines[j]) == normalize_line(dl.text)
                        or (
                            path.suffix.lower() in MARKDOWN_EXTENSIONS
                            and "markdown_lines_equiv" in globals()
                            and markdown_lines_equiv(file_lines[j], dl.text)
                        )
                    ):
                        replace_end = j + 1
                        search_from = j + 1
                        break

            if replace_end <= candidate:
                return None

            if (replace_end - candidate) > 20:
                warnings.append(
                    f"Numbered-list replacement span too large ({replace_end - candidate} lines); skipped."
                )
                return None

            return MatchResult(
                True,
                candidate,
                replace_end,
                0.91,
                "numbered_list_span_after_prev_anchor_atom",
                warnings + ["Numbered-list span atom used after unique previous numbered anchor."],
                [],
                1,
            ), replacement

    if found is None:
        return None

    line_index, score = found

    replacement = replacement_preserving_single_line_style(
        added_lines=added_only,
        matched_old_line=file_lines[line_index],
        path=path,
    )

    return MatchResult(
        True,
        line_index,
        line_index + 1,
        max(0.90, min(0.93, score)),
        f"side_anchored_single_line_atom_{anchor_desc}",
        warnings + [f"Side-anchored stale line atom used; score={score:.2f}."],
        [],
        1,
    ), replacement


 def locate_add_only_by_context_atom(
    file_lines: list[str],
    hunk: Hunk,
    path: Path,
    warnings: list[str],
 ) -> tuple[MatchResult, list[str]] | None:
    """Insert add-only hunks using nearby context anchors.

    Pair-first behavior:
      If single anchors are not unique, use a unique previous+next context pair.
      This handles dependency-list inserts where the same bullet appears elsewhere.
    """
    old_block, new_block, removed_only, added_only, context_only = old_new_blocks(hunk)

    if removed_only or not added_only:
        return None

    add_indexes = [i for i, dl in enumerate(hunk.lines) if dl.kind == "add"]
    if not add_indexes:
        return None

    first_add = min(add_indexes)
    last_add = max(add_indexes)

    if len(added_only) > 80:
        return None

    def line_matches_context(file_line: str, ctx: str) -> tuple[bool, str]:
        if file_line == ctx:
            return True, "exact"
        if normalize_line(file_line) == normalize_line(ctx):
            return True, "normalized"
        if (
            path.suffix.lower() in MARKDOWN_EXTENSIONS
            and "markdown_lines_equiv" in globals()
            and markdown_lines_equiv(file_line, ctx)
        ):
            return True, "markdown_equiv"
        return False, ""

    prev_contexts = [
        dl.text
        for dl in reversed(hunk.lines[:first_add])
        if dl.kind == "context" and dl.text.strip()
    ]

    next_contexts = [
        dl.text
        for dl in hunk.lines[last_add + 1:]
        if dl.kind == "context" and dl.text.strip()
    ]

    # 1) Pair mode: previous + next context together must identify exactly one gap.
    # This is safer than using only one duplicate context line.
    for prev_ctx in prev_contexts[:8]:
        for next_ctx in next_contexts[:8]:
            matches: list[tuple[int, int, str, str]] = []

            for i, file_line in enumerate(file_lines):
                prev_ok, prev_method = line_matches_context(file_line, prev_ctx)
                if not prev_ok:
                    continue

                scan_end = min(len(file_lines), i + 35)
                for j in range(i + 1, scan_end):
                    next_ok, next_method = line_matches_context(file_lines[j], next_ctx)
                    if next_ok:
                        matches.append((i, j, prev_method, next_method))
                        break

            if len(matches) == 1:
                prev_i, next_i, prev_method, next_method = matches[0]

                # If the added block already exists inside the bracket, call it done.
                inner = file_lines[prev_i + 1 : next_i]
                if find_exact_block(inner, added_only) or find_normalized_block(inner, added_only):
                    return MatchResult(
                        found=False,
                        confidence=0.97,
                        method="already_applied_add_only_between_context_pair",
                        warnings=warnings + ["Added block already exists between unique context pair."],
                        missing_lines=[],
                        candidate_count=1,
                    ), added_only

                # Idempotency fallback for malformed Markdown task/list hunks.
                #
                # After a successful apply, task-body lines may be reclassified
                # as context. The next-context anchor can then become one of the
                # newly-added body lines, making the bracket inner empty and
                # causing the task heading to look insertable again.
                #
                # Before inserting, check whether the added lines already exist
                # near the unique context pair.
                added_needles = [
                    line
                    for line in sanitize_replacement_lines(added_only, path)
                    if line.strip()
                ]

                if added_needles:
                    near_start = max(0, prev_i - 5)
                    near_end = min(len(file_lines), next_i + 60)

                    all_added_near_pair = all(
                        line_present_in_range(
                            file_lines=file_lines,
                            needle=needle,
                            path=path,
                            start=near_start,
                            end=near_end,
                            min_fuzzy_score=0.92,
                        )
                        for needle in added_needles
                    )

                    if all_added_near_pair:
                        return MatchResult(
                            found=False,
                            confidence=0.96,
                            method="already_applied_add_only_near_context_pair",
                            warnings=warnings + [
                                "Added lines already exist near unique context pair; hunk appears already applied."
                            ],
                            missing_lines=[],
                            candidate_count=1,
                        ), added_only

                return MatchResult(
                    True,
                    prev_i + 1,
                    prev_i + 1,
                    0.95,
                    f"insert_between_context_pair_atom_{prev_method}_{next_method}",
                    warnings + ["Add-only atom inserted between unique previous/following context pair."],
                    [],
                    1,
                ), added_only

            if len(matches) > 1:
                warnings.append(
                    f"Add-only context pair matched {len(matches)} places; trying other anchors."
                )

    # 2) Existing behavior: nearest unique previous context.
    for ctx in prev_contexts[:8]:
        found = find_unique_context_anchor(file_lines, ctx, path)
        if found:
            idx, method = found
            return MatchResult(
                True,
                idx + 1,
                idx + 1,
                0.94,
                f"insert_after_any_context_atom_{method}",
                warnings + ["Add-only atom inserted after unique nearby previous context."],
                [],
                1,
            ), added_only

    # 3) Existing behavior: nearest unique following context.
    for ctx in next_contexts[:8]:
        found = find_unique_context_anchor(file_lines, ctx, path)
        if found:
            idx, method = found
            return MatchResult(
                True,
                idx,
                idx,
                0.94,
                f"insert_before_any_context_atom_{method}",
                warnings + ["Add-only atom inserted before unique nearby following context."],
                [],
                1,
            ), added_only

    return None


 def locate_edit_atom_fallback(
    file_lines: list[str],
    hunk: Hunk,
    path: Path,
    warnings: list[str],
 ) -> tuple[MatchResult, list[str]] | None:
    """Human-style fallback for obvious edit atoms.

    Handles:
      - already-applied added blocks
      - replacement clusters between two unique context anchors
      - one stale/near-match removed line replaced by added line(s)
      - add-only block inserted after/before a unique context anchor
    """
    old_block, new_block, removed_only, added_only, context_only = old_new_blocks(hunk)

    if path.suffix.lower() in MARKDOWN_EXTENSIONS and "sanitize_replacement_lines" in globals():
        new_block = sanitize_replacement_lines(new_block, path)
        added_only = sanitize_replacement_lines(added_only, path)

    # Already-applied atom check for add-only hunks.
    if added_only:
        added_matches = find_exact_block(file_lines, added_only)
        if len(added_matches) == 1:
            return MatchResult(
                found=False,
                confidence=0.97,
                method="already_applied_added_block_atom",
                warnings=warnings + ["Added atom already exists uniquely; hunk appears already applied."],
                missing_lines=[],
                candidate_count=1,
            ), new_block

        normalized_added_matches = find_normalized_block(file_lines, added_only)
        if len(normalized_added_matches) == 1:
            return MatchResult(
                found=False,
                confidence=0.95,
                method="already_applied_added_block_atom_normalized",
                warnings=warnings + ["Whitespace-normalized added atom already exists uniquely; hunk appears already applied."],
                missing_lines=[],
                candidate_count=1,
            ), new_block

        add_only = locate_add_only_by_context_atom(file_lines, hunk, path, warnings)
        if add_only is not None:
            return add_only

    # Case A: context-bracket replacement/insert cluster.
    bracket = locate_context_bracket_atom(file_lines, hunk, path, warnings)
    if bracket is not None:
        return bracket

    # Case B-4: stale multi-line paragraph currently collapsed into fewer target lines.
    collapsed_paragraph = locate_collapsed_stale_paragraph_atom(file_lines, hunk, path, warnings)
    if collapsed_paragraph is not None:
        return collapsed_paragraph

    # Case B-3: stale paragraph replacement near hunk-header locality.
    header_window_paragraph = locate_stale_paragraph_by_header_window_atom(file_lines, hunk, path, warnings)
    if header_window_paragraph is not None:
        return header_window_paragraph

    # Case B-2: stale paragraph replacement after unique context/header anchor.
    stale_paragraph = locate_stale_paragraph_after_anchor_atom(file_lines, hunk, path, warnings)
    if stale_paragraph is not None:
        return stale_paragraph

    # Case B-1: small stale tail replacement after hunk-header context.
    header_tail = locate_header_anchor_tail_region_atom(file_lines, hunk, path, warnings)
    if header_tail is not None:
        return header_tail

    # Case B0: bounded stale tail replacement after unique previous context.
    tail_replacement = locate_prev_anchor_tail_replacement_atom(file_lines, hunk, path, warnings)
    if tail_replacement is not None:
        return tail_replacement

    # Case B: one-line stale replacement near unique context anchor.
    side_anchored = locate_side_anchored_single_line_atom(file_lines, hunk, path, warnings)
    if side_anchored is not None:
        return side_anchored

    # Case C: one-line stale replacement.
    if len(removed_only) == 1 and added_only:
        found = find_unique_fuzzy_line(file_lines, removed_only[0], path, min_score=0.86)
        if found:
            line_index, score = found
            replacement = replacement_preserving_single_line_style(
                added_lines=added_only,
                matched_old_line=file_lines[line_index],
                path=path,
            )
            return MatchResult(
                True,
                line_index,
                line_index + 1,
                max(0.90, min(0.94, score)),
                "fuzzy_single_line_atom",
                warnings + [f"Fuzzy single-line atom match used; score={score:.2f}."],
                [],
                1,
            ), replacement

    # Case D: add-only block. Insert by nearest context line in the hunk.
    if not removed_only and added_only and context_only:
        first_add_index = next(
            (i for i, dl in enumerate(hunk.lines) if dl.kind == "add"),
            None,
        )

        if first_add_index is not None:
            prev_context: str | None = None
            next_context: str | None = None

            for dl in reversed(hunk.lines[:first_add_index]):
                if dl.kind == "context" and dl.text.strip():
                    prev_context = dl.text
                    break

            last_add_index = max(i for i, dl in enumerate(hunk.lines) if dl.kind == "add")
            for dl in hunk.lines[last_add_index + 1:]:
                if dl.kind == "context" and dl.text.strip():
                    next_context = dl.text
                    break

            if prev_context:
                found_prev = find_unique_context_anchor(file_lines, prev_context, path)
                if found_prev:
                    idx, method = found_prev
                    return MatchResult(
                        True,
                        idx + 1,
                        idx + 1,
                        0.93,
                        f"insert_after_context_atom_{method}",
                        warnings + ["Add-only atom inserted after unique previous context."],
                        [],
                        1,
                    ), added_only

            if next_context:
                found_next = find_unique_context_anchor(file_lines, next_context, path)
                if found_next:
                    idx, method = found_next
                    return MatchResult(
                        True,
                        idx,
                        idx,
                        0.93,
                        f"insert_before_context_atom_{method}",
                        warnings + ["Add-only atom inserted before unique following context."],
                        [],
                        1,
                    ), added_only

    return None





 def find_ordered_line_subsequence_spans(
    file_lines: list[str],
    needles: list[str],
    max_span: int = 90,
    max_gap: int = 25,
 ) -> list[tuple[int, int]]:
    """Find compact ordered occurrences of normalized needle lines."""
    norm_file = [normalize_line(line) for line in file_lines]
    norm_needles = [normalize_line(line) for line in needles if line.strip()]

    if not norm_needles:
        return []

    spans: list[tuple[int, int]] = []

    first = norm_needles[0]
    first_positions = [i for i, line in enumerate(norm_file) if line == first]

    for start in first_positions:
        pos = start
        ok = True

        for needle in norm_needles[1:]:
            found: int | None = None
            search_end = min(len(norm_file), pos + max_gap + 1)

            for j in range(pos + 1, search_end):
                if norm_file[j] == needle:
                    found = j
                    break

            if found is None:
                ok = False
                break

            pos = found

        if ok and (pos + 1 - start) <= max_span:
            spans.append((start, pos + 1))

    # Dedupe while preserving order.
    return list(dict.fromkeys(spans))



 def find_ordered_fuzzy_line_subsequence_spans(
    file_lines: list[str],
    needles: list[str],
    path: Path,
    max_span: int = 170,
    max_gap: int = 45,
    min_line_score: float = 0.78,
    min_avg_score: float = 0.86,
 ) -> list[tuple[int, int, float, float]]:
    """Find compact ordered fuzzy occurrences of needle lines.

    This is for already-applied/idempotency detection only, not for applying.

    It handles cases where the final Markdown lines exist in order, but small
    wording, sanitation, or context-repair drift prevents exact normalized
    subsequence matching.
    """
    clean_needles = [line for line in needles if line.strip()]
    if not clean_needles:
        return []

    candidates: list[tuple[int, int, float, float]] = []

    # Find plausible first-line starts.
    first = clean_needles[0]
    first_positions: list[tuple[int, float]] = []
    for i, line in enumerate(file_lines):
        score = markdown_or_normal_similarity(line, first, path)
        if score >= min_line_score:
            first_positions.append((i, score))

    for start, first_score in first_positions:
        pos = start
        scores = [first_score]
        ok = True

        for needle in clean_needles[1:]:
            search_end = min(len(file_lines), pos + max_gap + 1)
            best: tuple[int, float] | None = None

            for j in range(pos + 1, search_end):
                score = markdown_or_normal_similarity(file_lines[j], needle, path)
                if score >= min_line_score and (best is None or score > best[1]):
                    best = (j, score)

            if best is None:
                ok = False
                break

            pos, score = best
            scores.append(score)

        if not ok:
            continue

        span = pos + 1 - start
        if span > max_span:
            continue

        avg_score = sum(scores) / len(scores)
        min_score = min(scores)

        if avg_score >= min_avg_score and min_score >= min_line_score:
            candidates.append((start, pos + 1, avg_score, min_score))

    candidates.sort(key=lambda item: (item[2], item[3]), reverse=True)
    return candidates


 def choose_unique_or_header_nearest_fuzzy_span(
    spans: list[tuple[int, int, float, float]],
    hunk: Hunk,
    max_distance: int = 190,
 ) -> tuple[int, int, float, float] | None:
    """Choose one fuzzy span, requiring uniqueness or header-local tie-break."""
    if not spans:
        return None

    best = spans[0]
    close = [s for s in spans if best[2] - s[2] < 0.025]

    if len(close) == 1:
        return best

    old_start = parse_hunk_old_start(hunk.header)
    if old_start is None:
        return None

    ranked = sorted(close, key=lambda span: abs(span[0] - old_start))
    best_distance = abs(ranked[0][0] - old_start)
    second_distance = abs(ranked[1][0] - old_start)

    if best_distance == second_distance:
        return None

    if best_distance > max_distance:
        return None

    return ranked[0]



 def line_present_in_range(
    file_lines: list[str],
    needle: str,
    path: Path,
    start: int,
    end: int,
    min_fuzzy_score: float = 0.88,
 ) -> bool:
    """Check whether one line exists in a bounded range."""
    start = max(0, start)
    end = min(len(file_lines), end)
    needle_norm = normalize_line(needle)

    for line in file_lines[start:end]:
        if line == needle:
            return True
        if normalize_line(line) == needle_norm:
            return True
        if (
            path.suffix.lower() in MARKDOWN_EXTENSIONS
            and "markdown_lines_equiv" in globals()
            and markdown_lines_equiv(line, needle)
        ):
            return True
        if markdown_or_normal_similarity(line, needle, path) >= min_fuzzy_score:
            return True

    return False



 def meaningful_new_block_lines_for_idempotency(
    lines: list[str],
    removed_only: list[str],
    path: Path,
 ) -> list[str]:
    """Extract useful final-state evidence from a hunk new_block.

    This is for malformed Markdown hunks where parser repair reclassifies final
    intended lines as context, so added_only is incomplete.

    Excludes:
      - blank lines
      - fences
      - old removed lines
      - generic headings/labels
    """
    removed_norm = {normalize_line(line) for line in removed_only if line.strip()}

    out: list[str] = []
    for line in sanitize_replacement_lines(lines, path):
        stripped = line.strip()
        norm = normalize_line(line)

        if not stripped:
            continue
        if stripped.startswith("```"):
            continue
        if norm in removed_norm:
            continue
        if stripped in {"Canonical flow:", "Recommended model:", "Operationally, this means:"}:
            continue

        # Keep arrow-flow lines, bullets, and meaningful prose.
        if (
            stripped.startswith("->")
            or stripped.startswith(("- ", "* ", "+ "))
            or len(stripped) >= 35
        ):
            out.append(line)

    # Dedupe while preserving order.
    seen: set[str] = set()
    deduped: list[str] = []
    for line in out:
        key = normalize_line(line)
        if key not in seen:
            seen.add(key)
            deduped.append(line)

    return deduped


 def locate_already_applied_new_block_lines_present(
    file_lines: list[str],
    hunk: Hunk,
    path: Path,
    removed_only: list[str],
    new_block: list[str],
    warnings: list[str],
    markdown_recovery: bool,
 ) -> MatchResult | None:
    """Detect already-applied malformed Markdown replacements by new-block evidence.

    This is idempotency-only.

    It catches hunks where:
      - old removed lines are gone
      - final intended lines exist near the hunk location
      - added_only is incomplete because repair reclassified arrow-flow/code-fence
        lines as context
    """
    if path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return None

    if not removed_only or not new_block:
        return None

    missing_removed = line_presence_missing(
        file_lines=file_lines,
        expected_lines=removed_only,
        path=path,
        markdown_recovery=markdown_recovery,
    )

    meaningful_removed = [line for line in removed_only if line.strip()]

    # Require strong evidence that the old side is gone.
    if len(missing_removed) < max(2, min(3, len(meaningful_removed))):
        return None

    needles = meaningful_new_block_lines_for_idempotency(
        lines=new_block,
        removed_only=removed_only,
        path=path,
    )

    # For messy split hunks, two strong final lines can be enough if several old
    # lines are missing. One line is too weak.
    if len(needles) < 2:
        return None

    if len(needles) > 40:
        return None

    old_start = parse_hunk_old_start(hunk.header)
    if old_start is None:
        return None

    search_start = max(0, old_start - 100)
    search_end = min(len(file_lines), old_start + 220)

    missing_needles: list[str] = []
    for needle in needles:
        if not line_present_in_range(
            file_lines=file_lines,
            needle=needle,
            path=path,
            start=search_start,
            end=search_end,
            min_fuzzy_score=0.88,
        ):
            missing_needles.append(needle)

    # Allow one miss if we still have many good needles. This handles a line
    # that was sanitized or wrapped differently after apply.
    allowed_missing = 0 if len(needles) < 5 else 1

    if len(missing_needles) > allowed_missing:
        return None

    return MatchResult(
        found=False,
        confidence=0.95,
        method="already_applied_new_block_lines_present",
        warnings=warnings + [
            f"Final new-block evidence already present near hunk location; old removed lines missing={len(missing_removed)}, evidence_lines={len(needles)}, missing_evidence={len(missing_needles)}."
        ],
        missing_lines=[],
        candidate_count=len(needles),
    )


 def locate_already_applied_added_lines_present(
    file_lines: list[str],
    hunk: Hunk,
    path: Path,
    removed_only: list[str],
    added_only: list[str],
    warnings: list[str],
    markdown_recovery: bool,
 ) -> MatchResult | None:
    """Detect already-applied malformed Markdown replacement by added-line presence.

    This is intentionally an idempotency-only fallback for cases where:
      - old removed lines are gone
      - added lines are present near the hunk's original location
      - full new_block matching fails because parser repair reclassified
        Markdown code-fence / arrow-flow lines as context

    Conservative limits:
      - Markdown only
      - replacement hunks only
      - require at least 3 added nonblank lines
      - require all meaningful added lines present in a bounded local window
      - require at least one removed line missing, preferably several
    """
    if path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return None

    if not removed_only or not added_only:
        return None

    added_needles = [
        line
        for line in sanitize_replacement_lines(added_only, path)
        if line.strip()
    ]

    if len(added_needles) < 3:
        return None

    if len(added_needles) > 30:
        return None

    missing_removed = line_presence_missing(
        file_lines=file_lines,
        expected_lines=removed_only,
        path=path,
        markdown_recovery=markdown_recovery,
    )

    if not missing_removed:
        return None

    # Require meaningful removal drift, not just one changed whitespace line.
    if len(missing_removed) < max(1, min(3, len([l for l in removed_only if l.strip()]))):
        return None

    old_start = parse_hunk_old_start(hunk.header)
    if old_start is None:
        return None

    search_start = max(0, old_start - 80)
    search_end = min(len(file_lines), old_start + 180)

    missing_added: list[str] = []
    for needle in added_needles:
        if not line_present_in_range(
            file_lines=file_lines,
            needle=needle,
            path=path,
            start=search_start,
            end=search_end,
            min_fuzzy_score=0.90,
        ):
            missing_added.append(needle)

    if missing_added:
        return None

    return MatchResult(
        found=False,
        confidence=0.95,
        method="already_applied_added_lines_present",
        warnings=warnings + [
            f"Added lines already present near hunk location; old removed lines missing={len(missing_removed)}."
        ],
        missing_lines=[],
        candidate_count=len(added_needles),
    )


 def locate_already_applied_added_subsequence(
    file_lines: list[str],
    hunk: Hunk,
    path: Path,
    removed_only: list[str],
    added_only: list[str],
    warnings: list[str],
    markdown_recovery: bool,
 ) -> MatchResult | None:
    """Detect already-applied replacement hunks by their added lines.

    This handles post-apply idempotency when:
      - the old lines are gone
      - the added lines are present in order
      - exact new_block matching fails due to context/sanitation drift

    Conservative limits:
      - Markdown only
      - replacement hunks only
      - require several added lines
      - require at least one removed line to be missing
      - require one compact ordered added-line span, or a uniquely nearest span
        to the original hunk header line
    """
    if path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return None

    if not removed_only or not added_only:
        return None

    # Ignore blank-only and very tiny additions.
    needles = [line for line in added_only if line.strip()]
    if len(needles) < 3:
        return None

    if len(needles) > 40:
        return None

    missing_removed = line_presence_missing(
        file_lines=file_lines,
        expected_lines=removed_only,
        path=path,
        markdown_recovery=markdown_recovery,
    )

    if not missing_removed:
        return None

    spans = find_ordered_line_subsequence_spans(
        file_lines=file_lines,
        needles=needles,
        max_span=110,
        max_gap=30,
    )

    if not spans:
        return None

    chosen: tuple[int, int] | None = None

    if len(spans) == 1:
        chosen = spans[0]
    else:
        old_start = parse_hunk_old_start(hunk.header)
        if old_start is None:
            return None

        ranked = sorted(spans, key=lambda span: abs(span[0] - old_start))
        best_distance = abs(ranked[0][0] - old_start)
        second_distance = abs(ranked[1][0] - old_start)

        if best_distance == second_distance:
            return None

        # Must still be local-ish to the original hunk.
        if best_distance > 160:
            return None

        chosen = ranked[0]

    start, end = chosen

    return MatchResult(
        found=False,
        confidence=0.95,
        method="already_applied_added_subsequence",
        warnings=warnings + [
            f"Added lines already exist as a compact ordered subsequence; old removed lines missing={len(missing_removed)}."
        ],
        missing_lines=[],
        candidate_count=len(spans),
    )



 def locate_already_applied_new_subsequence(
    file_lines: list[str],
    hunk: Hunk,
    path: Path,
    removed_only: list[str],
    new_block: list[str],
    warnings: list[str],
    markdown_recovery: bool,
 ) -> MatchResult | None:
    """Detect already-applied replacement hunks by final new-block shape.

    This handles post-apply idempotency when:
      - removed lines are gone
      - arrow-flow/list repairs reclassified many final lines as context
      - added_only alone is too small or incomplete
      - the final hunk shape exists in the target file as an ordered compact
        subsequence, even if exact new_block matching failed due to blank/context
        drift.

    Conservative limits:
      - Markdown only
      - replacement hunks only
      - require missing removed lines
      - require several final nonblank lines
      - require compact ordered span
    """
    if path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return None

    if not removed_only or not new_block:
        return None

    missing_removed = line_presence_missing(
        file_lines=file_lines,
        expected_lines=removed_only,
        path=path,
        markdown_recovery=markdown_recovery,
    )

    if not missing_removed:
        return None

    needles = [line for line in sanitize_replacement_lines(new_block, path) if line.strip()]

    # Ignore tiny blocks; those are too easy to match accidentally.
    if len(needles) < 4:
        return None

    if len(needles) > 80:
        return None

    spans = find_ordered_line_subsequence_spans(
        file_lines=file_lines,
        needles=needles,
        max_span=160,
        max_gap=35,
    )

    if spans:
        chosen: tuple[int, int] | None = None

        if len(spans) == 1:
            chosen = spans[0]
        else:
            old_start = parse_hunk_old_start(hunk.header)
            if old_start is None:
                chosen = None
            else:
                ranked = sorted(spans, key=lambda span: abs(span[0] - old_start))
                best_distance = abs(ranked[0][0] - old_start)
                second_distance = abs(ranked[1][0] - old_start)

                if best_distance != second_distance and best_distance <= 180:
                    chosen = ranked[0]

        if chosen is not None:
            start, end = chosen
            return MatchResult(
                found=False,
                confidence=0.96,
                method="already_applied_new_subsequence",
                warnings=warnings + [
                    f"Final new-block lines already exist as compact ordered subsequence; old removed lines missing={len(missing_removed)}."
                ],
                missing_lines=[],
                candidate_count=len(spans),
            )

    fuzzy_spans = find_ordered_fuzzy_line_subsequence_spans(
        file_lines=file_lines,
        needles=needles,
        path=path,
        max_span=190,
        max_gap=50,
        min_line_score=0.76,
        min_avg_score=0.84,
    )

    chosen_fuzzy = choose_unique_or_header_nearest_fuzzy_span(
        fuzzy_spans,
        hunk=hunk,
        max_distance=210,
    )

    if chosen_fuzzy is None:
        return None

    start, end, avg_score, min_score = chosen_fuzzy

    return MatchResult(
        found=False,
        confidence=0.94,
        method="already_applied_new_fuzzy_subsequence",
        warnings=warnings + [
            f"Final new-block lines already exist as compact fuzzy ordered subsequence; avg_score={avg_score:.2f}, min_score={min_score:.2f}, old removed lines missing={len(missing_removed)}."
        ],
        missing_lines=[],
        candidate_count=len(fuzzy_spans),
    )



 def looks_like_markdown_task_heading(line: str) -> bool:
    """Markdown task heading/list item, e.g. `- [ ] Foo` or `* [x] Foo`."""
    return bool(re.match(r"^\s*[-*+]\s+\[[ xX]\]\s+", line.strip()))


 def target_contains_markdown_equiv_line(file_lines: list[str], line: str, path: Path) -> bool:
    """Whether a line already exists in target under exact/normalized/Markdown-equivalent rules."""
    norm = normalize_line(line)

    for file_line in file_lines:
        if file_line == line:
            return True
        if normalize_line(file_line) == norm:
            return True
        if (
            path.suffix.lower() in MARKDOWN_EXTENSIONS
            and "markdown_lines_equiv" in globals()
            and markdown_lines_equiv(file_line, line)
        ):
            return True

    return False


 def normalize_task_body_addition(line: str) -> str:
    """Normalize malformed task-body bullets into nested `  * ...` style."""
    if looks_like_markdown_task_heading(line):
        return line

    m = markdown_bullet_match(line)
    if not m:
        return line

    body = m.group("body").rstrip()
    return f"  * {body}"


 def repair_malformed_added_task_body_removals(
    hunk: Hunk,
    file_lines: list[str],
    path: Path,
 ) -> tuple[Hunk, list[str]]:
    """Reclassify malformed task-body removals as additions."""
    if path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return hunk, []

    has_added_task = any(
        dl.kind == "add" and looks_like_markdown_task_heading(dl.text)
        for dl in hunk.lines
    )

    if not has_added_task:
        return hunk, []

    changed = False
    warnings: list[str] = []
    repaired: list[DiffLine] = []
    seen_added_task = False
    converted = 0

    for dl in hunk.lines:
        if dl.kind == "add" and looks_like_markdown_task_heading(dl.text):
            seen_added_task = True
            repaired.append(dl)
            continue

        if (
            seen_added_task
            and dl.kind == "remove"
            and looks_like_markdown_bullet(dl.text)
            and not target_contains_markdown_equiv_line(file_lines, dl.text, path)
        ):
            new_text = normalize_task_body_addition(dl.text)
            repaired.append(DiffLine("add", new_text, "+" + new_text))
            changed = True
            converted += 1
            warnings.append(
                f"Malformed added task-body removal reclassified as addition: {dl.text}"
            )
            continue

        repaired.append(dl)

    if not changed or converted < 2:
        return hunk, []

    return Hunk(
        header=hunk.header,
        lines=repaired,
        parse_warnings=list(hunk.parse_warnings),
    ), warnings


 def repair_markdown_arrow_flow_context_removals(
    hunk: Hunk,
    file_lines: list[str],
    path: Path,
 ) -> tuple[Hunk, list[str]]:
    """Repair parsed Markdown arrow-flow lines misread as removals.

    Malformed AI diffs may emit real Markdown context lines like:

        -> evaluate Products/Offers
        -> buy

    at column 1. Unified-diff parsing reads those as remove marker "-"
    plus text "> evaluate...".

    If the target Markdown file uniquely contains the literal arrow-flow line,
    treat it as context. This keeps add-only hunks from falling through to
    generic fuzzy_window matching.
    """
    if path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return hunk, []

    if not any(dl.kind == "add" for dl in hunk.lines):
        return hunk, []

    changed = False
    warnings: list[str] = []
    repaired_lines: list[DiffLine] = []

    for dl in hunk.lines:
        if dl.kind == "remove" and dl.text.startswith(">"):
            candidates = [f"-{dl.text}", f"+{dl.text}"]

            matches: list[str] = []
            for candidate in candidates:
                exact = [line for line in file_lines if line == candidate]
                if exact:
                    matches.extend(exact)
                    continue

                normalized = [
                    line for line in file_lines
                    if normalize_line(line) == normalize_line(candidate)
                ]
                matches.extend(normalized)

            # Unique while preserving order.
            unique_matches = list(dict.fromkeys(matches))

            if len(unique_matches) == 1:
                target_line = unique_matches[0]
                repaired_lines.append(DiffLine("context", target_line, " " + target_line))
                changed = True
                warnings.append(
                    f"Parsed Markdown arrow-flow removal reclassified as context: {target_line}"
                )
                continue

        repaired_lines.append(dl)

    if not changed:
        return hunk, []

    return Hunk(
        header=hunk.header,
        lines=repaired_lines,
        parse_warnings=list(hunk.parse_warnings),
    ), warnings

 def locate_hunk(
    file_lines: list[str],
    hunk: Hunk,
    path: Path,
    min_confidence: float,
    markdown_recovery: bool,
 ) -> tuple[MatchResult, list[str]]:
    warnings = list(hunk.parse_warnings)

    hunk, arrow_flow_repair_warnings = repair_markdown_arrow_flow_context_removals(
        hunk=hunk,
        file_lines=file_lines,
        path=path,
    )
    warnings.extend(arrow_flow_repair_warnings)

    hunk, parsed_repair_warnings = repair_parsed_markdown_context_removals(
        hunk=hunk,
        file_lines=file_lines,
        path=path,
    )
    warnings.extend(parsed_repair_warnings)

    hunk, task_body_repair_warnings = repair_malformed_added_task_body_removals(
        hunk=hunk,
        file_lines=file_lines,
        path=path,
    )
    warnings.extend(task_body_repair_warnings)

    old_block, new_block, removed_only, added_only, context_only = old_new_blocks(hunk)

    # Match against the same Markdown sanitation that apply_patch_file writes.
    # Otherwise a hunk can apply cleanly, but fail idempotency afterward because
    # the file contains sanitized output while new_block still contains AI
    # artifacts/trailing whitespace.
    if path.suffix.lower() in MARKDOWN_EXTENSIONS and "sanitize_replacement_lines" in globals():
        new_block = sanitize_replacement_lines(new_block, path)
        added_only = sanitize_replacement_lines(added_only, path)

    # AI diffs sometimes contain a hunk header plus context, but no actual
    # additions/removals. That is a malformed no-op hunk. Do not let it make the
    # patch unsafe.
    if not removed_only and not added_only:
        if not old_block:
            return MatchResult(
                found=False,
                confidence=1.0,
                method="noop_empty_hunk",
                warnings=warnings + ["Empty hunk has no changes; treated as no-op."],
                missing_lines=[],
            ), new_block

        exact = find_exact_block(file_lines, old_block)
        if len(exact) >= 1:
            return MatchResult(
                found=False,
                confidence=1.0,
                method="noop_context_only_hunk",
                warnings=warnings + ["Context-only hunk has no changes; treated as no-op."],
                missing_lines=[],
                candidate_count=len(exact),
            ), new_block

        normalized = find_normalized_block(file_lines, old_block)
        if len(normalized) >= 1:
            return MatchResult(
                found=False,
                confidence=0.98,
                method="noop_context_only_hunk_normalized",
                warnings=warnings + ["Whitespace-normalized context-only hunk has no changes; treated as no-op."],
                missing_lines=[],
                candidate_count=len(normalized),
            ), new_block

        if markdown_recovery and path.suffix.lower() in MARKDOWN_EXTENSIONS and "find_markdown_equiv_block" in globals():
            md = find_markdown_equiv_block(file_lines, old_block)
            if len(md) >= 1:
                return MatchResult(
                    found=False,
                    confidence=0.96,
                    method="noop_context_only_hunk_markdown_equiv",
                    warnings=warnings + ["Markdown-equivalent context-only hunk has no changes; treated as no-op."],
                    missing_lines=[],
                    candidate_count=len(md),
                ), new_block

        # Even if context drifted, there is still no requested edit. Keep this
        # non-dangerous and do not block the whole patch.
        return MatchResult(
            found=False,
            confidence=0.90,
            method="noop_context_only_hunk_unmatched",
            warnings=warnings + ["Context-only hunk has no changes but context was not found; treated as no-op."],
            missing_lines=[],
        ), new_block

    if any(is_placeholder(line) for line in old_block + new_block):
        return MatchResult(
            found=False,
            confidence=0.0,
            method="placeholder_detected",
            warnings=warnings + ["Placeholder line like '...' detected; hunk skipped for safety."],
            missing_lines=[],
        ), new_block

    if not old_block and added_only:
        # No old block means probably an add-file hunk with only additions.
        # If file is empty/new, insert at top. Otherwise skip in v1 unless context exists.
        if not file_lines:
            return MatchResult(found=True, start=0, end=0, confidence=0.99, method="add_to_empty_file", warnings=warnings), new_block
        return MatchResult(
            found=False,
            confidence=0.0,
            method="insert_without_anchor",
            warnings=warnings + ["Insertion has no old/context anchor; skipped for safety."],
            missing_lines=[],
        ), new_block

    # Strategy 0: already-applied detection.
    # Important for insert-only hunks: after applying once, some context-only old blocks can
    # still remain unique and would otherwise look applyable again.
    new_matches = find_exact_block(file_lines, new_block)
    if len(new_matches) >= 1:
        return MatchResult(
            found=False,
            confidence=1.0,
            method="already_applied",
            warnings=warnings + ["New block already exists; hunk appears already applied."],
            missing_lines=[],
            candidate_count=len(new_matches),
        ), new_block

    normalized_new_matches = find_normalized_block(file_lines, new_block)
    if len(normalized_new_matches) >= 1:
        return MatchResult(
            found=False,
            confidence=0.98,
            method="already_applied_normalized",
            warnings=warnings + ["Whitespace-normalized new block already exists; hunk appears already applied."],
            missing_lines=[],
            candidate_count=len(normalized_new_matches),
        ), new_block

    new_subsequence = locate_already_applied_new_subsequence(
        file_lines=file_lines,
        hunk=hunk,
        path=path,
        removed_only=removed_only,
        new_block=new_block,
        warnings=warnings,
        markdown_recovery=markdown_recovery,
    )
    if new_subsequence is not None:
        return new_subsequence, new_block

    new_block_present = locate_already_applied_new_block_lines_present(
        file_lines=file_lines,
        hunk=hunk,
        path=path,
        removed_only=removed_only,
        new_block=new_block,
        warnings=warnings,
        markdown_recovery=markdown_recovery,
    )
    if new_block_present is not None:
        return new_block_present, new_block

    # Strategy 0b: already-applied added block.
    # Useful when an insertion is already present, but surrounding context drifted.
    if added_only:
        added_matches = find_exact_block(file_lines, added_only)
        if len(added_matches) == 1:
            return MatchResult(
                found=False,
                confidence=0.97,
                method="already_applied_added_block",
                warnings=warnings + ["Added block already exists uniquely; hunk appears already applied."],
                missing_lines=[],
                candidate_count=1,
            ), new_block

        normalized_added_matches = find_normalized_block(file_lines, added_only)
        if len(normalized_added_matches) == 1:
            return MatchResult(
                found=False,
                confidence=0.95,
                method="already_applied_added_block_normalized",
                warnings=warnings + ["Whitespace-normalized added block already exists uniquely; hunk appears already applied."],
                missing_lines=[],
                candidate_count=1,
            ), new_block

        if markdown_recovery and path.suffix.lower() in MARKDOWN_EXTENSIONS:
            markdown_added_matches = find_markdown_equiv_block(file_lines, added_only)
            if len(markdown_added_matches) == 1:
                return MatchResult(
                    found=False,
                    confidence=0.94,
                    method="already_applied_added_block_markdown_equiv",
                    warnings=warnings + ["Markdown-equivalent added block already exists uniquely; hunk appears already applied."],
                    missing_lines=[],
                    candidate_count=1,
                ), new_block

        added_present = locate_already_applied_added_lines_present(
            file_lines=file_lines,
            hunk=hunk,
            path=path,
            removed_only=removed_only,
            added_only=added_only,
            warnings=warnings,
            markdown_recovery=markdown_recovery,
        )
        if added_present is not None:
            return added_present, new_block

        added_subsequence = locate_already_applied_added_subsequence(
            file_lines=file_lines,
            hunk=hunk,
            path=path,
            removed_only=removed_only,
            added_only=added_only,
            warnings=warnings,
            markdown_recovery=markdown_recovery,
        )
        if added_subsequence is not None:
            return added_subsequence, new_block

    # Strategy 0c: human-style edit atom fallback.
    #
    # Run this early. AI diffs often contain several small human-obvious edits
    # in one malformed hunk. If we wait until after strict block strategies,
    # stale context or duplicated fuzzy windows can poison the hunk.
    atom = locate_edit_atom_fallback(file_lines, hunk, path, warnings)
    if atom is not None:
        return atom

    # Strategy 1: exact old block.
    matches = find_exact_block(file_lines, old_block)
    if len(matches) == 1:
        start, end = matches[0]
        return MatchResult(True, start, end, 0.98, "exact_old_block", warnings, [], 1), new_block
    if len(matches) > 1:
        return MatchResult(
            False,
            confidence=0.70,
            method="ambiguous_exact_old_block",
            warnings=warnings + [f"Exact old block matched {len(matches)} places; skipped for safety."],
            candidate_count=len(matches),
        ), new_block

    # Strategy 2: exact removed-only block, useful when context has drifted.
    if removed_only:
        matches = find_exact_block(file_lines, removed_only)
        if len(matches) == 1:
            start, end = matches[0]
            local_warnings = warnings[:]
            if context_only:
                local_warnings.append("Matched removed lines without full context; review recommended.")
            return MatchResult(True, start, end, 0.94, "exact_removed_block", local_warnings, [], 1), added_only
        if len(matches) > 1:
            warnings.append(f"Removed block matched {len(matches)} places; checking stronger strategies.")

    # Strategy 3: Markdown bullet recovery variants for old block.
    for variant in expand_markdown_old_block_variants(old_block, path, markdown_recovery)[1:]:
        matches = find_exact_block(file_lines, variant)
        if len(matches) == 1:
            start, end = matches[0]
            return MatchResult(
                True,
                start,
                end,
                0.91,
                "markdown_bullet_recovery_old_block",
                warnings + ["Applied Markdown bullet recovery for old block."],
                [],
                1,
            ), new_block

    # Strategy 4: normalized old block.
    matches = find_normalized_block(file_lines, old_block)
    if len(matches) == 1:
        start, end = matches[0]
        return MatchResult(
            True,
            start,
            end,
            0.90,
            "normalized_old_block",
            warnings + ["Whitespace-normalized match used."],
            [],
            1,
        ), new_block
    if len(matches) > 1:
        warnings.append(f"Normalized old block matched {len(matches)} places; skipped normalized strategy.")

    # Strategy 4a: Markdown-equivalent removed block.
    # Handles small replace atoms where patch says `* item` but target uses `- item`,
    # or vice versa.
    if removed_only and added_only and markdown_recovery and path.suffix.lower() in MARKDOWN_EXTENSIONS:
        matches = find_markdown_equiv_block(file_lines, removed_only)
        if len(matches) == 1:
            start, end = matches[0]
            replacement = replacement_preserving_matched_context(
                hunk=hunk,
                matched_old_lines=file_lines[start:end],
                path=path,
            )
            return MatchResult(
                True,
                start,
                end,
                0.92,
                "markdown_equiv_removed_block",
                warnings + ["Markdown bullet-marker-equivalent removed block match used."],
                [],
                1,
            ), replacement
        if len(matches) > 1:
            warnings.append(f"Markdown-equivalent removed block matched {len(matches)} places; skipped strategy.")

    # Strategy 4b: Markdown-equivalent old block.
    # This handles target files that use '-' bullets while the AI patch uses '*'
    # bullets, or vice versa. Replacement preserves matched target context lines.
    if markdown_recovery and path.suffix.lower() in MARKDOWN_EXTENSIONS:
        matches = find_markdown_equiv_block(file_lines, old_block)
        if len(matches) == 1:
            start, end = matches[0]
            replacement = replacement_preserving_matched_context(
                hunk=hunk,
                matched_old_lines=file_lines[start:end],
                path=path,
            )
            return MatchResult(
                True,
                start,
                end,
                0.91,
                "markdown_equiv_old_block",
                warnings + ["Markdown bullet-marker-equivalent block match used."],
                [],
                1,
            ), replacement
        if len(matches) > 1:
            warnings.append(f"Markdown-equivalent old block matched {len(matches)} places; skipped strategy.")

    # Strategy 5: insertion between exact context lines.
    # Works for hunks with only additions plus context.
    if added_only and not removed_only and len(context_only) >= 1:
        # Use full old_block as context if possible; replacement should insert at its location.
        matches = find_exact_block(file_lines, context_only)
        if len(matches) == 1:
            start, end = matches[0]
            # Replacing context-only with context+additions is safe when context block is unique.
            return MatchResult(
                True,
                start,
                end,
                0.92,
                "exact_context_insert",
                warnings,
                [],
                1,
            ), new_block
        if len(matches) > 1:
            return MatchResult(
                False,
                confidence=0.65,
                method="ambiguous_context_insert",
                warnings=warnings + [f"Insertion context matched {len(matches)} places; skipped."],
                candidate_count=len(matches),
            ), new_block

    # Strategy 6: tail-anchor evidence, report-only for now unless very strong.
    anchors = [a for a in (tail_anchor(line) for line in old_block) if a]
    if anchors:
        joined_file = "\n".join(file_lines)
        found_anchors = [a for a in anchors if a in joined_file]
        if found_anchors and len(found_anchors) == len(anchors):
            warnings.append("Tail anchors were present, but no safe unique block match was found.")

    # Strategy 7: fuzzy window fallback.
    candidates = fuzzy_window_candidates(file_lines, old_block)
    if candidates:
        best = candidates[0]
        close = [c for c in candidates if best[2] - c[2] < 0.03]

        if best[2] >= 0.98 and len(close) == 1 and added_only and not removed_only:
            start, end, score = best
            return MatchResult(
                True,
                start,
                end,
                0.91,
                "near_exact_fuzzy_insert_only_window",
                warnings + [f"Near-exact fuzzy insert-only window used; score={score:.2f}."],
                [],
                len(candidates),
            ), new_block

        old_start_hint_for_top = parse_hunk_old_start(hunk.header)
        if (
            old_start_hint_for_top is not None
            and old_start_hint_for_top <= 2
            and best[2] >= 0.88
            and best[0] <= 3
            and len(new_block) <= 14
            and 0 <= (best[1] - best[0]) <= 14
            and removed_only
            and added_only
        ):
            start, end, score = best
            return MatchResult(
                True,
                start,
                end,
                0.91,
                "top_of_file_fuzzy_replacement",
                warnings + [
                    f"Top-of-file fuzzy replacement promoted; score={score:.2f}."
                ],
                [],
                len(candidates),
            ), new_block

        if best[2] >= 0.94 and len(close) == 1:
            start, end, score = best
            is_bounded_insert_only = (
                added_only
                and not removed_only
                and score >= 0.94
                and len(added_only) <= 12
                and len(new_block) <= 40
                and (end - start) <= 20
            )

            return MatchResult(
                True,
                start,
                end,
                0.91 if is_bounded_insert_only else min(0.89, score),
                "bounded_fuzzy_insert_only_window" if is_bounded_insert_only else "fuzzy_window",
                warnings + [
                    f"Bounded fuzzy insert-only window used; score={score:.2f}."
                    if is_bounded_insert_only
                    else "Fuzzy window match used; review recommended."
                ],
                [],
                len(candidates),
            ), new_block

        guided = header_guided_fuzzy_candidate(
            candidates=candidates,
            hunk=hunk,
            score_floor=0.94,
            max_distance=90,
        )
        if guided and len(new_block) <= 90:
            start, end, score = guided
            if 0 <= (end - start) <= 45:
                return MatchResult(
                    True,
                    start,
                    end,
                    0.91,
                    "header_guided_fuzzy_window",
                    warnings + [
                        f"Header-guided fuzzy window used as tie-breaker; score={score:.2f}."
                    ],
                    [],
                    len(candidates),
                ), new_block

        # Last fuzzy tie-breaker: if the best fuzzy score is effectively exact
        # but duplicated, use the hunk header old-start line as a locality hint.
        #
        # This is safer than lowering fuzzy thresholds generally: it only fires
        # for near-perfect matches, bounded replacements, and a candidate close
        # to the original hunk location.
        old_start_hint = parse_hunk_old_start(hunk.header)
        if old_start_hint is not None and best[2] >= 0.99 and len(new_block) <= 120:
            close_exact = [c for c in candidates if best[2] - c[2] < 0.01]
            ranked = sorted(close_exact, key=lambda c: abs(c[0] - old_start_hint))
            if ranked:
                start, end, score = ranked[0]
                if abs(start - old_start_hint) <= 140 and 0 <= (end - start) <= 60:
                    return MatchResult(
                        True,
                        start,
                        end,
                        0.91,
                        "header_guided_perfect_fuzzy_window",
                        warnings + [
                            f"Header-guided perfect fuzzy window used as tie-breaker; score={score:.2f}."
                        ],
                        [],
                        len(candidates),
                    ), new_block

        warnings.append(
            f"Best fuzzy candidate score {best[2]:.2f}, but not strong/unique enough for automatic apply."
        )

    missing = line_presence_missing(file_lines, removed_only or old_block, path, markdown_recovery)
    return MatchResult(
        False,
        confidence=0.0,
        method="not_found",
        warnings=warnings,
        missing_lines=missing,
        candidate_count=len(candidates),
    ), new_block


 def detect_newline(raw: bytes) -> str:
    if b"\r\n" in raw:
        return "\r\n"
    return "\n"


 def bytes_to_lines(raw: bytes) -> tuple[list[str], str, bool]:
    newline = detect_newline(raw)
    text = raw.decode("utf-8")
    has_final_newline = text.endswith("\n")
    lines = text.splitlines()
    return lines, newline, has_final_newline


 def lines_to_bytes(lines: list[str], newline: str, has_final_newline: bool) -> bytes:
    text = newline.join(lines)
    if has_final_newline:
        text += newline
    return text.encode("utf-8")


 def safe_target_path(root: Path, rel: str) -> Path:
    candidate = (root / rel).resolve()
    root_resolved = root.resolve()
    try:
        candidate.relative_to(root_resolved)
    except ValueError:
        raise ValueError(f"Refusing path outside root: {rel}")
    return candidate




 def repair_markdown_inline_text_fence_artifact(line: str) -> str:
    """Repair broken inline AI text-fence artifacts in Markdown output.

    Example bad output:
        `text +shopRef + itemRef +`

    Intended content:
        shopRef + itemRef

    This only handles obvious single-line artifacts, not normal Markdown code.
    """
    stripped = line.strip()

    m = re.match(r"^`{1,3}text\s+(?P<body>.+?)`*$", stripped)
    if not m:
        return line

    body = m.group("body").strip()

    # AI diffs sometimes preserve diff + markers inside the fake text fence.
    if body.startswith("+"):
        body = body[1:].strip()
    if body.endswith("+"):
        body = body[:-1].strip()

    return body


 def sanitize_replacement_lines(lines: list[str], path: Path) -> list[str]:
    """Final cleanup before marker-leak detection and writing.

    For Markdown, avoid writing AI/diff artifacts:
      - trailing whitespace
      - broken inline `text +...+` pseudo-fences
    """
    if path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return lines

    out: list[str] = []
    for line in lines:
        line = repair_markdown_inline_text_fence_artifact(line)
        line = line.rstrip()
        out.append(line)

    return out



 def suspicious_markdown_marker_leaks(lines: list[str], path: Path) -> list[str]:
    """Detect likely leaked diff markers in Markdown output.

    These usually mean smartpatch accidentally wrote patch syntax as document text.
    Legit Markdown + bullets use '+ item' with a space, so '+* item', '+```text',
    '++foo', '+A real sentence', etc. are suspicious outside code fences.
    """
    if path.suffix.lower() not in MARKDOWN_EXTENSIONS:
        return []

    leaks: list[str] = []
    in_fence = False

    for idx, line in enumerate(lines, start=1):
        stripped = line.strip()

        if stripped.startswith("```"):
            # A literal +``` is suspicious before toggling.
            if line.startswith("+```"):
                leaks.append(f"replacement line {idx}: suspicious leaked marker `{line}`")
                continue
            in_fence = not in_fence
            continue

        if in_fence:
            continue

        if line.startswith(("++", "+*", "+-", "+#")):
            leaks.append(f"replacement line {idx}: suspicious leaked marker `{line}`")
            continue

        if line.startswith("+") and not line.startswith("+ "):
            leaks.append(f"replacement line {idx}: suspicious literal plus `{line}`")
            continue

    return leaks


 def apply_patch_file(
    patch_file: PatchFile,
    root: Path,
    dry_run: bool,
    min_confidence: float,
    markdown_recovery: bool,
    backup: bool,
 ) -> FileReport:
    rel = patch_file.target_path or "<unknown>"
    report = FileReport(file=rel, exists=False, hunks_total=len(patch_file.hunks))
    report.warnings.extend(patch_file.parse_warnings)

    if not patch_file.target_path:
        report.skipped = len(patch_file.hunks)
        report.warnings.append("No target path; skipped file.")
        return report

    try:
        path = safe_target_path(root, patch_file.target_path)
    except ValueError as exc:
        report.skipped = len(patch_file.hunks)
        report.warnings.append(str(exc))
        return report

    if path.exists():
        report.exists = True
        raw = path.read_bytes()
        try:
            file_lines, newline, has_final_newline = bytes_to_lines(raw)
        except UnicodeDecodeError:
            report.skipped = len(patch_file.hunks)
            report.warnings.append("File is not valid UTF-8; skipped.")
            return report
    else:
        report.exists = False
        file_lines = []
        newline = "\n"
        has_final_newline = True

    changed = False
    current_lines = file_lines[:]

    for index, hunk in enumerate(patch_file.hunks, start=1):
        match, replacement = locate_hunk(current_lines, hunk, path, min_confidence, markdown_recovery)
        replacement = sanitize_replacement_lines(replacement, path)

        # Promote bounded add-only fuzzy windows.
        #
        # locate_hunk() intentionally caps generic fuzzy_window confidence at 0.89.
        # For add-only hunks, this can be too strict when the fuzzy match is already
        # unique enough to return a concrete location. Keep this conservative:
        # - add-only only
        # - no deletions
        # - small replacement
        # - small matched span
        # - concrete location
        if (
            match.method == "fuzzy_window"
            and match.found
            and match.confidence >= 0.89
            and match.start is not None
            and match.end is not None
        ):
            _old_block, _new_block, _removed_only, _added_only, _context_only = old_new_blocks(hunk)
            _span = match.end - match.start
            if _added_only and not _removed_only and len(_added_only) <= 12 and _span <= 20:
                match.confidence = max(match.confidence, min_confidence)
                match.method = "promoted_fuzzy_insert_only_window"
                match.warnings.append(
                    "Fuzzy insert-only window promoted because it is add-only, bounded, and had a concrete unique location."
                )

        # Promote bounded fuzzy windows that only expand a small matched span.
        #
        # This handles add-only malformed AI hunks where the fuzzy matcher found a
        # concrete unique window but capped confidence at 0.89. It stays bounded:
        # - fuzzy_window only
        # - concrete location
        # - small span
        # - replacement expands the span
        # - replacement is not huge
        if (
            match.method == "fuzzy_window"
            and match.found
            and match.confidence >= 0.89
            and match.start is not None
            and match.end is not None
        ):
            _span = match.end - match.start
            if 0 <= _span <= 20 and len(replacement) > _span and len(replacement) <= 40:
                match.confidence = max(match.confidence, min_confidence)
                match.method = "promoted_bounded_fuzzy_expanding_window"
                match.warnings.append(
                    "Bounded fuzzy expanding window promoted over threshold."
                )


        _old_block_for_gate, _new_block_for_gate, _removed_only_for_gate, _added_only_for_gate, _context_only_for_gate = old_new_blocks(hunk)
        _span_for_gate = (
            match.end - match.start
            if match.start is not None and match.end is not None
            else 999999
        )
        _smartpatch_allow_fuzzy_window = (
            match.method == "fuzzy_window"
            and match.found
            and match.confidence >= 0.88
            and match.start is not None
            and match.end is not None
            and _added_only_for_gate
            and not _removed_only_for_gate
            and len(_added_only_for_gate) <= 12
            and 0 <= _span_for_gate <= 20
            and len(replacement) <= 40
        )

        if _smartpatch_allow_fuzzy_window:
            match.confidence = max(match.confidence, min_confidence)
            match.method = "promoted_bounded_fuzzy_insert_only_window"
            match.warnings.append(
                "Bounded fuzzy insert-only window promoted over threshold in apply gate."
            )

        if not _smartpatch_allow_fuzzy_window and (not match.found or match.confidence < min_confidence or match.start is None or match.end is None):
            is_already_applied = match.method.startswith("already_applied")
            is_noop = match.method.startswith("noop_")

            # Last-chance safe promotion for bounded fuzzy insert windows.
            # This handles cases where locate_hunk found a concrete fuzzy_window
            # at ~0.89, but the earlier gate did not fire due to parser drift.
            if (
                match.method == "fuzzy_window"
                and match.found
                and match.confidence >= 0.87
                and match.start is not None
                and match.end is not None
                and 0 <= (match.end - match.start) <= 20
                and len(replacement) <= 40
                and len(replacement) >= (match.end - match.start)
            ):
                marker_leaks = suspicious_markdown_marker_leaks(replacement, path)
                if not marker_leaks:
                    current_lines = current_lines[: match.start] + replacement + current_lines[match.end :]
                    changed = True
                    if dry_run:
                        report.would_apply += 1
                    else:
                        report.applied += 1
                    report.hunk_reports.append(
                        HunkReport(
                            file=rel,
                            hunk_index=index,
                            action="would_apply" if dry_run else "applied",
                            confidence=max(match.confidence, min_confidence),
                            method="last_chance_promoted_bounded_fuzzy_window",
                            start_line=match.start + 1,
                            end_line=match.end,
                            warnings=match.warnings + [
                                "Last-chance bounded fuzzy window promoted; no marker leaks detected."
                            ],
                            missing_lines=[],
                        )
                    )
                    continue

            if is_already_applied or is_noop:
                report.already_applied += 1
            else:
                report.skipped += 1

            details = ""
            if is_already_applied:
                details = "Hunk appears already applied; no action needed."
            elif is_noop:
                details = "Hunk has no requested changes; no action needed."
            elif len(match.missing_lines) >= 3:
                details = "Several expected lines were not found; this may be the wrong file, wrong branch, or stale AI diff."
            elif len(match.missing_lines) >= 1:
                details = "One or more expected lines were not found."

            report.hunk_reports.append(
                HunkReport(
                    file=rel,
                    hunk_index=index,
                    action="already_applied" if (is_already_applied or is_noop) else "skipped",
                    confidence=match.confidence,
                    method=match.method,
                    start_line=None if match.start is None else match.start + 1,
                    end_line=None if match.end is None else match.end,
                    warnings=match.warnings,
                    missing_lines=match.missing_lines,
                    details=details,
                )
            )
            continue

        marker_leaks = suspicious_markdown_marker_leaks(replacement, path)
        if marker_leaks:
            report.skipped += 1
            report.hunk_reports.append(
                HunkReport(
                    file=rel,
                    hunk_index=index,
                    action="skipped",
                    confidence=0.0,
                    method="suspicious_marker_leak",
                    start_line=match.start + 1,
                    end_line=match.end,
                    warnings=match.warnings + marker_leaks,
                    missing_lines=[],
                    details="Suspicious literal diff markers would be written; hunk skipped.",
                )
            )
            continue

        # Apply to in-memory lines.
        current_lines = current_lines[: match.start] + replacement + current_lines[match.end :]
        changed = True
        action: Literal["applied", "would_apply"] = "would_apply" if dry_run else "applied"
        if dry_run:
            report.would_apply += 1
        else:
            report.applied += 1
        report.hunk_reports.append(
            HunkReport(
                file=rel,
                hunk_index=index,
                action=action,
                confidence=match.confidence,
                method=match.method,
                start_line=match.start + 1,
                end_line=match.end,
                warnings=match.warnings,
                missing_lines=match.missing_lines,
            )
        )

    if changed and not dry_run:
        if backup and path.exists():
            stamp = datetime.now().strftime("%Y%m%d-%H%M%S")
            backup_path = path.with_name(f"{path.name}.smartpatch-{stamp}.bak")
            shutil.copy2(path, backup_path)
            report.warnings.append(f"Backup written: {backup_path}")
        path.parent.mkdir(parents=True, exist_ok=True)
        path.write_bytes(lines_to_bytes(current_lines, newline, has_final_newline))

    return report


 def markdown_report(run: RunReport) -> str:
    lines: list[str] = []
    lines.append("# Smartpatch Report")
    lines.append("")
    lines.append("## Summary")
    lines.append("")
    lines.append(f"- Patch: `{run.patch}`")
    lines.append(f"- Root: `{run.root}`")
    lines.append(f"- Mode: `{'dry-run' if run.dry_run else 'write'}`")
    lines.append(f"- Minimum confidence: `{run.min_confidence:.2f}`")
    lines.append(f"- Files: `{run.files_total}`")
    lines.append(f"- Hunks: `{run.hunks_total}`")
    lines.append(f"- Applied: `{run.applied}`")
    lines.append(f"- Would apply: `{run.would_apply}`")
    lines.append(f"- Already applied: `{run.already_applied}`")
    lines.append(f"- Skipped: `{run.skipped}`")
    lines.append("")

    if run.warnings:
        lines.append("## Run Warnings")
        lines.append("")
        for warning in run.warnings:
            lines.append(f"- {warning}")
        lines.append("")

    for fr in run.file_reports:
        lines.append(f"## {fr.file}")
        lines.append("")
        lines.append(f"- Exists: `{fr.exists}`")
        lines.append(f"- Hunks: `{fr.hunks_total}`")
        lines.append(f"- Applied: `{fr.applied}`")
        lines.append(f"- Would apply: `{fr.would_apply}`")
        lines.append(f"- Already applied: `{fr.already_applied}`")
        lines.append(f"- Skipped: `{fr.skipped}`")
        if fr.warnings:
            lines.append("- Warnings:")
            for warning in fr.warnings:
                lines.append(f"  - {warning}")
        lines.append("")

        for hr in fr.hunk_reports:
            lines.append(f"### Hunk {hr.hunk_index} — {hr.action}")
            lines.append("")
            lines.append(f"- Confidence: `{hr.confidence:.2f}`")
            lines.append(f"- Method: `{hr.method}`")
            if hr.start_line is not None:
                lines.append(f"- Location: lines `{hr.start_line}`-`{hr.end_line}`")
            if hr.details:
                lines.append(f"- Assessment: {hr.details}")
            if hr.warnings:
                lines.append("- Warnings:")
                for warning in hr.warnings:
                    lines.append(f"  - {warning}")
            if hr.missing_lines:
                lines.append("- Missing expected lines:")
                for missing in hr.missing_lines[:20]:
                    lines.append(f"  - `{missing}`")
                if len(hr.missing_lines) > 20:
                    lines.append(f"  - ...and {len(hr.missing_lines) - 20} more")
            lines.append("")

    return "\n".join(lines)


 def safety_verdict(run: RunReport) -> tuple[str, str]:
    if run.hunks_total == 0:
        return "UNSAFE", "no actionable hunks found"

    if run.skipped > 0:
        return "UNSAFE", f"{run.skipped} skipped hunk(s)"

    if run.would_apply > 0 and run.dry_run:
        return "SAFE", f"{run.would_apply} hunk(s) would apply, {run.already_applied} already applied"

    if run.applied > 0 and not run.dry_run:
        return "APPLIED", f"{run.applied} hunk(s) applied, {run.already_applied} already applied"

    if run.already_applied > 0:
        return "NOOP", f"all {run.already_applied} hunk(s) already applied"

    return "UNSAFE", "nothing applyable found"


 def print_console_summary(run: RunReport) -> None:
    verdict, reason = safety_verdict(run)

    print(f"{verdict}: {reason}")
    print(
        f"files={run.files_total} hunks={run.hunks_total} "
        f"would_apply={run.would_apply} already_applied={run.already_applied} skipped={run.skipped}"
    )

    if verdict == "UNSAFE":
        shown = 0
        for fr in run.file_reports:
            for hr in fr.hunk_reports:
                if hr.action == "skipped":
                    print(
                        f"- {fr.file}: hunk {hr.hunk_index} skipped "
                        f"({hr.method}, confidence={hr.confidence:.2f})"
                    )
                    shown += 1
                    if shown >= 5:
                        return

 def detect_git_root(start: Path) -> Path | None:
    try:
        result = subprocess.run(
            ["git", "rev-parse", "--show-toplevel"],
            cwd=start,
            check=True,
            capture_output=True,
            text=True,
        )
    except (subprocess.CalledProcessError, FileNotFoundError):
        return None
    root = result.stdout.strip()
    return Path(root).resolve() if root else None


 def resolve_root(root_arg: str | None) -> Path:
    if root_arg:
        return Path(root_arg).expanduser().resolve()
    return detect_git_root(Path.cwd()) or Path.cwd().resolve()


 def default_report_path(root: Path, patch_path: Path) -> Path:
    return root / "tmp" / f"{patch_path.stem}_patch-report.md"


 def resolve_report_path(report_arg: str | None, root: Path, patch_path: Path) -> Path | None:
    if report_arg is None:
        return None
    if report_arg == "AUTO":
        return default_report_path(root, patch_path)
    return Path(report_arg).expanduser()


 def cmd_apply(args: argparse.Namespace) -> int:
    patch_path = Path(args.patch).expanduser().resolve()
    root = resolve_root(args.root)
    dry_run = not args.write

    if not patch_path.exists():
        print(f"Patch not found: {patch_path}", file=sys.stderr)
        return 2
    if not root.exists():
        print(f"Root not found: {root}", file=sys.stderr)
        return 2

    patch_text = patch_path.read_text(encoding="utf-8")
    patch_text, repair_warnings = repair_ai_patch_text(patch_text, root)
    patch_files, parse_warnings = parse_patch(patch_text)
    parse_warnings = repair_warnings + parse_warnings

    run = RunReport(
        patch=str(patch_path),
        root=str(root),
        dry_run=dry_run,
        min_confidence=args.min_confidence,
        files_total=len(patch_files),
        hunks_total=sum(len(pf.hunks) for pf in patch_files),
        warnings=parse_warnings,
    )

    for pf in patch_files:
        fr = apply_patch_file(
            pf,
            root=root,
            dry_run=dry_run,
            min_confidence=args.min_confidence,
            markdown_recovery=not args.no_markdown_bullet_recovery,
            backup=args.backup,
        )
        run.file_reports.append(fr)
        run.applied += fr.applied
        run.would_apply += fr.would_apply
        run.already_applied += fr.already_applied
        run.skipped += fr.skipped

    report_text = markdown_report(run)

    if args.verbose:
        print(report_text)

    report_path = resolve_report_path(args.report, root, patch_path)
    if report_path:
        if not report_path.is_absolute():
            report_path = root / report_path
        report_path.parent.mkdir(parents=True, exist_ok=True)
        report_path.write_text(report_text, encoding="utf-8")
        print(f"Report written: {report_path}")

    if args.json_report:
        json_path = Path(args.json_report).expanduser()
        json_path.parent.mkdir(parents=True, exist_ok=True)
        json_path.write_text(json.dumps(asdict(run), indent=2, ensure_ascii=False), encoding="utf-8")

    print_console_summary(run)
    return 1 if run.skipped else 0


 def build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser(
        prog="smartpatch.py",
        description="Conservative fuzzy patch applier for AI-made unified diffs.",
    )
    sub = parser.add_subparsers(dest="command", required=True)

    apply = sub.add_parser("apply", help="dry-run or apply an AI-made diff")
    apply.add_argument("patch", help="path to .diff/.patch file")
    apply.add_argument("--root", default=None, help="repo/root directory; default: auto-detected git root, else current directory")
    apply.add_argument("--write", action="store_true", help="actually modify files; default is dry-run")
    apply.add_argument("--backup", action="store_true", help="write .smartpatch timestamp backups before modifying files")
    apply.add_argument("--min-confidence", type=float, default=0.90, help="minimum confidence required to apply; default: 0.90")
    apply.add_argument(
        "--report",
        nargs="?",
        const="AUTO",
        help="also write markdown report; default path: tmp/<diff-filename>_patch-report.md",
    )
    apply.add_argument("--json-report", help="write JSON report to this path")
    apply.add_argument("--verbose", action="store_true", help="print full markdown report to terminal")
    apply.add_argument(
        "--no-markdown-bullet-recovery",
        action="store_true",
        help="disable Markdown fallback matching for AI diffs that omit bullet markers",
    )
    apply.set_defaults(func=cmd_apply)
    return parser


 def main(argv: list[str] | None = None) -> int:
    parser = build_parser()
    args = parser.parse_args(argv)
    return args.func(args)


 if __name__ == "__main__":
    raise SystemExit(main())
No results found