fentas · May 18, 2026 08:59
diff --git a/zig_extract_tests.sh b/zig_extract_tests.sh
 #!/bin/bash
 # Find the test region in $1 and split into source + sibling _tests.zig.
 # Auto-copies module-level @import lines and generates re-binds for
 # pub decls so test bodies don't need rewriting.

 set -euo pipefail

 src="$1"
 dir=$(dirname "$src")
 base=$(basename "$src" .zig)
 out="$dir/${base}_tests.zig"

 # Skip if already extracted
 if [ -f "$out" ]; then
  echo "skip $src (sibling already exists)" >&2
  exit 0
 fi

 # Match real `test "..."` blocks only — `test {}` is reserved for the
 # discovery stub the script ITSELF emits, so it must not trigger
 # extraction (otherwise re-running on a previously-extracted file
 # self-imports).
 start=$(awk '
  /^test "/ { print NR; exit }
 ' "$src")

 if [ -z "$start" ]; then
  echo "no tests in $src" >&2
  exit 0
 fi

 # Find the LAST `// === Tests ===` / `// ─── tests ───` header
 # above `$start`. Everything from that header onwards belongs to
 # the test region (including any test-only stub structs / consts
 # interspersed between the header and the first `test ` block).
 # If no header exists, fall back to just the first `test ` line.
 header_start=$start
 hdr=$(awk -v end="$start" '
  NR >= end { exit }
  /^[[:space:]]*\/\/.*[Tt]ests[[:space:]]*(—|-|:|$)/ { last = NR }
  /^[[:space:]]*\/\/[[:space:]]*=+[[:space:]]*$/ { sep = NR }
  /^[[:space:]]*\/\/[[:space:]]*─+[[:space:]]*$/ { sep = NR }
  END { if (last) print last; else print "" }
 ' "$src")
 if [ -n "$hdr" ]; then
  # Walk back over blank/separator lines just before the header
  header_start=$hdr
  prev=$((hdr - 1))
  while [ "$prev" -gt 0 ]; do
    line=$(sed -n "${prev}p" "$src")
    if [[ "$line" =~ ^[[:space:]]*//[[:space:]]*=+[[:space:]]*$ ]] || \
       [[ "$line" =~ ^[[:space:]]*//[[:space:]]*─+[[:space:]]*$ ]] || \
       [[ "$line" =~ ^[[:space:]]*$ ]]; then
      header_start=$prev
      prev=$((prev - 1))
      continue
    fi
    break
  done
 fi

 head_end=$((header_start - 1))
 test_count=$(awk "NR>=$start" "$src" | grep -c '^test ')

 # Find the END of the test region. Tests in this codebase are
 # usually at EOF, but a few files (e.g. dialog.zig) put production
 # code AFTER the tests. Stop the region at the closing `}` of the
 # LAST `test "..."` block: scan forward from the last `^test`,
 # count brace depth, end region at the matching close brace.
 last_test_line=$(awk '/^test "/ { n=NR } END { print n }' "$src")
 total_lines=$(wc -l < "$src")
 tail_end=$total_lines
 if [ -n "$last_test_line" ]; then
  # Find the matching `}` for the test block opened at $last_test_line.
  tail_end=$(awk -v start="$last_test_line" '
    NR < start { next }
    NR == start {
      # Count opening brace on the test header line
      n = gsub(/\{/, "{")
      depth = n
      n = gsub(/\}/, "}")
      depth -= n
      if (depth == 0) { print NR; exit }
      next
    }
    {
      n = gsub(/\{/, "{")
      depth += n
      n = gsub(/\}/, "}")
      depth -= n
      if (depth == 0) { print NR; exit }
    }
  ' "$src")
  if [ -z "$tail_end" ]; then
    tail_end=$total_lines
  fi
 fi
 # Also include trailing blank lines and section-separator comments
 # that belong to the test region (so they don't look like floating
 # noise in the source head).
 peek=$((tail_end + 1))
 while [ "$peek" -le "$total_lines" ]; do
  line=$(sed -n "${peek}p" "$src")
  if [[ "$line" =~ ^[[:space:]]*$ ]]; then
    tail_end=$peek
    peek=$((peek + 1))
    continue
  fi
  break
 done

 # Collect single-line `const X = ...;` aliases from the source so
 # tests reach their dependencies by bare name. Skip `std`,
 # struct/union/enum/fn/inline bodies (those open with `{`), and
 # anything that's a pub-prefixed decl (the pub re-bind via `src.X`
 # is generated separately).
 imports=$(awk -v end="$head_end" '
  NR > end { exit }
  /^const std = @import\("std"\)/ { next }
  /^const testing = std\.testing/ { next }
  /^const [a-zA-Z_][a-zA-Z0-9_]* = .*\{[[:space:]]*$/ { next }
  /^const [a-zA-Z_][a-zA-Z0-9_]* = / { print; next }
 ' "$src")

 # Collect pub decl names (const, fn, var) for re-binding.
 # Exclude pub decls that are themselves single-line aliases captured
 # above (avoids duplicate declarations).
 pub_names=$(awk -v end="$head_end" '
  NR > end { exit }
  /^pub const [a-zA-Z_][a-zA-Z0-9_]*[ :=]/ {
    name=$3; gsub(/[:=].*/, "", name); print name
  }
  /^pub fn [a-zA-Z_][a-zA-Z0-9_]*\(/ {
    name=$3; sub(/\(.*/, "", name); print name
  }
  /^pub var [a-zA-Z_][a-zA-Z0-9_]*[ :=]/ {
    name=$3; gsub(/[:=].*/, "", name); print name
  }
  /^pub inline fn [a-zA-Z_][a-zA-Z0-9_]*\(/ {
    name=$4; sub(/\(.*/, "", name); print name
  }
 ' "$src" | sort -u)

 # Names already declared by the import block — skip in re-bind to
 # avoid `duplicate struct member name`.
 import_names=$(echo "$imports" | awk '
  /^const [a-zA-Z_][a-zA-Z0-9_]*/ { print $2 }
  /^pub const [a-zA-Z_][a-zA-Z0-9_]*/ { print $3 }
 ' | tr -d ':' | sort -u)

 # Write head + stub + any production code that lives AFTER the
 # tests (rare; dialog.zig does this).
 post_tail_start=$((tail_end + 1))
 {
  sed -n "1,${head_end}p" "$src"
  echo ""
  echo "// ==========================================================================="
  echo "// Tests — extracted to \`${base}_tests.zig\` for readability."
  echo "// ==========================================================================="
  echo ""
  echo "test {"
  echo "    _ = @import(\"${base}_tests.zig\");"
  echo "}"
  if [ "$post_tail_start" -le "$total_lines" ]; then
    echo ""
    sed -n "${post_tail_start},${total_lines}p" "$src"
  fi
 } > "$src.new"

 # Write sibling tests file
 {
  echo "//! Tests for \`${src#src/}\`. Lifted into a sibling so the"
  echo "//! source file stays a manageable size."
  echo ""
  echo "const std = @import(\"std\");"
  echo "const testing = std.testing;"
  echo "const mod = @import(\"${base}.zig\");"
  echo ""
  if [ -n "$imports" ]; then
    echo "// Imports mirrored from the source file (tests reference"
    echo "// these module-level names by bare identifier)."
    echo "$imports"
    echo ""
  fi
  if [ -n "$pub_names" ]; then
    echo "// Re-binds of pub decls so test bodies stay short."
    while IFS= read -r n; do
      [ -z "$n" ] && continue
      if echo "$import_names" | grep -qFx "$n"; then continue; fi
      echo "const $n = mod.$n;"
    done <<< "$pub_names"
    echo ""
  fi
  # Filter out lines that would collide with the standard header:
  # `const std = @import("std")` and `const testing = std.testing`
  # are already declared above.
  sed -n "${header_start},${tail_end}p" "$src" \
    | awk '
      /^const std = @import\("std"\)/ { next }
      /^const testing = std\.testing/ { next }
      { print }
    '
 } > "$out"

 mv "$src.new" "$src"
 echo "$src ($test_count tests) -> $out"
	#!/bin/bash
	# Find the test region in $1 and split into source + sibling _tests.zig.
	# Auto-copies module-level @import lines and generates re-binds for
	# pub decls so test bodies don't need rewriting.

	set -euo pipefail

	src="$1"
	dir=$(dirname "$src")
	base=$(basename "$src" .zig)
	out="$dir/${base}_tests.zig"

	# Skip if already extracted
	if [ -f "$out" ]; then
	echo "skip $src (sibling already exists)" >&2
	exit 0
	fi

	# Match real `test "..."` blocks only — `test {}` is reserved for the
	# discovery stub the script ITSELF emits, so it must not trigger
	# extraction (otherwise re-running on a previously-extracted file
	# self-imports).
	start=$(awk '
	/^test "/ { print NR; exit }
	' "$src")

	if [ -z "$start" ]; then
	echo "no tests in $src" >&2
	exit 0
	fi

	# Find the LAST `// === Tests ===` / `// ─── tests ───` header
	# above `$start`. Everything from that header onwards belongs to
	# the test region (including any test-only stub structs / consts
	# interspersed between the header and the first `test ` block).
	# If no header exists, fall back to just the first `test ` line.
	header_start=$start
	hdr=$(awk -v end="$start" '
	NR >= end { exit }
	/^[[:space:]]\/\/.[Tt]ests[[:space:]]*(—\|-\|:\|$)/ { last = NR }
	/^[[:space:]]\/\/[[:space:]]=+[[:space:]]*$/ { sep = NR }
	/^[[:space:]]\/\/[[:space:]]─+[[:space:]]*$/ { sep = NR }
	END { if (last) print last; else print "" }
	' "$src")
	if [ -n "$hdr" ]; then
	# Walk back over blank/separator lines just before the header
	header_start=$hdr
	prev=$((hdr - 1))
	while [ "$prev" -gt 0 ]; do
	line=$(sed -n "${prev}p" "$src")
	if [[ "$line" =~ ^[[:space:]]//[[:space:]]=+[[:space:]]*$ ]] \|\| \
	[[ "$line" =~ ^[[:space:]]//[[:space:]]─+[[:space:]]*$ ]] \|\| \
	[[ "$line" =~ ^[[:space:]]*$ ]]; then
	header_start=$prev
	prev=$((prev - 1))
	continue
	fi
	break
	done
	fi

	head_end=$((header_start - 1))
	test_count=$(awk "NR>=$start" "$src" \| grep -c '^test ')

	# Find the END of the test region. Tests in this codebase are
	# usually at EOF, but a few files (e.g. dialog.zig) put production
	# code AFTER the tests. Stop the region at the closing `}` of the
	# LAST `test "..."` block: scan forward from the last `^test`,
	# count brace depth, end region at the matching close brace.
	last_test_line=$(awk '/^test "/ { n=NR } END { print n }' "$src")
	total_lines=$(wc -l < "$src")
	tail_end=$total_lines
	if [ -n "$last_test_line" ]; then
	# Find the matching `}` for the test block opened at $last_test_line.
	tail_end=$(awk -v start="$last_test_line" '
	NR < start { next }
	NR == start {
	# Count opening brace on the test header line
	n = gsub(/\{/, "{")
	depth = n
	n = gsub(/\}/, "}")
	depth -= n
	if (depth == 0) { print NR; exit }
	next
	}
	{
	n = gsub(/\{/, "{")
	depth += n
	n = gsub(/\}/, "}")
	depth -= n
	if (depth == 0) { print NR; exit }
	}
	' "$src")
	if [ -z "$tail_end" ]; then
	tail_end=$total_lines
	fi
	fi
	# Also include trailing blank lines and section-separator comments
	# that belong to the test region (so they don't look like floating
	# noise in the source head).
	peek=$((tail_end + 1))
	while [ "$peek" -le "$total_lines" ]; do
	line=$(sed -n "${peek}p" "$src")
	if [[ "$line" =~ ^[[:space:]]*$ ]]; then
	tail_end=$peek
	peek=$((peek + 1))
	continue
	fi
	break
	done

	# Collect single-line `const X = ...;` aliases from the source so
	# tests reach their dependencies by bare name. Skip `std`,
	# struct/union/enum/fn/inline bodies (those open with `{`), and
	# anything that's a pub-prefixed decl (the pub re-bind via `src.X`
	# is generated separately).
	imports=$(awk -v end="$head_end" '
	NR > end { exit }
	/^const std = @import\("std"\)/ { next }
	/^const testing = std\.testing/ { next }
	/^const [a-zA-Z_][a-zA-Z0-9_]* = .\{[[:space:]]$/ { next }
	/^const [a-zA-Z_][a-zA-Z0-9_]* = / { print; next }
	' "$src")

	# Collect pub decl names (const, fn, var) for re-binding.
	# Exclude pub decls that are themselves single-line aliases captured
	# above (avoids duplicate declarations).
	pub_names=$(awk -v end="$head_end" '
	NR > end { exit }
	/^pub const [a-zA-Z_][a-zA-Z0-9_]*[ :=]/ {
	name=$3; gsub(/[:=].*/, "", name); print name
	}
	/^pub fn [a-zA-Z_][a-zA-Z0-9_]*\(/ {
	name=$3; sub(/\(.*/, "", name); print name
	}
	/^pub var [a-zA-Z_][a-zA-Z0-9_]*[ :=]/ {
	name=$3; gsub(/[:=].*/, "", name); print name
	}
	/^pub inline fn [a-zA-Z_][a-zA-Z0-9_]*\(/ {
	name=$4; sub(/\(.*/, "", name); print name
	}
	' "$src" \| sort -u)

	# Names already declared by the import block — skip in re-bind to
	# avoid `duplicate struct member name`.
	import_names=$(echo "$imports" \| awk '
	/^const [a-zA-Z_][a-zA-Z0-9_]*/ { print $2 }
	/^pub const [a-zA-Z_][a-zA-Z0-9_]*/ { print $3 }
	' \| tr -d ':' \| sort -u)

	# Write head + stub + any production code that lives AFTER the
	# tests (rare; dialog.zig does this).
	post_tail_start=$((tail_end + 1))
	{
	sed -n "1,${head_end}p" "$src"
	echo ""
	echo "// ==========================================================================="
	echo "// Tests — extracted to \`${base}_tests.zig\` for readability."
	echo "// ==========================================================================="
	echo ""
	echo "test {"
	echo " _ = @import(\"${base}_tests.zig\");"
	echo "}"
	if [ "$post_tail_start" -le "$total_lines" ]; then
	echo ""
	sed -n "${post_tail_start},${total_lines}p" "$src"
	fi
	} > "$src.new"

	# Write sibling tests file
	{
	echo "//! Tests for \`${src#src/}\`. Lifted into a sibling so the"
	echo "//! source file stays a manageable size."
	echo ""
	echo "const std = @import(\"std\");"
	echo "const testing = std.testing;"
	echo "const mod = @import(\"${base}.zig\");"
	echo ""
	if [ -n "$imports" ]; then
	echo "// Imports mirrored from the source file (tests reference"
	echo "// these module-level names by bare identifier)."
	echo "$imports"
	echo ""
	fi
	if [ -n "$pub_names" ]; then
	echo "// Re-binds of pub decls so test bodies stay short."
	while IFS= read -r n; do
	[ -z "$n" ] && continue
	if echo "$import_names" \| grep -qFx "$n"; then continue; fi
	echo "const $n = mod.$n;"
	done <<< "$pub_names"
	echo ""
	fi
	# Filter out lines that would collide with the standard header:
	# `const std = @import("std")` and `const testing = std.testing`
	# are already declared above.
	sed -n "${header_start},${tail_end}p" "$src" \
	\| awk '
	/^const std = @import\("std"\)/ { next }
	/^const testing = std\.testing/ { next }
	{ print }
	'
	} > "$out"

	mv "$src.new" "$src"
	echo "$src ($test_count tests) -> $out"
No results found