disinfeqt · May 9, 2026 07:46
diff --git a/list-ai-models.sh b/list-ai-models.sh
 #!/usr/bin/env bash
 # Lists AI/LLM models referenced in source code, grouped per project by default.
 #
 # Usage:
 #   ./list-ai-models.sh [path]            # auto: groups by project unless path
 #                                         #       looks like a single repo root
 #   ./list-ai-models.sh --flat [path]     # force a single deduped table
 #   ./list-ai-models.sh --projects [path] # force per-project grouping
 #   ./list-ai-models.sh --detail [path]   # add per-line file:line dump
 # Defaults path to the current working directory.
 #
 # Requires ripgrep (`brew install ripgrep`).

 set -euo pipefail

 MODE="auto"
 DETAIL=0
 while [[ $# -gt 0 ]]; do
  case "$1" in
    --projects) MODE="projects"; shift ;;
    --flat)     MODE="flat"; shift ;;
    --detail)   DETAIL=1; shift ;;
    -h|--help)  sed -n '2,12p' "$0" | sed 's/^# \?//'; exit 0 ;;
    *)          break ;;
  esac
 done
 ROOT="${1:-.}"
 ROOT="${ROOT%/}"
 [[ "$ROOT" == "." ]] && ROOT="$PWD"

 # Auto: if ROOT looks like a single repo (has a project marker file), use flat.
 # Otherwise treat it as a parent dir and group by top-level subdir.
 if [[ "$MODE" == "auto" ]]; then
  if [[ -f "$ROOT/package.json" || -f "$ROOT/Cargo.toml" || -f "$ROOT/pyproject.toml" \
        || -f "$ROOT/go.mod" || -d "$ROOT/.git" ]]; then
    MODE="flat"
  else
    MODE="projects"
  fi
 fi

 command -v rg >/dev/null || { echo "ripgrep (rg) is required" >&2; exit 1; }

 # Load ignore list (project names, one per line) from list-ai-models.ignore
 # next to this script. Comments (# ...) and blank lines are skipped.
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 IGNORE_FILE="$SCRIPT_DIR/list-ai-models.ignore"
 IGNORE_LIST=""
 if [[ -f "$IGNORE_FILE" ]]; then
  IGNORE_LIST=$(awk '
    { sub(/#.*/, ""); gsub(/[ \t]/, "") }
    NF { print }
  ' "$IGNORE_FILE" | paste -sd, -)
 fi

 # Each model family must start with a recognized prefix AND require a version-ish
 # suffix (digit, or known qualifier like -mini/-pro). Bare brand words like "qwen"
 # or random IDs like "claude-desktop" / "o3-JIicAM..." should NOT match.
 MODELS='claude-(opus|sonnet|haiku|instant|[0-9])[a-zA-Z0-9.-]*'
 MODELS+='|gpt-[0-9][a-zA-Z0-9.-]*|gpt-image[a-zA-Z0-9.-]*'
 MODELS+='|sora(-[0-9a-z][a-zA-Z0-9.-]*)?'
 MODELS+='|dall-e[a-zA-Z0-9.-]*'
 MODELS+='|o[134]-(mini|preview|high|low|medium|reasoning|pro)[a-zA-Z0-9.-]*'
 MODELS+='|whisper-[0-9][a-zA-Z0-9.-]*'
 MODELS+='|gemini-[0-9][a-zA-Z0-9.-]*|gemma-?[0-9][a-zA-Z0-9.-]*'
 MODELS+='|nano-banana[a-zA-Z0-9.-]*|imagen-?[0-9][a-zA-Z0-9.-]*|veo-?[0-9][a-zA-Z0-9.-]*'
 MODELS+='|llama-?[0-9][a-zA-Z0-9.-]*|codellama-?[0-9a-z][a-zA-Z0-9.-]*'
 MODELS+='|grok-(image|vision|[0-9])[a-zA-Z0-9.-]*'
 MODELS+='|deepseek-(r1|v[0-9]|chat|coder|reasoner)[a-zA-Z0-9.-]*'
 MODELS+='|kimi-[a-z0-9][a-zA-Z0-9.-]*|moonshot-v[0-9][a-zA-Z0-9.-]*'
 MODELS+='|mistral-[a-z0-9][a-zA-Z0-9.-]*|mixtral-[0-9][a-zA-Z0-9.-]*|pixtral-[a-z0-9][a-zA-Z0-9.-]*'
 MODELS+='|qwen-?[0-9][a-zA-Z0-9.-]*|qwq-[0-9a-z][a-zA-Z0-9.-]*'

 PROVIDERS='anthropic|openai|google|meta-llama|x-ai|deepseek|moonshot|mistral|qwen'

 # Match either a provider-prefixed model id or a bare model id.
 PATTERN="($PROVIDERS)/($MODELS)|\\b($MODELS)"

 RG_ARGS=(
  --no-heading
  --line-number
  --only-matching
  --color=never
  --hidden
  -g '!node_modules' -g '!.next' -g '!.turbo' -g '!dist' -g '!build'
  -g '!.git' -g '!_generated' -g '!.venv' -g '!venv' -g '!__pycache__'
  -g '!pnpm-lock.yaml' -g '!package-lock.json' -g '!yarn.lock' -g '!bun.lockb'
  -g '*.{ts,tsx,js,jsx,mjs,cjs,py,go,rs,json,toml,yaml,yml,md,env,env.*}'
 )

 # rg outputs:  <relpath>:<line>:<match>
 run() {
  local root="$1" mode="$2" detail="$3"
  cd "$root"
  rg "${RG_ARGS[@]}" -e "$PATTERN" 2>/dev/null | awk -v mode="$mode" -v detail="$detail" -v root="$root" -v ignore="$IGNORE_LIST" '
    function pad(s, w,    n) { n=w-length(s); if (n<1) return s; return s sprintf("%*s", n, "") }
    function trunc(s, w)     { if (length(s) <= w) return s; return substr(s,1,w-1) "…" }
    function rule(w, ch,   r,i) { for (i=0;i<w;i++) r=r ch; return r }
    function box_top(w1,w2,w3)    { return "┌" rule(w1+2,"─") "┬" rule(w2+2,"─") "┬" rule(w3+2,"─") "┐" }
    function box_mid(w1,w2,w3)    { return "├" rule(w1+2,"─") "┼" rule(w2+2,"─") "┼" rule(w3+2,"─") "┤" }
    function box_bot(w1,w2,w3)    { return "└" rule(w1+2,"─") "┴" rule(w2+2,"─") "┴" rule(w3+2,"─") "┘" }
    function row(c1,c2,c3,w1,w2,w3) {
      return "│ " pad(trunc(c1,w1),w1) " │ " pad(trunc(c2,w2),w2) " │ " pad(trunc(c3,w3),w3) " │"
    }
    function box2_top(w1,w2)      { return "┌" rule(w1+2,"─") "┬" rule(w2+2,"─") "┐" }
    function box2_mid(w1,w2)      { return "├" rule(w1+2,"─") "┼" rule(w2+2,"─") "┤" }
    function box2_bot(w1,w2)      { return "└" rule(w1+2,"─") "┴" rule(w2+2,"─") "┘" }
    function row2(c1,c2,w1,w2)    { return "│ " pad(trunc(c1,w1),w1) " │ " pad(trunc(c2,w2),w2) " │" }
    BEGIN {
      FS=":"
      n_ig = split(ignore, ig_arr, ",")
      for (ii=1; ii<=n_ig; ii++) if (ig_arr[ii] != "") ignored[ig_arr[ii]] = 1
    }
    {
      file=$1; line=$2; m=""
      for (i=3;i<=NF;i++) m = m (i>3?":":"") $i
      if (mode=="projects") {
        slash=index(file,"/")
        if (slash==0) next
        proj=substr(file,1,slash-1); rel=substr(file,slash+1)
        if (proj=="scripts") next
        if (proj in ignored) next
      } else { proj="."; rel=file }

      if (!(proj in seen_proj)) { seen_proj[proj]=1; pord[++np]=proj }
      pmkey = proj SUBSEP m
      if (!(pmkey in seen_pm)) {
        seen_pm[pmkey]=1
        proj_unique[proj]++
        pmord[proj] = pmord[proj] (pmord[proj]?SUBSEP:"") m
        pm_first[pmkey] = rel ":" line
      }
      pm_count[pmkey]++
      proj_total[proj]++
      lines[pmkey] = lines[pmkey] (lines[pmkey]?", ":"") rel ":" line
    }
    END {
      if (np==0) exit 1

      if (mode=="projects") {
        # ── Summary ──
        w1=14; for (i=1;i<=np;i++) if (length(pord[i])>w1) w1=length(pord[i])
        w2=6; w3=8
        print box_top(w1,w2,w3)
        print row("Project","Models","Mentions",w1,w2,w3)
        print box_mid(w1,w2,w3)
        for (i=1;i<=np;i++) {
          p=pord[i]
          print row(p, sprintf("%d",proj_unique[p]), sprintf("%d",proj_total[p]), w1,w2,w3)
        }
        print box_bot(w1,w2,w3)
        print ""
      }

      # ── Per-project model breakdown ──
      for (i=1;i<=np;i++) {
        p=pord[i]
        n_models = split(pmord[p], models_arr, SUBSEP)

        # column widths
        mw=14; for (j=1;j<=n_models;j++) if (length(models_arr[j])>mw) mw=length(models_arr[j])
        if (mw>50) mw=50
        cw=4
        fw=44

        if (mode=="projects") print "── " p " " rule(78-length(p)-4, "─")
        print box_top(mw,cw,fw)
        print row("Model","#","First seen at",mw,cw,fw)
        print box_mid(mw,cw,fw)
        for (j=1;j<=n_models;j++) {
          m=models_arr[j]
          k=p SUBSEP m
          print row(m, sprintf("%d", pm_count[k]), pm_first[k], mw,cw,fw)
        }
        print box_bot(mw,cw,fw)

        if (detail=="1") {
          print ""
          # Detail box: per model -> all file:line refs
          dmw=mw; dfw=70
          print box2_top(dmw, dfw)
          print row2("Model","All references", dmw, dfw)
          print box2_mid(dmw, dfw)
          for (j=1;j<=n_models;j++) {
            m=models_arr[j]
            k=p SUBSEP m
            # split refs across rows if too long
            refs = lines[k]
            first=1
            while (length(refs) > 0) {
              chunk = substr(refs, 1, dfw)
              if (length(refs) > dfw) {
                # break at last comma in window
                cut = chunk
                pos = 0
                for (q=length(chunk); q>0; q--) {
                  if (substr(chunk,q,1)==",") { pos=q; break }
                }
                if (pos>0) chunk = substr(refs, 1, pos-1)
                refs = (pos>0) ? substr(refs, pos+2) : substr(refs, dfw+1)
              } else {
                refs = ""
              }
              print row2(first?m:"", chunk, dmw, dfw)
              first=0
            }
          }
          print box2_bot(dmw, dfw)
        }
        if (i<np) print ""
      }
    }
  '
 }

 if ! run "$ROOT" "$MODE" "$DETAIL"; then
  echo "No AI/LLM model references found under $ROOT"
 fi
	#!/usr/bin/env bash
	# Lists AI/LLM models referenced in source code, grouped per project by default.
	#
	# Usage:
	# ./list-ai-models.sh [path] # auto: groups by project unless path
	# # looks like a single repo root
	# ./list-ai-models.sh --flat [path] # force a single deduped table
	# ./list-ai-models.sh --projects [path] # force per-project grouping
	# ./list-ai-models.sh --detail [path] # add per-line file:line dump
	# Defaults path to the current working directory.
	#
	# Requires ripgrep (`brew install ripgrep`).

	set -euo pipefail

	MODE="auto"
	DETAIL=0
	while [[ $# -gt 0 ]]; do
	case "$1" in
	--projects) MODE="projects"; shift ;;
	--flat) MODE="flat"; shift ;;
	--detail) DETAIL=1; shift ;;
	-h\|--help) sed -n '2,12p' "$0" \| sed 's/^# \?//'; exit 0 ;;
	*) break ;;
	esac
	done
	ROOT="${1:-.}"
	ROOT="${ROOT%/}"
	[[ "$ROOT" == "." ]] && ROOT="$PWD"

	# Auto: if ROOT looks like a single repo (has a project marker file), use flat.
	# Otherwise treat it as a parent dir and group by top-level subdir.
	if [[ "$MODE" == "auto" ]]; then
	if [[ -f "$ROOT/package.json" \|\| -f "$ROOT/Cargo.toml" \|\| -f "$ROOT/pyproject.toml" \
	\|\| -f "$ROOT/go.mod" \|\| -d "$ROOT/.git" ]]; then
	MODE="flat"
	else
	MODE="projects"
	fi
	fi

	command -v rg >/dev/null \|\| { echo "ripgrep (rg) is required" >&2; exit 1; }

	# Load ignore list (project names, one per line) from list-ai-models.ignore
	# next to this script. Comments (# ...) and blank lines are skipped.
	SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
	IGNORE_FILE="$SCRIPT_DIR/list-ai-models.ignore"
	IGNORE_LIST=""
	if [[ -f "$IGNORE_FILE" ]]; then
	IGNORE_LIST=$(awk '
	{ sub(/#.*/, ""); gsub(/[ \t]/, "") }
	NF { print }
	' "$IGNORE_FILE" \| paste -sd, -)
	fi

	# Each model family must start with a recognized prefix AND require a version-ish
	# suffix (digit, or known qualifier like -mini/-pro). Bare brand words like "qwen"
	# or random IDs like "claude-desktop" / "o3-JIicAM..." should NOT match.
	MODELS='claude-(opus\|sonnet\|haiku\|instant\|[0-9])[a-zA-Z0-9.-]*'
	MODELS+='\|gpt-[0-9][a-zA-Z0-9.-]\|gpt-image[a-zA-Z0-9.-]'
	MODELS+='\|sora(-[0-9a-z][a-zA-Z0-9.-]*)?'
	MODELS+='\|dall-e[a-zA-Z0-9.-]*'
	MODELS+='\|o[134]-(mini\|preview\|high\|low\|medium\|reasoning\|pro)[a-zA-Z0-9.-]*'
	MODELS+='\|whisper-[0-9][a-zA-Z0-9.-]*'
	MODELS+='\|gemini-[0-9][a-zA-Z0-9.-]\|gemma-?[0-9][a-zA-Z0-9.-]'
	MODELS+='\|nano-banana[a-zA-Z0-9.-]\|imagen-?[0-9][a-zA-Z0-9.-]\|veo-?[0-9][a-zA-Z0-9.-]*'
	MODELS+='\|llama-?[0-9][a-zA-Z0-9.-]\|codellama-?[0-9a-z][a-zA-Z0-9.-]'
	MODELS+='\|grok-(image\|vision\|[0-9])[a-zA-Z0-9.-]*'
	MODELS+='\|deepseek-(r1\|v[0-9]\|chat\|coder\|reasoner)[a-zA-Z0-9.-]*'
	MODELS+='\|kimi-[a-z0-9][a-zA-Z0-9.-]\|moonshot-v[0-9][a-zA-Z0-9.-]'
	MODELS+='\|mistral-[a-z0-9][a-zA-Z0-9.-]\|mixtral-[0-9][a-zA-Z0-9.-]\|pixtral-[a-z0-9][a-zA-Z0-9.-]*'
	MODELS+='\|qwen-?[0-9][a-zA-Z0-9.-]\|qwq-[0-9a-z][a-zA-Z0-9.-]'

	PROVIDERS='anthropic\|openai\|google\|meta-llama\|x-ai\|deepseek\|moonshot\|mistral\|qwen'

	# Match either a provider-prefixed model id or a bare model id.
	PATTERN="($PROVIDERS)/($MODELS)\|\\b($MODELS)"

	RG_ARGS=(
	--no-heading
	--line-number
	--only-matching
	--color=never
	--hidden
	-g '!node_modules' -g '!.next' -g '!.turbo' -g '!dist' -g '!build'
	-g '!.git' -g '!_generated' -g '!.venv' -g '!venv' -g '!__pycache__'
	-g '!pnpm-lock.yaml' -g '!package-lock.json' -g '!yarn.lock' -g '!bun.lockb'
	-g '.{ts,tsx,js,jsx,mjs,cjs,py,go,rs,json,toml,yaml,yml,md,env,env.}'
	)

	# rg outputs: <relpath>:<line>:<match>
	run() {
	local root="$1" mode="$2" detail="$3"
	cd "$root"
	rg "${RG_ARGS[@]}" -e "$PATTERN" 2>/dev/null \| awk -v mode="$mode" -v detail="$detail" -v root="$root" -v ignore="$IGNORE_LIST" '
	function pad(s, w, n) { n=w-length(s); if (n<1) return s; return s sprintf("%*s", n, "") }
	function trunc(s, w) { if (length(s) <= w) return s; return substr(s,1,w-1) "…" }
	function rule(w, ch, r,i) { for (i=0;i<w;i++) r=r ch; return r }
	function box_top(w1,w2,w3) { return "┌" rule(w1+2,"─") "┬" rule(w2+2,"─") "┬" rule(w3+2,"─") "┐" }
	function box_mid(w1,w2,w3) { return "├" rule(w1+2,"─") "┼" rule(w2+2,"─") "┼" rule(w3+2,"─") "┤" }
	function box_bot(w1,w2,w3) { return "└" rule(w1+2,"─") "┴" rule(w2+2,"─") "┴" rule(w3+2,"─") "┘" }
	function row(c1,c2,c3,w1,w2,w3) {
	return "│ " pad(trunc(c1,w1),w1) " │ " pad(trunc(c2,w2),w2) " │ " pad(trunc(c3,w3),w3) " │"
	}
	function box2_top(w1,w2) { return "┌" rule(w1+2,"─") "┬" rule(w2+2,"─") "┐" }
	function box2_mid(w1,w2) { return "├" rule(w1+2,"─") "┼" rule(w2+2,"─") "┤" }
	function box2_bot(w1,w2) { return "└" rule(w1+2,"─") "┴" rule(w2+2,"─") "┘" }
	function row2(c1,c2,w1,w2) { return "│ " pad(trunc(c1,w1),w1) " │ " pad(trunc(c2,w2),w2) " │" }
	BEGIN {
	FS=":"
	n_ig = split(ignore, ig_arr, ",")
	for (ii=1; ii<=n_ig; ii++) if (ig_arr[ii] != "") ignored[ig_arr[ii]] = 1
	}
	{
	file=$1; line=$2; m=""
	for (i=3;i<=NF;i++) m = m (i>3?":":"") $i
	if (mode=="projects") {
	slash=index(file,"/")
	if (slash==0) next
	proj=substr(file,1,slash-1); rel=substr(file,slash+1)
	if (proj=="scripts") next
	if (proj in ignored) next
	} else { proj="."; rel=file }

	if (!(proj in seen_proj)) { seen_proj[proj]=1; pord[++np]=proj }
	pmkey = proj SUBSEP m
	if (!(pmkey in seen_pm)) {
	seen_pm[pmkey]=1
	proj_unique[proj]++
	pmord[proj] = pmord[proj] (pmord[proj]?SUBSEP:"") m
	pm_first[pmkey] = rel ":" line
	}
	pm_count[pmkey]++
	proj_total[proj]++
	lines[pmkey] = lines[pmkey] (lines[pmkey]?", ":"") rel ":" line
	}
	END {
	if (np==0) exit 1

	if (mode=="projects") {
	# ── Summary ──
	w1=14; for (i=1;i<=np;i++) if (length(pord[i])>w1) w1=length(pord[i])
	w2=6; w3=8
	print box_top(w1,w2,w3)
	print row("Project","Models","Mentions",w1,w2,w3)
	print box_mid(w1,w2,w3)
	for (i=1;i<=np;i++) {
	p=pord[i]
	print row(p, sprintf("%d",proj_unique[p]), sprintf("%d",proj_total[p]), w1,w2,w3)
	}
	print box_bot(w1,w2,w3)
	print ""
	}

	# ── Per-project model breakdown ──
	for (i=1;i<=np;i++) {
	p=pord[i]
	n_models = split(pmord[p], models_arr, SUBSEP)

	# column widths
	mw=14; for (j=1;j<=n_models;j++) if (length(models_arr[j])>mw) mw=length(models_arr[j])
	if (mw>50) mw=50
	cw=4
	fw=44

	if (mode=="projects") print "── " p " " rule(78-length(p)-4, "─")
	print box_top(mw,cw,fw)
	print row("Model","#","First seen at",mw,cw,fw)
	print box_mid(mw,cw,fw)
	for (j=1;j<=n_models;j++) {
	m=models_arr[j]
	k=p SUBSEP m
	print row(m, sprintf("%d", pm_count[k]), pm_first[k], mw,cw,fw)
	}
	print box_bot(mw,cw,fw)

	if (detail=="1") {
	print ""
	# Detail box: per model -> all file:line refs
	dmw=mw; dfw=70
	print box2_top(dmw, dfw)
	print row2("Model","All references", dmw, dfw)
	print box2_mid(dmw, dfw)
	for (j=1;j<=n_models;j++) {
	m=models_arr[j]
	k=p SUBSEP m
	# split refs across rows if too long
	refs = lines[k]
	first=1
	while (length(refs) > 0) {
	chunk = substr(refs, 1, dfw)
	if (length(refs) > dfw) {
	# break at last comma in window
	cut = chunk
	pos = 0
	for (q=length(chunk); q>0; q--) {
	if (substr(chunk,q,1)==",") { pos=q; break }
	}
	if (pos>0) chunk = substr(refs, 1, pos-1)
	refs = (pos>0) ? substr(refs, pos+2) : substr(refs, dfw+1)
	} else {
	refs = ""
	}
	print row2(first?m:"", chunk, dmw, dfw)
	first=0
	}
	}
	print box2_bot(dmw, dfw)
	}
	if (i<np) print ""
	}
	}
	'
	}

	if ! run "$ROOT" "$MODE" "$DETAIL"; then
	echo "No AI/LLM model references found under $ROOT"
	fi
No results found