Skip to content

Instantly share code, notes, and snippets.

@disinfeqt
Created May 9, 2026 07:46
Show Gist options
  • Select an option

  • Save disinfeqt/82e518eef15fcf80842d669e8fe3f91c to your computer and use it in GitHub Desktop.

Select an option

Save disinfeqt/82e518eef15fcf80842d669e8fe3f91c to your computer and use it in GitHub Desktop.
Lists AI/LLM models referenced in source code
#!/usr/bin/env bash
# Lists AI/LLM models referenced in source code, grouped per project by default.
#
# Usage:
# ./list-ai-models.sh [path] # auto: groups by project unless path
# # looks like a single repo root
# ./list-ai-models.sh --flat [path] # force a single deduped table
# ./list-ai-models.sh --projects [path] # force per-project grouping
# ./list-ai-models.sh --detail [path] # add per-line file:line dump
# Defaults path to the current working directory.
#
# Requires ripgrep (`brew install ripgrep`).
set -euo pipefail
MODE="auto"
DETAIL=0
while [[ $# -gt 0 ]]; do
case "$1" in
--projects) MODE="projects"; shift ;;
--flat) MODE="flat"; shift ;;
--detail) DETAIL=1; shift ;;
-h|--help) sed -n '2,12p' "$0" | sed 's/^# \?//'; exit 0 ;;
*) break ;;
esac
done
ROOT="${1:-.}"
ROOT="${ROOT%/}"
[[ "$ROOT" == "." ]] && ROOT="$PWD"
# Auto: if ROOT looks like a single repo (has a project marker file), use flat.
# Otherwise treat it as a parent dir and group by top-level subdir.
if [[ "$MODE" == "auto" ]]; then
if [[ -f "$ROOT/package.json" || -f "$ROOT/Cargo.toml" || -f "$ROOT/pyproject.toml" \
|| -f "$ROOT/go.mod" || -d "$ROOT/.git" ]]; then
MODE="flat"
else
MODE="projects"
fi
fi
command -v rg >/dev/null || { echo "ripgrep (rg) is required" >&2; exit 1; }
# Load ignore list (project names, one per line) from list-ai-models.ignore
# next to this script. Comments (# ...) and blank lines are skipped.
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
IGNORE_FILE="$SCRIPT_DIR/list-ai-models.ignore"
IGNORE_LIST=""
if [[ -f "$IGNORE_FILE" ]]; then
IGNORE_LIST=$(awk '
{ sub(/#.*/, ""); gsub(/[ \t]/, "") }
NF { print }
' "$IGNORE_FILE" | paste -sd, -)
fi
# Each model family must start with a recognized prefix AND require a version-ish
# suffix (digit, or known qualifier like -mini/-pro). Bare brand words like "qwen"
# or random IDs like "claude-desktop" / "o3-JIicAM..." should NOT match.
MODELS='claude-(opus|sonnet|haiku|instant|[0-9])[a-zA-Z0-9.-]*'
MODELS+='|gpt-[0-9][a-zA-Z0-9.-]*|gpt-image[a-zA-Z0-9.-]*'
MODELS+='|sora(-[0-9a-z][a-zA-Z0-9.-]*)?'
MODELS+='|dall-e[a-zA-Z0-9.-]*'
MODELS+='|o[134]-(mini|preview|high|low|medium|reasoning|pro)[a-zA-Z0-9.-]*'
MODELS+='|whisper-[0-9][a-zA-Z0-9.-]*'
MODELS+='|gemini-[0-9][a-zA-Z0-9.-]*|gemma-?[0-9][a-zA-Z0-9.-]*'
MODELS+='|nano-banana[a-zA-Z0-9.-]*|imagen-?[0-9][a-zA-Z0-9.-]*|veo-?[0-9][a-zA-Z0-9.-]*'
MODELS+='|llama-?[0-9][a-zA-Z0-9.-]*|codellama-?[0-9a-z][a-zA-Z0-9.-]*'
MODELS+='|grok-(image|vision|[0-9])[a-zA-Z0-9.-]*'
MODELS+='|deepseek-(r1|v[0-9]|chat|coder|reasoner)[a-zA-Z0-9.-]*'
MODELS+='|kimi-[a-z0-9][a-zA-Z0-9.-]*|moonshot-v[0-9][a-zA-Z0-9.-]*'
MODELS+='|mistral-[a-z0-9][a-zA-Z0-9.-]*|mixtral-[0-9][a-zA-Z0-9.-]*|pixtral-[a-z0-9][a-zA-Z0-9.-]*'
MODELS+='|qwen-?[0-9][a-zA-Z0-9.-]*|qwq-[0-9a-z][a-zA-Z0-9.-]*'
PROVIDERS='anthropic|openai|google|meta-llama|x-ai|deepseek|moonshot|mistral|qwen'
# Match either a provider-prefixed model id or a bare model id.
PATTERN="($PROVIDERS)/($MODELS)|\\b($MODELS)"
RG_ARGS=(
--no-heading
--line-number
--only-matching
--color=never
--hidden
-g '!node_modules' -g '!.next' -g '!.turbo' -g '!dist' -g '!build'
-g '!.git' -g '!_generated' -g '!.venv' -g '!venv' -g '!__pycache__'
-g '!pnpm-lock.yaml' -g '!package-lock.json' -g '!yarn.lock' -g '!bun.lockb'
-g '*.{ts,tsx,js,jsx,mjs,cjs,py,go,rs,json,toml,yaml,yml,md,env,env.*}'
)
# rg outputs: <relpath>:<line>:<match>
run() {
local root="$1" mode="$2" detail="$3"
cd "$root"
rg "${RG_ARGS[@]}" -e "$PATTERN" 2>/dev/null | awk -v mode="$mode" -v detail="$detail" -v root="$root" -v ignore="$IGNORE_LIST" '
function pad(s, w, n) { n=w-length(s); if (n<1) return s; return s sprintf("%*s", n, "") }
function trunc(s, w) { if (length(s) <= w) return s; return substr(s,1,w-1) "…" }
function rule(w, ch, r,i) { for (i=0;i<w;i++) r=r ch; return r }
function box_top(w1,w2,w3) { return "┌" rule(w1+2,"─") "┬" rule(w2+2,"─") "┬" rule(w3+2,"─") "┐" }
function box_mid(w1,w2,w3) { return "├" rule(w1+2,"─") "┼" rule(w2+2,"─") "┼" rule(w3+2,"─") "┤" }
function box_bot(w1,w2,w3) { return "└" rule(w1+2,"─") "┴" rule(w2+2,"─") "┴" rule(w3+2,"─") "┘" }
function row(c1,c2,c3,w1,w2,w3) {
return "│ " pad(trunc(c1,w1),w1) " │ " pad(trunc(c2,w2),w2) " │ " pad(trunc(c3,w3),w3) " │"
}
function box2_top(w1,w2) { return "┌" rule(w1+2,"─") "┬" rule(w2+2,"─") "┐" }
function box2_mid(w1,w2) { return "├" rule(w1+2,"─") "┼" rule(w2+2,"─") "┤" }
function box2_bot(w1,w2) { return "└" rule(w1+2,"─") "┴" rule(w2+2,"─") "┘" }
function row2(c1,c2,w1,w2) { return "│ " pad(trunc(c1,w1),w1) " │ " pad(trunc(c2,w2),w2) " │" }
BEGIN {
FS=":"
n_ig = split(ignore, ig_arr, ",")
for (ii=1; ii<=n_ig; ii++) if (ig_arr[ii] != "") ignored[ig_arr[ii]] = 1
}
{
file=$1; line=$2; m=""
for (i=3;i<=NF;i++) m = m (i>3?":":"") $i
if (mode=="projects") {
slash=index(file,"/")
if (slash==0) next
proj=substr(file,1,slash-1); rel=substr(file,slash+1)
if (proj=="scripts") next
if (proj in ignored) next
} else { proj="."; rel=file }
if (!(proj in seen_proj)) { seen_proj[proj]=1; pord[++np]=proj }
pmkey = proj SUBSEP m
if (!(pmkey in seen_pm)) {
seen_pm[pmkey]=1
proj_unique[proj]++
pmord[proj] = pmord[proj] (pmord[proj]?SUBSEP:"") m
pm_first[pmkey] = rel ":" line
}
pm_count[pmkey]++
proj_total[proj]++
lines[pmkey] = lines[pmkey] (lines[pmkey]?", ":"") rel ":" line
}
END {
if (np==0) exit 1
if (mode=="projects") {
# ── Summary ──
w1=14; for (i=1;i<=np;i++) if (length(pord[i])>w1) w1=length(pord[i])
w2=6; w3=8
print box_top(w1,w2,w3)
print row("Project","Models","Mentions",w1,w2,w3)
print box_mid(w1,w2,w3)
for (i=1;i<=np;i++) {
p=pord[i]
print row(p, sprintf("%d",proj_unique[p]), sprintf("%d",proj_total[p]), w1,w2,w3)
}
print box_bot(w1,w2,w3)
print ""
}
# ── Per-project model breakdown ──
for (i=1;i<=np;i++) {
p=pord[i]
n_models = split(pmord[p], models_arr, SUBSEP)
# column widths
mw=14; for (j=1;j<=n_models;j++) if (length(models_arr[j])>mw) mw=length(models_arr[j])
if (mw>50) mw=50
cw=4
fw=44
if (mode=="projects") print "── " p " " rule(78-length(p)-4, "─")
print box_top(mw,cw,fw)
print row("Model","#","First seen at",mw,cw,fw)
print box_mid(mw,cw,fw)
for (j=1;j<=n_models;j++) {
m=models_arr[j]
k=p SUBSEP m
print row(m, sprintf("%d", pm_count[k]), pm_first[k], mw,cw,fw)
}
print box_bot(mw,cw,fw)
if (detail=="1") {
print ""
# Detail box: per model -> all file:line refs
dmw=mw; dfw=70
print box2_top(dmw, dfw)
print row2("Model","All references", dmw, dfw)
print box2_mid(dmw, dfw)
for (j=1;j<=n_models;j++) {
m=models_arr[j]
k=p SUBSEP m
# split refs across rows if too long
refs = lines[k]
first=1
while (length(refs) > 0) {
chunk = substr(refs, 1, dfw)
if (length(refs) > dfw) {
# break at last comma in window
cut = chunk
pos = 0
for (q=length(chunk); q>0; q--) {
if (substr(chunk,q,1)==",") { pos=q; break }
}
if (pos>0) chunk = substr(refs, 1, pos-1)
refs = (pos>0) ? substr(refs, pos+2) : substr(refs, dfw+1)
} else {
refs = ""
}
print row2(first?m:"", chunk, dmw, dfw)
first=0
}
}
print box2_bot(dmw, dfw)
}
if (i<np) print ""
}
}
'
}
if ! run "$ROOT" "$MODE" "$DETAIL"; then
echo "No AI/LLM model references found under $ROOT"
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment