Created
May 9, 2026 07:46
-
-
Save disinfeqt/82e518eef15fcf80842d669e8fe3f91c to your computer and use it in GitHub Desktop.
Lists AI/LLM models referenced in source code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # Lists AI/LLM models referenced in source code, grouped per project by default. | |
| # | |
| # Usage: | |
| # ./list-ai-models.sh [path] # auto: groups by project unless path | |
| # # looks like a single repo root | |
| # ./list-ai-models.sh --flat [path] # force a single deduped table | |
| # ./list-ai-models.sh --projects [path] # force per-project grouping | |
| # ./list-ai-models.sh --detail [path] # add per-line file:line dump | |
| # Defaults path to the current working directory. | |
| # | |
| # Requires ripgrep (`brew install ripgrep`). | |
| set -euo pipefail | |
| MODE="auto" | |
| DETAIL=0 | |
| while [[ $# -gt 0 ]]; do | |
| case "$1" in | |
| --projects) MODE="projects"; shift ;; | |
| --flat) MODE="flat"; shift ;; | |
| --detail) DETAIL=1; shift ;; | |
| -h|--help) sed -n '2,12p' "$0" | sed 's/^# \?//'; exit 0 ;; | |
| *) break ;; | |
| esac | |
| done | |
| ROOT="${1:-.}" | |
| ROOT="${ROOT%/}" | |
| [[ "$ROOT" == "." ]] && ROOT="$PWD" | |
| # Auto: if ROOT looks like a single repo (has a project marker file), use flat. | |
| # Otherwise treat it as a parent dir and group by top-level subdir. | |
| if [[ "$MODE" == "auto" ]]; then | |
| if [[ -f "$ROOT/package.json" || -f "$ROOT/Cargo.toml" || -f "$ROOT/pyproject.toml" \ | |
| || -f "$ROOT/go.mod" || -d "$ROOT/.git" ]]; then | |
| MODE="flat" | |
| else | |
| MODE="projects" | |
| fi | |
| fi | |
| command -v rg >/dev/null || { echo "ripgrep (rg) is required" >&2; exit 1; } | |
| # Load ignore list (project names, one per line) from list-ai-models.ignore | |
| # next to this script. Comments (# ...) and blank lines are skipped. | |
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" | |
| IGNORE_FILE="$SCRIPT_DIR/list-ai-models.ignore" | |
| IGNORE_LIST="" | |
| if [[ -f "$IGNORE_FILE" ]]; then | |
| IGNORE_LIST=$(awk ' | |
| { sub(/#.*/, ""); gsub(/[ \t]/, "") } | |
| NF { print } | |
| ' "$IGNORE_FILE" | paste -sd, -) | |
| fi | |
| # Each model family must start with a recognized prefix AND require a version-ish | |
| # suffix (digit, or known qualifier like -mini/-pro). Bare brand words like "qwen" | |
| # or random IDs like "claude-desktop" / "o3-JIicAM..." should NOT match. | |
| MODELS='claude-(opus|sonnet|haiku|instant|[0-9])[a-zA-Z0-9.-]*' | |
| MODELS+='|gpt-[0-9][a-zA-Z0-9.-]*|gpt-image[a-zA-Z0-9.-]*' | |
| MODELS+='|sora(-[0-9a-z][a-zA-Z0-9.-]*)?' | |
| MODELS+='|dall-e[a-zA-Z0-9.-]*' | |
| MODELS+='|o[134]-(mini|preview|high|low|medium|reasoning|pro)[a-zA-Z0-9.-]*' | |
| MODELS+='|whisper-[0-9][a-zA-Z0-9.-]*' | |
| MODELS+='|gemini-[0-9][a-zA-Z0-9.-]*|gemma-?[0-9][a-zA-Z0-9.-]*' | |
| MODELS+='|nano-banana[a-zA-Z0-9.-]*|imagen-?[0-9][a-zA-Z0-9.-]*|veo-?[0-9][a-zA-Z0-9.-]*' | |
| MODELS+='|llama-?[0-9][a-zA-Z0-9.-]*|codellama-?[0-9a-z][a-zA-Z0-9.-]*' | |
| MODELS+='|grok-(image|vision|[0-9])[a-zA-Z0-9.-]*' | |
| MODELS+='|deepseek-(r1|v[0-9]|chat|coder|reasoner)[a-zA-Z0-9.-]*' | |
| MODELS+='|kimi-[a-z0-9][a-zA-Z0-9.-]*|moonshot-v[0-9][a-zA-Z0-9.-]*' | |
| MODELS+='|mistral-[a-z0-9][a-zA-Z0-9.-]*|mixtral-[0-9][a-zA-Z0-9.-]*|pixtral-[a-z0-9][a-zA-Z0-9.-]*' | |
| MODELS+='|qwen-?[0-9][a-zA-Z0-9.-]*|qwq-[0-9a-z][a-zA-Z0-9.-]*' | |
| PROVIDERS='anthropic|openai|google|meta-llama|x-ai|deepseek|moonshot|mistral|qwen' | |
| # Match either a provider-prefixed model id or a bare model id. | |
| PATTERN="($PROVIDERS)/($MODELS)|\\b($MODELS)" | |
| RG_ARGS=( | |
| --no-heading | |
| --line-number | |
| --only-matching | |
| --color=never | |
| --hidden | |
| -g '!node_modules' -g '!.next' -g '!.turbo' -g '!dist' -g '!build' | |
| -g '!.git' -g '!_generated' -g '!.venv' -g '!venv' -g '!__pycache__' | |
| -g '!pnpm-lock.yaml' -g '!package-lock.json' -g '!yarn.lock' -g '!bun.lockb' | |
| -g '*.{ts,tsx,js,jsx,mjs,cjs,py,go,rs,json,toml,yaml,yml,md,env,env.*}' | |
| ) | |
| # rg outputs: <relpath>:<line>:<match> | |
| run() { | |
| local root="$1" mode="$2" detail="$3" | |
| cd "$root" | |
| rg "${RG_ARGS[@]}" -e "$PATTERN" 2>/dev/null | awk -v mode="$mode" -v detail="$detail" -v root="$root" -v ignore="$IGNORE_LIST" ' | |
| function pad(s, w, n) { n=w-length(s); if (n<1) return s; return s sprintf("%*s", n, "") } | |
| function trunc(s, w) { if (length(s) <= w) return s; return substr(s,1,w-1) "…" } | |
| function rule(w, ch, r,i) { for (i=0;i<w;i++) r=r ch; return r } | |
| function box_top(w1,w2,w3) { return "┌" rule(w1+2,"─") "┬" rule(w2+2,"─") "┬" rule(w3+2,"─") "┐" } | |
| function box_mid(w1,w2,w3) { return "├" rule(w1+2,"─") "┼" rule(w2+2,"─") "┼" rule(w3+2,"─") "┤" } | |
| function box_bot(w1,w2,w3) { return "└" rule(w1+2,"─") "┴" rule(w2+2,"─") "┴" rule(w3+2,"─") "┘" } | |
| function row(c1,c2,c3,w1,w2,w3) { | |
| return "│ " pad(trunc(c1,w1),w1) " │ " pad(trunc(c2,w2),w2) " │ " pad(trunc(c3,w3),w3) " │" | |
| } | |
| function box2_top(w1,w2) { return "┌" rule(w1+2,"─") "┬" rule(w2+2,"─") "┐" } | |
| function box2_mid(w1,w2) { return "├" rule(w1+2,"─") "┼" rule(w2+2,"─") "┤" } | |
| function box2_bot(w1,w2) { return "└" rule(w1+2,"─") "┴" rule(w2+2,"─") "┘" } | |
| function row2(c1,c2,w1,w2) { return "│ " pad(trunc(c1,w1),w1) " │ " pad(trunc(c2,w2),w2) " │" } | |
| BEGIN { | |
| FS=":" | |
| n_ig = split(ignore, ig_arr, ",") | |
| for (ii=1; ii<=n_ig; ii++) if (ig_arr[ii] != "") ignored[ig_arr[ii]] = 1 | |
| } | |
| { | |
| file=$1; line=$2; m="" | |
| for (i=3;i<=NF;i++) m = m (i>3?":":"") $i | |
| if (mode=="projects") { | |
| slash=index(file,"/") | |
| if (slash==0) next | |
| proj=substr(file,1,slash-1); rel=substr(file,slash+1) | |
| if (proj=="scripts") next | |
| if (proj in ignored) next | |
| } else { proj="."; rel=file } | |
| if (!(proj in seen_proj)) { seen_proj[proj]=1; pord[++np]=proj } | |
| pmkey = proj SUBSEP m | |
| if (!(pmkey in seen_pm)) { | |
| seen_pm[pmkey]=1 | |
| proj_unique[proj]++ | |
| pmord[proj] = pmord[proj] (pmord[proj]?SUBSEP:"") m | |
| pm_first[pmkey] = rel ":" line | |
| } | |
| pm_count[pmkey]++ | |
| proj_total[proj]++ | |
| lines[pmkey] = lines[pmkey] (lines[pmkey]?", ":"") rel ":" line | |
| } | |
| END { | |
| if (np==0) exit 1 | |
| if (mode=="projects") { | |
| # ── Summary ── | |
| w1=14; for (i=1;i<=np;i++) if (length(pord[i])>w1) w1=length(pord[i]) | |
| w2=6; w3=8 | |
| print box_top(w1,w2,w3) | |
| print row("Project","Models","Mentions",w1,w2,w3) | |
| print box_mid(w1,w2,w3) | |
| for (i=1;i<=np;i++) { | |
| p=pord[i] | |
| print row(p, sprintf("%d",proj_unique[p]), sprintf("%d",proj_total[p]), w1,w2,w3) | |
| } | |
| print box_bot(w1,w2,w3) | |
| print "" | |
| } | |
| # ── Per-project model breakdown ── | |
| for (i=1;i<=np;i++) { | |
| p=pord[i] | |
| n_models = split(pmord[p], models_arr, SUBSEP) | |
| # column widths | |
| mw=14; for (j=1;j<=n_models;j++) if (length(models_arr[j])>mw) mw=length(models_arr[j]) | |
| if (mw>50) mw=50 | |
| cw=4 | |
| fw=44 | |
| if (mode=="projects") print "── " p " " rule(78-length(p)-4, "─") | |
| print box_top(mw,cw,fw) | |
| print row("Model","#","First seen at",mw,cw,fw) | |
| print box_mid(mw,cw,fw) | |
| for (j=1;j<=n_models;j++) { | |
| m=models_arr[j] | |
| k=p SUBSEP m | |
| print row(m, sprintf("%d", pm_count[k]), pm_first[k], mw,cw,fw) | |
| } | |
| print box_bot(mw,cw,fw) | |
| if (detail=="1") { | |
| print "" | |
| # Detail box: per model -> all file:line refs | |
| dmw=mw; dfw=70 | |
| print box2_top(dmw, dfw) | |
| print row2("Model","All references", dmw, dfw) | |
| print box2_mid(dmw, dfw) | |
| for (j=1;j<=n_models;j++) { | |
| m=models_arr[j] | |
| k=p SUBSEP m | |
| # split refs across rows if too long | |
| refs = lines[k] | |
| first=1 | |
| while (length(refs) > 0) { | |
| chunk = substr(refs, 1, dfw) | |
| if (length(refs) > dfw) { | |
| # break at last comma in window | |
| cut = chunk | |
| pos = 0 | |
| for (q=length(chunk); q>0; q--) { | |
| if (substr(chunk,q,1)==",") { pos=q; break } | |
| } | |
| if (pos>0) chunk = substr(refs, 1, pos-1) | |
| refs = (pos>0) ? substr(refs, pos+2) : substr(refs, dfw+1) | |
| } else { | |
| refs = "" | |
| } | |
| print row2(first?m:"", chunk, dmw, dfw) | |
| first=0 | |
| } | |
| } | |
| print box2_bot(dmw, dfw) | |
| } | |
| if (i<np) print "" | |
| } | |
| } | |
| ' | |
| } | |
| if ! run "$ROOT" "$MODE" "$DETAIL"; then | |
| echo "No AI/LLM model references found under $ROOT" | |
| fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment