Skip to content

Instantly share code, notes, and snippets.

@jai
Created February 23, 2026 01:31
Show Gist options
  • Select an option

  • Save jai/3241c07ca1eadc227af074c7067eda76 to your computer and use it in GitHub Desktop.

Select an option

Save jai/3241c07ca1eadc227af074c7067eda76 to your computer and use it in GitHub Desktop.
FINN AI Context Collection Script
#!/usr/bin/env bash
# collect-finn-context.sh — Collect all AI context, CI config, and repo metadata
# from the EWA-Services GitHub org for AGENTS.md generation.
#
# Prerequisites: gh CLI authenticated with access to EWA-Services org
# Output: /tmp/finn-context/ directory with all collected data
#
# Usage: bash /tmp/collect-finn-context.sh [--repos N] [--output-dir DIR]
set -euo pipefail
# --- Configuration ---
ORG="EWA-Services"
MAX_REPOS="${1:-100}"
OUTPUT_DIR="${2:-/tmp/finn-context}"
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
mkdir -p "$OUTPUT_DIR"/{ai-context-files,workflows,branch-protection,codeowners,policy-files,pre-commit,linter-configs,commit-samples,pr-templates,test-configs,repo-metadata}
echo "=== FINN Context Collection — $TIMESTAMP ==="
echo "Org: $ORG | Max repos: $MAX_REPOS | Output: $OUTPUT_DIR"
echo ""
# --- Step 1: Repo Inventory ---
echo "[1/10] Collecting repo inventory..."
gh repo list "$ORG" \
--json name,primaryLanguage,updatedAt,isArchived,defaultBranchRef \
--limit "$MAX_REPOS" \
> "$OUTPUT_DIR/repo-metadata/all-repos.json"
# Filter active repos
python3 -c "
import json
with open('$OUTPUT_DIR/repo-metadata/all-repos.json') as f:
repos = json.load(f)
active = [r for r in repos if not r.get('isArchived', False)]
with open('$OUTPUT_DIR/repo-metadata/active-repos.json', 'w') as f:
json.dump(active, f, indent=2)
# Write just names for iteration
with open('$OUTPUT_DIR/repo-metadata/active-repo-names.txt', 'w') as f:
f.write('\n'.join(r['name'] for r in active))
# Language summary
from collections import Counter
langs = Counter(r['primaryLanguage']['name'] if r.get('primaryLanguage') else 'None' for r in active)
with open('$OUTPUT_DIR/repo-metadata/language-summary.json', 'w') as f:
json.dump(dict(langs.most_common()), f, indent=2)
print(f' Found {len(active)} active repos, {len(repos)-len(active)} archived')
"
ACTIVE_REPOS="$OUTPUT_DIR/repo-metadata/active-repo-names.txt"
# --- Step 2: Search for existing AI context files ---
echo "[2/10] Searching for existing AI context files..."
for filename in AGENTS.md CLAUDE.md .cursorrules copilot-instructions.md .github/copilot-instructions.md; do
echo " Searching for $filename..."
gh search code "filename:$filename" --owner "$ORG" --json repository,path 2>/dev/null \
> "$OUTPUT_DIR/ai-context-files/search-${filename//\//-}.json" || true
done
# Also check EWA-Actions (the centralized source)
echo " Fetching EWA-Actions/AGENTS.md..."
gh api "repos/$ORG/EWA-Actions/contents/AGENTS.md" 2>/dev/null \
| python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
> "$OUTPUT_DIR/ai-context-files/EWA-Actions-AGENTS.md" 2>/dev/null || true
echo " Fetching EWA-Actions/CLAUDE.md..."
gh api "repos/$ORG/EWA-Actions/contents/CLAUDE.md" 2>/dev/null \
| python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
> "$OUTPUT_DIR/ai-context-files/EWA-Actions-CLAUDE.md" 2>/dev/null || true
# Check agent-resources repo
echo " Fetching agent-resources..."
gh api "repos/$ORG/agent-resources/contents/README.md" 2>/dev/null \
| python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
> "$OUTPUT_DIR/ai-context-files/agent-resources-README.md" 2>/dev/null || true
gh api "repos/$ORG/agent-resources/contents/skills-src" 2>/dev/null \
| python3 -c "import json,sys; data=json.load(sys.stdin); [print(d['name']) for d in data if d['type']=='dir']" \
> "$OUTPUT_DIR/ai-context-files/agent-resources-skills-list.txt" 2>/dev/null || true
# --- Step 3: Collect workflow files from key repos ---
echo "[3/10] Collecting CI/CD workflow info..."
# Get the centralized workflow templates
gh api "repos/$ORG/EWA-Actions/contents/workflow-templates" 2>/dev/null \
| python3 -c "import json,sys; data=json.load(sys.stdin); [print(d['name']) for d in data]" \
> "$OUTPUT_DIR/workflows/centralized-workflow-templates.txt" 2>/dev/null || true
# Read key workflow files from EWA-Actions
for wf in repository-code-review.yaml repository-semantic-pr.yaml repository-pr-metadata-gate.yaml repository-release.yaml; do
echo " Fetching $wf..."
gh api "repos/$ORG/EWA-Actions/contents/workflow-templates/$wf" 2>/dev/null \
| python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
> "$OUTPUT_DIR/workflows/$wf" 2>/dev/null || true
done
# Get sync config (which repos get which workflows)
for syncfile in sync-workflow-files.yml sync-other-files.yml; do
echo " Fetching $syncfile..."
gh api "repos/$ORG/EWA-Actions/contents/.github/$syncfile" 2>/dev/null \
| python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
> "$OUTPUT_DIR/workflows/$syncfile" 2>/dev/null || true
done
# Per-repo workflow listing (sample top 15 recently updated repos)
echo " Listing workflows per repo..."
head -15 "$ACTIVE_REPOS" | while read -r repo; do
gh api "repos/$ORG/$repo/contents/.github/workflows" 2>/dev/null \
| python3 -c "import json,sys; data=json.load(sys.stdin); [print(d['name']) for d in data]" \
> "$OUTPUT_DIR/workflows/repo-$repo-workflows.txt" 2>/dev/null || true
done
# --- Step 4: Branch protection rules ---
echo "[4/10] Collecting branch protection rules..."
while read -r repo; do
gh api "repos/$ORG/$repo/branches/main/protection" 2>/dev/null \
> "$OUTPUT_DIR/branch-protection/$repo.json" 2>/dev/null || true
done < "$ACTIVE_REPOS"
# --- Step 5: CODEOWNERS files ---
echo "[5/10] Collecting CODEOWNERS files..."
while read -r repo; do
# Check both root and .github/ locations
for path in CODEOWNERS .github/CODEOWNERS; do
content=$(gh api "repos/$ORG/$repo/contents/$path" 2>/dev/null \
| python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" 2>/dev/null || true)
if [ -n "$content" ]; then
echo "$content" > "$OUTPUT_DIR/codeowners/$repo-CODEOWNERS.txt"
break
fi
done
done < "$ACTIVE_REPOS"
# --- Step 6: Policy files ---
echo "[6/10] Collecting policy files..."
gh api "repos/$ORG/EWA-Actions/contents/other-templates" 2>/dev/null \
| python3 -c "import json,sys; data=json.load(sys.stdin); [print(d['name']) for d in data if '.policy' in d['name']]" \
> "$OUTPUT_DIR/policy-files/policy-file-list.txt" 2>/dev/null || true
while read -r pf; do
[ -z "$pf" ] && continue
gh api "repos/$ORG/EWA-Actions/contents/other-templates/$pf" 2>/dev/null \
| python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
> "$OUTPUT_DIR/policy-files/$pf" 2>/dev/null || true
done < "$OUTPUT_DIR/policy-files/policy-file-list.txt"
# --- Step 7: Pre-commit configs ---
echo "[7/10] Collecting pre-commit configurations..."
# Centralized templates
for pcf in pre-commit-config-python.yaml pre-commit-config-nodejs.yaml pre-commit-config-infrastructure.yaml pre-commit-config-serverless.yaml pre-commit-config-php.yaml; do
gh api "repos/$ORG/EWA-Actions/contents/other-templates/$pcf" 2>/dev/null \
| python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
> "$OUTPUT_DIR/pre-commit/$pcf" 2>/dev/null || true
done
# --- Step 8: Linter/formatter configs ---
echo "[8/10] Collecting linter and formatter configs..."
# Ruff config (Python)
gh api "repos/$ORG/EWA-Actions/contents/other-templates/ruff.toml" 2>/dev/null \
| python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
> "$OUTPUT_DIR/linter-configs/ruff.toml" 2>/dev/null || true
# Bulldozer config
gh api "repos/$ORG/EWA-Actions/contents/other-templates/bulldozer-general.yml" 2>/dev/null \
| python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
> "$OUTPUT_DIR/linter-configs/bulldozer-general.yml" 2>/dev/null || true
# Sample repo-specific configs (ESLint, Prettier, etc.)
for repo in FINN-Web-App Core; do
for cfgfile in .eslintrc.json .prettierrc .prettierrc.json tsconfig.json; do
gh api "repos/$ORG/$repo/contents/$cfgfile" 2>/dev/null \
| python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
> "$OUTPUT_DIR/linter-configs/$repo-$cfgfile" 2>/dev/null || true
done
done
# Go linting config
gh api "repos/$ORG/banking-integrations/contents/.golangci.yml" 2>/dev/null \
| python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
> "$OUTPUT_DIR/linter-configs/banking-integrations-.golangci.yml" 2>/dev/null || true
# --- Step 9: Commit history samples ---
echo "[9/10] Sampling recent commits..."
for repo in FINN-Web-App Core Statement-Service banking-integrations Serverless-Operations Infrastructure; do
gh api "repos/$ORG/$repo/commits?per_page=20" --jq '.[].commit.message' 2>/dev/null \
| head -20 \
> "$OUTPUT_DIR/commit-samples/$repo-commits.txt" 2>/dev/null || true
done
# --- Step 10: PR templates and test configs ---
echo "[10/10] Collecting PR templates and test configs..."
# PR template
gh api "repos/$ORG/EWA-Actions/contents/other-templates/pull_request_template.md" 2>/dev/null \
| python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
> "$OUTPUT_DIR/pr-templates/pull_request_template.md" 2>/dev/null || true
# Test configs from key repos
for repo in FINN-Web-App Core Statement-Service; do
for testcfg in jest.config.js jest.config.ts vitest.config.ts pytest.ini setup.cfg pyproject.toml; do
content=$(gh api "repos/$ORG/$repo/contents/$testcfg" 2>/dev/null \
| python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" 2>/dev/null || true)
if [ -n "$content" ]; then
echo "$content" > "$OUTPUT_DIR/test-configs/$repo-$testcfg"
fi
done
done
# --- Summary ---
echo ""
echo "=== Collection Complete ==="
echo "Output directory: $OUTPUT_DIR"
echo ""
echo "Contents:"
find "$OUTPUT_DIR" -type f | wc -l | xargs -I{} echo " {} files collected"
echo ""
echo "Directory breakdown:"
for dir in "$OUTPUT_DIR"/*/; do
dirname=$(basename "$dir")
count=$(find "$dir" -type f | wc -l)
[ "$count" -gt 0 ] && echo " $dirname: $count files"
done
echo ""
echo "Next step: Feed the contents of $OUTPUT_DIR to the AGENTS.md generator prompt"
echo " Example: cat $OUTPUT_DIR/**/* | head -50000 > /tmp/finn-context-bundle.txt"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment