jai · February 23, 2026 01:31
diff --git a/collect-finn-context.sh b/collect-finn-context.sh
 #!/usr/bin/env bash
 # collect-finn-context.sh — Collect all AI context, CI config, and repo metadata
 # from the EWA-Services GitHub org for AGENTS.md generation.
 #
 # Prerequisites: gh CLI authenticated with access to EWA-Services org
 # Output: /tmp/finn-context/ directory with all collected data
 #
 # Usage: bash /tmp/collect-finn-context.sh [--repos N] [--output-dir DIR]

 set -euo pipefail

 # --- Configuration ---
 ORG="EWA-Services"
 MAX_REPOS="${1:-100}"
 OUTPUT_DIR="${2:-/tmp/finn-context}"
 TIMESTAMP=$(date +%Y%m%d-%H%M%S)

 mkdir -p "$OUTPUT_DIR"/{ai-context-files,workflows,branch-protection,codeowners,policy-files,pre-commit,linter-configs,commit-samples,pr-templates,test-configs,repo-metadata}

 echo "=== FINN Context Collection — $TIMESTAMP ==="
 echo "Org: $ORG | Max repos: $MAX_REPOS | Output: $OUTPUT_DIR"
 echo ""

 # --- Step 1: Repo Inventory ---
 echo "[1/10] Collecting repo inventory..."
 gh repo list "$ORG" \
  --json name,primaryLanguage,updatedAt,isArchived,defaultBranchRef \
  --limit "$MAX_REPOS" \
  > "$OUTPUT_DIR/repo-metadata/all-repos.json"

 # Filter active repos
 python3 -c "
 import json
 with open('$OUTPUT_DIR/repo-metadata/all-repos.json') as f:
    repos = json.load(f)
 active = [r for r in repos if not r.get('isArchived', False)]
 with open('$OUTPUT_DIR/repo-metadata/active-repos.json', 'w') as f:
    json.dump(active, f, indent=2)
 # Write just names for iteration
 with open('$OUTPUT_DIR/repo-metadata/active-repo-names.txt', 'w') as f:
    f.write('\n'.join(r['name'] for r in active))
 # Language summary
 from collections import Counter
 langs = Counter(r['primaryLanguage']['name'] if r.get('primaryLanguage') else 'None' for r in active)
 with open('$OUTPUT_DIR/repo-metadata/language-summary.json', 'w') as f:
    json.dump(dict(langs.most_common()), f, indent=2)
 print(f'  Found {len(active)} active repos, {len(repos)-len(active)} archived')
 "

 ACTIVE_REPOS="$OUTPUT_DIR/repo-metadata/active-repo-names.txt"

 # --- Step 2: Search for existing AI context files ---
 echo "[2/10] Searching for existing AI context files..."
 for filename in AGENTS.md CLAUDE.md .cursorrules copilot-instructions.md .github/copilot-instructions.md; do
  echo "  Searching for $filename..."
  gh search code "filename:$filename" --owner "$ORG" --json repository,path 2>/dev/null \
    > "$OUTPUT_DIR/ai-context-files/search-${filename//\//-}.json" || true
 done

 # Also check EWA-Actions (the centralized source)
 echo "  Fetching EWA-Actions/AGENTS.md..."
 gh api "repos/$ORG/EWA-Actions/contents/AGENTS.md" 2>/dev/null \
  | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
  > "$OUTPUT_DIR/ai-context-files/EWA-Actions-AGENTS.md" 2>/dev/null || true

 echo "  Fetching EWA-Actions/CLAUDE.md..."
 gh api "repos/$ORG/EWA-Actions/contents/CLAUDE.md" 2>/dev/null \
  | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
  > "$OUTPUT_DIR/ai-context-files/EWA-Actions-CLAUDE.md" 2>/dev/null || true

 # Check agent-resources repo
 echo "  Fetching agent-resources..."
 gh api "repos/$ORG/agent-resources/contents/README.md" 2>/dev/null \
  | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
  > "$OUTPUT_DIR/ai-context-files/agent-resources-README.md" 2>/dev/null || true

 gh api "repos/$ORG/agent-resources/contents/skills-src" 2>/dev/null \
  | python3 -c "import json,sys; data=json.load(sys.stdin); [print(d['name']) for d in data if d['type']=='dir']" \
  > "$OUTPUT_DIR/ai-context-files/agent-resources-skills-list.txt" 2>/dev/null || true

 # --- Step 3: Collect workflow files from key repos ---
 echo "[3/10] Collecting CI/CD workflow info..."

 # Get the centralized workflow templates
 gh api "repos/$ORG/EWA-Actions/contents/workflow-templates" 2>/dev/null \
  | python3 -c "import json,sys; data=json.load(sys.stdin); [print(d['name']) for d in data]" \
  > "$OUTPUT_DIR/workflows/centralized-workflow-templates.txt" 2>/dev/null || true

 # Read key workflow files from EWA-Actions
 for wf in repository-code-review.yaml repository-semantic-pr.yaml repository-pr-metadata-gate.yaml repository-release.yaml; do
  echo "  Fetching $wf..."
  gh api "repos/$ORG/EWA-Actions/contents/workflow-templates/$wf" 2>/dev/null \
    | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
    > "$OUTPUT_DIR/workflows/$wf" 2>/dev/null || true
 done

 # Get sync config (which repos get which workflows)
 for syncfile in sync-workflow-files.yml sync-other-files.yml; do
  echo "  Fetching $syncfile..."
  gh api "repos/$ORG/EWA-Actions/contents/.github/$syncfile" 2>/dev/null \
    | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
    > "$OUTPUT_DIR/workflows/$syncfile" 2>/dev/null || true
 done

 # Per-repo workflow listing (sample top 15 recently updated repos)
 echo "  Listing workflows per repo..."
 head -15 "$ACTIVE_REPOS" | while read -r repo; do
  gh api "repos/$ORG/$repo/contents/.github/workflows" 2>/dev/null \
    | python3 -c "import json,sys; data=json.load(sys.stdin); [print(d['name']) for d in data]" \
    > "$OUTPUT_DIR/workflows/repo-$repo-workflows.txt" 2>/dev/null || true
 done

 # --- Step 4: Branch protection rules ---
 echo "[4/10] Collecting branch protection rules..."
 while read -r repo; do
  gh api "repos/$ORG/$repo/branches/main/protection" 2>/dev/null \
    > "$OUTPUT_DIR/branch-protection/$repo.json" 2>/dev/null || true
 done < "$ACTIVE_REPOS"

 # --- Step 5: CODEOWNERS files ---
 echo "[5/10] Collecting CODEOWNERS files..."
 while read -r repo; do
  # Check both root and .github/ locations
  for path in CODEOWNERS .github/CODEOWNERS; do
    content=$(gh api "repos/$ORG/$repo/contents/$path" 2>/dev/null \
      | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" 2>/dev/null || true)
    if [ -n "$content" ]; then
      echo "$content" > "$OUTPUT_DIR/codeowners/$repo-CODEOWNERS.txt"
      break
    fi
  done
 done < "$ACTIVE_REPOS"

 # --- Step 6: Policy files ---
 echo "[6/10] Collecting policy files..."
 gh api "repos/$ORG/EWA-Actions/contents/other-templates" 2>/dev/null \
  | python3 -c "import json,sys; data=json.load(sys.stdin); [print(d['name']) for d in data if '.policy' in d['name']]" \
  > "$OUTPUT_DIR/policy-files/policy-file-list.txt" 2>/dev/null || true

 while read -r pf; do
  [ -z "$pf" ] && continue
  gh api "repos/$ORG/EWA-Actions/contents/other-templates/$pf" 2>/dev/null \
    | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
    > "$OUTPUT_DIR/policy-files/$pf" 2>/dev/null || true
 done < "$OUTPUT_DIR/policy-files/policy-file-list.txt"

 # --- Step 7: Pre-commit configs ---
 echo "[7/10] Collecting pre-commit configurations..."
 # Centralized templates
 for pcf in pre-commit-config-python.yaml pre-commit-config-nodejs.yaml pre-commit-config-infrastructure.yaml pre-commit-config-serverless.yaml pre-commit-config-php.yaml; do
  gh api "repos/$ORG/EWA-Actions/contents/other-templates/$pcf" 2>/dev/null \
    | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
    > "$OUTPUT_DIR/pre-commit/$pcf" 2>/dev/null || true
 done

 # --- Step 8: Linter/formatter configs ---
 echo "[8/10] Collecting linter and formatter configs..."
 # Ruff config (Python)
 gh api "repos/$ORG/EWA-Actions/contents/other-templates/ruff.toml" 2>/dev/null \
  | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
  > "$OUTPUT_DIR/linter-configs/ruff.toml" 2>/dev/null || true

 # Bulldozer config
 gh api "repos/$ORG/EWA-Actions/contents/other-templates/bulldozer-general.yml" 2>/dev/null \
  | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
  > "$OUTPUT_DIR/linter-configs/bulldozer-general.yml" 2>/dev/null || true

 # Sample repo-specific configs (ESLint, Prettier, etc.)
 for repo in FINN-Web-App Core; do
  for cfgfile in .eslintrc.json .prettierrc .prettierrc.json tsconfig.json; do
    gh api "repos/$ORG/$repo/contents/$cfgfile" 2>/dev/null \
      | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
      > "$OUTPUT_DIR/linter-configs/$repo-$cfgfile" 2>/dev/null || true
  done
 done

 # Go linting config
 gh api "repos/$ORG/banking-integrations/contents/.golangci.yml" 2>/dev/null \
  | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
  > "$OUTPUT_DIR/linter-configs/banking-integrations-.golangci.yml" 2>/dev/null || true

 # --- Step 9: Commit history samples ---
 echo "[9/10] Sampling recent commits..."
 for repo in FINN-Web-App Core Statement-Service banking-integrations Serverless-Operations Infrastructure; do
  gh api "repos/$ORG/$repo/commits?per_page=20" --jq '.[].commit.message' 2>/dev/null \
    | head -20 \
    > "$OUTPUT_DIR/commit-samples/$repo-commits.txt" 2>/dev/null || true
 done

 # --- Step 10: PR templates and test configs ---
 echo "[10/10] Collecting PR templates and test configs..."
 # PR template
 gh api "repos/$ORG/EWA-Actions/contents/other-templates/pull_request_template.md" 2>/dev/null \
  | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \
  > "$OUTPUT_DIR/pr-templates/pull_request_template.md" 2>/dev/null || true

 # Test configs from key repos
 for repo in FINN-Web-App Core Statement-Service; do
  for testcfg in jest.config.js jest.config.ts vitest.config.ts pytest.ini setup.cfg pyproject.toml; do
    content=$(gh api "repos/$ORG/$repo/contents/$testcfg" 2>/dev/null \
      | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" 2>/dev/null || true)
    if [ -n "$content" ]; then
      echo "$content" > "$OUTPUT_DIR/test-configs/$repo-$testcfg"
    fi
  done
 done

 # --- Summary ---
 echo ""
 echo "=== Collection Complete ==="
 echo "Output directory: $OUTPUT_DIR"
 echo ""
 echo "Contents:"
 find "$OUTPUT_DIR" -type f | wc -l | xargs -I{} echo "  {} files collected"
 echo ""
 echo "Directory breakdown:"
 for dir in "$OUTPUT_DIR"/*/; do
  dirname=$(basename "$dir")
  count=$(find "$dir" -type f | wc -l)
  [ "$count" -gt 0 ] && echo "  $dirname: $count files"
 done
 echo ""
 echo "Next step: Feed the contents of $OUTPUT_DIR to the AGENTS.md generator prompt"
 echo "  Example: cat $OUTPUT_DIR/**/* | head -50000 > /tmp/finn-context-bundle.txt"
No results found