Created
February 23, 2026 01:31
-
-
Save jai/3241c07ca1eadc227af074c7067eda76 to your computer and use it in GitHub Desktop.
FINN AI Context Collection Script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # collect-finn-context.sh — Collect all AI context, CI config, and repo metadata | |
| # from the EWA-Services GitHub org for AGENTS.md generation. | |
| # | |
| # Prerequisites: gh CLI authenticated with access to EWA-Services org | |
| # Output: /tmp/finn-context/ directory with all collected data | |
| # | |
| # Usage: bash /tmp/collect-finn-context.sh [--repos N] [--output-dir DIR] | |
| set -euo pipefail | |
| # --- Configuration --- | |
| ORG="EWA-Services" | |
| MAX_REPOS="${1:-100}" | |
| OUTPUT_DIR="${2:-/tmp/finn-context}" | |
| TIMESTAMP=$(date +%Y%m%d-%H%M%S) | |
| mkdir -p "$OUTPUT_DIR"/{ai-context-files,workflows,branch-protection,codeowners,policy-files,pre-commit,linter-configs,commit-samples,pr-templates,test-configs,repo-metadata} | |
| echo "=== FINN Context Collection — $TIMESTAMP ===" | |
| echo "Org: $ORG | Max repos: $MAX_REPOS | Output: $OUTPUT_DIR" | |
| echo "" | |
| # --- Step 1: Repo Inventory --- | |
| echo "[1/10] Collecting repo inventory..." | |
| gh repo list "$ORG" \ | |
| --json name,primaryLanguage,updatedAt,isArchived,defaultBranchRef \ | |
| --limit "$MAX_REPOS" \ | |
| > "$OUTPUT_DIR/repo-metadata/all-repos.json" | |
| # Filter active repos | |
| python3 -c " | |
| import json | |
| with open('$OUTPUT_DIR/repo-metadata/all-repos.json') as f: | |
| repos = json.load(f) | |
| active = [r for r in repos if not r.get('isArchived', False)] | |
| with open('$OUTPUT_DIR/repo-metadata/active-repos.json', 'w') as f: | |
| json.dump(active, f, indent=2) | |
| # Write just names for iteration | |
| with open('$OUTPUT_DIR/repo-metadata/active-repo-names.txt', 'w') as f: | |
| f.write('\n'.join(r['name'] for r in active)) | |
| # Language summary | |
| from collections import Counter | |
| langs = Counter(r['primaryLanguage']['name'] if r.get('primaryLanguage') else 'None' for r in active) | |
| with open('$OUTPUT_DIR/repo-metadata/language-summary.json', 'w') as f: | |
| json.dump(dict(langs.most_common()), f, indent=2) | |
| print(f' Found {len(active)} active repos, {len(repos)-len(active)} archived') | |
| " | |
| ACTIVE_REPOS="$OUTPUT_DIR/repo-metadata/active-repo-names.txt" | |
| # --- Step 2: Search for existing AI context files --- | |
| echo "[2/10] Searching for existing AI context files..." | |
| for filename in AGENTS.md CLAUDE.md .cursorrules copilot-instructions.md .github/copilot-instructions.md; do | |
| echo " Searching for $filename..." | |
| gh search code "filename:$filename" --owner "$ORG" --json repository,path 2>/dev/null \ | |
| > "$OUTPUT_DIR/ai-context-files/search-${filename//\//-}.json" || true | |
| done | |
| # Also check EWA-Actions (the centralized source) | |
| echo " Fetching EWA-Actions/AGENTS.md..." | |
| gh api "repos/$ORG/EWA-Actions/contents/AGENTS.md" 2>/dev/null \ | |
| | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \ | |
| > "$OUTPUT_DIR/ai-context-files/EWA-Actions-AGENTS.md" 2>/dev/null || true | |
| echo " Fetching EWA-Actions/CLAUDE.md..." | |
| gh api "repos/$ORG/EWA-Actions/contents/CLAUDE.md" 2>/dev/null \ | |
| | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \ | |
| > "$OUTPUT_DIR/ai-context-files/EWA-Actions-CLAUDE.md" 2>/dev/null || true | |
| # Check agent-resources repo | |
| echo " Fetching agent-resources..." | |
| gh api "repos/$ORG/agent-resources/contents/README.md" 2>/dev/null \ | |
| | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \ | |
| > "$OUTPUT_DIR/ai-context-files/agent-resources-README.md" 2>/dev/null || true | |
| gh api "repos/$ORG/agent-resources/contents/skills-src" 2>/dev/null \ | |
| | python3 -c "import json,sys; data=json.load(sys.stdin); [print(d['name']) for d in data if d['type']=='dir']" \ | |
| > "$OUTPUT_DIR/ai-context-files/agent-resources-skills-list.txt" 2>/dev/null || true | |
| # --- Step 3: Collect workflow files from key repos --- | |
| echo "[3/10] Collecting CI/CD workflow info..." | |
| # Get the centralized workflow templates | |
| gh api "repos/$ORG/EWA-Actions/contents/workflow-templates" 2>/dev/null \ | |
| | python3 -c "import json,sys; data=json.load(sys.stdin); [print(d['name']) for d in data]" \ | |
| > "$OUTPUT_DIR/workflows/centralized-workflow-templates.txt" 2>/dev/null || true | |
| # Read key workflow files from EWA-Actions | |
| for wf in repository-code-review.yaml repository-semantic-pr.yaml repository-pr-metadata-gate.yaml repository-release.yaml; do | |
| echo " Fetching $wf..." | |
| gh api "repos/$ORG/EWA-Actions/contents/workflow-templates/$wf" 2>/dev/null \ | |
| | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \ | |
| > "$OUTPUT_DIR/workflows/$wf" 2>/dev/null || true | |
| done | |
| # Get sync config (which repos get which workflows) | |
| for syncfile in sync-workflow-files.yml sync-other-files.yml; do | |
| echo " Fetching $syncfile..." | |
| gh api "repos/$ORG/EWA-Actions/contents/.github/$syncfile" 2>/dev/null \ | |
| | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \ | |
| > "$OUTPUT_DIR/workflows/$syncfile" 2>/dev/null || true | |
| done | |
| # Per-repo workflow listing (sample top 15 recently updated repos) | |
| echo " Listing workflows per repo..." | |
| head -15 "$ACTIVE_REPOS" | while read -r repo; do | |
| gh api "repos/$ORG/$repo/contents/.github/workflows" 2>/dev/null \ | |
| | python3 -c "import json,sys; data=json.load(sys.stdin); [print(d['name']) for d in data]" \ | |
| > "$OUTPUT_DIR/workflows/repo-$repo-workflows.txt" 2>/dev/null || true | |
| done | |
| # --- Step 4: Branch protection rules --- | |
| echo "[4/10] Collecting branch protection rules..." | |
| while read -r repo; do | |
| gh api "repos/$ORG/$repo/branches/main/protection" 2>/dev/null \ | |
| > "$OUTPUT_DIR/branch-protection/$repo.json" 2>/dev/null || true | |
| done < "$ACTIVE_REPOS" | |
| # --- Step 5: CODEOWNERS files --- | |
| echo "[5/10] Collecting CODEOWNERS files..." | |
| while read -r repo; do | |
| # Check both root and .github/ locations | |
| for path in CODEOWNERS .github/CODEOWNERS; do | |
| content=$(gh api "repos/$ORG/$repo/contents/$path" 2>/dev/null \ | |
| | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" 2>/dev/null || true) | |
| if [ -n "$content" ]; then | |
| echo "$content" > "$OUTPUT_DIR/codeowners/$repo-CODEOWNERS.txt" | |
| break | |
| fi | |
| done | |
| done < "$ACTIVE_REPOS" | |
| # --- Step 6: Policy files --- | |
| echo "[6/10] Collecting policy files..." | |
| gh api "repos/$ORG/EWA-Actions/contents/other-templates" 2>/dev/null \ | |
| | python3 -c "import json,sys; data=json.load(sys.stdin); [print(d['name']) for d in data if '.policy' in d['name']]" \ | |
| > "$OUTPUT_DIR/policy-files/policy-file-list.txt" 2>/dev/null || true | |
| while read -r pf; do | |
| [ -z "$pf" ] && continue | |
| gh api "repos/$ORG/EWA-Actions/contents/other-templates/$pf" 2>/dev/null \ | |
| | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \ | |
| > "$OUTPUT_DIR/policy-files/$pf" 2>/dev/null || true | |
| done < "$OUTPUT_DIR/policy-files/policy-file-list.txt" | |
| # --- Step 7: Pre-commit configs --- | |
| echo "[7/10] Collecting pre-commit configurations..." | |
| # Centralized templates | |
| for pcf in pre-commit-config-python.yaml pre-commit-config-nodejs.yaml pre-commit-config-infrastructure.yaml pre-commit-config-serverless.yaml pre-commit-config-php.yaml; do | |
| gh api "repos/$ORG/EWA-Actions/contents/other-templates/$pcf" 2>/dev/null \ | |
| | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \ | |
| > "$OUTPUT_DIR/pre-commit/$pcf" 2>/dev/null || true | |
| done | |
| # --- Step 8: Linter/formatter configs --- | |
| echo "[8/10] Collecting linter and formatter configs..." | |
| # Ruff config (Python) | |
| gh api "repos/$ORG/EWA-Actions/contents/other-templates/ruff.toml" 2>/dev/null \ | |
| | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \ | |
| > "$OUTPUT_DIR/linter-configs/ruff.toml" 2>/dev/null || true | |
| # Bulldozer config | |
| gh api "repos/$ORG/EWA-Actions/contents/other-templates/bulldozer-general.yml" 2>/dev/null \ | |
| | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \ | |
| > "$OUTPUT_DIR/linter-configs/bulldozer-general.yml" 2>/dev/null || true | |
| # Sample repo-specific configs (ESLint, Prettier, etc.) | |
| for repo in FINN-Web-App Core; do | |
| for cfgfile in .eslintrc.json .prettierrc .prettierrc.json tsconfig.json; do | |
| gh api "repos/$ORG/$repo/contents/$cfgfile" 2>/dev/null \ | |
| | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \ | |
| > "$OUTPUT_DIR/linter-configs/$repo-$cfgfile" 2>/dev/null || true | |
| done | |
| done | |
| # Go linting config | |
| gh api "repos/$ORG/banking-integrations/contents/.golangci.yml" 2>/dev/null \ | |
| | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \ | |
| > "$OUTPUT_DIR/linter-configs/banking-integrations-.golangci.yml" 2>/dev/null || true | |
| # --- Step 9: Commit history samples --- | |
| echo "[9/10] Sampling recent commits..." | |
| for repo in FINN-Web-App Core Statement-Service banking-integrations Serverless-Operations Infrastructure; do | |
| gh api "repos/$ORG/$repo/commits?per_page=20" --jq '.[].commit.message' 2>/dev/null \ | |
| | head -20 \ | |
| > "$OUTPUT_DIR/commit-samples/$repo-commits.txt" 2>/dev/null || true | |
| done | |
| # --- Step 10: PR templates and test configs --- | |
| echo "[10/10] Collecting PR templates and test configs..." | |
| # PR template | |
| gh api "repos/$ORG/EWA-Actions/contents/other-templates/pull_request_template.md" 2>/dev/null \ | |
| | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" \ | |
| > "$OUTPUT_DIR/pr-templates/pull_request_template.md" 2>/dev/null || true | |
| # Test configs from key repos | |
| for repo in FINN-Web-App Core Statement-Service; do | |
| for testcfg in jest.config.js jest.config.ts vitest.config.ts pytest.ini setup.cfg pyproject.toml; do | |
| content=$(gh api "repos/$ORG/$repo/contents/$testcfg" 2>/dev/null \ | |
| | python3 -c "import json,sys,base64; d=json.load(sys.stdin); print(base64.b64decode(d.get('content','')).decode())" 2>/dev/null || true) | |
| if [ -n "$content" ]; then | |
| echo "$content" > "$OUTPUT_DIR/test-configs/$repo-$testcfg" | |
| fi | |
| done | |
| done | |
| # --- Summary --- | |
| echo "" | |
| echo "=== Collection Complete ===" | |
| echo "Output directory: $OUTPUT_DIR" | |
| echo "" | |
| echo "Contents:" | |
| find "$OUTPUT_DIR" -type f | wc -l | xargs -I{} echo " {} files collected" | |
| echo "" | |
| echo "Directory breakdown:" | |
| for dir in "$OUTPUT_DIR"/*/; do | |
| dirname=$(basename "$dir") | |
| count=$(find "$dir" -type f | wc -l) | |
| [ "$count" -gt 0 ] && echo " $dirname: $count files" | |
| done | |
| echo "" | |
| echo "Next step: Feed the contents of $OUTPUT_DIR to the AGENTS.md generator prompt" | |
| echo " Example: cat $OUTPUT_DIR/**/* | head -50000 > /tmp/finn-context-bundle.txt" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment