Last active
April 6, 2026 20:26
-
-
Save StudioEtrange/7cec41a83ee3a731a203badd03f94b9c to your computer and use it in GitHub Desktop.
Analyse forks of a GitHub project
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # analyse-github-fork.sh | |
| # Analyse forks of a GitHub project | |
| # https://gist.github.com/StudioEtrange/7cec41a83ee3a731a203badd03f94b9c | |
| # Author: StudioEtrange https://github.com/StudioEtrange | |
| # License: MIT | |
| set -eu | |
| usage() { | |
| cat <<EOF | |
| Analyse forks of a GitHub project | |
| Usage: | |
| $0 owner/repo | |
| Help: | |
| $0 -h|--help | |
| Example: | |
| Use a GitHub token to analyse forks | |
| GITHUB_TOKEN="ghp_xxx" $0 owner/repo | |
| Look only for active forks that are ahead of the main repository | |
| GITHUB_TOKEN="ghp_xxx" AHEAD_ONLY=1 PARALLEL=8 $0 owner/repo | |
| Analyse only the first 2 forks returned by the GitHub API | |
| LIMIT=2 $0 owner/repo | |
| Output: | |
| fork ahead behind stars last_commit_date pushed_at url | |
| foo/bar 12 3 5 2026-03-01T12:00:00Z 2026-03-02T10:00:00Z https://github.com/foo/bar | |
| Environment variables: | |
| GITHUB_TOKEN : optional GitHub token | |
| PARALLEL : number of workers (default 4) | |
| LIMIT : max number of forks analysed after GitHub API pagination and our deduplication, before final sorting (default 0 = all) | |
| AHEAD_ONLY : analyse only forks which have code ahead | |
| Notes: | |
| - "ahead" = comparison from upstream default branch to the fork's default branch | |
| - "last_commit_date" = date of the latest commit on the fork's default branch | |
| - "pushed_at" = last push date reported by GitHub for the fork repository | |
| - With many forks, a GitHub token is strongly recommended | |
| - The final result is sorted by ahead (desc), behind (asc), stars (desc), then pushed_at (desc) | |
| Author: | |
| StudioEtrange (c) 2025 | |
| EOF | |
| } | |
| if [ "$#" -ne 1 ]; then | |
| usage | |
| exit 1 | |
| fi | |
| case "$1" in | |
| -h|--help) usage; exit 0;; | |
| esac | |
| UPSTREAM="$1" | |
| OWNER="${UPSTREAM%%/*}" | |
| API_BASE="https://api.github.com" | |
| PARALLEL="${PARALLEL:-4}" | |
| LIMIT="${LIMIT:-0}" | |
| AHEAD_ONLY="${AHEAD_ONLY:-0}" | |
| TMPDIR_ROOT="${TMPDIR:-/tmp}" | |
| WORKDIR="$(mktemp -d "${TMPDIR_ROOT%/}/forks.XXXXXX")" | |
| trap 'rm -rf "$WORKDIR"' EXIT INT TERM HUP | |
| AUTH_HEADER="" | |
| if [ -n "${GITHUB_TOKEN:-}" ]; then | |
| AUTH_HEADER="Authorization: Bearer ${GITHUB_TOKEN}" | |
| fi | |
| accept_header="Accept: application/vnd.github+json" | |
| api_version_header="X-GitHub-Api-Version: 2022-11-28" | |
| # output a formated table | |
| # input : | |
| # pass string to parse with a pipe | |
| # options : | |
| # ALIGN_RIGHT : align all column text to right | |
| # CELL_DELIMITER : use a char to separate column when printing the table | |
| # SEPARATOR : define a separator which separate column in input text. default is TAB. For special character use a special notation like this | |
| # __format_table "SEPARATOR "$'\t'"" | |
| # sample : | |
| # printf "head_1 "$'\t'" head_2\n val_1 "$'\t'" val_2" | __format_table "ALIGN_RIGHT CELL_DELIMITER |" | |
| # printf "head_1 | head_2 | head_3 \n val_1 || val_3" | __format_table "SEPARATOR |" | |
| __format_table() { | |
| declare __str | |
| __str=$(</dev/stdin); | |
| local __opt="${1:-}" | |
| local __align_right="" | |
| local __cell_delim="" | |
| local __flag_cel_delim="OFF" | |
| local __separator=$'\t' | |
| local __flag_separator="OFF" | |
| for o in ${__opt}; do | |
| [ "$o" = "ALIGN_RIGHT" ] && __align_right="1" | |
| [ "$__flag_cel_delim" = "ON" ] && __cell_delim="$o" && __flag_cel_delim="OFF" | |
| [ "$o" = "CELL_DELIMITER" ] && __flag_cel_delim="ON" | |
| [ "$__flag_separator" = "ON" ] && __separator="$o" && __flag_separator="OFF" | |
| [ "$o" = "SEPARATOR" ] && __flag_separator="ON" | |
| done | |
| # NOTE : -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g | |
| # add a blank character for an empty cell because column command have a bug with empty column | |
| if [ "${__cell_delim}" = "" ]; then | |
| if [ "${__align_right}" = "1" ]; then | |
| # NOTE : To work around the requirement entries in the leftmost column must be of equal width insert a dummy column and remove it later | |
| # https://stackoverflow.com/a/18022947/5027535 | |
| echo "${__str}" | sed -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g | sed -e s/^/FOO"${__separator}"/ | rev | column -s "${__separator}" -t | rev | cut -c4- | |
| else | |
| echo "${__str}" | sed -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g | column -s "${__separator}" -t | |
| fi | |
| else | |
| if [ "${__align_right}" = "1" ]; then | |
| echo "${__str}" | sed -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g | sed -e s/"${__separator}/${__separator}${__cell_delim}${__separator}"/g | sed -e s/^/FOO"${__separator}"/ | rev | column -s "${__separator}" -t | rev | cut -c4- | |
| else | |
| echo "${__str}" | sed -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g | sed -e s/"${__separator}/${__separator}${__cell_delim}${__separator}"/g | column -s "${__separator}" -t | |
| fi | |
| fi | |
| echo -n | |
| } | |
| curl_api() { | |
| local url="$1" | |
| local body_file="$2" | |
| local header_file="$3" | |
| local code_file="$4" | |
| local http_code | |
| if [ -n "$AUTH_HEADER" ]; then | |
| http_code="$(curl -sS -L \ | |
| -H "$accept_header" \ | |
| -H "$api_version_header" \ | |
| -H "$AUTH_HEADER" \ | |
| -D "$header_file" \ | |
| -o "$body_file" \ | |
| -w '%{http_code}' \ | |
| "$url")" | |
| else | |
| http_code="$(curl -sS -L \ | |
| -H "$accept_header" \ | |
| -H "$api_version_header" \ | |
| -D "$header_file" \ | |
| -o "$body_file" \ | |
| -w '%{http_code}' \ | |
| "$url")" | |
| fi | |
| printf '%s' "$http_code" > "$code_file" | |
| } | |
| json_get() { | |
| jq -r "$2" "$1" | |
| } | |
| get_next_link() { | |
| tr -d '\r' < "$1" \ | |
| | grep -i '^link:' \ | |
| | sed -n 's/^[Ll]ink: //p' \ | |
| | tr ',' '\n' \ | |
| | sed -n 's/.*<\(.*\)>; rel="next".*/\1/p' | |
| } | |
| get_upstream_default_branch() { | |
| local body="$WORKDIR/upstream.json" | |
| local hdr="$WORKDIR/upstream.hdr" | |
| local code="$WORKDIR/upstream.code" | |
| curl_api "${API_BASE}/repos/${UPSTREAM}" "$body" "$hdr" "$code" | |
| [ "$(cat "$code")" = "200" ] || { | |
| echo "ERROR: cannot read ${UPSTREAM}" >&2 | |
| exit 1 | |
| } | |
| json_get "$body" '.default_branch' | |
| } | |
| fetch_all_forks() { | |
| local out="$1" | |
| local url body hdr code | |
| : > "$out" | |
| url="${API_BASE}/repos/${UPSTREAM}/forks?per_page=100" | |
| while [ -n "$url" ]; do | |
| body="$WORKDIR/page.json" | |
| hdr="$WORKDIR/page.hdr" | |
| code="$WORKDIR/page.code" | |
| curl_api "$url" "$body" "$hdr" "$code" | |
| [ "$(cat "$code")" = "200" ] || exit 1 | |
| jq -c '.[] | {full_name, default_branch, pushed_at, stargazers_count}' "$body" >> "$out" | |
| url="$(get_next_link "$hdr" || true)" | |
| done | |
| awk ' | |
| { | |
| line = $0 | |
| name = line | |
| sub(/^{"full_name":"/, "", name) | |
| sub(/".*$/, "", name) | |
| if (!seen[name]++) print line | |
| } | |
| ' "$out" > "$out.tmp" && mv "$out.tmp" "$out" | |
| } | |
| get_last_commit() { | |
| local repo="$1" | |
| local branch="$2" | |
| local suffix="$3" | |
| local body="$WORKDIR/commit.${suffix}.json" | |
| local hdr="$WORKDIR/commit.${suffix}.hdr" | |
| local code="$WORKDIR/commit.${suffix}.code" | |
| curl_api "${API_BASE}/repos/${repo}/commits?sha=${branch}&per_page=1" \ | |
| "$body" "$hdr" "$code" | |
| if [ "$(cat "$code")" != "200" ]; then | |
| echo "ERROR" | |
| return | |
| fi | |
| jq -r '.[0].commit.committer.date // "UNKNOWN"' "$body" | |
| } | |
| get_ahead_behind() { | |
| local repo="$1" | |
| local upstream_branch="$2" | |
| local fork_branch="$3" | |
| local suffix="$4" | |
| local fork_owner="${repo%%/*}" | |
| local body="$WORKDIR/compare.${suffix}.json" | |
| local hdr="$WORKDIR/compare.${suffix}.hdr" | |
| local code="$WORKDIR/compare.${suffix}.code" | |
| curl_api \ | |
| "${API_BASE}/repos/${UPSTREAM}/compare/${OWNER}:${upstream_branch}...${fork_owner}:${fork_branch}" \ | |
| "$body" "$hdr" "$code" | |
| case "$(cat "$code")" in | |
| 200) jq -r '[.ahead_by // 0, .behind_by // 0] | @tsv' "$body" ;; | |
| 404) printf 'NA\tNA\n' ;; | |
| *) printf 'ERROR\tERROR\n' ;; | |
| esac | |
| } | |
| process_one() { | |
| local line="$1" | |
| local upstream_branch="$2" | |
| local repo branch ahead behind last suffix pushed_at html_url stars | |
| repo="$(printf '%s\n' "$line" | jq -r '.full_name')" | |
| branch="$(printf '%s\n' "$line" | jq -r '.default_branch')" | |
| pushed_at="$(printf '%s\n' "$line" | jq -r '.pushed_at // "UNKNOWN"')" | |
| html_url="https://github.com/${repo}" | |
| stars="$(printf '%s\n' "$line" | jq -r '.stargazers_count // 0')" | |
| [ -n "$repo" ] || exit 0 | |
| [ "$branch" = "null" ] && branch="main" | |
| suffix="${$}.$RANDOM" | |
| IFS="$(printf '\t')" read -r ahead behind <<EOF | |
| $(get_ahead_behind "$repo" "$upstream_branch" "$branch" "$suffix") | |
| EOF | |
| if [ "$AHEAD_ONLY" = "1" ]; then | |
| case "$ahead" in | |
| 0|NA|ERROR|'') return ;; | |
| esac | |
| fi | |
| last="$(get_last_commit "$repo" "$branch" "$suffix")" | |
| printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' "$repo" "$ahead" "$behind" "$stars" "$last" "$pushed_at" "$html_url" | |
| } | |
| main() { | |
| local upstream_branch forks_file | |
| upstream_branch="$(get_upstream_default_branch)" | |
| forks_file="$WORKDIR/forks.jsonl" | |
| fetch_all_forks "$forks_file" | |
| printf 'fork\tahead\tbehind\tstars\tlast_commit_date\tpushed_at\turl\n' | |
| export WORKDIR API_BASE OWNER UPSTREAM AUTH_HEADER AHEAD_ONLY accept_header api_version_header | |
| export -f curl_api json_get get_next_link get_last_commit get_ahead_behind process_one | |
| if [ "$LIMIT" -gt 0 ]; then | |
| head -n "$LIMIT" "$forks_file" | |
| else | |
| cat "$forks_file" | |
| fi | while IFS= read -r line; do | |
| printf '%s\0' "$line" | |
| done | xargs -0 -I{} -P "$PARALLEL" bash -c 'process_one "$1" "$2"' _ "{}" "$upstream_branch" \ | |
| | sort -t "$(printf '\t')" -k2,2nr -k3,3n -k4,4nr -k6,6r | |
| } | |
| echo "Analyse forks of https://github.com/$UPSTREAM" | |
| echo "--===-------------------------------------------===--" | |
| main | __format_table |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment