Skip to content

Instantly share code, notes, and snippets.

@StudioEtrange
Last active April 6, 2026 20:26
Show Gist options
  • Select an option

  • Save StudioEtrange/7cec41a83ee3a731a203badd03f94b9c to your computer and use it in GitHub Desktop.

Select an option

Save StudioEtrange/7cec41a83ee3a731a203badd03f94b9c to your computer and use it in GitHub Desktop.
Analyse forks of a GitHub project
#!/usr/bin/env bash
# analyse-github-fork.sh
# Analyse forks of a GitHub project
# https://gist.github.com/StudioEtrange/7cec41a83ee3a731a203badd03f94b9c
# Author: StudioEtrange https://github.com/StudioEtrange
# License: MIT
set -eu
usage() {
cat <<EOF
Analyse forks of a GitHub project
Usage:
$0 owner/repo
Help:
$0 -h|--help
Example:
Use a GitHub token to analyse forks
GITHUB_TOKEN="ghp_xxx" $0 owner/repo
Look only for active forks that are ahead of the main repository
GITHUB_TOKEN="ghp_xxx" AHEAD_ONLY=1 PARALLEL=8 $0 owner/repo
Analyse only the first 2 forks returned by the GitHub API
LIMIT=2 $0 owner/repo
Output:
fork ahead behind stars last_commit_date pushed_at url
foo/bar 12 3 5 2026-03-01T12:00:00Z 2026-03-02T10:00:00Z https://github.com/foo/bar
Environment variables:
GITHUB_TOKEN : optional GitHub token
PARALLEL : number of workers (default 4)
LIMIT : max number of forks analysed after GitHub API pagination and our deduplication, before final sorting (default 0 = all)
AHEAD_ONLY : analyse only forks which have code ahead
Notes:
- "ahead" = comparison from upstream default branch to the fork's default branch
- "last_commit_date" = date of the latest commit on the fork's default branch
- "pushed_at" = last push date reported by GitHub for the fork repository
- With many forks, a GitHub token is strongly recommended
- The final result is sorted by ahead (desc), behind (asc), stars (desc), then pushed_at (desc)
Author:
StudioEtrange (c) 2025
EOF
}
if [ "$#" -ne 1 ]; then
usage
exit 1
fi
case "$1" in
-h|--help) usage; exit 0;;
esac
UPSTREAM="$1"
OWNER="${UPSTREAM%%/*}"
API_BASE="https://api.github.com"
PARALLEL="${PARALLEL:-4}"
LIMIT="${LIMIT:-0}"
AHEAD_ONLY="${AHEAD_ONLY:-0}"
TMPDIR_ROOT="${TMPDIR:-/tmp}"
WORKDIR="$(mktemp -d "${TMPDIR_ROOT%/}/forks.XXXXXX")"
trap 'rm -rf "$WORKDIR"' EXIT INT TERM HUP
AUTH_HEADER=""
if [ -n "${GITHUB_TOKEN:-}" ]; then
AUTH_HEADER="Authorization: Bearer ${GITHUB_TOKEN}"
fi
accept_header="Accept: application/vnd.github+json"
api_version_header="X-GitHub-Api-Version: 2022-11-28"
# output a formated table
# input :
# pass string to parse with a pipe
# options :
# ALIGN_RIGHT : align all column text to right
# CELL_DELIMITER : use a char to separate column when printing the table
# SEPARATOR : define a separator which separate column in input text. default is TAB. For special character use a special notation like this
# __format_table "SEPARATOR "$'\t'""
# sample :
# printf "head_1 "$'\t'" head_2\n val_1 "$'\t'" val_2" | __format_table "ALIGN_RIGHT CELL_DELIMITER |"
# printf "head_1 | head_2 | head_3 \n val_1 || val_3" | __format_table "SEPARATOR |"
__format_table() {
declare __str
__str=$(</dev/stdin);
local __opt="${1:-}"
local __align_right=""
local __cell_delim=""
local __flag_cel_delim="OFF"
local __separator=$'\t'
local __flag_separator="OFF"
for o in ${__opt}; do
[ "$o" = "ALIGN_RIGHT" ] && __align_right="1"
[ "$__flag_cel_delim" = "ON" ] && __cell_delim="$o" && __flag_cel_delim="OFF"
[ "$o" = "CELL_DELIMITER" ] && __flag_cel_delim="ON"
[ "$__flag_separator" = "ON" ] && __separator="$o" && __flag_separator="OFF"
[ "$o" = "SEPARATOR" ] && __flag_separator="ON"
done
# NOTE : -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g
# add a blank character for an empty cell because column command have a bug with empty column
if [ "${__cell_delim}" = "" ]; then
if [ "${__align_right}" = "1" ]; then
# NOTE : To work around the requirement entries in the leftmost column must be of equal width insert a dummy column and remove it later
# https://stackoverflow.com/a/18022947/5027535
echo "${__str}" | sed -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g | sed -e s/^/FOO"${__separator}"/ | rev | column -s "${__separator}" -t | rev | cut -c4-
else
echo "${__str}" | sed -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g | column -s "${__separator}" -t
fi
else
if [ "${__align_right}" = "1" ]; then
echo "${__str}" | sed -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g | sed -e s/"${__separator}/${__separator}${__cell_delim}${__separator}"/g | sed -e s/^/FOO"${__separator}"/ | rev | column -s "${__separator}" -t | rev | cut -c4-
else
echo "${__str}" | sed -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g -e s/"${__separator}${__separator}"/"${__separator} ${__separator}"/g | sed -e s/"${__separator}/${__separator}${__cell_delim}${__separator}"/g | column -s "${__separator}" -t
fi
fi
echo -n
}
curl_api() {
local url="$1"
local body_file="$2"
local header_file="$3"
local code_file="$4"
local http_code
if [ -n "$AUTH_HEADER" ]; then
http_code="$(curl -sS -L \
-H "$accept_header" \
-H "$api_version_header" \
-H "$AUTH_HEADER" \
-D "$header_file" \
-o "$body_file" \
-w '%{http_code}' \
"$url")"
else
http_code="$(curl -sS -L \
-H "$accept_header" \
-H "$api_version_header" \
-D "$header_file" \
-o "$body_file" \
-w '%{http_code}' \
"$url")"
fi
printf '%s' "$http_code" > "$code_file"
}
json_get() {
jq -r "$2" "$1"
}
get_next_link() {
tr -d '\r' < "$1" \
| grep -i '^link:' \
| sed -n 's/^[Ll]ink: //p' \
| tr ',' '\n' \
| sed -n 's/.*<\(.*\)>; rel="next".*/\1/p'
}
get_upstream_default_branch() {
local body="$WORKDIR/upstream.json"
local hdr="$WORKDIR/upstream.hdr"
local code="$WORKDIR/upstream.code"
curl_api "${API_BASE}/repos/${UPSTREAM}" "$body" "$hdr" "$code"
[ "$(cat "$code")" = "200" ] || {
echo "ERROR: cannot read ${UPSTREAM}" >&2
exit 1
}
json_get "$body" '.default_branch'
}
fetch_all_forks() {
local out="$1"
local url body hdr code
: > "$out"
url="${API_BASE}/repos/${UPSTREAM}/forks?per_page=100"
while [ -n "$url" ]; do
body="$WORKDIR/page.json"
hdr="$WORKDIR/page.hdr"
code="$WORKDIR/page.code"
curl_api "$url" "$body" "$hdr" "$code"
[ "$(cat "$code")" = "200" ] || exit 1
jq -c '.[] | {full_name, default_branch, pushed_at, stargazers_count}' "$body" >> "$out"
url="$(get_next_link "$hdr" || true)"
done
awk '
{
line = $0
name = line
sub(/^{"full_name":"/, "", name)
sub(/".*$/, "", name)
if (!seen[name]++) print line
}
' "$out" > "$out.tmp" && mv "$out.tmp" "$out"
}
get_last_commit() {
local repo="$1"
local branch="$2"
local suffix="$3"
local body="$WORKDIR/commit.${suffix}.json"
local hdr="$WORKDIR/commit.${suffix}.hdr"
local code="$WORKDIR/commit.${suffix}.code"
curl_api "${API_BASE}/repos/${repo}/commits?sha=${branch}&per_page=1" \
"$body" "$hdr" "$code"
if [ "$(cat "$code")" != "200" ]; then
echo "ERROR"
return
fi
jq -r '.[0].commit.committer.date // "UNKNOWN"' "$body"
}
get_ahead_behind() {
local repo="$1"
local upstream_branch="$2"
local fork_branch="$3"
local suffix="$4"
local fork_owner="${repo%%/*}"
local body="$WORKDIR/compare.${suffix}.json"
local hdr="$WORKDIR/compare.${suffix}.hdr"
local code="$WORKDIR/compare.${suffix}.code"
curl_api \
"${API_BASE}/repos/${UPSTREAM}/compare/${OWNER}:${upstream_branch}...${fork_owner}:${fork_branch}" \
"$body" "$hdr" "$code"
case "$(cat "$code")" in
200) jq -r '[.ahead_by // 0, .behind_by // 0] | @tsv' "$body" ;;
404) printf 'NA\tNA\n' ;;
*) printf 'ERROR\tERROR\n' ;;
esac
}
process_one() {
local line="$1"
local upstream_branch="$2"
local repo branch ahead behind last suffix pushed_at html_url stars
repo="$(printf '%s\n' "$line" | jq -r '.full_name')"
branch="$(printf '%s\n' "$line" | jq -r '.default_branch')"
pushed_at="$(printf '%s\n' "$line" | jq -r '.pushed_at // "UNKNOWN"')"
html_url="https://github.com/${repo}"
stars="$(printf '%s\n' "$line" | jq -r '.stargazers_count // 0')"
[ -n "$repo" ] || exit 0
[ "$branch" = "null" ] && branch="main"
suffix="${$}.$RANDOM"
IFS="$(printf '\t')" read -r ahead behind <<EOF
$(get_ahead_behind "$repo" "$upstream_branch" "$branch" "$suffix")
EOF
if [ "$AHEAD_ONLY" = "1" ]; then
case "$ahead" in
0|NA|ERROR|'') return ;;
esac
fi
last="$(get_last_commit "$repo" "$branch" "$suffix")"
printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\n' "$repo" "$ahead" "$behind" "$stars" "$last" "$pushed_at" "$html_url"
}
main() {
local upstream_branch forks_file
upstream_branch="$(get_upstream_default_branch)"
forks_file="$WORKDIR/forks.jsonl"
fetch_all_forks "$forks_file"
printf 'fork\tahead\tbehind\tstars\tlast_commit_date\tpushed_at\turl\n'
export WORKDIR API_BASE OWNER UPSTREAM AUTH_HEADER AHEAD_ONLY accept_header api_version_header
export -f curl_api json_get get_next_link get_last_commit get_ahead_behind process_one
if [ "$LIMIT" -gt 0 ]; then
head -n "$LIMIT" "$forks_file"
else
cat "$forks_file"
fi | while IFS= read -r line; do
printf '%s\0' "$line"
done | xargs -0 -I{} -P "$PARALLEL" bash -c 'process_one "$1" "$2"' _ "{}" "$upstream_branch" \
| sort -t "$(printf '\t')" -k2,2nr -k3,3n -k4,4nr -k6,6r
}
echo "Analyse forks of https://github.com/$UPSTREAM"
echo "--===-------------------------------------------===--"
main | __format_table
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment