Skip to content

Instantly share code, notes, and snippets.

@nate-double-u
Last active February 4, 2026 20:38
Show Gist options
  • Select an option

  • Save nate-double-u/b31ef0642091dbb465672fdaa15bf3e8 to your computer and use it in GitHub Desktop.

Select an option

Save nate-double-u/b31ef0642091dbb465672fdaa15bf3e8 to your computer and use it in GitHub Desktop.
Find stale PRs where the author hasn't commented in 2 weeks
#!/usr/bin/env bash
set -uo pipefail
# =============================================================================
# Kubernetes PR Wrangler Helper Script
# Finds PRs that should be closed according to:
# https://kubernetes.io/docs/contribute/participate/pr-wranglers/#when-to-close-pull-requests
# =============================================================================
help_message() {
cat <<EOF
Usage: $0 [-r <repository>] [-d <inactivity days>] [-c <cla days>] [-v]
-r Repository in the format owner/repo (default: kubernetes/website)
-d Inactivity period in days for no response (default: 14)
-c CLA unsigned period in days (default: 14)
-v Enable verbose output for troubleshooting
-h Show this help message
Criteria for closing PRs (per Kubernetes docs):
1. Author hasn't signed the CLA for two weeks
2. Author has not responded to comments or feedback in 2 or more weeks
Note: Authors listed in OWNERS_ALIASES are skipped (trusted contributors).
EOF
}
# Default values
REPO="kubernetes/website"
INACTIVE_DAYS=14
CLA_DAYS=14
VERBOSE=0
while getopts "r:d:c:vh" opt; do
case $opt in
r) REPO="$OPTARG" ;;
d) INACTIVE_DAYS="$OPTARG" ;;
c) CLA_DAYS="$OPTARG" ;;
v) VERBOSE=1 ;;
h) help_message; exit 0 ;;
*) help_message; exit 1 ;;
esac
done
log_verbose() {
if [[ $VERBOSE -eq 1 ]]; then
echo "[DEBUG] $*" >&2
fi
}
log_info() {
echo "[INFO] $*"
}
log_flag() {
echo "[FLAG] $*"
}
# Validate dependencies
for cmd in gh jq date; do
if ! command -v "$cmd" &> /dev/null; then
echo "Error: Required command '$cmd' not found" >&2
exit 1
fi
done
log_info "Repository: $REPO"
log_info "Inactivity period (days): $INACTIVE_DAYS"
log_info "CLA unsigned period (days): $CLA_DAYS"
# Calculate threshold dates (portable for macOS and Linux)
get_threshold_date() {
local days=$1
if date -v-1d +%s &> /dev/null; then
date -v-"${days}"d +%Y-%m-%dT%H:%M:%SZ
else
date -d "${days} days ago" +%Y-%m-%dT%H:%M:%SZ
fi
}
date_to_seconds() {
local date_str=$1
if date -v-1d +%s &> /dev/null; then
date -jf "%Y-%m-%dT%H:%M:%SZ" "$date_str" +%s 2>/dev/null || echo 0
else
date -d "$date_str" +%s 2>/dev/null || echo 0
fi
}
format_timestamp() {
local ts=$1
date -r "$ts" +%Y-%m-%d 2>/dev/null || date -d "@$ts" +%Y-%m-%d 2>/dev/null || echo "unknown"
}
INACTIVE_DATE=$(get_threshold_date "$INACTIVE_DAYS")
INACTIVE_DATE_SEC=$(date_to_seconds "$INACTIVE_DATE")
CLA_DATE=$(get_threshold_date "$CLA_DAYS")
CLA_DATE_SEC=$(date_to_seconds "$CLA_DATE")
log_verbose "Inactive threshold date: $INACTIVE_DATE ($INACTIVE_DATE_SEC)"
log_verbose "CLA threshold date: $CLA_DATE ($CLA_DATE_SEC)"
# =============================================================================
# Fetch and parse OWNERS_ALIASES to skip trusted contributors
# =============================================================================
TRUSTED_AUTHORS_LIST=""
fetch_trusted_authors() {
log_info "Fetching OWNERS_ALIASES from $REPO..."
local owners_content
owners_content=$(gh api "repos/$REPO/contents/OWNERS_ALIASES" --jq '.content' 2>/dev/null | base64 -d 2>/dev/null || echo "")
if [[ -z "$owners_content" ]]; then
log_verbose "Could not fetch OWNERS_ALIASES file, skipping trusted author check"
return
fi
# Extract all unique usernames from lines like " - username" or " - username # comment"
TRUSTED_AUTHORS_LIST=$(echo "$owners_content" \
| grep -E '^\s*-\s+[a-zA-Z0-9]' \
| sed 's/^[^-]*-\s*//' \
| sed 's/\s*#.*//' \
| tr -d ' ' \
| tr '[:upper:]' '[:lower:]' \
| sort -u || true)
local count
count=$(echo "$TRUSTED_AUTHORS_LIST" | grep -c . || echo 0)
log_info "Loaded $count trusted authors from OWNERS_ALIASES"
log_verbose "Trusted authors: $(echo $TRUSTED_AUTHORS_LIST | tr '\n' ' ')"
}
is_trusted_author() {
local author=$1
local author_lower="${author,,}"
if echo "$TRUSTED_AUTHORS_LIST" | grep -qix "$author_lower"; then
return 0 # trusted
fi
return 1 # not trusted
}
fetch_trusted_authors
# =============================================================================
# Arrays to store flagged PR details
# Format: "timestamp|pr_number|url|author|reason|has_lgtm"
# =============================================================================
declare -a cla_flagged_prs=()
declare -a inactive_flagged_prs=()
# =============================================================================
# Check author activity
# =============================================================================
check_author_activity() {
local pr_number=$1
local author=$2
local threshold_sec=$3
log_verbose "Checking activity for PR #$pr_number by $author"
local issue_comments
issue_comments=$(gh api --paginate "repos/$REPO/issues/$pr_number/comments" \
--jq "[.[] | select(.user.login == \"$author\") | .created_at] | join(\"\n\")" 2>/dev/null || echo "")
local review_comments
review_comments=$(gh api --paginate "repos/$REPO/pulls/$pr_number/comments" \
--jq "[.[] | select(.user.login == \"$author\") | .created_at] | join(\"\n\")" 2>/dev/null || echo "")
local reviews
reviews=$(gh api --paginate "repos/$REPO/pulls/$pr_number/reviews" \
--jq "[.[] | select(.user.login == \"$author\") | .submitted_at] | join(\"\n\")" 2>/dev/null || echo "")
local commits
commits=$(gh api --paginate "repos/$REPO/pulls/$pr_number/commits" \
--jq "[.[] | select(.author.login == \"$author\" or .committer.login == \"$author\") | .commit.committer.date] | join(\"\n\")" 2>/dev/null || echo "")
local all_timestamps
all_timestamps=$(printf '%s\n%s\n%s\n%s' "$issue_comments" "$review_comments" "$reviews" "$commits" | grep -v '^$' | sort -r || true)
log_verbose "All activity timestamps for PR #$pr_number: $all_timestamps"
if [[ -z "$all_timestamps" ]]; then
echo "none"
return
fi
local last_activity
last_activity=$(echo "$all_timestamps" | head -n 1)
local last_activity_sec
last_activity_sec=$(date_to_seconds "$last_activity")
if (( last_activity_sec > threshold_sec )); then
echo "recent:$last_activity"
else
echo "stale:$last_activity"
fi
}
# =============================================================================
# Process a single PR
# =============================================================================
process_pr() {
local pr_json=$1
local pr_number url author created_at labels_json
pr_number=$(echo "$pr_json" | jq -r '.number')
url=$(echo "$pr_json" | jq -r '.url')
author=$(echo "$pr_json" | jq -r '.author.login')
created_at=$(echo "$pr_json" | jq -r '.createdAt')
labels_json=$(echo "$pr_json" | jq -c '[.labels[].name]')
local created_sec
created_sec=$(date_to_seconds "$created_at")
log_verbose "Processing PR #$pr_number by $author (created: $created_at)"
log_verbose "Labels: $labels_json"
# Skip PRs from trusted authors (OWNERS_ALIASES members)
if is_trusted_author "$author"; then
log_verbose "Skipping PR #$pr_number: author ($author) is in OWNERS_ALIASES"
return
fi
# Skip PRs that are too new to evaluate
if (( created_sec > INACTIVE_DATE_SEC )); then
log_verbose "Skipping PR #$pr_number: created within the inactivity period"
return
fi
# Skip work-in-progress PRs
if echo "$labels_json" | jq -e 'map(select(startswith("do-not-merge") or . == "wip")) | length > 0' > /dev/null; then
log_verbose "Skipping PR #$pr_number: marked as work in progress or do-not-merge"
return
fi
# Filter: only process PRs with the "language/en" label (for kubernetes/website)
if [[ "$REPO" == "kubernetes/website" ]]; then
if ! echo "$labels_json" | jq -e 'map(select(. == "language/en")) | length > 0' > /dev/null; then
log_verbose "Skipping PR #$pr_number: not labeled as language/en"
return
fi
fi
# Check if PR has lgtm label
local has_lgtm="no"
if echo "$labels_json" | jq -e 'map(select(. == "lgtm")) | length > 0' > /dev/null; then
has_lgtm="yes"
fi
# Condition 1: CLA not signed for CLA_DAYS
if echo "$labels_json" | jq -e 'map(select(. == "cncf-cla: no")) | length > 0' > /dev/null; then
if (( created_sec < CLA_DATE_SEC )); then
log_flag "PR #$pr_number: $url"
log_flag " → Author ($author) hasn't signed the CLA (PR older than $CLA_DAYS days)"
# Store with timestamp for sorting (oldest first) and lgtm status
cla_flagged_prs+=("$created_sec|$pr_number|$url|$author|CLA not signed since $created_at|$has_lgtm")
else
log_verbose "PR #$pr_number has unsigned CLA but is within grace period"
fi
fi
# Condition 2: Author hasn't responded in INACTIVE_DAYS
local activity_status
activity_status=$(check_author_activity "$pr_number" "$author" "$INACTIVE_DATE_SEC")
case "$activity_status" in
none)
log_flag "PR #$pr_number: $url"
log_flag " → Author ($author) has never commented or committed since PR creation ($created_at)"
inactive_flagged_prs+=("$created_sec|$pr_number|$url|$author|No activity since creation|$has_lgtm")
;;
stale:*)
local last_date=${activity_status#stale:}
log_flag "PR #$pr_number: $url"
log_flag " → Author ($author) last active on $last_date (over $INACTIVE_DAYS days ago)"
inactive_flagged_prs+=("$created_sec|$pr_number|$url|$author|Last active: $last_date|$has_lgtm")
;;
recent:*)
log_verbose "PR #$pr_number: Author has recent activity"
;;
esac
}
# =============================================================================
# Open URLs in Firefox (oldest first)
# =============================================================================
open_in_firefox() {
local -a urls=("$@")
if [[ ${#urls[@]} -eq 0 ]]; then
echo "No URLs to open."
return
fi
echo ""
echo "Opening ${#urls[@]} PRs in Firefox (oldest first)..."
for url in "${urls[@]}"; do
log_verbose "Opening: $url"
if [[ "$OSTYPE" == "darwin"* ]]; then
open -a "Firefox" "$url" 2>/dev/null || open "$url" 2>/dev/null
elif command -v firefox &> /dev/null; then
firefox "$url" &>/dev/null &
elif command -v xdg-open &> /dev/null; then
xdg-open "$url" &>/dev/null &
else
echo "Warning: Could not open $url - no suitable browser found"
fi
# Small delay to maintain tab order
sleep 0.3
done
echo "Done! Opened ${#urls[@]} tabs."
}
# =============================================================================
# Print summary for a category
# =============================================================================
print_category_summary() {
local title=$1
shift
local -a entries=("$@")
if [[ ${#entries[@]} -eq 0 ]]; then
return
fi
echo ""
echo "$title (${#entries[@]}):"
echo "----------------------------------------------"
# Sort by timestamp (first field) - oldest first
while IFS= read -r entry; do
IFS='|' read -r ts num url auth reason has_lgtm <<< "$entry"
lgtm_indicator=""
if [[ "$has_lgtm" == "yes" ]]; then
lgtm_indicator=" [LGTM]"
fi
echo " PR #$num - $auth (created: $(format_timestamp "$ts"))$lgtm_indicator"
echo " $url"
echo " Reason: $reason"
done < <(printf '%s\n' "${entries[@]}" | sort -t'|' -k1 -n)
}
# =============================================================================
# Main execution
# =============================================================================
log_info "Fetching open PRs from $REPO..."
PR_JSON=$(gh pr list -R "$REPO" \
--state open \
--limit 1000 \
--json number,url,author,createdAt,labels)
PR_COUNT=$(echo "$PR_JSON" | jq 'length')
log_info "Found $PR_COUNT open PRs to evaluate"
if [[ "$PR_COUNT" -eq 0 ]]; then
log_info "No open PRs found."
exit 0
fi
PROCESSED=0
while IFS= read -r pr; do
process_pr "$pr"
((PROCESSED++)) || true
if (( PROCESSED % 25 == 0 )); then
log_info "Processed $PROCESSED/$PR_COUNT PRs..."
fi
done <<< "$(echo "$PR_JSON" | jq -c '.[]')"
log_info "Processed $PROCESSED/$PR_COUNT PRs."
# =============================================================================
# Summary
# =============================================================================
echo ""
echo "=============================================="
echo " SUMMARY"
echo "=============================================="
print_category_summary "PRs with unsigned CLA" "${cla_flagged_prs[@]}"
print_category_summary "PRs with inactive authors" "${inactive_flagged_prs[@]}"
TOTAL_FLAGGED=$(( ${#cla_flagged_prs[@]} + ${#inactive_flagged_prs[@]} ))
if [[ $TOTAL_FLAGGED -eq 0 ]]; then
echo ""
echo "✅ No PRs found meeting the closure criteria."
else
echo ""
echo "⚠️ Total PRs to review for closure: $TOTAL_FLAGGED"
echo ""
echo "Remember: Leave a /close comment on PRs that should be closed."
echo "Contributors can reopen PRs if they resume work."
# Build URL arrays for each category (sorted oldest first)
declare -a all_urls=()
declare -a cla_urls=()
declare -a inactive_urls=()
declare -a lgtm_urls=()
# Process CLA flagged PRs
if [[ ${#cla_flagged_prs[@]} -gt 0 ]]; then
while IFS= read -r entry; do
IFS='|' read -r ts num url auth reason has_lgtm <<< "$entry"
cla_urls+=("$url")
all_urls+=("$url")
if [[ "$has_lgtm" == "yes" ]]; then
lgtm_urls+=("$url")
fi
done < <(printf '%s\n' "${cla_flagged_prs[@]}" | sort -t'|' -k1 -n)
fi
# Process inactive flagged PRs
if [[ ${#inactive_flagged_prs[@]} -gt 0 ]]; then
while IFS= read -r entry; do
IFS='|' read -r ts num url auth reason has_lgtm <<< "$entry"
inactive_urls+=("$url")
all_urls+=("$url")
if [[ "$has_lgtm" == "yes" ]]; then
lgtm_urls+=("$url")
fi
done < <(printf '%s\n' "${inactive_flagged_prs[@]}" | sort -t'|' -k1 -n)
fi
# Deduplicate all_urls and lgtm_urls (a PR might be in both CLA and inactive)
readarray -t all_urls < <(printf '%s\n' "${all_urls[@]}" | awk '!seen[$0]++')
readarray -t lgtm_urls < <(printf '%s\n' "${lgtm_urls[@]}" | awk '!seen[$0]++')
# Count for display
cla_count=${#cla_urls[@]}
inactive_count=${#inactive_urls[@]}
lgtm_count=${#lgtm_urls[@]}
all_count=${#all_urls[@]}
# Interactive menu
echo ""
echo "Open PRs in Firefox (oldest first):"
echo "------------------------------------"
echo " [a] ALL flagged PRs ($all_count)"
if [[ $cla_count -gt 0 ]]; then
echo " [c] CLA unsigned PRs only ($cla_count)"
else
echo " [c] CLA unsigned PRs only (none)"
fi
if [[ $inactive_count -gt 0 ]]; then
echo " [i] Inactive author PRs only ($inactive_count)"
else
echo " [i] Inactive author PRs only (none)"
fi
if [[ $lgtm_count -gt 0 ]]; then
echo " [l] PRs with LGTM label only ($lgtm_count)"
else
echo " [l] PRs with LGTM label only (none)"
fi
echo " [n] Don't open any PRs"
echo ""
read -p "Your choice [a/c/i/l/n]: " response
case "$response" in
[aA])
open_in_firefox "${all_urls[@]}"
;;
[cC])
if [[ $cla_count -gt 0 ]]; then
open_in_firefox "${cla_urls[@]}"
else
echo "No CLA unsigned PRs to open."
fi
;;
[iI])
if [[ $inactive_count -gt 0 ]]; then
open_in_firefox "${inactive_urls[@]}"
else
echo "No inactive author PRs to open."
fi
;;
[lL])
if [[ $lgtm_count -gt 0 ]]; then
open_in_firefox "${lgtm_urls[@]}"
else
echo "No PRs with LGTM label to open."
fi
;;
*)
echo "Skipping browser opening."
;;
esac
fi
exit 0
@nate-double-u
Copy link
Author

Added check for CLA

@nate-double-u
Copy link
Author

Written with the help of ChatGPT.

@nate-double-u
Copy link
Author

Update assisted by Copilot & Claude Opus 4.5

@nate-double-u
Copy link
Author

Usage: ./pr-wrangler.sh [-r ] [-d ] [-c ] [-v]
-r Repository in the format owner/repo (default: kubernetes/website)
-d Inactivity period in days for no response (default: 14)
-c CLA unsigned period in days (default: 14)
-v Enable verbose output for troubleshooting
-h Show this help message

Criteria for closing PRs (per Kubernetes docs):

  1. Author hasn't signed the CLA for two weeks
  2. Author has not responded to comments or feedback in 2 or more weeks

Note: Authors listed in OWNERS_ALIASES are skipped (trusted contributors).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment