Skip to content

Instantly share code, notes, and snippets.

@jdavcs
Last active May 9, 2025 15:50
Show Gist options
  • Save jdavcs/49d52803dd7d95ab1df3d236f0091fa8 to your computer and use it in GitHub Desktop.
Save jdavcs/49d52803dd7d95ab1df3d236f0091fa8 to your computer and use it in GitHub Desktop.
Get list of PRs for release, save as cvs file
"""
Must run with github token (see get_token())
Also can simply load it into GITHUB_AUTH env var.
"""
import csv
import datetime
from collections import namedtuple
from pathlib import Path
from github import Github
from github.PullRequest import PullRequest
from packaging.version import Version
from typing import (
List,
Optional,
Set,
)
PROJECT_OWNER = "galaxyproject"
PROJECT_NAME = "galaxy"
PRItem = namedtuple("PR", ["number", "title", "url", "is_highlight", "is_uiux", "is_manual", "admin_required", "labels_kind", "labels_area", "labels_other", "author"])
MILESTONE = "24.2"
STATE = "closed"
def run():
version = Version("24.2")
release_date = datetime.date(2025, 1, 15)
items = []
title_row = [field for field in PRItem._fields]
items.append(title_row)
for pr in _get_prs(version, release_date):
labels = pr.labels
if "merge" in (l.name for l in labels): # ignore merge PRs
continue
is_manual = _to_y("[x] Instructions for manual testing" in pr.body)
is_uiux = _to_y("area/UI-UX" in (l.name for l in labels))
is_highlight = _to_y(any(x for x in labels if x.name.startswith("highlight")))
labels_kind, labels_area, labels_other = _get_pr_labels(labels)
item = PRItem(
number = pr.number,
title = pr.title,
url = pr.html_url,
is_highlight = is_highlight,
is_uiux = is_uiux,
is_manual = is_manual,
admin_required = None,
labels_kind = labels_kind,
labels_area = labels_area,
labels_other = labels_other,
author = pr.user.login,
)
items.append(item)
write_csv(items)
def _to_y(value):
return "y" if value else None
def _get_prs(release_version: Version, release_date: datetime.date, state: str = "closed") -> List[PullRequest]:
github = Github(_get_token())
repo = github.get_repo(f"{PROJECT_OWNER}/{PROJECT_NAME}")
_cutoff_date = release_date.replace(year=release_date.year - 1)
cutoff_time = datetime.datetime.combine(_cutoff_date, datetime.time.min)
prs: List[PullRequest] = []
counter = 0
print("Collecting relevant pull requests...")
for pr in repo.get_pulls(state=state, sort="updated", direction="desc"):
assert pr.updated_at
if pr.updated_at.replace(tzinfo=None) < cutoff_time:
break
counter += 1
if counter % 100 == 0:
print(
f"Examined {counter} PRs; collected {len(prs)} (currently on #{pr.number} updated on {pr.updated_at.date()})"
)
# Select PRs that are merged + have correct milestone + have not been previously collected and added to the prs file
proper_state = state != "closed" or pr.merged_at # open PRs or PRs that have been merged
if proper_state and pr.milestone and pr.milestone.title == str(release_version):
prs.append(pr)
print(f"Collected {len(prs)} pull requests")
return prs
def write_csv(items):
with open('prs.csv', 'w', newline='') as f:
writer = csv.writer(f, dialect='excel-tab')
for item in items:
writer.writerow(item)
def _get_pr_labels(labels):
kind, area, other = [], [], []
for label in labels:
if label.name.startswith("kind"):
kind.append(label.name[5:])
elif label.name.startswith("area"):
area.append(label.name[5:])
else:
other.append(label.name)
return _stringify(kind), _stringify(area), _stringify(other)
def _stringify(items):
return ", ".join(items)
def _get_token():
path = Path.home() / '.github'
with open(path, 'r') as f:
return f.read().strip()
if __name__ == '__main__':
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment