Skip to content

Instantly share code, notes, and snippets.

@amotl
Last active February 10, 2025 23:50
Show Gist options
  • Save amotl/36bba37d662c1e1e480e4bf8c2a9447d to your computer and use it in GitHub Desktop.
Save amotl/36bba37d662c1e1e480e4bf8c2a9447d to your computer and use it in GitHub Desktop.
Transfer pull requests from one repository to another
"""
# Transfer GitHub Pull Requests
## About
Transfer pull requests on GitHub from one repository to another.
## Details
Here: Transfer PRs closed by stale bot on the PyCaret repository,
modulo updates submitted by Dependabot, to the fork at sktime.
## Synopsis
```shell
uv run transfer_pull_requests.py
```
## Usage guidelines
Q: Will this appear as PR by whichever person provides the token?
A: Yes, the program can't impersonate the original author.
However, it inserts a preamble into the PR body like this:
> Contributed by @foobar on 2020-01-01 per repo#42. Thank you very much.
Q: Which data and metadata is transferred?
A: Other than the patch commits themselves, the original PR's title and body are transferred.
Q: What about comments on PRs?
A: Currently, the program doesn't do anything about the comments yet. On relevant PRs where this is important,
we can run subsequent refinement procedures, in order to also transfer PR comments.
Q: Can we group relevant rescued PRs somehow?
A: The label `no-pr-activity` has been created on `sktime/pycaret`,
in order to bundle all PRs under the same label as the original ones.
Q: Will the program duplicate source PRs into the target fork when invoked multiple times?
A: The program is idempotent, in the sense that it ignores PRs that have already been created.
It does not duplicate them on the target fork when invoked multiple times.
This feature is courtesy of GitHub, the program just handles relevant API
responses so that it will not croak.
"""
# /// script
# requires-python = ">=3.8"
# dependencies = [
# "munch",
# "requests-cache",
# ]
# ///
import shlex
import datetime as dt
import sys
import tempfile
from munch import Munch, munchify
import requests_cache
import subprocess
# GitHub API query to select pull requests.
# org:pycaret repo:pycaret is:pr is:closed label:no-pr-activity -author:dependabot[bot]
GITHUB_API_PRS = "https://api.github.com/search/issues?q=org%3Apycaret+repo%3Apycaret+is%3Apr+is%3Aclosed+label%3Ano-pr-activity+-author%3Adependabot%5Bbot%5D&type=pullrequests&ref=advsearch&s=created&o=asc"
# GitHub API template URL to retrieve information about a pull request.
GITHUB_API_PR_TPL = "https://api.github.com/repos/pycaret/pycaret/pulls/{number}"
# Define source and target repositories.
REPOSITORY_SOURCE = "pycaret/pycaret"
REPOSITORY_TARGET = "sktime/pycaret"
http = requests_cache.CachedSession(backend="sqlite")
def run(command: str):
output = subprocess.check_output(shlex.split(command), stderr=subprocess.STDOUT)
print(output.decode("utf-8"), file=sys.stderr)
return output
def select_pull_requests() -> list[int]:
items = http.get(GITHUB_API_PRS).json()["items"]
return sorted([item["number"] for item in items])
def get_pull_request(number: int) -> Munch:
return munchify(http.get(GITHUB_API_PR_TPL.format(number=number)).json())
def transfer_pull_request(number: int, dry_run: bool = False) -> None:
pr = get_pull_request(number)
created_at = dt.datetime.fromisoformat(pr.created_at).strftime("%Y-%m-%d")
preamble = f"Contributed by @{pr.user.login} on {created_at} per {pr.html_url}. Thank you very much."
body = f"_{preamble}_\n\n{pr.body}"
branch = pr.head.label.replace(":", "/").lower()
run(f"gh --repo {REPOSITORY_SOURCE} pr checkout {number} --branch='{branch}'")
run("git push sktime")
with tempfile.NamedTemporaryFile(suffix=".md") as f:
f.write(body.encode("utf-8"))
f.flush()
command = (
f"gh --repo {REPOSITORY_TARGET} pr create "
f"--head='{branch}' --title='{pr.title}' --body-file='{f.name}' --label='no-pr-activity' --draft"
)
if dry_run:
command += " --dry-run"
try:
run(command)
except subprocess.CalledProcessError as ex:
print(ex.output.decode("utf-8"), file=sys.stderr)
if b"already exists" not in ex.output:
raise
def transfer_pull_requests() -> None:
for number in select_pull_requests():
transfer_pull_request(number)
# MARKER: Remove this line to work through ALL pull requests.
break
def main():
transfer_pull_requests()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment