Last active
February 10, 2025 23:50
-
-
Save amotl/36bba37d662c1e1e480e4bf8c2a9447d to your computer and use it in GitHub Desktop.
Transfer pull requests from one repository to another
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
# Transfer GitHub Pull Requests | |
## About | |
Transfer pull requests on GitHub from one repository to another. | |
## Details | |
Here: Transfer PRs closed by stale bot on the PyCaret repository, | |
modulo updates submitted by Dependabot, to the fork at sktime. | |
## Synopsis | |
```shell | |
uv run transfer_pull_requests.py | |
``` | |
## Usage guidelines | |
Q: Will this appear as PR by whichever person provides the token? | |
A: Yes, the program can't impersonate the original author. | |
However, it inserts a preamble into the PR body like this: | |
> Contributed by @foobar on 2020-01-01 per repo#42. Thank you very much. | |
Q: Which data and metadata is transferred? | |
A: Other than the patch commits themselves, the original PR's title and body are transferred. | |
Q: What about comments on PRs? | |
A: Currently, the program doesn't do anything about the comments yet. On relevant PRs where this is important, | |
we can run subsequent refinement procedures, in order to also transfer PR comments. | |
Q: Can we group relevant rescued PRs somehow? | |
A: The label `no-pr-activity` has been created on `sktime/pycaret`, | |
in order to bundle all PRs under the same label as the original ones. | |
Q: Will the program duplicate source PRs into the target fork when invoked multiple times? | |
A: The program is idempotent, in the sense that it ignores PRs that have already been created. | |
It does not duplicate them on the target fork when invoked multiple times. | |
This feature is courtesy of GitHub, the program just handles relevant API | |
responses so that it will not croak. | |
""" | |
# /// script | |
# requires-python = ">=3.8" | |
# dependencies = [ | |
# "munch", | |
# "requests-cache", | |
# ] | |
# /// | |
import shlex | |
import datetime as dt | |
import sys | |
import tempfile | |
from munch import Munch, munchify | |
import requests_cache | |
import subprocess | |
# GitHub API query to select pull requests. | |
# org:pycaret repo:pycaret is:pr is:closed label:no-pr-activity -author:dependabot[bot] | |
GITHUB_API_PRS = "https://api.github.com/search/issues?q=org%3Apycaret+repo%3Apycaret+is%3Apr+is%3Aclosed+label%3Ano-pr-activity+-author%3Adependabot%5Bbot%5D&type=pullrequests&ref=advsearch&s=created&o=asc" | |
# GitHub API template URL to retrieve information about a pull request. | |
GITHUB_API_PR_TPL = "https://api.github.com/repos/pycaret/pycaret/pulls/{number}" | |
# Define source and target repositories. | |
REPOSITORY_SOURCE = "pycaret/pycaret" | |
REPOSITORY_TARGET = "sktime/pycaret" | |
http = requests_cache.CachedSession(backend="sqlite") | |
def run(command: str): | |
output = subprocess.check_output(shlex.split(command), stderr=subprocess.STDOUT) | |
print(output.decode("utf-8"), file=sys.stderr) | |
return output | |
def select_pull_requests() -> list[int]: | |
items = http.get(GITHUB_API_PRS).json()["items"] | |
return sorted([item["number"] for item in items]) | |
def get_pull_request(number: int) -> Munch: | |
return munchify(http.get(GITHUB_API_PR_TPL.format(number=number)).json()) | |
def transfer_pull_request(number: int, dry_run: bool = False) -> None: | |
pr = get_pull_request(number) | |
created_at = dt.datetime.fromisoformat(pr.created_at).strftime("%Y-%m-%d") | |
preamble = f"Contributed by @{pr.user.login} on {created_at} per {pr.html_url}. Thank you very much." | |
body = f"_{preamble}_\n\n{pr.body}" | |
branch = pr.head.label.replace(":", "/").lower() | |
run(f"gh --repo {REPOSITORY_SOURCE} pr checkout {number} --branch='{branch}'") | |
run("git push sktime") | |
with tempfile.NamedTemporaryFile(suffix=".md") as f: | |
f.write(body.encode("utf-8")) | |
f.flush() | |
command = ( | |
f"gh --repo {REPOSITORY_TARGET} pr create " | |
f"--head='{branch}' --title='{pr.title}' --body-file='{f.name}' --label='no-pr-activity' --draft" | |
) | |
if dry_run: | |
command += " --dry-run" | |
try: | |
run(command) | |
except subprocess.CalledProcessError as ex: | |
print(ex.output.decode("utf-8"), file=sys.stderr) | |
if b"already exists" not in ex.output: | |
raise | |
def transfer_pull_requests() -> None: | |
for number in select_pull_requests(): | |
transfer_pull_request(number) | |
# MARKER: Remove this line to work through ALL pull requests. | |
break | |
def main(): | |
transfer_pull_requests() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment