Last active
March 2, 2023 13:20
-
-
Save Remi-Gau/6194563cabcfb1b89ce885a4f762ecbc to your computer and use it in GitHub Desktop.
Script to get all files touched by PRs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Script to get all files touched by PRs.""" | |
import shutil | |
from pathlib import Path | |
from warnings import warn | |
import requests | |
from rich import print | |
USERNAME = "Remi-Gau" | |
# may require a token if run often | |
TOKEN_FILE = Path(__file__).parent.joinpath("token.txt") | |
# repo to check | |
GH_USERNAME = "nilearn" | |
GH_REPO = "nilearn" | |
DEBUG = False | |
USE_LOCAL = True | |
OUTPUT_FOLDER = Path(__file__).parent / "tmp" | |
OUTPUT_FILE = OUTPUT_FOLDER / "output.md" | |
EXLUDE_PR = {"title": ["format"]} | |
def print_to_output(output_file, all_files): | |
unique_files = set(all_files) | |
unique_files = sorted(unique_files) | |
print_line_to_output( | |
output_file=output_file, text="| file | nb of PR found in |" | |
) | |
print_line_to_output( | |
output_file=output_file, text="| ---- | ----------------- |" | |
) | |
for file in unique_files: | |
print_line_to_output( | |
output_file=output_file, | |
text=f"| {file} | {all_files.count(file)} |", | |
) | |
def print_line_to_output(output_file, text): | |
"""Print to file or stdout.""" | |
if output_file is not None: | |
with open(output_file, "a") as f: | |
print(f"{text}", file=f) | |
return | |
else: | |
print(text) | |
def save_diffs(pulls, auth, output_folder): | |
for i, pull_ in enumerate(pulls): | |
if DEBUG and i == 2: | |
break | |
print(f"\n{pull_['number']}, {pull_['title']}") | |
diff = get_this_pr_diff(url=pull_["diff_url"], auth=auth) | |
if diff is None: | |
continue | |
save_diff_to_file( | |
number=pull_["number"], | |
name=pull_["title"], | |
diff=diff, | |
output_folder=output_folder, | |
) | |
def save_diff_to_file(number, name, diff, output_folder): | |
filename = ( | |
output_folder | |
/ f"{number}_{name.replace(' ', '_').replace('/', '_')}.diff" | |
) | |
with open(filename, "w") as f: | |
f.write(diff) | |
def get_list_of_prs(gh_username, gh_repo, auth=None): | |
url = f"https://api.github.com/repos/{gh_username}/{gh_repo}/pulls?per_page=100" | |
response = requests.get(url, auth=auth) | |
if response.status_code != 200: | |
warn(f"Error {response.status_code}: {response.text}") | |
return None | |
return response.json() | |
def get_this_pr_diff(url, auth=None): | |
response = requests.get(url, auth=auth) | |
if response.status_code != 200: | |
warn(f"Error {response.status_code}: {response.text}") | |
return | |
return response.text | |
def list_all_files_in_prs(input_folder, exclude_pr): | |
all_files = [] | |
pulls = input_folder.glob("*.diff") | |
for pull_ in pulls: | |
pr_number = pull_.stem.split("_")[0] | |
pr_title = pull_.stem.split("_")[1:] | |
if any(ex in pr_title for ex in exclude_pr["title"]): | |
print(f"[red]skipping {pr_number}, {' '.join(pr_title)}[/red]") | |
continue | |
print(f"{pr_number}, {' '.join(pr_title)}") | |
diff = Path(pull_).read_text() | |
for line in diff.splitlines(): | |
if line.startswith("diff --git "): | |
this_file = line.split(" ")[2][2:] | |
all_files.append(this_file) | |
return all_files | |
def main(): | |
if not USE_LOCAL: | |
shutil.rmtree(OUTPUT_FOLDER, ignore_errors=True) | |
OUTPUT_FILE.unlink(missing_ok=True) | |
OUTPUT_FOLDER.mkdir(exist_ok=True) | |
TOKEN = None | |
if TOKEN_FILE.exists(): | |
with open(Path(__file__).parent.joinpath("token.txt")) as f: | |
TOKEN = f.read().strip() | |
auth = None if USERNAME is None or TOKEN is None else (USERNAME, TOKEN) | |
if not USE_LOCAL: | |
pulls = get_list_of_prs( | |
gh_username=GH_USERNAME, gh_repo=GH_REPO, auth=auth | |
) | |
save_diffs(pulls=pulls, auth=auth, output_folder=OUTPUT_FOLDER) | |
all_files = list_all_files_in_prs( | |
input_folder=OUTPUT_FOLDER, exclude_pr=EXLUDE_PR | |
) | |
print_to_output(output_file=OUTPUT_FILE, all_files=all_files) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment