Created
May 14, 2021 23:15
-
-
Save vitchyr/60b9ba3facf97492fccacbeba741d151 to your computer and use it in GitHub Desktop.
Save Git Info in Python for Reproducible Experiments
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Helper functions to save git information every time you | |
Requirements: | |
- GitPython==2.1.12 | |
(Probably works on other GitPython versions, but this is the version I've tested.) | |
Usage: | |
``` | |
from git_info import save_git_infos | |
def experiment(*args, log_dir, **kwargs): | |
git_infos = save_git_infos( | |
[ | |
'/home/user/code_dir1', | |
'/home/user/code_dir2', | |
], | |
log_dir, | |
) | |
# rest of experiment code | |
``` | |
This will save a bunch of git repository information to `log_dir` so that your | |
experiment is reproducible. In particular, for each code directory listed, it | |
will save: | |
- the path to the code directory | |
- the git branch and hash | |
- any unstaged and staged diff in the repo | |
Ideally this information will be enough to reproduce any results. | |
""" | |
import json | |
import os | |
import os.path as osp | |
from typing import NamedTuple, List, Union | |
GitInfo = NamedTuple( | |
'GitInfo', | |
[ | |
('directory', str), | |
('code_diff', str), | |
('code_diff_staged', str), | |
('commit_hash', str), | |
('branch_name', str), | |
], | |
) | |
def _generate_git_infos(code_dirs): | |
try: | |
import git | |
git_infos = [] | |
for directory in code_dirs: | |
# Idk how to query these things, so I'm just doing try-catch | |
try: | |
repo = git.Repo(directory) | |
try: | |
branch_name = repo.active_branch.name | |
except TypeError: | |
branch_name = '[DETACHED]' | |
git_infos.append(GitInfo( | |
directory=directory, | |
code_diff=repo.git.diff(None), | |
code_diff_staged=repo.git.diff('--staged'), | |
commit_hash=repo.head.commit.hexsha, | |
branch_name=branch_name, | |
)) | |
except git.exc.InvalidGitRepositoryError: | |
git_infos.append(GitInfo( | |
directory=directory, | |
code_diff='', | |
code_diff_staged='', | |
commit_hash='', | |
branch_name='(not a git repo)', | |
)) | |
pass | |
except (ImportError, UnboundLocalError, NameError) as e: | |
print("Install GitPython to automatically save git information.") | |
git_infos = [] | |
return git_infos | |
def save_git_info(code_dirs, log_dir: str): | |
git_infos = _generate_git_infos(code_dirs) | |
for ( | |
directory, code_diff, code_diff_staged, commit_hash, branch_name | |
) in git_infos: | |
if directory[-1] == '/': | |
diff_file_name = directory[1:-1].replace("/", "-") + ".patch" | |
diff_staged_file_name = ( | |
directory[1:-1].replace("/", "-") + "_staged.patch" | |
) | |
else: | |
diff_file_name = directory[1:].replace("/", "-") + ".patch" | |
diff_staged_file_name = ( | |
directory[1:].replace("/", "-") + "_staged.patch" | |
) | |
if code_diff is not None and len(code_diff) > 0: | |
with open(osp.join(log_dir, diff_file_name), "w") as f: | |
f.write(code_diff + '\n') | |
if code_diff_staged is not None and len(code_diff_staged) > 0: | |
with open(osp.join(log_dir, diff_staged_file_name), "w") as f: | |
f.write(code_diff_staged + '\n') | |
with open(osp.join(log_dir, "git_infos.txt"), "a") as f: | |
f.write("directory: {}".format(directory)) | |
f.write('\n') | |
f.write("git hash: {}".format(commit_hash)) | |
f.write('\n') | |
f.write("git branch name: {}".format(branch_name)) | |
f.write('\n\n') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment