Last active
February 16, 2018 20:32
-
-
Save dhimmel/5cf98acc58f60ede9504422e7a0a9f41 to your computer and use it in GitHub Desktop.
Convert Manubot gh-pages to use versioned directories
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Created versioned gh-pages directories for legacy Manubot repos\n", | |
"\n", | |
"Designed to be run from the root of a repository to be upgraded.\n", | |
"\n", | |
"See https://github.com/greenelab/manubot-rootstock/pull/101 and https://github.com/greenelab/manubot-rootstock/issues/96.\n", | |
"\n", | |
"```\n", | |
"# First make sure you have a local gh-pages branch\n", | |
"# Checkout from upstream like\n", | |
"git checkout --track upstream/gh-pages\n", | |
"# Pull to make sure its up to date\n", | |
"git pull\n", | |
"```\n", | |
"\n", | |
"You must be in the gh-pages branch for this to work. This notebook is online as a [GitHub gist](https://gist.github.com/dhimmel/5cf98acc58f60ede9504422e7a0a9f41) and is released under a [CC0 1.0 License](https://creativecommons.org/publicdomain/zero/1.0/legalcode)." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import collections\n", | |
"import re\n", | |
"import pathlib\n", | |
"import subprocess\n", | |
"\n", | |
"import git\n", | |
"import pandas" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Set parameters\n", | |
"\n", | |
"This notebook was developed for https://github.com/greenelab/meta-review/. Update for your own repository." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"b74e916beb5c5e070b51ca42fc6b32655b3256ee\n" | |
] | |
} | |
], | |
"source": [ | |
"# Set to path of repository\n", | |
"repo = git.Repo(path = '.')\n", | |
"# State of gh-pages branch\n", | |
"print(repo.heads['gh-pages'].commit)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"slug = 'greenelab/meta-review'\n", | |
"# Commit range in gh-pages to convert to versioned directories\n", | |
"first_commit = '3cac19dad403e4a0262c287a05a1b0adce355744' # oldest\n", | |
"last_commit = '8b6861af46df7ec2664aca2d2d5e08a0bf3bb238' # most recent" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Identify commits to port to directories" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>commit</th>\n", | |
" <th>author_name</th>\n", | |
" <th>author_email</th>\n", | |
" <th>authored_datetime</th>\n", | |
" <th>summary</th>\n", | |
" <th>count</th>\n", | |
" <th>merge</th>\n", | |
" <th>parents</th>\n", | |
" <th>source_commit</th>\n", | |
" <th>files</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>90f15d32caa3f4de7d23e9e3f58986f72daeaf63</td>\n", | |
" <td>Daniel</td>\n", | |
" <td>[email protected]</td>\n", | |
" <td>2017-07-16 18:02:55+00:00</td>\n", | |
" <td>Second attempt for Travis deploy timeout (#23)</td>\n", | |
" <td>16</td>\n", | |
" <td>0</td>\n", | |
" <td>3cac19dad403e4a0262c287a05a1b0adce355744</td>\n", | |
" <td>123b84b73fde43e2f5df9fc47a62fe3b45295d24</td>\n", | |
" <td>[github-pandoc.css, index.html, index.html.ots...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>27f6d900eaf7cec3e5a2f40fa92d259d75735e59</td>\n", | |
" <td>David</td>\n", | |
" <td>[email protected]</td>\n", | |
" <td>2017-07-16 18:54:36+00:00</td>\n", | |
" <td>Fix typo in 03.figures.md (#22)</td>\n", | |
" <td>17</td>\n", | |
" <td>0</td>\n", | |
" <td>90f15d32caa3f4de7d23e9e3f58986f72daeaf63</td>\n", | |
" <td>6c732a3a4eebe8a23ef5a0dbc37330746d629ead</td>\n", | |
" <td>[github-pandoc.css, index.html, index.html.ots...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" commit author_name \\\n", | |
"0 90f15d32caa3f4de7d23e9e3f58986f72daeaf63 Daniel \n", | |
"1 27f6d900eaf7cec3e5a2f40fa92d259d75735e59 David \n", | |
"\n", | |
" author_email authored_datetime \\\n", | |
"0 [email protected] 2017-07-16 18:02:55+00:00 \n", | |
"1 [email protected] 2017-07-16 18:54:36+00:00 \n", | |
"\n", | |
" summary count merge \\\n", | |
"0 Second attempt for Travis deploy timeout (#23) 16 0 \n", | |
"1 Fix typo in 03.figures.md (#22) 17 0 \n", | |
"\n", | |
" parents \\\n", | |
"0 3cac19dad403e4a0262c287a05a1b0adce355744 \n", | |
"1 90f15d32caa3f4de7d23e9e3f58986f72daeaf63 \n", | |
"\n", | |
" source_commit \\\n", | |
"0 123b84b73fde43e2f5df9fc47a62fe3b45295d24 \n", | |
"1 6c732a3a4eebe8a23ef5a0dbc37330746d629ead \n", | |
"\n", | |
" files \n", | |
"0 [github-pandoc.css, index.html, index.html.ots... \n", | |
"1 [github-pandoc.css, index.html, index.html.ots... " | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"rows = list()\n", | |
"\n", | |
"# commit in backticks regex finder\n", | |
"pattern = re.compile('`([0-9a-f]{40})`')\n", | |
" \n", | |
"for commit in repo.iter_commits(rev=f'{first_commit}...{last_commit}'):\n", | |
" row = collections.OrderedDict()\n", | |
" row['commit'] = commit.hexsha\n", | |
" row['author_name'] = commit.author.name\n", | |
" row['author_email'] = commit.author.email\n", | |
" row['authored_datetime'] = commit.authored_datetime\n", | |
" row['summary'] = commit.summary\n", | |
" row['count'] = commit.count()\n", | |
" row['merge'] = int(len(commit.parents) > 1)\n", | |
" row['parents'] = ', '.join(x.hexsha for x in commit.parents)\n", | |
" readme = repo.git.show(f'{commit}:README.md')\n", | |
" match = pattern.search(readme)\n", | |
" row['source_commit'] = match.group(1) if match else None\n", | |
" files = repo.git.execute(['git', 'ls-tree', '--name-only', commit.hexsha]).splitlines()\n", | |
" files = sorted(set(files) - {'README.md', 'README.md.ots'})\n", | |
" row['files'] = files\n", | |
" rows.append(row)\n", | |
"\n", | |
"rows = list(reversed(rows))\n", | |
"commit_df = pandas.DataFrame(rows)\n", | |
"commit_df = commit_df.drop_duplicates('source_commit', keep='last')\n", | |
"commit_df.head(2)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"38" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(commit_df)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Output shell commands" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"assert repo.active_branch.name == 'gh-pages'\n", | |
"\n", | |
"for row in commit_df.itertuples():\n", | |
" directory = pathlib.Path(f'v/{row.source_commit}')\n", | |
" directory.mkdir(exist_ok=True)\n", | |
" args = [\n", | |
" 'git',\n", | |
" f'--work-tree={directory}',\n", | |
" 'checkout',\n", | |
" row.commit,\n", | |
" '--',\n", | |
" ] + row.files\n", | |
" process = subprocess.run(args, stderr=subprocess.PIPE)\n", | |
" if process.stderr:\n", | |
" print(' '.join(process.args))\n", | |
" print(process.stderr.decode())\n", | |
" subprocess.run(['git', 'add', '--update'])" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Commit and upgrade timestamps\n", | |
"\n", | |
"```\n", | |
"git add v\n", | |
"git commit\n", | |
"```\n", | |
"\n", | |
"Upgrade timestamps\n", | |
"\n", | |
"```\n", | |
"## The following may fail due to https://github.com/opentimestamps/opentimestamps-client/issues/71\n", | |
"# ots upgrade v/*/*.ots\n", | |
"## Instead use\n", | |
"ls -t --reverse v/*/*.ots | xargs ots upgrade\n", | |
"rm v/*/*.ots.bak\n", | |
"git add v/*/*.ots\n", | |
"git commit\n", | |
"```" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [conda root]", | |
"language": "python", | |
"name": "conda-root-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment