Last active
August 29, 2015 14:07
-
-
Save cjerdonek/76608610df43fd5b0fc3 to your computer and use it in GitHub Desktop.
Python Pandoc filter for converting GitHub markdown to Python reST long_description for PyPI
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Python Pandoc filter [1] for converting a GitHub markdown file to a Python | |
reST long_description (suitable for display on PyPI). | |
Sample usage: | |
$ pandoc --filter ./md2rst.py --write=rst --output=long_description.rst README.md | |
PyPI's reST rendering breaks on things like relative links (supported by | |
GitHub [2]), and anchor fragments. This filter converts these links | |
to links that will continue to work once on PyPI. | |
See also this PyPI bug report [3]. | |
[1]: https://github.com/jgm/pandocfilters | |
[2]: https://github.com/blog/1395-relative-links-in-markup-files | |
[3]: https://bitbucket.org/pypa/pypi/issue/161/rest-formatting-fails-and-there-is-no-way | |
""" | |
import logging | |
import os | |
import sys | |
from urllib.parse import urljoin, urlparse, urlunparse | |
from pandocfilters import toJSONFilter, Link | |
GITHUB_URL = "https://github.com/cjerdonek/open-rcv/blob/master/" | |
PYPI_URL = "https://pypi.python.org/pypi/OpenRCV/" | |
log = logging.getLogger(os.path.basename(__file__)) | |
def configure_logging(): | |
format_string = "%(name)s: [%(levelname)s] %(message)s" | |
logging.basicConfig(format=format_string, level=logging.DEBUG) | |
log.debug("Debug logging enabled.") | |
# This function can be used to create other Pandoc filters that | |
# transform URLs in hyperlinks. | |
def init_action(convert_url): | |
""" | |
Return a Pandoc "action" suitable for passing to toJSONFilter. | |
Arguments: | |
convert_url: a function that accepts an URL path and returns | |
a new one. | |
""" | |
configure_logging() | |
def transform_url(key, value, format, meta): | |
if key != 'Link': | |
return None | |
# Then value has the following form: | |
# [[{'t': 'Str', 'c': 'Contributing'}], ['docs/contributing.md', '']] | |
# Extract the URL. | |
url = value[1][0] | |
new_url = convert_url(url) | |
if new_url is None: | |
return None | |
log.info("converting URL:\n" | |
" %s\n" | |
"-->%s" % (url, new_url)) | |
value[1][0] = new_url | |
return Link(*value) | |
return transform_url | |
def convert_url(url): | |
"""Convert URL appearing in a markdown file to a new URL. | |
Returns None if URL should remain same. | |
""" | |
parsed_url = urlparse(url) | |
log.debug(repr(parsed_url)) | |
url_path = parsed_url[2] | |
if not url_path: | |
# Then we assume it is a fragment (e.g. "#license") that should | |
# link back to a section on the same PyPI page. | |
new_url = urlunparse(parsed_url) | |
new_url = urljoin(PYPI_URL, new_url) | |
return new_url | |
if (not url_path.endswith(".md") and | |
url_path != "LICENSE"): | |
return None | |
# Otherwise, we link back to the original source GitHub page. | |
new_url = urlunparse(parsed_url) | |
new_url = urljoin(GITHUB_URL, new_url) | |
return new_url | |
if __name__ == "__main__": | |
toJSONFilter(init_action(convert_url)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment