|
import os.path |
|
import re |
|
from glob import glob |
|
|
|
|
|
def replace(basedir, replacements): |
|
for filename in glob(os.path.join(basedir, '**', '*.html'), recursive=True): |
|
with open(filename) as f: |
|
content = f.read() |
|
for pattern, replacement in replacements.items(): |
|
content = re.sub(pattern, replacement, content, flags=re.MULTILINE) |
|
with open(filename, 'w') as f: |
|
f.write(content) |
|
|
|
|
|
replace('build', { |
|
r'\.\.\.': '…', |
|
# URLs |
|
'/codelists#': '/codelists/#', |
|
'/schema#': '/schema/#', |
|
# HTML entities |
|
'‘': '‘', |
|
'’': '’', |
|
'“': '“', |
|
'”': '”', |
|
'> <': '><', |
|
# Classes |
|
' class="first"': '', |
|
' class="last"': '', |
|
' class="first ': ' class="', |
|
' class="last ': ' class="', |
|
' class="first last ': ' class="', |
|
' class="hide last"': ' class="hide"', |
|
'<dl class="docutils">': '<dl class="simple">', |
|
# Links |
|
' internal"': ' external"', |
|
r'<span class="doc">([^<]+)</span>': r'\1', |
|
# Blockquotes |
|
r'<blockquote>\n<div>(?!<p>)(.+)</div>': r'<blockquote>\n<div><p>\1</p>\n</div>', |
|
# Lists |
|
r'^<dd>(?!<p>)(.+)</dd>': r'<dd><p>\1</p>\n</dd>', |
|
r'^<li>(?!<p>)(.+)</li>': r'<li><p>\1</p></li>', |
|
r'^<li>(?!<p>)(.+)<ul>': r'<li><p>\1</p>\n<ul>', |
|
r'^<li>(<p>.+</p>)\n</li>': r'<li>\1</li>', |
|
# Tables |
|
r'<table border="1" class="([^"]*)docutils">': r'<table class="\1docutils align-default">', |
|
'<col width="': '<col style="width: ', |
|
'<thead valign="bottom">': '<thead>', |
|
'<tbody valign="top">': '<tbody>', |
|
r'<th class="head"([^>]*)>(?!<p>)(.+?)\n?</th>': r'<th class="head"\1><p>\2</p></th>', |
|
r'<td>(?!<p>)(.+?)\n?</td>': r'<td><p>\1</p></td>', |
|
r'<td colspan="([14])">(?!<p>)(.+)</td>': r'<td colspan="\1"><p>\2</p></td>', |
|
# Cleanup (too lazy to fix the above) |
|
r'(<p>)+': '<p>', |
|
r'(</p>)+': '</p>', |
|
# Whitespace |
|
'</dt>\n<dd>': '</dt><dd>', |
|
}) |
|
|
|
replace('t', { |
|
r' <script type="text/javascript" src="(\.\./)*_static/language_data\.js"></script>\n': '', |
|
# Classes |
|
' notranslate': '', |
|
' class="first ': ' class="', |
|
# Links |
|
' internal"': ' external"', |
|
r'<span class="doc">([^<]+)</span>': r'\1', |
|
}) |