Created
October 19, 2015 13:27
-
-
Save beshrkayali/6e2261f0b704d6aa7f90 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python2 | |
""" | |
This is a modified version https://moinmo.in/ActionMarket/HTML2MoinMoin that takes in an HTML file instead of URL | |
Usage: | |
./html2moinmoin.py FILE.html | |
./html2moinmoin.py FILE.html > file.moin | |
Retrives the given FILE.html and convert it to MoinMoin markup. The result is written to stdout. | |
""" | |
import htmlentitydefs, sys | |
from HTMLParser import HTMLParser | |
class HTML2MoinMoin(HTMLParser): | |
start_tags = { | |
"a" : " [%(0)s ", | |
"b" : "'''", | |
"em" : "''", | |
"tt" : "{{{", | |
"pre" : "\n{{{", | |
"p" : "\n\n", | |
"br" : "\n\n", | |
"h1" : "\n\n= ", | |
"h2" : "\n\n== ", | |
"h3" : "\n\n=== ", | |
"h4" : "\n\n==== ", | |
"h5" : "\n\n===== ", | |
"title" : "TITLE: ", | |
"table" : "\n", | |
"tr" : "", | |
"td" : "||" | |
} | |
end_tags = { | |
"a" : ']', | |
"b" : "'''", | |
"em" : "''", | |
"tt" : "}}}", | |
"pre" : "}}}\n", | |
"p" : "", | |
"h1" : " =\n\n", | |
"h2" : " ==\n\n", | |
"h3" : " ===\n\n", | |
"h4" : " ====\n\n", | |
"h5" : " =====\n\n", | |
"table" : "\n", | |
"tr" : "||\n", | |
"dt" : ":: " | |
} | |
def __init__(self): | |
HTMLParser.__init__(self) | |
self.output = sys.stdout | |
self.list_mode = [] | |
self.preformatted = False | |
self.verbose = 0 | |
def write(self, text): | |
self.output.write(text) | |
def do_ul_start(self, attrs, tag): | |
self.list_mode.append("*") | |
def do_ol_start(self, attrs, tag): | |
self.list_mode.append("1.") | |
def do_dl_start(self, attrs, tag): | |
self.list_mode.append("") | |
def do_ul_end(self, tag): | |
self.list_mode = self.list_mode[:-1] | |
do_ol_end = do_ul_end | |
do_dl_end = do_ul_end | |
def do_li_start(self, args, tag): | |
self.write("\n" + " " * len(self.list_mode) + self.list_mode[-1]) | |
def do_dt_start(self, args, tag): | |
self.write("\n" + " " * len(self.list_mode) + self.list_mode[-1]) | |
def do_pre_start(self, args, tag): | |
self.preformatted = True | |
self.write(self.start_tags["pre"]) | |
def do_pre_end(self, tag): | |
self.preformatted = False | |
self.write(self.end_tags["pre"]) | |
def handle_starttag(self, tag, attrs): | |
func = HTML2MoinMoin.__dict__.get("do_%s_start" % tag, | |
HTML2MoinMoin.do_default_start) | |
if ((func == HTML2MoinMoin.do_default_start) and | |
self.start_tags.has_key(tag)): | |
attr_dict = {} | |
i = 0 | |
for a in attrs: | |
attr_dict[a[0]] = a[1] | |
attr_dict[str(i)] = a[1] | |
i += 1 | |
self.write(self.start_tags[tag] % attr_dict) | |
else: | |
func(self, attrs, tag) | |
def handle_endtag(self, tag): | |
func = HTML2MoinMoin.__dict__.get("do_%s_end" % tag, | |
HTML2MoinMoin.do_default_end) | |
if ((func == HTML2MoinMoin.do_default_end) and | |
self.end_tags.has_key(tag)): | |
self.write(self.end_tags[tag]) | |
else: | |
func(self, tag) | |
def handle_data(self, data): | |
if self.preformatted: | |
self.write(data) | |
else: | |
self.write(data.replace("\n", " ")) | |
def handle_charref(self, name): | |
self.write(name) | |
def handle_entityref(self, name): | |
if htmlentitydefs.entitydefs.has_key(name): | |
self.write(htmlentitydefs.entitydefs[name]) | |
else: | |
self.write("&" + name) | |
def do_default_start(self, attrs, tag): | |
if self.verbose: | |
print "Encountered the beginning of a %s tag" % tag | |
print "Attribs: %s" % attrs | |
def do_default_end(self, tag): | |
if self.verbose: | |
print "Encountered the end of a %s tag" % tag | |
def main(): | |
with open(sys.argv[1], 'r') as htmlfile: | |
htmldata = htmlfile.read() | |
p = HTML2MoinMoin() | |
p.feed(htmldata) | |
p.close() | |
htmlfile.close() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment