-
-
Save kijuky/cac1779689cdc2fc4e9f to your computer and use it in GitHub Desktop.
index.htmlを解析してメソッド一覧を作る。フレームに依存しているため、フレームがないjavadocだと落ちそう。
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import os | |
import os.path | |
import sys | |
import lxml.html | |
import xlwt | |
def is_method(method): | |
if method.find('-') != -1: | |
return True | |
if method.find('(') == -1: | |
return False | |
if method.find(')') == -1: | |
return False | |
return True | |
def write_on_sheet(doc_file, sheet, start, xpath_exp='//a/@name', debug=True): | |
root = lxml.html.parse(doc_file) | |
methods = [method for method in root.xpath(xpath_exp) if is_method(method)] | |
methods.sort() | |
# write | |
sheet.write(start, 1, os.path.basename(doc_file.split('.')[-2])) # remove extention | |
for method in methods: | |
method_sig = method | |
if method.find('-'): | |
if (method.startswith('Z:Z')): | |
method = method[len('Z:Z'):] | |
method_sig = method.split('-')[0] + '(' + ','.join([param.replace(':A', '[]') for param in method.split('-')[1:-1]]) + ')' | |
if debug: | |
print method_sig | |
sheet.write(start, 2, method_sig) | |
start += 1 | |
return start | |
def main(): | |
if len(sys.argv) != 2: | |
print 'Error' | |
sys.exit(1) | |
package_doc_files = {} | |
root_addr = sys.argv[1][:-len("index.html")] if sys.argv[1].endswith("index.html") else sys.argv[1] | |
overview = lxml.html.parse(sys.argv[1]) # sys.argv[1] = index.html | |
package_list_frame_addr = root_addr + overview.xpath('//*[@name="packageListFrame"]')[0].xpath('@src')[0] | |
package_list_frame = lxml.html.parse(package_list_frame_addr) | |
package_overview_list = [package_overview for package_overview in package_list_frame.xpath('//a[@target="packageFrame" and contains(@href, "/package-frame.html")]')] | |
for package_overview_link in package_overview_list: | |
package_overview_addr = root_addr + package_overview_link.xpath('@href')[0] | |
package_overview = lxml.html.parse(package_overview_addr) | |
class_list = [class_overview.xpath("@href")[0] for class_overview in package_overview.xpath('//a[@target="classFrame" and not(contains(@href, "/package-summary.html"))]')] | |
class_addr_list = [(package_overview_addr[:-len("package-frame.html")] + class_addr) for class_addr in class_list] | |
package_doc_files.update({package_overview_link.text:class_addr_list}) | |
# excel file | |
book_name = 'method_list.xls' | |
sheet_name = 'All Methods' | |
if os.path.exists(book_name): | |
try: | |
os.remove(book_name) | |
except ex: | |
print "Sheet can't be removed" | |
print ex | |
sys.exit(1) | |
wb = xlwt.Workbook() | |
ws = wb.add_sheet(sheet_name, cell_overwrite_ok=True) | |
start = 0 | |
package_name_list = package_doc_files.keys() | |
package_name_list.sort() | |
for package_name in package_name_list: | |
print package_name | |
ws.write(start, 0, package_name) | |
for doc_file in package_doc_files[package_name]: | |
print doc_file | |
start = write_on_sheet(doc_file, ws, start) | |
wb.save(book_name) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment