pearswj · June 30, 2018 11:46
diff --git a/docjet2xml.py b/docjet2xml.py
 #!/usr/bin/env python

 # Reformat c++ documentation comments from docjet (also supported by
 # doc-o-matic) to c#-style xml (supported by doxygen).
 #
 # Configure doxygen filter like so...
 #
 #   FILTER_PATTERNS        = *.h="python docjet2xml.py"
 #
 #
 # Will Pearson <[email protected]>

 import re
 import sys

 def main():
    files = sys.argv[1:]

    for fname in files:
        # print fname

        with open(fname) as f:
            content = f.readlines()

        content = [x.rstrip() for x in content]

        # uncomment to rewrite the original file, instead of printing to STDOUT
        #sys.stdout = open(fname, 'w')

        # skip header and license
        skip = 15

        for l in content[:skip]:
            print l

        process_lines(content[skip:])

 def process_lines(content):
    block = Block()
    in_block = False

    for l in content:
        # print l

        # replace tabs with 2 spaces
        l = l.replace('\t', '  ')

        if not in_block:
            m = re.match(r'^( *)/\*(.*)', l) # match /*
            if m:
                if '*/' in l: # no one-liners
                    print l
                    continue
                # forcefully start a new block
                if len(block) > 0:
                    block.pprint()
                block = Block(True)
                in_block = True
                block.indent = len(m.group(1))
                if m.group(2).strip():
                    block.lines.append(m.group(2))
                continue

            # if '//' in l:
            m = re.match(r'^( *)//(.*)', l) # match //
            if m:
                if m.group(2).startswith('/'): # fixup existing /// comments
                    if re.search('^ *///[^ /]', l):
                        print l.replace('///', '/// ')
                        continue
                else:
                    block.indent = len(m.group(1))
                    block.lines.append(m.group(2))
                    continue

        elif in_block:
            if '*/' in l:
                in_block = False
                # end current block and flush
                block.pprint()
                if len(l.strip()) > 2 and not l.rstrip().endswith('*/'):
                    print '// TODODOC: ' + l
                    # raise Exception(l)
                    print l.replace('*/', '')
                block = Block()
            else:
                block.lines.append(l)
            continue

        if len(block) > 0:
            #print len(block)
            block.pprint()
        block = Block()
        print l

 class Block:
    def __init__(self, asterix=False):
        self.lines = []
        self.indent = 0
        self.asterix = asterix

    def __len__(self):
        return len(self.lines) if self.lines else 0

    def pprint(self):
        if self.indent > 10: # comments with large indents are better left alone
            for l in self.lines:
                print ' ' * self.indent + '//' + l.rstrip()
        elif len(self) == 1: # don't try to parse one line comments
            print ' ' * self.indent + '///' + self.lines[0].rstrip()
        else: # try to parse doc-o-matic syntax
            store = {}
            key = None
            orphans = []
            allowed_keys = ['description', 'details', 'parameters', 'input', 'returns', 'remarks', 'comments', 'example', 'see also']
            for l in self.lines:
                # if not l.strip():
                #     continue
                #print ' ' * self.indent + '/// ' + l
                if l.strip().endswith(':'):
                    tmp_key = l.strip()[:-1].lower()
                    if tmp_key in allowed_keys:
                        key = tmp_key
                        store[key] = []
                        continue
                    else:
                        print ' ' * self.indent + '// TODODOC: ' + tmp_key
                if key:
                    # preserve indent
                    indent = max(len(l) - len(l.lstrip()) - self.indent, 0)
                    l = l.strip() if indent < 3 else l.rstrip()[2:]
                    # indent = (indent - self.indent) if indent > self.indent else 0
                    store[key].append(l)
                else:
                    orphans.append(l)
                    # print ' ' * self.indent + '// TODODOC: ' + l

            # print store

            # not a doc-o-matic comment?
            if not any(key in store for key in allowed_keys):
                # print ' ' * self.indent + '// TODODOC:'
                if self.asterix:
                    print ' ' * self.indent + '/*'
                    for l in self.lines:
                        print l
                    print ' ' * self.indent + '*/'
                else:
                    for l in self.lines:
                        print ' ' * self.indent + '//' + l
                return

            # doc-o-matic comments found, but some lines didn't parse correctly
            # print them with a note to come back later
            if orphans:
                print ' ' * self.indent + '// TODODOC: partial doc'
                for l in orphans:
                    print ' ' * self.indent + '//' + l
                # raise Exception(len(self.lines) - len(orphans))
                # return

            ###########################
            # parse doc-o-matic comment


            # print store
            # Description => <summary>
            if 'description' in store or 'details' in store:
                print ' ' * self.indent + '/// <summary>'
                # Description
                if 'description' in store:
                    for l in store['description']:
                        print ' ' * self.indent + '/// ' + l
                    store.pop('description')
                # Details
                if 'details' in store:
                    print ' ' * self.indent + '///'
                    # print ' ' * self.indent + '/// <para>'
                    for l in store['details']:
                        print ' ' * self.indent + '/// ' + l
                    # print ' ' * self.indent + '/// </para>'
                    store.pop('details')
                print ' ' * self.indent + '/// </summary>'
            # Parameters => <param>...
            if 'parameters' in store:
                process_params(store['parameters'], self.indent)
                store.pop('parameters')
            # Input => <param>...
            if 'input' in store:
                process_params(store['input'], self.indent)
                store.pop('input')
            # Returns => <returns>
            if 'returns' in store:
                #if len(store['returns']) > 1:
                print ' ' * self.indent + '/// <returns>'
                for l in store['returns']:
                    print ' ' * self.indent + '/// ' + l
                print ' ' * self.indent + '/// </returns>'
                store.pop('returns')
            # Remarks => <remarks>
            if 'remarks' in store:
                print ' ' * self.indent + '/// <remarks>'
                for l in store['remarks']:
                    print ' ' * self.indent + '/// ' + l
                print ' ' * self.indent + '/// </remarks>'
                store.pop('remarks')
            # Comments (alias for 'Remarks')
            if 'comments' in store:
                print ' ' * self.indent + '/// <remarks>'
                for l in store['comments']:
                    print ' ' * self.indent + '/// ' + l
                print ' ' * self.indent + '/// </remarks>'
                store.pop('comments')
            # Example => <example>
            if 'example' in store:
                print ' ' * self.indent + '/// <example>'
                print ' ' * self.indent + '/// <code>'
                for l in store['example']:
                    print ' ' * self.indent + '/// ' + l
                print ' ' * self.indent + '/// </code>'
                print ' ' * self.indent + '/// </example>'
                store.pop('example')
            # See also => <seealso>...
            if 'see also' in store:
                for l in store['see also']:
                    print ' ' * self.indent + '/// <seealso cref="{}"/>'.format(l)
                store.pop('see also')

            # no string left behind...
            if len(store) > 0:
                raise Exception('LEFTOVERS >>> ' + str(store))

 def process_params(lines, indent):
    params = []
    for l in lines: # another loop!
        m = re.match(r'^([a-zA-Z0-9_]+) *- *(.*)', l.strip())
        if not m:
            if len(params) == 0:
                print ' ' * indent + '/// TODODOC: ' + l
                continue
                # raise Exception(l)
            params.append(l)
        else:
            if params:
                params[-1] += '</param>'
            params.append('<param name="{}">{}'.format(m.group(1), m.group(2)))
    if params:
        params[-1] += '</param>'
        for p in params:
            print ' ' * indent + '/// ' + p


 if __name__ == '__main__':
    main()
	#!/usr/bin/env python

	# Reformat c++ documentation comments from docjet (also supported by
	# doc-o-matic) to c#-style xml (supported by doxygen).
	#
	# Configure doxygen filter like so...
	#
	# FILTER_PATTERNS = *.h="python docjet2xml.py"
	#
	#
	# Will Pearson <[email protected]>

	import re
	import sys

	def main():
	files = sys.argv[1:]

	for fname in files:
	# print fname

	with open(fname) as f:
	content = f.readlines()

	content = [x.rstrip() for x in content]

	# uncomment to rewrite the original file, instead of printing to STDOUT
	#sys.stdout = open(fname, 'w')

	# skip header and license
	skip = 15

	for l in content[:skip]:
	print l

	process_lines(content[skip:])

	def process_lines(content):
	block = Block()
	in_block = False

	for l in content:
	# print l

	# replace tabs with 2 spaces
	l = l.replace('\t', ' ')

	if not in_block:
	m = re.match(r'^( )/\(.)', l) # match /
	if m:
	if '*/' in l: # no one-liners
	print l
	continue
	# forcefully start a new block
	if len(block) > 0:
	block.pprint()
	block = Block(True)
	in_block = True
	block.indent = len(m.group(1))
	if m.group(2).strip():
	block.lines.append(m.group(2))
	continue

	# if '//' in l:
	m = re.match(r'^( )//(.)', l) # match //
	if m:
	if m.group(2).startswith('/'): # fixup existing /// comments
	if re.search('^ *///[^ /]', l):
	print l.replace('///', '/// ')
	continue
	else:
	block.indent = len(m.group(1))
	block.lines.append(m.group(2))
	continue

	elif in_block:
	if '*/' in l:
	in_block = False
	# end current block and flush
	block.pprint()
	if len(l.strip()) > 2 and not l.rstrip().endswith('*/'):
	print '// TODODOC: ' + l
	# raise Exception(l)
	print l.replace('*/', '')
	block = Block()
	else:
	block.lines.append(l)
	continue

	if len(block) > 0:
	#print len(block)
	block.pprint()
	block = Block()
	print l

	class Block:
	def __init__(self, asterix=False):
	self.lines = []
	self.indent = 0
	self.asterix = asterix

	def __len__(self):
	return len(self.lines) if self.lines else 0

	def pprint(self):
	if self.indent > 10: # comments with large indents are better left alone
	for l in self.lines:
	print ' ' * self.indent + '//' + l.rstrip()
	elif len(self) == 1: # don't try to parse one line comments
	print ' ' * self.indent + '///' + self.lines[0].rstrip()
	else: # try to parse doc-o-matic syntax
	store = {}
	key = None
	orphans = []
	allowed_keys = ['description', 'details', 'parameters', 'input', 'returns', 'remarks', 'comments', 'example', 'see also']
	for l in self.lines:
	# if not l.strip():
	# continue
	#print ' ' * self.indent + '/// ' + l
	if l.strip().endswith(':'):
	tmp_key = l.strip()[:-1].lower()
	if tmp_key in allowed_keys:
	key = tmp_key
	store[key] = []
	continue
	else:
	print ' ' * self.indent + '// TODODOC: ' + tmp_key
	if key:
	# preserve indent
	indent = max(len(l) - len(l.lstrip()) - self.indent, 0)
	l = l.strip() if indent < 3 else l.rstrip()[2:]
	# indent = (indent - self.indent) if indent > self.indent else 0
	store[key].append(l)
	else:
	orphans.append(l)
	# print ' ' * self.indent + '// TODODOC: ' + l

	# print store

	# not a doc-o-matic comment?
	if not any(key in store for key in allowed_keys):
	# print ' ' * self.indent + '// TODODOC:'
	if self.asterix:
	print ' ' * self.indent + '/*'
	for l in self.lines:
	print l
	print ' ' * self.indent + '*/'
	else:
	for l in self.lines:
	print ' ' * self.indent + '//' + l
	return

	# doc-o-matic comments found, but some lines didn't parse correctly
	# print them with a note to come back later
	if orphans:
	print ' ' * self.indent + '// TODODOC: partial doc'
	for l in orphans:
	print ' ' * self.indent + '//' + l
	# raise Exception(len(self.lines) - len(orphans))
	# return

	###########################
	# parse doc-o-matic comment


	# print store
	# Description => <summary>
	if 'description' in store or 'details' in store:
	print ' ' * self.indent + '/// <summary>'
	# Description
	if 'description' in store:
	for l in store['description']:
	print ' ' * self.indent + '/// ' + l
	store.pop('description')
	# Details
	if 'details' in store:
	print ' ' * self.indent + '///'
	# print ' ' * self.indent + '/// <para>'
	for l in store['details']:
	print ' ' * self.indent + '/// ' + l
	# print ' ' * self.indent + '/// </para>'
	store.pop('details')
	print ' ' * self.indent + '/// </summary>'
	# Parameters => <param>...
	if 'parameters' in store:
	process_params(store['parameters'], self.indent)
	store.pop('parameters')
	# Input => <param>...
	if 'input' in store:
	process_params(store['input'], self.indent)
	store.pop('input')
	# Returns => <returns>
	if 'returns' in store:
	#if len(store['returns']) > 1:
	print ' ' * self.indent + '/// <returns>'
	for l in store['returns']:
	print ' ' * self.indent + '/// ' + l
	print ' ' * self.indent + '/// </returns>'
	store.pop('returns')
	# Remarks => <remarks>
	if 'remarks' in store:
	print ' ' * self.indent + '/// <remarks>'
	for l in store['remarks']:
	print ' ' * self.indent + '/// ' + l
	print ' ' * self.indent + '/// </remarks>'
	store.pop('remarks')
	# Comments (alias for 'Remarks')
	if 'comments' in store:
	print ' ' * self.indent + '/// <remarks>'
	for l in store['comments']:
	print ' ' * self.indent + '/// ' + l
	print ' ' * self.indent + '/// </remarks>'
	store.pop('comments')
	# Example => <example>
	if 'example' in store:
	print ' ' * self.indent + '/// <example>'
	print ' ' * self.indent + '/// <code>'
	for l in store['example']:
	print ' ' * self.indent + '/// ' + l
	print ' ' * self.indent + '/// </code>'
	print ' ' * self.indent + '/// </example>'
	store.pop('example')
	# See also => <seealso>...
	if 'see also' in store:
	for l in store['see also']:
	print ' ' * self.indent + '/// <seealso cref="{}"/>'.format(l)
	store.pop('see also')

	# no string left behind...
	if len(store) > 0:
	raise Exception('LEFTOVERS >>> ' + str(store))

	def process_params(lines, indent):
	params = []
	for l in lines: # another loop!
	m = re.match(r'^([a-zA-Z0-9_]+) - (.*)', l.strip())
	if not m:
	if len(params) == 0:
	print ' ' * indent + '/// TODODOC: ' + l
	continue
	# raise Exception(l)
	params.append(l)
	else:
	if params:
	params[-1] += '</param>'
	params.append('<param name="{}">{}'.format(m.group(1), m.group(2)))
	if params:
	params[-1] += '</param>'
	for p in params:
	print ' ' * indent + '/// ' + p


	if __name__ == '__main__':
	main()