jwhitlock · February 6, 2019 18:37 · escattone · Feb 6, 2019
diff --git a/find_macro_usage.py b/find_macro_usage.py
 import re

 from kuma.wiki.models import Document

 macro_re_pattern = """
    {{   # Opening braces
    \s*  # Optional whitespace
    %s   # The macro name
    \s*  # Optional whitespace
    \(   # Open parens
    .*   # Anything
    \)   # Close parens
    \s*  # Optional whitespace
    }}   # Closing braces
 """


 def find_macros(macro):
    macro_re = re.compile(macro_re_pattern % macro, re.IGNORECASE + re.VERBOSE)
    lmacro = macro.lower()
    usage = []
    docs = Document.objects.exclude(is_template=True).filter(html__icontains=lmacro)
    for doc_id in docs.values_list('id', flat=True):
        doc = Document.objects.get(id=doc_id)
        doc_url = doc.get_full_url()
        for match in macro_re.finditer(doc.html):
            usage.append((doc.id, doc_url, match.start(), match.group(0)))
    return usage


 def print_usage(usage, comma_count=None, max_len=None):
    last_doc_id = None
    for doc_id, doc_url, pos, text in usage:
        if comma_count is not None and text.count(',') != comma_count:
            continue
        if max_len is not None and len(text) > max_len:
            continue
        if doc_id != last_doc_id:
            print('%s (%s)' % (doc_url, doc_id))
            last_doc_id = doc_id
        print((u"  " + text).encode('utf8'))


 def find_and_print_usage(macro, comma_count=None, max_len=None):
    print_usage(find_macros(macro), comma_count, max_len)

 macro = 'EmbedLiveSample'
 # find_and_print_usage(macro)  # All instances

 # All instances less than 1000 chars (avoid syntax and parsing errors)
 # find_and_print_usage(macro, max_len=1000) 

 # All instances with a single comma (~2 args), limit length
 # find_and_print_usage(macro, 1, 1000)
	import re

	from kuma.wiki.models import Document

	macro_re_pattern = """
	{{ # Opening braces
	\s* # Optional whitespace
	%s # The macro name
	\s* # Optional whitespace
	\( # Open parens
	.* # Anything
	\) # Close parens
	\s* # Optional whitespace
	}} # Closing braces
	"""


	def find_macros(macro):
	macro_re = re.compile(macro_re_pattern % macro, re.IGNORECASE + re.VERBOSE)
	lmacro = macro.lower()
	usage = []
	docs = Document.objects.exclude(is_template=True).filter(html__icontains=lmacro)
	for doc_id in docs.values_list('id', flat=True):
	doc = Document.objects.get(id=doc_id)
	doc_url = doc.get_full_url()
	for match in macro_re.finditer(doc.html):
	usage.append((doc.id, doc_url, match.start(), match.group(0)))
	return usage


	def print_usage(usage, comma_count=None, max_len=None):
	last_doc_id = None
	for doc_id, doc_url, pos, text in usage:
	if comma_count is not None and text.count(',') != comma_count:
	continue
	if max_len is not None and len(text) > max_len:
	continue
	if doc_id != last_doc_id:
	print('%s (%s)' % (doc_url, doc_id))
	last_doc_id = doc_id
	print((u" " + text).encode('utf8'))


	def find_and_print_usage(macro, comma_count=None, max_len=None):
	print_usage(find_macros(macro), comma_count, max_len)

	macro = 'EmbedLiveSample'
	# find_and_print_usage(macro) # All instances

	# All instances less than 1000 chars (avoid syntax and parsing errors)
	# find_and_print_usage(macro, max_len=1000)

	# All instances with a single comma (~2 args), limit length
	# find_and_print_usage(macro, 1, 1000)