Skip to content

Instantly share code, notes, and snippets.

@jwhitlock
Last active February 6, 2019 18:37
Find macro usage in MDN pages
import re
from kuma.wiki.models import Document
macro_re_pattern = """
{{ # Opening braces
\s* # Optional whitespace
%s # The macro name
\s* # Optional whitespace
\( # Open parens
.* # Anything
\) # Close parens
\s* # Optional whitespace
}} # Closing braces
"""
def find_macros(macro):
macro_re = re.compile(macro_re_pattern % macro, re.IGNORECASE + re.VERBOSE)
lmacro = macro.lower()
usage = []
docs = Document.objects.exclude(is_template=True).filter(html__icontains=lmacro)
for doc_id in docs.values_list('id', flat=True):
doc = Document.objects.get(id=doc_id)
doc_url = doc.get_full_url()
for match in macro_re.finditer(doc.html):
usage.append((doc.id, doc_url, match.start(), match.group(0)))
return usage
def print_usage(usage, comma_count=None, max_len=None):
last_doc_id = None
for doc_id, doc_url, pos, text in usage:
if comma_count is not None and text.count(',') != comma_count:
continue
if max_len is not None and len(text) > max_len:
continue
if doc_id != last_doc_id:
print('%s (%s)' % (doc_url, doc_id))
last_doc_id = doc_id
print((u" " + text).encode('utf8'))
def find_and_print_usage(macro, comma_count=None, max_len=None):
print_usage(find_macros(macro), comma_count, max_len)
macro = 'EmbedLiveSample'
# find_and_print_usage(macro) # All instances
# All instances less than 1000 chars (avoid syntax and parsing errors)
# find_and_print_usage(macro, max_len=1000)
# All instances with a single comma (~2 args), limit length
# find_and_print_usage(macro, 1, 1000)
@escattone
Copy link

I love the way you created macro_re_pattern, with each key part of the regex briefly explained with a comment. Nice idea!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment