Skip to content

Instantly share code, notes, and snippets.

@waxpancake
Created February 23, 2011 17:41

Revisions

  1. waxpancake created this gist Feb 23, 2011.
    63 changes: 63 additions & 0 deletions parse_daily.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,63 @@
    #!/usr/bin/python

    import json, sys, re
    filename = sys.argv[1]
    f = open(filename, 'r')
    contents = json.load(f)

    contentIndex = {}

    for content in contents['content']:
    if content['type'] == 'text':
    identifier = content['identifier']
    contentIndex[identifier] = content['text']

    headlineIndex = {}
    deckIndex = {}
    bylineIndex = {}

    for section in contents['sections']:
    print '<h3>' + section['name'] + '</h3>'
    print '<ol>'
    for page in section['pages']:
    if page['displayOnCarousel'] == True:
    if 'webURL' in page:
    webURL = page['webURL']
    layoutIndex = {}
    for layout in page['layouts']:
    for component in layout['components']:
    if 'contentRefs' in component:
    if len(component['contentRefs']) > 0:
    contentID = component['contentRefs'][0]['contentID']
    if contentID in contentIndex:
    if re.search("Head", component['title'], re.IGNORECASE):
    headline = re.sub('\n', ' ', contentIndex[contentID])
    headlineIndex[webURL] = headline
    if re.search("Hed", component['title'], re.IGNORECASE):
    headline = re.sub('\n', ' ', contentIndex[contentID])
    headlineIndex[webURL] = headline
    if re.search("Deck", component['title'], re.IGNORECASE):
    deck = re.sub('\n', ' ', contentIndex[contentID])
    deckIndex[webURL] = deck
    if re.search("Byline", component['title'], re.IGNORECASE):
    byline = re.sub('\n', ' ', contentIndex[contentID])
    bylineIndex[webURL] = byline

    print '<li>'
    if 'headline' in page:
    print '<a href="' + webURL + '">' + page['headline'] + '</a>'
    elif webURL in headlineIndex:
    print '<a href="' + webURL + '">' + headlineIndex[webURL] + '</a>'
    else:
    print '<a href="' + webURL + '">' + page['title'] + '</a>'

    if webURL in bylineIndex:
    print bylineIndex[webURL]

    if webURL in deckIndex:
    print '&mdash;', deckIndex[webURL]
    elif 'openingLine' in page:
    print '&mdash;', page['openingLine']

    print '</li>'
    print '</ol>'