Last active
March 21, 2016 16:13
-
-
Save riconroy/bafc6c64e74a4fda0945 to your computer and use it in GitHub Desktop.
This recursive Python routine parses an XML file to a JSON file, with the added requirement that the XML can have multiple equal tags that become a list (array). See example in listing below.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# example XML: | |
# <maximum-temperatures> | |
# <temperature year="2012" units="C">37</temperature> | |
# <temperature year="2013" units="C">38</temperature> | |
# </maximum-temperatures> | |
# | |
# would ouput: | |
# | |
# "maximum-temperatures": { | |
# "temperature": [ | |
# { | |
# "year": "2012", | |
# "units": "C", | |
# "value": "37" | |
# }, | |
# { | |
# "year": "2013", | |
# "units": "C", | |
# "value": "38" | |
# } | |
# ] | |
# } | |
def createInnerDictionary(element): | |
# example: <myTag name="foo" zone="bar">myText</myTag> | |
# child.attrib returns a dictionary of key/values that "describe the element": {"name": "foo", "zone": "bar"} | |
# child.tag is the name of the element: "myTag" | |
# child.text is the value of the element: "myText" | |
# start with the attributes of the main element (could be empty, = {}) | |
out_dict = element.attrib | |
# if one exists, add the value of the main element here | |
if element.text and (len(element.text.strip()) > 0): | |
out_dict['value'] = element.text.strip() | |
# for this element, what are the children (and attributes) | |
for child in element: | |
addToArray = False | |
# we need to know if the child is already in the dictionary | |
if child.tag in out_dict: | |
addToArray = True | |
# we need to ensure the dictionary element is a list | |
if not isinstance(out_dict[child.tag], list): | |
# we need to make it an array | |
theElement = out_dict[child.tag] | |
out_dict.pop(child.tag, None) | |
out_dict[child.tag] = [theElement] | |
# check to see if the child has children: | |
if (len(child) == 0): | |
# the child has no children | |
if (len(child.attrib) > 0): | |
elementToAdd = child.attrib | |
if child.text and (len(child.text.strip()) > 0): | |
elementToAdd['value'] = child.text.strip() | |
else: | |
elementToAdd = child.text | |
else: | |
# the child does have children | |
elementToAdd = createInnerDictionary(child) | |
# add to either list or dictionary | |
if addToArray: | |
theList = out_dict[child.tag] | |
out_dict.pop(child.tag, None) | |
theList.append(elementToAdd) | |
out_dict[child.tag] = theList | |
else: | |
out_dict[child.tag] = elementToAdd | |
return out_dict | |
# our 'main' | |
import json | |
try: | |
import xml.etree.cElementTree as etree | |
except ImportError: | |
import xml.etree.ElementTree as etree | |
# here we assume input file is local | |
tree = etree.parse('myInputFile.xml') | |
root = tree.getroot() | |
# create a dictionary from the XML file | |
my_dict = createInnerDictionary(root) | |
print json.dumps(my_dict, sort_keys=True, indent=4) | |
# done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment