-
-
Save davidrielo/167b0d8ba4c57a5b80988c1b8df40b29 to your computer and use it in GitHub Desktop.
Given an XML Document, returns a (mostly) accurate JSON representation. WIP.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import _ from "lodash"; | |
// flattens an object (recursively!), similarly to Array#flatten | |
// e.g. flatten({ a: { b: { c: "hello!" } } }); // => "hello!" | |
function flatten(object) { | |
const check = _.isPlainObject(object) && _.size(object) === 1; | |
return check ? flatten(_.values(object)[0]) : object; | |
} | |
export function parse(xml) { | |
let data = {}; | |
const isText = xml.nodeType === 3, | |
isElement = xml.nodeType === 1, | |
body = xml.textContent.trim(), | |
hasChildren = xml.children.length, | |
hasAttributes = xml.attributes.length; | |
// if it's text just return it | |
if (isText) { return xml.nodeValue.trim(); } | |
// if it doesn't have any children or attributes, just return the contents | |
if (!hasChildren && !hasAttributes) { return xml.textContent; } | |
// if it doesn't have children but _does_ have body content, we'll use that | |
if (!hasChildren && body.length) { data.text = body; } | |
// if it's an element with attributes, add them to data.attributes | |
if (isElement && hasAttributes) { | |
data.attributes = _.reduce(xml.attributes, function(obj, name, id) { | |
let attr = xml.attributes.item(id); | |
obj[attr.name] = attr.value; | |
return obj; | |
}, {}); | |
} | |
// recursively call #parse over children, adding results to data | |
_.each(xml.children, function(child) { | |
const name = child.nodeName; | |
// if we've not come across a child with this nodeType, add it as an object | |
// and return here | |
if (!_.has(data, name)) { | |
data[name] = parse(child); | |
return; | |
} | |
// if we've encountered a second instance of the same nodeType, make our | |
// representation of it an array | |
if (!_.isArray(data[name])) { data[name] = [data[name]]; } | |
// and finally, append the new child | |
data[name].push(parse(child)); | |
}); | |
// if we can, let's fold some attributes into the body | |
_.each(data.attributes, (value, key) => { | |
if (data[key] != null) { return; } | |
data[key] = value; | |
delete data.attributes[key]; | |
}); | |
// if data.attributes is now empty, get rid of it | |
if (_.isEmpty(data.attributes)) { delete data.attributes; } | |
// simplify to reduce number of final leaf nodes and return | |
return flatten(data); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment