Created
August 17, 2019 17:45
-
-
Save swashcap/d6786f14ff340e285d4b0ab6d785a6bf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Transform RSS-style XML with geolocation data to YAML | |
* | |
* {@link https://yaml.org} | |
* | |
* ``` | |
* mkdir -p data | |
* yarn add hard-rejection cheerio he fast-xml-parser js-yaml | |
* # ... | |
* node xml-to-yaml.js | |
* ``` | |
*/ | |
require('hard-rejection/register') | |
const cheerio = require('cheerio') | |
const he = require('he') | |
const path = require('path') | |
const xml = require('fast-xml-parser') | |
const yaml = require('js-yaml') | |
const { readFile, writeFile } = require('fs').promises | |
;(async () => { | |
const data = await readFile(path.join(__dirname, 'data.xml'), 'utf8') | |
if (!xml.validate(data)) { | |
throw new Error('data is invalid XML') | |
} | |
const addressTag = /<\/?address>/g | |
const brTag = /<br\s*\/?>/g | |
const zipPattern = /(\d{5})(-\d+)?$/ | |
const items = xml.parse(data).rss.channel.item.reduce((memo, item) => { | |
const name = he.decode(item.title) | |
if (!(name in memo)) { | |
memo[name] = { | |
name, | |
links: [item.link], | |
locations: [], | |
} | |
} | |
if ( | |
typeof item['geo:lat'] !== 'number' || | |
typeof item['geo:long'] !== 'number' | |
) { | |
throw new Error(`${item.title} lacks coordinates`) | |
} | |
const address = item['content:encoded'] | |
.replace(addressTag, '') | |
.split(brTag) | |
.map(s => s.trim()) | |
.filter(Boolean) | |
let zip = address[address.length - 1].match(zipPattern)[1] | |
zip = !Number.isNaN(parseInt(zip, 10)) ? parseInt(zip, 10) : zip | |
memo[name].locations.push({ | |
name: item.description ? he.decode(item.description) : name, | |
address: { | |
street: address[address.length === 3 ? 1 : 0], | |
zip, | |
}, | |
geo: { | |
latitude: item['geo:lat'], | |
longitude: item['geo:long'], | |
}, | |
}) | |
return memo | |
}, {}) | |
const writes = Object.keys(items).reduce((memo, name) => { | |
const filename = path.join( | |
__dirname, | |
'data', | |
name | |
.toLowerCase() | |
.replace(/\.|&/g, ' ') | |
.replace(/’/g, '') | |
.trim() | |
.replace(/ +/g, '-') + '.yml' | |
) | |
return memo.concat([ | |
[filename, writeFile(filename, yaml.safeDump(items[name]))], | |
]) | |
}, []) | |
await Promise.all(writes.map(([, x]) => x)) | |
console.log(writes.map(([filename]) => filename).join('\n')) | |
})() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment