Last active
May 6, 2021 19:33
-
-
Save dontcallmedom/290986d35a8991a163f805e1692ff53a to your computer and use it in GitHub Desktop.
Process editors extracted from a custom reffy run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const {JSDOM} = require("jsdom"); | |
const {Parser} = require("json2csv"); | |
const fs = require("fs"); | |
const specData = require(process.argv[2]); | |
const affiliations = {}; | |
const sortKeys = (o) => | |
Object (o) !== o || Array .isArray (o) | |
? o | |
: Object .keys (o) .sort () .reduce ((a, k) => ({...a, [k]: sortKeys (o [k])}), {}); | |
const domainToAffiliation = { | |
"adobe.com": "Adobe", | |
"adboe.com": "Adobe", | |
"google.com": "Google", | |
"mozilla.com": "Mozilla", | |
"mozilla.org": "Mozilla", | |
"cs.stanford.edu": "Mozilla", | |
"mcc.id.au": "Mozilla", | |
"apple.com": "Apple", | |
"w3.org": "W3C", | |
"microsoft.com": "Microsoft", | |
"chromium.org": "Google", | |
"disruptive-innovations.com": "Disruptive Innovations", | |
"compuware.com": "Compuware", | |
"gmail.com": "undetermined" | |
}; | |
specData.filter(s => s.editors).forEach(s => { | |
s.editors.forEach(e => { | |
let affiliation, name, m; | |
const edNode = JSDOM.fragment(e.markup); | |
if (edNode.querySelector(".org")) { | |
affiliation = edNode.querySelector(".org").textContent; | |
name = edNode.querySelector(".p-name").textContent; | |
} else if (m = edNode.querySelector('a[href^="mailto:"]')) { | |
const domain = m.href.split('@')[1]; | |
affiliation = domainToAffiliation[domain]; | |
if (!affiliation) { | |
affiliation = "undetermined"; | |
} | |
if (!m.textContent.match(/@/)) { | |
name = m.textContent; | |
} else { | |
name = edNode.textContent.split(',')[0]; | |
} | |
} | |
if (!affiliation || affiliation === "undetermined") { | |
if (m = e.text.match(/^([^\(]*) \(([^\)]*)\)/)) { | |
affiliation = m[2]; | |
name = m[1]; | |
} else if (m = e.text.match(/^([^,]*), ([a-zA-Z ]*)/)) { | |
affiliation = m[2]; | |
name = m[1]; | |
} else { | |
affiliation = "undetermined"; | |
name = e.text; | |
} | |
} | |
affiliation = affiliation.trim().replace(/ Systems/, '').replace(/ LLC/, '').replace(/ inc\.?/i, '').replace(/ Corp.*/, '').replace(/ \[[^\]]*\]/, '').replace(/ \([^\)]*\)/, '').replace(/,.*/, '').replace(/ Foundation/, '').replace(/ ASA/, '').replace("W3C Invited Expert", "Invited Expert").toLowerCase(); | |
if (affiliation.match(/^until /) || affiliation.match(/former /)) return; | |
name = name.replace(/’/, "'"); | |
if (!name || name.match(/see contributors/i)) return; | |
if (!affiliations[affiliation]) { | |
affiliations[affiliation] = {}; | |
} | |
if (!affiliations[affiliation][name]) { | |
affiliations[affiliation][name] = []; | |
} | |
affiliations[affiliation][name].push({url: s.nightly.url, title: s.title}); | |
}); | |
}); | |
fs.writeFileSync("editors-affiliations.json", JSON.stringify(sortKeys(affiliations), null, 2)); | |
const json2csv = new Parser(); | |
const flat = Object.keys(affiliations).sort().map(aff => Object.keys(affiliations[aff]).sort().map(ed => affiliations[aff][ed].map(spec => { spec.editor = ed; spec.affiliation = aff; return spec ;}))).flat().flat(); | |
const csv = json2csv.parse(flat); | |
fs.writeFileSync("editors-affiliations.csv", csv); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment