Last active
March 7, 2025 11:24
-
-
Save sebilasse/670f0a5821d12bd36a0d68eab2fc85b0 to your computer and use it in GitHub Desktop.
abstracted wd (v1.1) to `as`, as described to max in fedi
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { Redaktor } from "@/API/context/index.ts"; // Context Factory | |
import { AsLinkObject } from "@AS"; // Type | |
import { WBK, simplifySnak, simplifyQualifiers } from "https://esm.sh/[email protected]"; | |
import wellKnownIDS from "@/data/wellKnownIDS.json" with { type: "json" }; | |
// useful for well known ids like ISO, ISBN etc ... {"Q142": "FR" ... ...} | |
import { wdPropertyToAS, wdToNameQualifier } from "@/data/PROPERTY/_PROPERTY.ts"; | |
// what goes to as containers like image, icon, attachment etc. and which Qualifiers to use ... | |
import { wdTypeToAS } from "@/data/TYPE/_TYPE.ts"; | |
// as above and "rich types" | |
import { toQuantity } from '@/API/Quantity/Unit.ts'; | |
// > | |
import { | |
WDPROP, wikiIdRegex, wdFilter, wdReduce, wdMap, defaultWdOptions, getId, getWD, | |
withHreflang, qualifyFactory, createWikiMedia, getPropertiesFromContext | |
} from './wikiUtil.ts'; | |
// See other gist https://gist.github.com/sebilasse/b5370e2123145539598137c53ffb49a3 | |
// TODO labelcache from deno.kv | |
let labelcache = {}; | |
try { | |
labelcache = JSON.parse(Deno.readTextFileSync(`${Deno.cwd()}/data/PROPERTY/qualifierLabel.json`))||{}; | |
} catch(e) {} | |
const wikidata = WBK({ | |
instance: 'https://www.wikidata.org', | |
sparqlEndpoint: 'https://query.wikidata.org/sparql' | |
}); | |
async function fetchManyEntities( | |
ids: string[], | |
languages = ['en', 'de', 'fr', 'es', 'pt'], | |
properties?: any | |
) { | |
if (Array.isArray(languages)) { | |
languages = ['mul', ...languages.map((l) => l.indexOf('-') < 0 ? l.toLowerCase() : l)]; | |
} | |
const urls = wikidata.getManyEntities({ | |
ids, | |
languages, // returns all languages if not specified | |
redirections: false, // defaults to true | |
}); | |
// console.log(Object.keys(properties), Object.keys(properties).indexOf('P18')) | |
// console.log(urls.length, 'requests following ...'); | |
const langSet = new Set(languages); | |
const res = await Promise.all(urls.map(getWD)); | |
const o = {}; | |
const multi = {}; | |
for await (const entities of res) { | |
const simple = wikidata.simplify.entities(entities, defaultWdOptions); | |
try { | |
let qualifiers: any = []; | |
for (const key in simple) { | |
multi[key] = {}; | |
o[key] = simple[key]; | |
for (const k in (entities[key].claims||{})) { | |
if (!properties[k] && !wdPropertyToAS[k]) continue; | |
const claims = entities[key].claims[k]; | |
const oClaims = o[key].claims[k]; | |
if (Array.isArray(claims) && oClaims && claims.length > oClaims.length) { | |
o[key].claims[k] = entities[key].claims[k].map((claim) => { | |
const {value, type} = simplifySnak(claim.mainsnak, defaultWdOptions); | |
return { | |
value, type, | |
qualifiers: simplifyQualifiers(claim.qualifiers, defaultWdOptions) | |
} | |
}); | |
} | |
} | |
for (const k in (o[key]?.claims||{})) { | |
const q = o[key].claims[k]; | |
try { | |
if (q[0]?.type === 'monolingualtext') { | |
const mk = q.filter(({value}) => !!value && langSet.has(value.language)); | |
if (mk.length) multi[key][k] = mk; | |
} | |
} catch(e) { console.log(e) } | |
if (!properties[k]) continue; | |
try { | |
for (const x of q) { | |
if (!x?.qualifiers) continue; | |
for (const xk in x.qualifiers) { | |
const nQuali = x.qualifiers[xk].filter((r) => { | |
const isQuali = !!wdToNameQualifier[xk] || r.type === 'wikibase-item'; | |
return isQuali && !!r.value && !labelcache[r.value]; | |
}); | |
qualifiers = qualifiers.concat(nQuali); | |
} | |
} | |
} catch(e) { console.log(e) } | |
} | |
} | |
const ids = Array.from(new Set(qualifiers.map((r) => r.value))); | |
if (ids.length) { | |
const qualiUrls = wikidata.getManyEntities({ | |
ids, | |
languages, // returns all languages if not specified | |
redirections: false, // defaults to true | |
}); | |
const qualiRes = await Promise.all(qualiUrls.map(getWD)); | |
for await (const q of qualiRes) { | |
for (const k in q) { labelcache[k] = q[k].labels||{}; } | |
} | |
} | |
} catch (e) { | |
console.log(e) | |
} | |
} | |
for (const key in o) { | |
o[key].claims = {...o[key].claims, ...(multi[key]||{})}; | |
} | |
return o | |
} | |
// TODO from cache | |
const properties = getPropertiesFromContext( | |
// TODO : | |
/*[WikidataSPARQL,Redaktor.$context],*/ | |
Redaktor.$context | |
).map(wdMap).filter(wdFilter).reduce(wdReduce, {}); | |
// console.log(properties); | |
// TODO | |
const nameQualifiers = { | |
P3086: 'wdt:P3005' | |
} | |
const qualiNameFactory = (property) => ((o) => { | |
if (typeof o !== 'object') return o; | |
if (nameQualifiers[property] && o[nameQualifiers[property]]?.nameMap) { | |
o.nameMap = o[nameQualifiers[property]].nameMap; | |
o[nameQualifiers[property]] = o[nameQualifiers[property]].id; | |
} | |
return o | |
}); | |
// https://www.wikidata.org/wiki/Help:Data_type | |
// TODO musical-notation : string describing music following LilyPond syntax | |
// TODO math : formatted string that displays as formula | |
const toASType = { | |
string: ({value}) => value, | |
monolingualtext: (claim) => (Array.isArray(claim) ? claim : [claim]).reduce((r, {value}) => { | |
if (typeof value === 'string') { | |
r.und = value; | |
return r; | |
} | |
if (!value) { return r; } | |
const {text = '', language = 'mul'} = value; | |
if (!!text) { | |
const l = language === 'mul' ? 'und' : language; | |
if (!Array.isArray(r[l])) { r[l] = []; } | |
r[l].push(text); | |
} | |
return r | |
}, {}), | |
'external-id': ({value}, idMap) => getId(value, idMap), | |
'wikibase-item': ({value}, idMap) => getId(value, idMap), | |
url: ({value, qualifiers}, idMap, extraProperties = {}) => { | |
const link: AsLinkObject = withHreflang({ | |
type: ['Link'], | |
href: value, | |
...extraProperties | |
}, qualifiers, idMap); | |
// TODO rel ? | |
return link | |
}, | |
'globe-coordinate': ({value}) => { | |
if (value.precision) { | |
const accuracy = value.precision === 1 | |
? 99 | |
: 100-(Math.round(value.precision * 111000 / 1000)); | |
value.accuracy = accuracy||90; | |
value['wdt:precision'] = value.precision; | |
delete value.precision; | |
} | |
for (const k in (value||{})) { | |
if (Object.is(value[k], null)) { delete value[k]; } | |
} | |
return value||'' | |
}, | |
quantity: ({value = {amount: '-', unit: ''}, qualifiers = {}}, idMap) => { | |
const res = toQuantity(value); | |
for (const qk in qualifiers) { | |
// console.log('---:',qualifiers[qk][0]?.value); | |
if (qualifiers[qk].length && typeof qualifiers[qk][0]?.value === 'string' | |
&& labelcache[qualifiers[qk][0].value]) { | |
res[`wdt:${qk}`] = { | |
id: getId(qualifiers[qk][0].value, idMap), | |
nameMap: labelcache[qualifiers[qk][0].value] | |
}; | |
} | |
} | |
return res | |
}, | |
time: ({value, qualifiers}, idMap, functional = false) => { | |
if (functional) return value?.time||value; | |
const { time, timezone } = value; | |
const qualify = qualifyFactory(qualifiers, idMap, labelcache); | |
const qualified = Object.keys(qualifiers).reduce(qualify, {}); | |
return { time, timezone, ...qualified }; | |
}, | |
commonsMedia: ({value}) => createWikiMedia(value), | |
// TODO geo-shape is Data | |
'geo-shape': ({value}) => value, | |
'wikibase-lexeme': ({value}) => value, | |
'wikibase-sense': ({value}) => value | |
} | |
export async function wikiDetails( | |
ids: string[], | |
type: string[] = [], | |
languages = ['en', 'de', 'fr', 'es', 'pt', 'ar'], | |
filepath: false | string = false, | |
idMap: {[wdId: string]: string /* ISO */} = wellKnownIDS, | |
countryCheckMap?: {[wdId: string]: string /* ISO */} | |
) { | |
idMap = {...countryCheckMap, ...idMap}; | |
if (Array.isArray(languages)) { | |
languages = ['mul', ...languages.map((l) => l.indexOf('-') < 0 ? l.toLowerCase() : l)]; | |
} | |
const wdRes = await fetchManyEntities(ids, languages, properties); | |
let results: any[] = []; | |
const toAS = (o) => !!toASType[o.type] ? toASType[o.type](o, idMap) : o; | |
const toTag = (o) => ({ type: 'Hashtag', name: toAS(o) }); | |
const propertyToAS = (key, arr, isSet) => { | |
if (arr.length && arr[0]?.type === 'monolingualtext') { | |
return { | |
type: ['Note'], | |
nameMap: toASType.monolingualtext(arr) | |
} | |
} | |
if (!Array.isArray(arr)) arr = [arr]; | |
const target = isSet ? Array.from(new Set(arr)) : arr; | |
return target.map(toAS).map(qualiNameFactory(key)); | |
} | |
// console.log(wdRes); | |
for (const wdId in wdRes) { | |
const wd = wdRes[wdId]; | |
const id = getId(wd, idMap, wdId); | |
const [nameMap, summaryMap] = [{},(wd.descriptions ? wd.descriptions : {})]; | |
for (const lang in wd.labels) { | |
const label = (Array.isArray(wd.labels[lang]) ? wd.labels[lang] : [wd.labels[lang]]); | |
const alias = (!!wd?.aliases[lang] | |
? (Array.isArray(wd.aliases[lang]) ? wd.aliases[lang] : [wd.aliases[lang]]) | |
: []); | |
nameMap[lang] = Array.from(new Set([...label, ...alias, ...(wd?.aliases?.mul||[])])); | |
} | |
for (const lang in summaryMap) { | |
summaryMap[lang] = Array.from(new Set(( | |
Array.isArray(summaryMap[lang]) ? summaryMap[lang] : [summaryMap[lang]] | |
))); | |
} | |
const res: any = { | |
type, | |
id, | |
updated: wd.modified || new Date().toISOString(), | |
describes: [ `wd:${wdId}` ], | |
nameMap, | |
summaryMap, | |
url: [ | |
{ | |
type: ['Link'], | |
rel: 'alternate', | |
href: `https://www.wikidata.org/wiki/Special:EntityData/${wdId}`, | |
nameMap: { | |
en: 'details from wikidata', | |
fr: 'détails de wikidata', | |
de: 'Details von wikidata', | |
es: 'detalles de wikidata', | |
pt: 'pormenores da wikidata' | |
}, | |
mediaType: 'application/ld+json' | |
}, | |
{ | |
type: ['Link'], | |
rel: 'about', | |
href: `https://www.wikidata.org/entity/${wdId}`, | |
name: 'wikidata', | |
mediaType: 'text/html' | |
} | |
] | |
}; | |
if (!wd?.claims) { | |
console.log('no claims for', wdId); | |
results.push(res); | |
continue; | |
} | |
const { | |
P625, P2044, P17, P31, P580, P582, P1566, P1813, P2572, | |
P1332, P1333, P1334, P1335 | |
} = wd.claims | |
/* Main location */ | |
if (P625) { | |
const _altitude = !!P2044 | |
? (Array.isArray(P2044) && P2044.length ? P2044 : [P2044]).map((alt) => alt?.amount||alt)[0] | |
: false; | |
res.location = P625.map(toAS); | |
if (typeof _altitude === 'number') { | |
res.location = res.location.map((l) => { | |
if (!l.altitude) l.altitude = _altitude; | |
return l | |
}); | |
} | |
} | |
if (countryCheckMap && P17 && Array.isArray(P17) && P17.indexOf(Object.keys(countryCheckMap)[0]) < 0) { | |
console.log('Country error for:'); | |
console.log('"'+id+'":["'+wdId+'", '+JSON.stringify(P17)+'],'); | |
}; | |
const mapFunctional = (o) => toASType.time(o, idMap, true); | |
if (P580) { | |
console.log('P580',P580); | |
const start = (Array.isArray(P580) ? P580 : [P580]).map(mapFunctional) | |
.sort((a,b) => a.localeCompare(b)); | |
res.startTime = start[0]; | |
} | |
if (P582) { | |
const end = (Array.isArray(P580) ? P580 : [P580]).map(mapFunctional) | |
.sort((a,b) => b.localeCompare(a)); | |
res.endTime = end[0]; | |
} | |
// TODO P740 eventLocation to location and // P706 located in/on physical feature | |
// TODO P580 startTime and P582 endTime and P585 point in time / duration schema | |
// 'schema:startDate', 'schema:endDate', 'schema:previousStartDate' | |
/* | |
// toponymName | |
if (P1705) { // native name | |
const nn = Array.isArray(P1705) ? P1705 : [P1705]; | |
res.nativeName = nn.map(toAS); | |
res.name = res.name.concat(res.nativeName); | |
} | |
if (P1448) { // official name | |
const on = Array.isArray(P1448) ? P1448 : [P1448]; | |
res.officialName = on.map(toAS); | |
res.name = res.name.concat(res.officialName); | |
} | |
*/ | |
// short name | |
if (P1813) { | |
if (!res.tag) { res.tag = [] } | |
res.tag = res.tag.concat((Array.isArray(P1813) ? P1813 : [P1813]).map(toTag)); | |
} | |
// hashtag | |
if (P2572) { | |
console.log('hashtag',P2572); | |
if (!res.tag) { res.tag = [] } | |
res.tag = res.tag.concat((Array.isArray(P2572) ? P2572 : [P2572]).map(toTag)); | |
} | |
if (P31) { res.type = res.type.concat(P31.map(({value}) => getId({id:value}, idMap))); } | |
const altType = res.type.filter((t) => typeof t === 'string' && t.startsWith('wd:')).map((t) => { | |
const [x,k] = t.split(':'); | |
return !!wdTypeToAS[k]?.type ? wdTypeToAS[k]?.type : []; | |
}).flat(); | |
res.type = Array.from(new Set([...res.type, ...altType])); | |
// TODO static GN IDs from mapping | |
if (P1566) { | |
P1566.forEach(({value}) => { | |
res.url.push({ | |
type: ['Link'], | |
rel: 'about', | |
href: `https://www.geonames.org/${value}`, | |
name: 'geonames', | |
mediaType: 'text/html' | |
}); | |
}); | |
} | |
// TODO static OSM IDs from mapping | |
for (const a of [['P402','relation'], ['P11693','node'], ['P10689','way']]) { | |
if (wd.claims[a[0]]) { | |
wd.claims[a[0]].forEach(({value}) => { | |
res.url.push({ | |
type: ['Link'], | |
rel: 'about', | |
href: `https://openstreetmap.org/${a[1]}/${value}`, | |
name: 'OpenStreetMap', | |
mediaType: 'text/html' | |
}); | |
}); | |
} | |
} | |
if (wd.sitelinks && Array.isArray(languages)) { | |
languages.forEach((lang) => { | |
for (const site in wd.sitelinks) { | |
if (`${site}.`.indexOf(lang) === 0) { | |
let href; | |
try { | |
const title = wd.sitelinks[site]; | |
href = wikidata.getSitelinkUrl({site, title}); | |
res.url.push({ | |
type: ['Link'], | |
name: title, | |
hreflang: lang, | |
href, | |
mediaType: 'text/html', | |
}); | |
} catch (e) { | |
//console.log(e); | |
} | |
} | |
} | |
}); | |
} | |
const handled = { | |
P625:1,P2044:1,P17:1,P31:1,P1566:1,P1813:1, | |
P2572:1,P1332:1,P1333:1,P1334:1,P1335:1 | |
}; | |
for (const key in wd.claims) { | |
if (handled[key]) continue; | |
if (!Array.isArray(wd.claims[key])) { | |
wd.claims[key] = [wd.claims[key]]; | |
} | |
if (wdPropertyToAS[key]) { | |
const {asType, wdTypes, container, rel, name, prefix} = wdPropertyToAS[key]; | |
for (let {value, qualifiers} of wd.claims[key]) { | |
if (!res[container]) { res[container] = [] } | |
if (container === 'url') { | |
value = prefix | |
? `${prefix}${value}` | |
: (wikiIdRegex.test(value) | |
? `https://www.wikidata.org/wiki/${value}` | |
: getId({id:value}, idMap)); | |
res.url.push(toASType.url({value, qualifiers}, idMap, { | |
name, rel, mediaType: wdPropertyToAS[key].mediaType || 'text/html' | |
})); | |
continue; | |
} | |
if (container === 'attributedTo') { | |
if (!res?.attributedTo) { res.attributedTo = []; } | |
res.attributedTo.push({ | |
id: getId({id:value}, idMap), | |
type: [asType, ...wdTypes], | |
context: [`${WDPROP}${key}`] | |
}); | |
continue; | |
} | |
res[container].push({ ...createWikiMedia(`${value}`, `${WDPROP}${key}`), | |
...{type: [asType, ...wdTypes]}}); | |
} | |
continue; | |
} | |
if (P17) { res.country = P17.map(toAS); } | |
if (key === 'P1813' || key === 'P2572' || !properties[key]) { continue; } | |
const { | |
name, functional, container = [], type = [], prefix: p = '', suffix = '' | |
} = properties[key]; | |
try { | |
const isSet = container && container.indexOf('@set') > -1; | |
const isId = container && container.indexOf('@id') > -1; | |
if (!res[name]) { res[name] = []; } | |
const asRes = propertyToAS(key, wd.claims[key], isSet); | |
res[name] = res[name].concat(asRes); | |
} catch(e) { | |
console.log('err:', key, name, e); | |
} | |
if (functional) res[name] = res[name][0]; | |
/* | |
const prefix = (!p && isId) | |
? 'https://www.wikidata.org/wiki/' | |
: p; | |
if (res[name]) { | |
if (type.indexOf('xsd:decimal') > -1) { | |
if (Array.isArray(res[name])) { | |
res[name] = res[name].map((s: string) => !s ? 0 : parseFloat(s)); | |
} else if (typeof res[name] === 'string') { | |
res[name] = !res[name] ? 0 : parseFloat(res[name]); | |
} | |
} else if (type.indexOf('xsd:nonNegativeInteger') > -1) { | |
if (Array.isArray(res[name])) { | |
res[name] = res[name].map((s: string) => | |
!s ? 0 : parseInt(s, 10) | |
); | |
} else if (typeof res[name] === 'string') { | |
res[name] = !res[name] ? 0 : parseInt(res[name], 10); | |
} | |
} else if (type.indexOf('xsd:positiveInteger') > -1) { | |
if (Array.isArray(res[name])) { | |
res[name] = res[name].map((s: string) => parseInt(s, 10)||null); | |
} else if (typeof res[name] === 'string') { | |
res[name] = parseInt(res[name], 10)||null; | |
} | |
} else if (prefix || suffix) { | |
if (Array.isArray(res[name])) { | |
res[name] = res[name].map((s: string) => | |
!s ? '' : `${prefix}${s}${suffix}` | |
); | |
} else { | |
res[name] = `${prefix}${res[name]}${suffix}`; | |
} | |
} | |
} | |
*/ | |
} | |
if (!res.bbox && P1332 && P1333 && P1334 && P1335) { | |
// N, S, E, W | |
if (P1332.length && P1333.length && P1334.length && P1335.length) { | |
const coord = (v, p) => (typeof v[0] === 'number' ? v[0] : v[0]?.value[p]); | |
res.bbox = { | |
north: coord(P1332, 'latitude'), south: coord(P1333, 'latitude'), | |
east: coord(P1334, 'longitude'), west: coord(P1335, 'longitude') | |
}; | |
} | |
} | |
if (filepath) { | |
const filename = (id||wdId).replace('redaktor:','').replace('wd:',''); | |
await Deno.writeTextFile(`${filepath}/${filename}.json`, JSON.stringify(res, null, 2)); | |
} | |
results.push(res); | |
} | |
try { | |
await Deno.writeTextFile( | |
`${Deno.cwd()}/data/PROPERTY/qualifierLabel.json`, | |
JSON.stringify(labelcache, null, 2) | |
); | |
} catch(e) {} | |
return results; | |
} | |
/* | |
const getIt = async () => { | |
const r = await wikiDetails(['Q2','Q1055'], ['Place','Profile','CollectionPage','redaktor:Factual','redaktor:Topic','schema:AdministrativeArea']); | |
Deno.writeTextFileSync( | |
`./outputResult.json`, | |
JSON.stringify(r, null, 2), | |
); | |
} | |
getIt(); | |
*/ | |
/* | |
export function getPropertyFromObject(prop: string, o: AsObjectNormalized|AsLinkObject = {}) { | |
const wdt = 'https://www.wikidata.org/prop/direct/'; | |
if (o[`wdt:${prop}`] || o[`${wdt}${prop}`]) return o[`wdt:${prop}`] || o[`${wdt}${prop}`]; | |
if (wdPropertyToASlink[prop] && o?.url) { | |
const {type, rel} = wdPropertyToASlink[prop]; | |
return o.url.filter((r) => { | |
if (typeof r === 'object' && type.length === type.filter((t) => (r?.type||[]).indexOf(t) > -1).length) { | |
return typeof r?.rel === 'string' && (r.rel.indexOf(rel) > -1 || r.rel.indexOf(rel.replace('related', '').trim()) > -1) | |
} | |
return false | |
}); | |
} else { | |
for (const container in wdPropertyToAS) { | |
if ((!wdPropertyToAS[container][prop] || !o[container])) continue; | |
const oContainer = Array.isArray(o[container]) ? o[container] : [o[container]]; | |
const type = wdPropertyToAS[container][prop]; | |
return oContainer.filter((r) => { | |
if (type.length === type.filter((t) => (r?.type||[]).indexOf(t) > -1).length) { | |
return true; | |
} | |
const rc = r?.context||r?.rel||r?.url?.context||r?.url?.rel; | |
if (rc && rc.filter((c) => c === `wdt:${prop}` || c === `${wdt}${prop}`).length) return true; | |
}); | |
} | |
} | |
return []; | |
}*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment