Skip to content

Instantly share code, notes, and snippets.

@sebilasse
Last active March 7, 2025 11:24
Show Gist options
  • Save sebilasse/670f0a5821d12bd36a0d68eab2fc85b0 to your computer and use it in GitHub Desktop.
Save sebilasse/670f0a5821d12bd36a0d68eab2fc85b0 to your computer and use it in GitHub Desktop.
abstracted wd (v1.1) to `as`, as described to max in fedi
import { Redaktor } from "@/API/context/index.ts"; // Context Factory
import { AsLinkObject } from "@AS"; // Type
import { WBK, simplifySnak, simplifyQualifiers } from "https://esm.sh/[email protected]";
import wellKnownIDS from "@/data/wellKnownIDS.json" with { type: "json" };
// useful for well known ids like ISO, ISBN etc ... {"Q142": "FR" ... ...}
import { wdPropertyToAS, wdToNameQualifier } from "@/data/PROPERTY/_PROPERTY.ts";
// what goes to as containers like image, icon, attachment etc. and which Qualifiers to use ...
import { wdTypeToAS } from "@/data/TYPE/_TYPE.ts";
// as above and "rich types"
import { toQuantity } from '@/API/Quantity/Unit.ts';
// >
import {
WDPROP, wikiIdRegex, wdFilter, wdReduce, wdMap, defaultWdOptions, getId, getWD,
withHreflang, qualifyFactory, createWikiMedia, getPropertiesFromContext
} from './wikiUtil.ts';
// See other gist https://gist.github.com/sebilasse/b5370e2123145539598137c53ffb49a3
// TODO labelcache from deno.kv
let labelcache = {};
try {
labelcache = JSON.parse(Deno.readTextFileSync(`${Deno.cwd()}/data/PROPERTY/qualifierLabel.json`))||{};
} catch(e) {}
const wikidata = WBK({
instance: 'https://www.wikidata.org',
sparqlEndpoint: 'https://query.wikidata.org/sparql'
});
async function fetchManyEntities(
ids: string[],
languages = ['en', 'de', 'fr', 'es', 'pt'],
properties?: any
) {
if (Array.isArray(languages)) {
languages = ['mul', ...languages.map((l) => l.indexOf('-') < 0 ? l.toLowerCase() : l)];
}
const urls = wikidata.getManyEntities({
ids,
languages, // returns all languages if not specified
redirections: false, // defaults to true
});
// console.log(Object.keys(properties), Object.keys(properties).indexOf('P18'))
// console.log(urls.length, 'requests following ...');
const langSet = new Set(languages);
const res = await Promise.all(urls.map(getWD));
const o = {};
const multi = {};
for await (const entities of res) {
const simple = wikidata.simplify.entities(entities, defaultWdOptions);
try {
let qualifiers: any = [];
for (const key in simple) {
multi[key] = {};
o[key] = simple[key];
for (const k in (entities[key].claims||{})) {
if (!properties[k] && !wdPropertyToAS[k]) continue;
const claims = entities[key].claims[k];
const oClaims = o[key].claims[k];
if (Array.isArray(claims) && oClaims && claims.length > oClaims.length) {
o[key].claims[k] = entities[key].claims[k].map((claim) => {
const {value, type} = simplifySnak(claim.mainsnak, defaultWdOptions);
return {
value, type,
qualifiers: simplifyQualifiers(claim.qualifiers, defaultWdOptions)
}
});
}
}
for (const k in (o[key]?.claims||{})) {
const q = o[key].claims[k];
try {
if (q[0]?.type === 'monolingualtext') {
const mk = q.filter(({value}) => !!value && langSet.has(value.language));
if (mk.length) multi[key][k] = mk;
}
} catch(e) { console.log(e) }
if (!properties[k]) continue;
try {
for (const x of q) {
if (!x?.qualifiers) continue;
for (const xk in x.qualifiers) {
const nQuali = x.qualifiers[xk].filter((r) => {
const isQuali = !!wdToNameQualifier[xk] || r.type === 'wikibase-item';
return isQuali && !!r.value && !labelcache[r.value];
});
qualifiers = qualifiers.concat(nQuali);
}
}
} catch(e) { console.log(e) }
}
}
const ids = Array.from(new Set(qualifiers.map((r) => r.value)));
if (ids.length) {
const qualiUrls = wikidata.getManyEntities({
ids,
languages, // returns all languages if not specified
redirections: false, // defaults to true
});
const qualiRes = await Promise.all(qualiUrls.map(getWD));
for await (const q of qualiRes) {
for (const k in q) { labelcache[k] = q[k].labels||{}; }
}
}
} catch (e) {
console.log(e)
}
}
for (const key in o) {
o[key].claims = {...o[key].claims, ...(multi[key]||{})};
}
return o
}
// TODO from cache
const properties = getPropertiesFromContext(
// TODO :
/*[WikidataSPARQL,Redaktor.$context],*/
Redaktor.$context
).map(wdMap).filter(wdFilter).reduce(wdReduce, {});
// console.log(properties);
// TODO
const nameQualifiers = {
P3086: 'wdt:P3005'
}
const qualiNameFactory = (property) => ((o) => {
if (typeof o !== 'object') return o;
if (nameQualifiers[property] && o[nameQualifiers[property]]?.nameMap) {
o.nameMap = o[nameQualifiers[property]].nameMap;
o[nameQualifiers[property]] = o[nameQualifiers[property]].id;
}
return o
});
// https://www.wikidata.org/wiki/Help:Data_type
// TODO musical-notation : string describing music following LilyPond syntax
// TODO math : formatted string that displays as formula
const toASType = {
string: ({value}) => value,
monolingualtext: (claim) => (Array.isArray(claim) ? claim : [claim]).reduce((r, {value}) => {
if (typeof value === 'string') {
r.und = value;
return r;
}
if (!value) { return r; }
const {text = '', language = 'mul'} = value;
if (!!text) {
const l = language === 'mul' ? 'und' : language;
if (!Array.isArray(r[l])) { r[l] = []; }
r[l].push(text);
}
return r
}, {}),
'external-id': ({value}, idMap) => getId(value, idMap),
'wikibase-item': ({value}, idMap) => getId(value, idMap),
url: ({value, qualifiers}, idMap, extraProperties = {}) => {
const link: AsLinkObject = withHreflang({
type: ['Link'],
href: value,
...extraProperties
}, qualifiers, idMap);
// TODO rel ?
return link
},
'globe-coordinate': ({value}) => {
if (value.precision) {
const accuracy = value.precision === 1
? 99
: 100-(Math.round(value.precision * 111000 / 1000));
value.accuracy = accuracy||90;
value['wdt:precision'] = value.precision;
delete value.precision;
}
for (const k in (value||{})) {
if (Object.is(value[k], null)) { delete value[k]; }
}
return value||''
},
quantity: ({value = {amount: '-', unit: ''}, qualifiers = {}}, idMap) => {
const res = toQuantity(value);
for (const qk in qualifiers) {
// console.log('---:',qualifiers[qk][0]?.value);
if (qualifiers[qk].length && typeof qualifiers[qk][0]?.value === 'string'
&& labelcache[qualifiers[qk][0].value]) {
res[`wdt:${qk}`] = {
id: getId(qualifiers[qk][0].value, idMap),
nameMap: labelcache[qualifiers[qk][0].value]
};
}
}
return res
},
time: ({value, qualifiers}, idMap, functional = false) => {
if (functional) return value?.time||value;
const { time, timezone } = value;
const qualify = qualifyFactory(qualifiers, idMap, labelcache);
const qualified = Object.keys(qualifiers).reduce(qualify, {});
return { time, timezone, ...qualified };
},
commonsMedia: ({value}) => createWikiMedia(value),
// TODO geo-shape is Data
'geo-shape': ({value}) => value,
'wikibase-lexeme': ({value}) => value,
'wikibase-sense': ({value}) => value
}
export async function wikiDetails(
ids: string[],
type: string[] = [],
languages = ['en', 'de', 'fr', 'es', 'pt', 'ar'],
filepath: false | string = false,
idMap: {[wdId: string]: string /* ISO */} = wellKnownIDS,
countryCheckMap?: {[wdId: string]: string /* ISO */}
) {
idMap = {...countryCheckMap, ...idMap};
if (Array.isArray(languages)) {
languages = ['mul', ...languages.map((l) => l.indexOf('-') < 0 ? l.toLowerCase() : l)];
}
const wdRes = await fetchManyEntities(ids, languages, properties);
let results: any[] = [];
const toAS = (o) => !!toASType[o.type] ? toASType[o.type](o, idMap) : o;
const toTag = (o) => ({ type: 'Hashtag', name: toAS(o) });
const propertyToAS = (key, arr, isSet) => {
if (arr.length && arr[0]?.type === 'monolingualtext') {
return {
type: ['Note'],
nameMap: toASType.monolingualtext(arr)
}
}
if (!Array.isArray(arr)) arr = [arr];
const target = isSet ? Array.from(new Set(arr)) : arr;
return target.map(toAS).map(qualiNameFactory(key));
}
// console.log(wdRes);
for (const wdId in wdRes) {
const wd = wdRes[wdId];
const id = getId(wd, idMap, wdId);
const [nameMap, summaryMap] = [{},(wd.descriptions ? wd.descriptions : {})];
for (const lang in wd.labels) {
const label = (Array.isArray(wd.labels[lang]) ? wd.labels[lang] : [wd.labels[lang]]);
const alias = (!!wd?.aliases[lang]
? (Array.isArray(wd.aliases[lang]) ? wd.aliases[lang] : [wd.aliases[lang]])
: []);
nameMap[lang] = Array.from(new Set([...label, ...alias, ...(wd?.aliases?.mul||[])]));
}
for (const lang in summaryMap) {
summaryMap[lang] = Array.from(new Set((
Array.isArray(summaryMap[lang]) ? summaryMap[lang] : [summaryMap[lang]]
)));
}
const res: any = {
type,
id,
updated: wd.modified || new Date().toISOString(),
describes: [ `wd:${wdId}` ],
nameMap,
summaryMap,
url: [
{
type: ['Link'],
rel: 'alternate',
href: `https://www.wikidata.org/wiki/Special:EntityData/${wdId}`,
nameMap: {
en: 'details from wikidata',
fr: 'détails de wikidata',
de: 'Details von wikidata',
es: 'detalles de wikidata',
pt: 'pormenores da wikidata'
},
mediaType: 'application/ld+json'
},
{
type: ['Link'],
rel: 'about',
href: `https://www.wikidata.org/entity/${wdId}`,
name: 'wikidata',
mediaType: 'text/html'
}
]
};
if (!wd?.claims) {
console.log('no claims for', wdId);
results.push(res);
continue;
}
const {
P625, P2044, P17, P31, P580, P582, P1566, P1813, P2572,
P1332, P1333, P1334, P1335
} = wd.claims
/* Main location */
if (P625) {
const _altitude = !!P2044
? (Array.isArray(P2044) && P2044.length ? P2044 : [P2044]).map((alt) => alt?.amount||alt)[0]
: false;
res.location = P625.map(toAS);
if (typeof _altitude === 'number') {
res.location = res.location.map((l) => {
if (!l.altitude) l.altitude = _altitude;
return l
});
}
}
if (countryCheckMap && P17 && Array.isArray(P17) && P17.indexOf(Object.keys(countryCheckMap)[0]) < 0) {
console.log('Country error for:');
console.log('"'+id+'":["'+wdId+'", '+JSON.stringify(P17)+'],');
};
const mapFunctional = (o) => toASType.time(o, idMap, true);
if (P580) {
console.log('P580',P580);
const start = (Array.isArray(P580) ? P580 : [P580]).map(mapFunctional)
.sort((a,b) => a.localeCompare(b));
res.startTime = start[0];
}
if (P582) {
const end = (Array.isArray(P580) ? P580 : [P580]).map(mapFunctional)
.sort((a,b) => b.localeCompare(a));
res.endTime = end[0];
}
// TODO P740 eventLocation to location and // P706 located in/on physical feature
// TODO P580 startTime and P582 endTime and P585 point in time / duration schema
// 'schema:startDate', 'schema:endDate', 'schema:previousStartDate'
/*
// toponymName
if (P1705) { // native name
const nn = Array.isArray(P1705) ? P1705 : [P1705];
res.nativeName = nn.map(toAS);
res.name = res.name.concat(res.nativeName);
}
if (P1448) { // official name
const on = Array.isArray(P1448) ? P1448 : [P1448];
res.officialName = on.map(toAS);
res.name = res.name.concat(res.officialName);
}
*/
// short name
if (P1813) {
if (!res.tag) { res.tag = [] }
res.tag = res.tag.concat((Array.isArray(P1813) ? P1813 : [P1813]).map(toTag));
}
// hashtag
if (P2572) {
console.log('hashtag',P2572);
if (!res.tag) { res.tag = [] }
res.tag = res.tag.concat((Array.isArray(P2572) ? P2572 : [P2572]).map(toTag));
}
if (P31) { res.type = res.type.concat(P31.map(({value}) => getId({id:value}, idMap))); }
const altType = res.type.filter((t) => typeof t === 'string' && t.startsWith('wd:')).map((t) => {
const [x,k] = t.split(':');
return !!wdTypeToAS[k]?.type ? wdTypeToAS[k]?.type : [];
}).flat();
res.type = Array.from(new Set([...res.type, ...altType]));
// TODO static GN IDs from mapping
if (P1566) {
P1566.forEach(({value}) => {
res.url.push({
type: ['Link'],
rel: 'about',
href: `https://www.geonames.org/${value}`,
name: 'geonames',
mediaType: 'text/html'
});
});
}
// TODO static OSM IDs from mapping
for (const a of [['P402','relation'], ['P11693','node'], ['P10689','way']]) {
if (wd.claims[a[0]]) {
wd.claims[a[0]].forEach(({value}) => {
res.url.push({
type: ['Link'],
rel: 'about',
href: `https://openstreetmap.org/${a[1]}/${value}`,
name: 'OpenStreetMap',
mediaType: 'text/html'
});
});
}
}
if (wd.sitelinks && Array.isArray(languages)) {
languages.forEach((lang) => {
for (const site in wd.sitelinks) {
if (`${site}.`.indexOf(lang) === 0) {
let href;
try {
const title = wd.sitelinks[site];
href = wikidata.getSitelinkUrl({site, title});
res.url.push({
type: ['Link'],
name: title,
hreflang: lang,
href,
mediaType: 'text/html',
});
} catch (e) {
//console.log(e);
}
}
}
});
}
const handled = {
P625:1,P2044:1,P17:1,P31:1,P1566:1,P1813:1,
P2572:1,P1332:1,P1333:1,P1334:1,P1335:1
};
for (const key in wd.claims) {
if (handled[key]) continue;
if (!Array.isArray(wd.claims[key])) {
wd.claims[key] = [wd.claims[key]];
}
if (wdPropertyToAS[key]) {
const {asType, wdTypes, container, rel, name, prefix} = wdPropertyToAS[key];
for (let {value, qualifiers} of wd.claims[key]) {
if (!res[container]) { res[container] = [] }
if (container === 'url') {
value = prefix
? `${prefix}${value}`
: (wikiIdRegex.test(value)
? `https://www.wikidata.org/wiki/${value}`
: getId({id:value}, idMap));
res.url.push(toASType.url({value, qualifiers}, idMap, {
name, rel, mediaType: wdPropertyToAS[key].mediaType || 'text/html'
}));
continue;
}
if (container === 'attributedTo') {
if (!res?.attributedTo) { res.attributedTo = []; }
res.attributedTo.push({
id: getId({id:value}, idMap),
type: [asType, ...wdTypes],
context: [`${WDPROP}${key}`]
});
continue;
}
res[container].push({ ...createWikiMedia(`${value}`, `${WDPROP}${key}`),
...{type: [asType, ...wdTypes]}});
}
continue;
}
if (P17) { res.country = P17.map(toAS); }
if (key === 'P1813' || key === 'P2572' || !properties[key]) { continue; }
const {
name, functional, container = [], type = [], prefix: p = '', suffix = ''
} = properties[key];
try {
const isSet = container && container.indexOf('@set') > -1;
const isId = container && container.indexOf('@id') > -1;
if (!res[name]) { res[name] = []; }
const asRes = propertyToAS(key, wd.claims[key], isSet);
res[name] = res[name].concat(asRes);
} catch(e) {
console.log('err:', key, name, e);
}
if (functional) res[name] = res[name][0];
/*
const prefix = (!p && isId)
? 'https://www.wikidata.org/wiki/'
: p;
if (res[name]) {
if (type.indexOf('xsd:decimal') > -1) {
if (Array.isArray(res[name])) {
res[name] = res[name].map((s: string) => !s ? 0 : parseFloat(s));
} else if (typeof res[name] === 'string') {
res[name] = !res[name] ? 0 : parseFloat(res[name]);
}
} else if (type.indexOf('xsd:nonNegativeInteger') > -1) {
if (Array.isArray(res[name])) {
res[name] = res[name].map((s: string) =>
!s ? 0 : parseInt(s, 10)
);
} else if (typeof res[name] === 'string') {
res[name] = !res[name] ? 0 : parseInt(res[name], 10);
}
} else if (type.indexOf('xsd:positiveInteger') > -1) {
if (Array.isArray(res[name])) {
res[name] = res[name].map((s: string) => parseInt(s, 10)||null);
} else if (typeof res[name] === 'string') {
res[name] = parseInt(res[name], 10)||null;
}
} else if (prefix || suffix) {
if (Array.isArray(res[name])) {
res[name] = res[name].map((s: string) =>
!s ? '' : `${prefix}${s}${suffix}`
);
} else {
res[name] = `${prefix}${res[name]}${suffix}`;
}
}
}
*/
}
if (!res.bbox && P1332 && P1333 && P1334 && P1335) {
// N, S, E, W
if (P1332.length && P1333.length && P1334.length && P1335.length) {
const coord = (v, p) => (typeof v[0] === 'number' ? v[0] : v[0]?.value[p]);
res.bbox = {
north: coord(P1332, 'latitude'), south: coord(P1333, 'latitude'),
east: coord(P1334, 'longitude'), west: coord(P1335, 'longitude')
};
}
}
if (filepath) {
const filename = (id||wdId).replace('redaktor:','').replace('wd:','');
await Deno.writeTextFile(`${filepath}/${filename}.json`, JSON.stringify(res, null, 2));
}
results.push(res);
}
try {
await Deno.writeTextFile(
`${Deno.cwd()}/data/PROPERTY/qualifierLabel.json`,
JSON.stringify(labelcache, null, 2)
);
} catch(e) {}
return results;
}
/*
const getIt = async () => {
const r = await wikiDetails(['Q2','Q1055'], ['Place','Profile','CollectionPage','redaktor:Factual','redaktor:Topic','schema:AdministrativeArea']);
Deno.writeTextFileSync(
`./outputResult.json`,
JSON.stringify(r, null, 2),
);
}
getIt();
*/
/*
export function getPropertyFromObject(prop: string, o: AsObjectNormalized|AsLinkObject = {}) {
const wdt = 'https://www.wikidata.org/prop/direct/';
if (o[`wdt:${prop}`] || o[`${wdt}${prop}`]) return o[`wdt:${prop}`] || o[`${wdt}${prop}`];
if (wdPropertyToASlink[prop] && o?.url) {
const {type, rel} = wdPropertyToASlink[prop];
return o.url.filter((r) => {
if (typeof r === 'object' && type.length === type.filter((t) => (r?.type||[]).indexOf(t) > -1).length) {
return typeof r?.rel === 'string' && (r.rel.indexOf(rel) > -1 || r.rel.indexOf(rel.replace('related', '').trim()) > -1)
}
return false
});
} else {
for (const container in wdPropertyToAS) {
if ((!wdPropertyToAS[container][prop] || !o[container])) continue;
const oContainer = Array.isArray(o[container]) ? o[container] : [o[container]];
const type = wdPropertyToAS[container][prop];
return oContainer.filter((r) => {
if (type.length === type.filter((t) => (r?.type||[]).indexOf(t) > -1).length) {
return true;
}
const rc = r?.context||r?.rel||r?.url?.context||r?.url?.rel;
if (rc && rc.filter((c) => c === `wdt:${prop}` || c === `${wdt}${prop}`).length) return true;
});
}
}
return [];
}*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment