MarketingPip · February 12, 2024 16:11 · MarketingPip · Nov 15, 2023 · MarketingPip · Nov 16, 2023
diff --git a/Spencer_Help_Request.js b/Spencer_Help_Request.js
 async function getWordInfo(words, langCode = 'en') {
  const endpointUrl = 'https://query.wikidata.org/sparql';

  const sparqlQuery = `
    SELECT ?word ?lemma (GROUP_CONCAT(DISTINCT ?category; separator="|\$|") AS ?grammar)
    (GROUP_CONCAT(DISTINCT ?forms; separator="|\$|") AS ?LexIDs)
    (GROUP_CONCAT(DISTINCT ?gloss; separator="|\$|") AS ?Senses)
    (GROUP_CONCAT(DISTINCT ?feat2; separator="|\$|") AS ?Uses)
    (GROUP_CONCAT(DISTINCT ?usagewords; separator="|\$|") AS ?SameMeaning)
    WHERE {
      VALUES ?word {${words.map(word => `'${word}'@${langCode}`).join(' ')}}
      
      ?l a ontolex:LexicalEntry ;
         dct:language wd:Q1860 ;
         wikibase:lemma ?lemma ;
         ontolex:lexicalForm ?form.

      OPTIONAL {
        ?l wikibase:lexicalCategory ?cat .
        ?cat rdfs:label ?category. FILTER(LANG(?category) = "${langCode}").
      }

      ?l ontolex:lexicalForm ?forms .
      ?forms wikibase:grammaticalFeature ?features.
      ?features rdfs:label ?feat2. FILTER(LANG(?feat2) = "${langCode}").

      ?forms ontolex:representation ?usagewords .
      ?form ontolex:representation ?word .
      ?l ontolex:sense ?sense .
      ?sense skos:definition ?gloss.
      FILTER(LANG(?gloss) = "${langCode}")
    }
    GROUP BY ?word ?lemma`;

  const headers = { 'Accept': 'application/sparql-results+json' };
  const fullUrl = endpointUrl + '?query=' + encodeURIComponent(sparqlQuery);

  try {
    const response = await fetch(fullUrl, { headers });
    const results = await response.json();
    mapToSchema(results);
  } catch (error) {
    console.error('Error fetching data:', error);
  }
 }

 const getKeyByValue = (obj, value) => Object.keys(obj).find(key => obj[key] === value);

 const compromiseMapping =  {
    CC: 'Conjunction',
    CD: 'Cardinal',
    DT: 'Determiner',
    EX: 'Preposition', //Existential there
    FW: 'Expression',
    IN: 'Preposition',
    JJ: 'Adjective',
    JJR: 'Comparative',
    JJS: 'Superlative',
    MD: 'Modal',
    NN: 'Noun',
    NNS: 'Plural',
    NNP: 'Singular',
    NNPS: ' Plural',
    POS: 'Possessive',
    PRP: 'Pronoun',
    RB: 'Adverb',
    RBR: 'Comparative',
    RBS: 'Superlative',
    RP: 'PhrasalVerb',
    PDT: 'Determiner',
    SYM: 'Expression',
    TO: 'Conjunction',
    UH: 'Expression',
    VB: 'Verb',
    VBD: 'PastTense',
    VBG: 'Gerund',
    VBN: 'Participle', // past participle
    VBP: 'PresentTense', // non-3rd person singular present
    VBZ: 'PresentTense', // 3rd person singular present
    'PRP$': 'Pronoun',
    'WP$': 'Possessive',
    WDT: 'Determiner',
    WP: 'Pronoun',
    WRB: 'Adverb',
  }

 const verbFormsMapping = {
  "simple past": "VBD",
  "past participle in english": "VBN",
  "present participle": "VBG",
  "plural": "NNS",
  "singular": "NNP",
  "third person": "VBZ",
  "first person singular": "VBP",
  "second person singular": "VB",
  "third person plural": "VBP",
  "infinitive": "VB",
  "present": "VBP",
  "past": "VBD",
  "gerund": "VBG",
  "positive": "JJ",
  "comparative": "JJR",
  "superlative": "JJS",
 };

 function mapToSchema(results) {
  results = results.results.bindings;
  console.log(results)//
  const mappedResults = {
    words: []
  };

  results.forEach(result => {
    const wordInfo = {
      word: result.word.value,
      pos: result.grammar.value.split('|$|'),
      tags:null,
      lemma:null,
      wikidata: result.LexIDs.value.split('|$|').map(result => result.split("/").pop()),
      forms: null,
      senses: []
    };
 //
    if(wordInfo.word.toLowerCase() != result.lemma.value.toLowerCase()){
      wordInfo.lemma = result.lemma.value
    }
    
    const senses = result.Senses.value.split('|$|');
    const uses = result.Uses.value.split('|$|');
    const sameMeaning = result.SameMeaning.value.split('|$|');
    const postypes = {};

    wordInfo.senses = [...senses] || [];

    for (let i = 0; i < uses.length; i++) {
      postypes[[uses[i]]] = sameMeaning[i] || sameMeaning[sameMeaning.length - 1];
  // Need help here spencer - theses arent mapped right. Assuming we need to change SPARQL query?
    }

    wordInfo.forms = postypes;
    
    const type = getKeyByValue(wordInfo.forms, wordInfo.word)
    const penn = verbFormsMapping[type]
    
    wordInfo.tags = {wikidata:type, penn:penn, compromise:compromiseMapping[penn]}
  //  wordInfo.forms.filter(obj => obj.intent === "2017-07-12T14:41:15");
    
    mappedResults.words.push(wordInfo);
  });

  console.log(mappedResults);
 }

 // Example usage:
 const wordsToQuery = ['hated', 'hate', 'going', 'go'];
 getWordInfo(wordsToQuery);
	async function getWordInfo(words, langCode = 'en') {
	const endpointUrl = 'https://query.wikidata.org/sparql';

	const sparqlQuery = `
	SELECT ?word ?lemma (GROUP_CONCAT(DISTINCT ?category; separator="\|\$\|") AS ?grammar)
	(GROUP_CONCAT(DISTINCT ?forms; separator="\|\$\|") AS ?LexIDs)
	(GROUP_CONCAT(DISTINCT ?gloss; separator="\|\$\|") AS ?Senses)
	(GROUP_CONCAT(DISTINCT ?feat2; separator="\|\$\|") AS ?Uses)
	(GROUP_CONCAT(DISTINCT ?usagewords; separator="\|\$\|") AS ?SameMeaning)
	WHERE {
	VALUES ?word {${words.map(word => `'${word}'@${langCode}`).join(' ')}}

	?l a ontolex:LexicalEntry ;
	dct:language wd:Q1860 ;
	wikibase:lemma ?lemma ;
	ontolex:lexicalForm ?form.

	OPTIONAL {
	?l wikibase:lexicalCategory ?cat .
	?cat rdfs:label ?category. FILTER(LANG(?category) = "${langCode}").
	}

	?l ontolex:lexicalForm ?forms .
	?forms wikibase:grammaticalFeature ?features.
	?features rdfs:label ?feat2. FILTER(LANG(?feat2) = "${langCode}").

	?forms ontolex:representation ?usagewords .
	?form ontolex:representation ?word .
	?l ontolex:sense ?sense .
	?sense skos:definition ?gloss.
	FILTER(LANG(?gloss) = "${langCode}")
	}
	GROUP BY ?word ?lemma`;

	const headers = { 'Accept': 'application/sparql-results+json' };
	const fullUrl = endpointUrl + '?query=' + encodeURIComponent(sparqlQuery);

	try {
	const response = await fetch(fullUrl, { headers });
	const results = await response.json();
	mapToSchema(results);
	} catch (error) {
	console.error('Error fetching data:', error);
	}
	}

	const getKeyByValue = (obj, value) => Object.keys(obj).find(key => obj[key] === value);

	const compromiseMapping = {
	CC: 'Conjunction',
	CD: 'Cardinal',
	DT: 'Determiner',
	EX: 'Preposition', //Existential there
	FW: 'Expression',
	IN: 'Preposition',
	JJ: 'Adjective',
	JJR: 'Comparative',
	JJS: 'Superlative',
	MD: 'Modal',
	NN: 'Noun',
	NNS: 'Plural',
	NNP: 'Singular',
	NNPS: ' Plural',
	POS: 'Possessive',
	PRP: 'Pronoun',
	RB: 'Adverb',
	RBR: 'Comparative',
	RBS: 'Superlative',
	RP: 'PhrasalVerb',
	PDT: 'Determiner',
	SYM: 'Expression',
	TO: 'Conjunction',
	UH: 'Expression',
	VB: 'Verb',
	VBD: 'PastTense',
	VBG: 'Gerund',
	VBN: 'Participle', // past participle
	VBP: 'PresentTense', // non-3rd person singular present
	VBZ: 'PresentTense', // 3rd person singular present
	'PRP$': 'Pronoun',
	'WP$': 'Possessive',
	WDT: 'Determiner',
	WP: 'Pronoun',
	WRB: 'Adverb',
	}

	const verbFormsMapping = {
	"simple past": "VBD",
	"past participle in english": "VBN",
	"present participle": "VBG",
	"plural": "NNS",
	"singular": "NNP",
	"third person": "VBZ",
	"first person singular": "VBP",
	"second person singular": "VB",
	"third person plural": "VBP",
	"infinitive": "VB",
	"present": "VBP",
	"past": "VBD",
	"gerund": "VBG",
	"positive": "JJ",
	"comparative": "JJR",
	"superlative": "JJS",
	};

	function mapToSchema(results) {
	results = results.results.bindings;
	console.log(results)//
	const mappedResults = {
	words: []
	};

	results.forEach(result => {
	const wordInfo = {
	word: result.word.value,
	pos: result.grammar.value.split('\|$\|'),
	tags:null,
	lemma:null,
	wikidata: result.LexIDs.value.split('\|$\|').map(result => result.split("/").pop()),
	forms: null,
	senses: []
	};
	//
	if(wordInfo.word.toLowerCase() != result.lemma.value.toLowerCase()){
	wordInfo.lemma = result.lemma.value
	}

	const senses = result.Senses.value.split('\|$\|');
	const uses = result.Uses.value.split('\|$\|');
	const sameMeaning = result.SameMeaning.value.split('\|$\|');
	const postypes = {};

	wordInfo.senses = [...senses] \|\| [];

	for (let i = 0; i < uses.length; i++) {
	postypes[[uses[i]]] = sameMeaning[i] \|\| sameMeaning[sameMeaning.length - 1];
	// Need help here spencer - theses arent mapped right. Assuming we need to change SPARQL query?
	}

	wordInfo.forms = postypes;

	const type = getKeyByValue(wordInfo.forms, wordInfo.word)
	const penn = verbFormsMapping[type]

	wordInfo.tags = {wikidata:type, penn:penn, compromise:compromiseMapping[penn]}
	// wordInfo.forms.filter(obj => obj.intent === "2017-07-12T14:41:15");

	mappedResults.words.push(wordInfo);
	});

	console.log(mappedResults);
	}

	// Example usage:
	const wordsToQuery = ['hated', 'hate', 'going', 'go'];
	getWordInfo(wordsToQuery);