Skip to content

Instantly share code, notes, and snippets.

@oleoneto
Created February 6, 2025 15:43
Show Gist options
  • Save oleoneto/7e161b5c1df2834c31b856ce40b92ba1 to your computer and use it in GitHub Desktop.
Save oleoneto/7e161b5c1df2834c31b856ce40b92ba1 to your computer and use it in GitHub Desktop.
DROP TRIGGER IF EXISTS new_definition_trigger;
DROP TRIGGER IF EXISTS new_dictionary_fts_trigger;
DROP TABLE IF EXISTS dictionary_;
DROP TABLE IF EXISTS dictionary_words_agg;
DROP TABLE IF EXISTS dictionary_explanations_agg;
DROP VIEW IF EXISTS dictionary;
DROP TABLE IF EXISTS associations;
DROP TABLE IF EXISTS explanations;
DROP TABLE IF EXISTS words;
DROP VIEW IF EXISTS words_agg;
DROP VIEW IF EXISTS explanations_agg;
DROP TABLE IF EXISTS penn_part_of_speech_tags;
DROP TABLE IF EXISTS pt_transalations;
DROP TABLE IF EXISTS fr_transalations;
DROP TABLE IF EXISTS search_history;
DROP TABLE IF EXISTS saved_queries;
/*
Design notes:
- A word is indentified by its text (excluding its part of speech).
- A word can exist independent of its explanation(s).
- A word can have any number of explanations.
- An explanation can be associated with any number of words.
- Each explanation refers to a part of speech.
- The words table is intended to be in English.
- Translations can be added as new plugable tables.
- Dictionary is a view connecting words and explanations via associations.
- Dictionary can be read from and written to.
- Writing to the dictionary updates word/explanation association if they do not exist.
*/
---------------------------------------
--:: 0. Create Part of Speech Tags
---------------------------------------
CREATE TABLE penn_part_of_speech_tags (
id INTEGER PRIMARY KEY,
tag VARCHAR(5) NOT NULL UNIQUE,
category VARCHAR(15),
description VARCHAR(100) NOT NULL,
example VARCHAR(100) NULL,
CONSTRAINT unique_tag UNIQUE (tag, category)
);
INSERT INTO penn_part_of_speech_tags(id, tag, category, description, example)
VALUES
(1, "NN", "noun", "Noun (singular or mass)", ""),
(2, "NNS", "noun", "Noun (plural)", ""),
(3, "NNP", "noun", "Proper noun (singular)", ""),
(4, "NNPS", "noun", "Proper noun (plural)", ""),
(5, "PRP", "pronoun", "Personal pronoun", ""),
(6, "PRP$", "pronoun", "Possessive pronoun", ""),
(7, "WP", "pronoun", "Wh-pronoun", "pronouns begining in WH"),
(8, "WP$", "pronoun", "Possessive wh-pronoun", "possessive pronouns begining in WH"),
(9, "JJ", "adjective", "Adjective", ""),
(10, "JJR", "adjective", "Adjective", "comparative"),
(11, "JJS", "adjective", "Adjective", "superlative"),
(12, "VB", "verb", "Verb (base form)", ""),
(13, "VBD", "verb", "Verb (past tense)", ""),
(14, "VBG", "verb", "Verb (gerund or present participle)", ""),
(15, "VBN", "verb", "Verb (past participle)", ""),
(16, "VBP", "verb", "Verb (non-3rd person singular present)", ""),
(17, "VBZ", "verb", "Verb (3rd person singular present)", ""),
(18, "RB", "adverb", "Adverb", ""),
(19, "RBR", "adverb", "Adverb", "comparative"),
(20, "RBS", "adverb", "Adverb", "superlative"),
(21, "WRB", "adverb", "Wh-adverb", "adverbs begining in WH"),
(22, "DT", "determiner", "Determiner", ""),
(23, "PDT", "determiner", "Predeterminer", ""),
(24, "WDT", "determiner", "Wh-determiner", "determiner pronouns begining in WH"),
(25, "CC", "conjunction", "Coordinating", "conjunction"),
(26, "IN", "conjunction", "Preposition or subordinating conjunction", ""),
(27, "CD", "numeral", "Cardinal", "number"),
(28, "EX", "existencial", "Existential", "there"),
(29, "FW", "foreign", "Foreign word", ""),
(30, "LS", "marker", "List item marker", ""),
(31, "MD", "modal", "Modal", ""),
(32, "POS", "possessive", "Possessive ending", ""),
(33, "RP", "particle", "Particle", ""),
(34, "SYM", "symbol", "Symbol", ""),
(35, "TO", "preposition", "to", ""),
(36, "UH", "interjection", "Interjection", "")
ON CONFLICT (tag) DO NOTHING
;
---------------------------------------
--:: 1. Create words
---------------------------------------
CREATE TABLE words (
id INTEGER PRIMARY KEY,
text VARCHAR(100) NOT NULL,
CONSTRAINT unique_word UNIQUE (text)
);
CREATE INDEX words_idx1 ON words('text');
---------------------------------------
--:: 2. Create explanations/definitions
---------------------------------------
CREATE TABLE explanations (
id INTEGER PRIMARY KEY,
part_of_speech VARCHAR(20), -- NEW
pos_tag VARCHAR(5), -- NEW
text TEXT NOT NULL,
CONSTRAINT unique_explanation UNIQUE (text, part_of_speech)
);
-- CREATE INDEX explanations_idx1 ON explanations('text');
--:: 3. Connect words to their explanations/definitions
CREATE TABLE associations (
word_id INTEGER NOT NULL REFERENCES words(id),
explanation_id INTEGER NOT NULL REFERENCES explanations(id),
explicit BOOLEAN NULL,
CONSTRAINT unique_association UNIQUE (word_id, explanation_id)
);
---------------------------------------
--:: 4. FTS
---------------------------------------
CREATE VIRTUAL TABLE dictionary_ USING fts5 (word_id, word, definition, explanation_id);
CREATE VIRTUAL TABLE dictionary_words_agg USING fts5 (id, word, definitions, explanation_ids);
CREATE VIRTUAL TABLE dictionary_explanations_agg USING fts5 (explanation_id, explanation, matches, word_ids, words);
---------------------------------------
--:: 5. Dictionary
---------------------------------------
CREATE VIEW dictionary AS
SELECT
words.id,
words.text AS word,
e.pos_tag,
e.part_of_speech,
e.text AS explanation,
e.id AS explanation_id
FROM
words
LEFT JOIN associations a ON a.word_id = words.id
JOIN explanations e ON a.explanation_id = e.id;
CREATE VIEW words_agg AS
SELECT
dx.id,
dx.word,
json_group_array(
json_object(
'word_id', dy.id,
'explanation_id', dy.explanation_id,
'explanation', dy.explanation,
'explicit', COALESCE(a.explicit, FALSE),
'part_of_speech', e.part_of_speech,
'part_of_speech_tag', e.pos_tag
)
) definitions,
json_group_array(DISTINCT dy.explanation_id) explanation_ids
FROM
dictionary dx
INNER JOIN dictionary dy ON dx.id = dy.id
AND dy.explanation_id = dx.explanation_id
JOIN words w ON w.id = dx.id
LEFT JOIN penn_part_of_speech_tags t ON t.tag = dx.pos_tag
JOIN associations a ON a.word_id = dy.id AND a.explanation_id = dy.explanation_id
JOIN explanations e ON e.id = a.explanation_id AND e.id = dy.explanation_id
JOIN dictionary_ ON dictionary_.word_id = dy.id AND dictionary_.explanation_id = dy.explanation_id
GROUP BY
dy.id
ORDER BY
dx.word,
dx.pos_tag
;
CREATE VIEW explanations_agg AS
SELECT
dx.explanation_id,
dx.explanation,
json_group_array(
json_object(
'word_id', dy.id,
'word', dy.word,
'explicit', COALESCE(a.explicit, FALSE),
'part_of_speech', e.part_of_speech,
'part_of_speech_tag', e.pos_tag
)
) matches,
json_group_array(DISTINCT dy.id) word_ids,
json_group_array(DISTINCT dy.word) words
FROM
dictionary dx
INNER JOIN dictionary dy ON dx.id = dy.id
AND dy.explanation_id = dx.explanation_id
JOIN explanations e ON e.id = dx.explanation_id
JOIN words w ON w.id = dx.id
LEFT JOIN penn_part_of_speech_tags t ON t.tag = dx.pos_tag
JOIN associations a ON a.word_id = dy.id AND a.explanation_id = dy.explanation_id
JOIN dictionary_ ON dictionary_.word_id = dy.id AND dictionary_.explanation_id = dy.explanation_id
GROUP BY
dy.explanation_id
ORDER BY
dx.word,
dx.pos_tag
;
CREATE TRIGGER new_definition_trigger INSTEAD OF INSERT ON dictionary
BEGIN
/* Creates a new word entry if one does not yet exist */
INSERT INTO words(text)
VALUES(NEW.word)
ON CONFLICT (text) DO UPDATE SET text = NEW.word
;
/* Creates a new explanation if one does not yet exist */
INSERT INTO explanations(text, part_of_speech, pos_tag)
VALUES(NEW.explanation, NEW.part_of_speech, NEW.pos_tag)
ON CONFLICT(text, part_of_speech)
DO UPDATE SET text = NEW.explanation
;
/* Links the word with its explanation */
INSERT INTO associations(word_id, explanation_id)
SELECT words.id, explanations.id
FROM words, explanations
WHERE explanations.part_of_speech = NEW.part_of_speech
AND words.text = NEW.word
AND explanations.text = NEW.explanation
AND explanations.part_of_speech = NEW.part_of_speech
ON CONFLICT (word_id, explanation_id)
DO NOTHING
;
END
;
CREATE TRIGGER new_dictionary_fts_trigger AFTER INSERT ON associations
BEGIN
/* Makes the word and explanation searcheable via FTS */
INSERT INTO dictionary_(word_id, word, definition, explanation_id)
SELECT id, word, explanation, explanation_id
FROM dictionary
WHERE dictionary.id = NEW.word_id AND dictionary.explanation_id = NEW.explanation_id
;
/*
-- Groups words with all relevant definitions and make results searcheable via FTS
INSERT INTO dictionary_words_agg(id, word, definitions, explanation_ids)
SELECT id, word, definitions, explanation_ids
FROM words_agg
WHERE id = NEW.word_id
-- Checks if `explanation_id` is in JSON array
AND EXISTS (
SELECT 1
FROM JSON_EACH(explanation_ids)
EACH WHERE each.value = NEW.explanation_id
)
;
-- Groups explanations with all matching words and make results searcheable via FTS
INSERT INTO dictionary_explanations_agg(explanation_id, explanation, matches, word_ids)
SELECT explanation_id, explanation, matches, word_ids
FROM explanations_agg
WHERE explanation_id = NEW.explanation_id
-- Checks if `word_id` is in JSON array
AND EXISTS (
SELECT 1
FROM JSON_EACH(word_ids)
EACH WHERE each.value = NEW.word_id
)
;
*/
END
;
-------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------
-------------------------------------------------------------------------------------------
---------------------------------------
--:: 6. Translations
---------------------------------------
-- CREATE TABLE pt_transalations (
-- word_id INTEGER NOT NULL REFERENCES words(id),
-- text VARCHAR(100) NOT NULL,
-- CONSTRAINT unique_pt_translation UNIQUE (word_id, text)
-- );
-- CREATE TABLE fr_transalations (
-- word_id INTEGER NOT NULL REFERENCES words(id),
-- text VARCHAR(100) NOT NULL,
-- CONSTRAINT unique_fr_translation UNIQUE (word_id, text)
-- );
---------------------------------------
--:: 7. Saved Queries
---------------------------------------
-- CREATE TABLE saved_queries (
-- id INTEGER PRIMARY KEY,
-- name VARCHAR(50) NOT NULL,
-- user_id VARCHAR(38),
-- q TEXT NOT NULL,
-- CONSTRAINT unique_query_name_per_user UNIQUE (name, user_id)
-- );
---------------------------------------
--:: 8. Examples
---------------------------------------
-- CREATE TABLE examples (
-- association...
-- -- id INTEGER PRIMARY KEY,
-- word_id INTEGER NOT NULL REFERENCES words(id),
-- text TEXT NOT NULL UNIQUE
-- );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment