Last active
April 16, 2020 17:47
-
-
Save UrsaDK/bdf771d89b39e1873e6c7c5c818dde7f to your computer and use it in GitHub Desktop.
Replace musicbrainz_unaccent with pgsql's unaccent extension
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* MusicBrainz (http://musicbrainz.org) comes with a custom unaccent pgsql extension (musicbrainz_unaccent). | |
* However, AWS PGSQL service does not include this extension and it can not be easilly added. | |
* | |
* The following code duplicates musicbrainz_unaccent functionality using the built in unaccent extension. | |
*/ | |
CREATE EXTENSION unaccent; | |
CREATE OR REPLACE FUNCTION musicbrainz_unaccent(txt text) RETURNS text AS $$ | |
BEGIN | |
RETURN unaccent(txt); | |
END; | |
$$ LANGUAGE 'plpgsql' IMMUTABLE; | |
CREATE TEXT SEARCH DICTIONARY musicbrainz_unaccentdict ( | |
TEMPLATE = unaccent, | |
RULES='unaccent' | |
); | |
/** | |
* Tests for the musicbrainz_unaccent extencion. | |
* (The following code is not required to use the extension) | |
*/ | |
SELECT musicbrainz_unaccent('Hôtel') = 'Hotel'; | |
SELECT musicbrainz_unaccent('ľščťžýáí') = 'lsctzyai'; | |
SELECT musicbrainz_unaccent('foo—bar‒baz') = 'foo—bar‒baz'; | |
SELECT musicbrainz_unaccent('nonunicode') = 'nonunicode'; | |
SELECT musicbrainz_unaccent('') = ''; | |
SELECT musicbrainz_unaccent(null) IS NULL; | |
SELECT musicbrainz_unaccent(repeat('ä', 65536)) = repeat('a', 65536); | |
SELECT ts_lexize('musicbrainz_unaccentdict', 'ľščťžýáí foo—bar‒baz nonunicode') = E'{"lsctzyai foo—bar‒baz nonunicode"}’; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment