Skip to content

Instantly share code, notes, and snippets.

@chx
Last active July 23, 2023 04:49
Show Gist options
  • Save chx/ce1d45398996bbadcaf0bd65a61f5902 to your computer and use it in GitHub Desktop.
Save chx/ce1d45398996bbadcaf0bd65a61f5902 to your computer and use it in GitHub Desktop.
<?php
// Note this is not a transliterator. See https://unicode-org.github.io/icu/userguide/transforms/general/#overview
// on why it was originally named Transliterator but now does a lot more.
$transformation = \Transliterator::createFromRules(':: NFD; :: [:Mn:] Remove; :: NFC;');
$letters = preg_grep('/\pL/u', array_map('utf8', range(0x80, 0x2000)));
$letters = array_combine($letters, $letters);
$transformed = array_map([$transformation, 'transliterate'], $letters);
$map = array_diff_assoc($transformed, $letters);
print count($map);
$search = [' => ', 'array (', ')', ' ', "\n"];
$replace = ['=>', '[', ']', '', ''];
file_put_contents("map.php", str_replace($search, $replace, var_export($map, TRUE)));
function utf8($num)
{
if($num<=0x7F) return chr($num);
if($num<=0x7FF) return chr(($num>>6)+192).chr(($num&63)+128);
if($num<=0xFFFF) return chr(($num>>12)+224).chr((($num>>6)&63)+128).chr(($num&63)+128);
if($num<=0x1FFFFF) return chr(($num>>18)+240).chr((($num>>12)&63)+128).chr((($num>>6)&63)+128).chr(($num&63)+128);
return '';
}
<?php
namespace core\lib\Drupal\Component\Transliteration;
class RemoveDiacritics {
// phpcs:ignore
protected const MAP = ['À'=>'A','Á'=>'A','Â'=>'A','Ã'=>'A','Ä'=>'A','Å'=>'A','Ç'=>'C','È'=>'E','É'=>'E','Ê'=>'E','Ë'=>'E','Ì'=>'I','Í'=>'I','Î'=>'I','Ï'=>'I','Ñ'=>'N','Ò'=>'O','Ó'=>'O','Ô'=>'O','Õ'=>'O','Ö'=>'O','Ù'=>'U','Ú'=>'U','Û'=>'U','Ü'=>'U','Ý'=>'Y','à'=>'a','á'=>'a','â'=>'a','ã'=>'a','ä'=>'a','å'=>'a','ç'=>'c','è'=>'e','é'=>'e','ê'=>'e','ë'=>'e','ì'=>'i','í'=>'i','î'=>'i','ï'=>'i','ñ'=>'n','ò'=>'o','ó'=>'o','ô'=>'o','õ'=>'o','ö'=>'o','ù'=>'u','ú'=>'u','û'=>'u','ü'=>'u','ý'=>'y','ÿ'=>'y','Ā'=>'A','ā'=>'a','Ă'=>'A','ă'=>'a','Ą'=>'A','ą'=>'a','Ć'=>'C','ć'=>'c','Ĉ'=>'C','ĉ'=>'c','Ċ'=>'C','ċ'=>'c','Č'=>'C','č'=>'c','Ď'=>'D','ď'=>'d','Ē'=>'E','ē'=>'e','Ĕ'=>'E','ĕ'=>'e','Ė'=>'E','ė'=>'e','Ę'=>'E','ę'=>'e','Ě'=>'E','ě'=>'e','Ĝ'=>'G','ĝ'=>'g','Ğ'=>'G','ğ'=>'g','Ġ'=>'G','ġ'=>'g','Ģ'=>'G','ģ'=>'g','Ĥ'=>'H','ĥ'=>'h','Ĩ'=>'I','ĩ'=>'i','Ī'=>'I','ī'=>'i','Ĭ'=>'I','ĭ'=>'i','Į'=>'I','į'=>'i','İ'=>'I','Ĵ'=>'J','ĵ'=>'j','Ķ'=>'K','ķ'=>'k','Ĺ'=>'L','ĺ'=>'l','Ļ'=>'L','ļ'=>'l','Ľ'=>'L','ľ'=>'l','Ń'=>'N','ń'=>'n','Ņ'=>'N','ņ'=>'n','Ň'=>'N','ň'=>'n','Ō'=>'O','ō'=>'o','Ŏ'=>'O','ŏ'=>'o','Ő'=>'O','ő'=>'o','Ŕ'=>'R','ŕ'=>'r','Ŗ'=>'R','ŗ'=>'r','Ř'=>'R','ř'=>'r','Ś'=>'S','ś'=>'s','Ŝ'=>'S','ŝ'=>'s','Ş'=>'S','ş'=>'s','Š'=>'S','š'=>'s','Ţ'=>'T','ţ'=>'t','Ť'=>'T','ť'=>'t','Ũ'=>'U','ũ'=>'u','Ū'=>'U','ū'=>'u','Ŭ'=>'U','ŭ'=>'u','Ů'=>'U','ů'=>'u','Ű'=>'U','ű'=>'u','Ų'=>'U','ų'=>'u','Ŵ'=>'W','ŵ'=>'w','Ŷ'=>'Y','ŷ'=>'y','Ÿ'=>'Y','Ź'=>'Z','ź'=>'z','Ż'=>'Z','ż'=>'z','Ž'=>'Z','ž'=>'z','Ơ'=>'O','ơ'=>'o','Ư'=>'U','ư'=>'u','Ǎ'=>'A','ǎ'=>'a','Ǐ'=>'I','ǐ'=>'i','Ǒ'=>'O','ǒ'=>'o','Ǔ'=>'U','ǔ'=>'u','Ǖ'=>'U','ǖ'=>'u','Ǘ'=>'U','ǘ'=>'u','Ǚ'=>'U','ǚ'=>'u','Ǜ'=>'U','ǜ'=>'u','Ǟ'=>'A','ǟ'=>'a','Ǡ'=>'A','ǡ'=>'a','Ǣ'=>'Æ','ǣ'=>'æ','Ǧ'=>'G','ǧ'=>'g','Ǩ'=>'K','ǩ'=>'k','Ǫ'=>'O','ǫ'=>'o','Ǭ'=>'O','ǭ'=>'o','Ǯ'=>'Ʒ','ǯ'=>'ʒ','ǰ'=>'j','Ǵ'=>'G','ǵ'=>'g','Ǹ'=>'N','ǹ'=>'n','Ǻ'=>'A','ǻ'=>'a','Ǽ'=>'Æ','ǽ'=>'æ','Ǿ'=>'Ø','ǿ'=>'ø','Ȁ'=>'A','ȁ'=>'a','Ȃ'=>'A','ȃ'=>'a','Ȅ'=>'E','ȅ'=>'e','Ȇ'=>'E','ȇ'=>'e','Ȉ'=>'I','ȉ'=>'i','Ȋ'=>'I','ȋ'=>'i','Ȍ'=>'O','ȍ'=>'o','Ȏ'=>'O','ȏ'=>'o','Ȑ'=>'R','ȑ'=>'r','Ȓ'=>'R','ȓ'=>'r','Ȕ'=>'U','ȕ'=>'u','Ȗ'=>'U','ȗ'=>'u','Ș'=>'S','ș'=>'s','Ț'=>'T','ț'=>'t','Ȟ'=>'H','ȟ'=>'h','Ȧ'=>'A','ȧ'=>'a','Ȩ'=>'E','ȩ'=>'e','Ȫ'=>'O','ȫ'=>'o','Ȭ'=>'O','ȭ'=>'o','Ȯ'=>'O','ȯ'=>'o','Ȱ'=>'O','ȱ'=>'o','Ȳ'=>'Y','ȳ'=>'y','ʹ'=>'ʹ','Ά'=>'Α','Έ'=>'Ε','Ή'=>'Η','Ί'=>'Ι','Ό'=>'Ο','Ύ'=>'Υ','Ώ'=>'Ω','ΐ'=>'ι','Ϊ'=>'Ι','Ϋ'=>'Υ','ά'=>'α','έ'=>'ε','ή'=>'η','ί'=>'ι','ΰ'=>'υ','ϊ'=>'ι','ϋ'=>'υ','ό'=>'ο','ύ'=>'υ','ώ'=>'ω','ϓ'=>'ϒ','ϔ'=>'ϒ','Ѐ'=>'Е','Ё'=>'Е','Ѓ'=>'Г','Ї'=>'І','Ќ'=>'К','Ѝ'=>'И','Ў'=>'У','Й'=>'И','й'=>'и','ѐ'=>'е','ё'=>'е','ѓ'=>'г','ї'=>'і','ќ'=>'к','ѝ'=>'и','ў'=>'у','Ѷ'=>'Ѵ','ѷ'=>'ѵ','Ӂ'=>'Ж','ӂ'=>'ж','Ӑ'=>'А','ӑ'=>'а','Ӓ'=>'А','ӓ'=>'а','Ӗ'=>'Е','ӗ'=>'е','Ӛ'=>'Ә','ӛ'=>'ә','Ӝ'=>'Ж','ӝ'=>'ж','Ӟ'=>'З','ӟ'=>'з','Ӣ'=>'И','ӣ'=>'и','Ӥ'=>'И','ӥ'=>'и','Ӧ'=>'О','ӧ'=>'о','Ӫ'=>'Ө','ӫ'=>'ө','Ӭ'=>'Э','ӭ'=>'э','Ӯ'=>'У','ӯ'=>'у','Ӱ'=>'У','ӱ'=>'у','Ӳ'=>'У','ӳ'=>'у','Ӵ'=>'Ч','ӵ'=>'ч','Ӹ'=>'Ы','ӹ'=>'ы','آ'=>'ا','أ'=>'ا','ؤ'=>'و','إ'=>'ا','ئ'=>'ي','ۀ'=>'ە','ۂ'=>'ہ','ۓ'=>'ے',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'',''=>'A',''=>'a',''=>'B',''=>'b',''=>'B',''=>'b',''=>'B',''=>'b',''=>'C',''=>'c',''=>'D',''=>'d',''=>'D',''=>'d',''=>'D',''=>'d',''=>'D',''=>'d',''=>'D',''=>'d',''=>'E',''=>'e',''=>'E',''=>'e',''=>'E',''=>'e',''=>'E',''=>'e',''=>'E',''=>'e',''=>'F',''=>'f',''=>'G',''=>'g',''=>'H',''=>'h',''=>'H',''=>'h',''=>'H',''=>'h',''=>'H',''=>'h',''=>'H',''=>'h',''=>'I',''=>'i',''=>'I',''=>'i',''=>'K',''=>'k',''=>'K',''=>'k',''=>'K',''=>'k',''=>'L',''=>'l',''=>'L',''=>'l',''=>'L',''=>'l',''=>'L',''=>'l',''=>'M','ḿ'=>'m',''=>'M',''=>'m',''=>'M',''=>'m',''=>'N',''=>'n',''=>'N',''=>'n',''=>'N',''=>'n',''=>'N',''=>'n',''=>'O',''=>'o',''=>'O',''=>'o',''=>'O',''=>'o',''=>'O',''=>'o',''=>'P',''=>'p',''=>'P',''=>'p',''=>'R',''=>'r',''=>'R',''=>'r',''=>'R',''=>'r',''=>'R',''=>'r',''=>'S',''=>'s',''=>'S',''=>'s',''=>'S',''=>'s',''=>'S',''=>'s',''=>'S',''=>'s',''=>'T',''=>'t',''=>'T',''=>'t',''=>'T',''=>'t',''=>'T',''=>'t',''=>'U',''=>'u',''=>'U',''=>'u',''=>'U',''=>'u',''=>'U',''=>'u',''=>'U',''=>'u',''=>'V',''=>'v',''=>'V','ṿ'=>'v',''=>'W',''=>'w',''=>'W',''=>'w',''=>'W',''=>'w',''=>'W',''=>'w',''=>'W',''=>'w',''=>'X',''=>'x',''=>'X',''=>'x',''=>'Y',''=>'y',''=>'Z',''=>'z',''=>'Z',''=>'z',''=>'Z',''=>'z',''=>'h',''=>'t',''=>'w',''=>'y',''=>'ſ',''=>'A',''=>'a',''=>'A',''=>'a',''=>'A',''=>'a',''=>'A',''=>'a',''=>'A',''=>'a',''=>'A',''=>'a',''=>'A',''=>'a',''=>'A',''=>'a',''=>'A',''=>'a',''=>'A',''=>'a',''=>'A',''=>'a',''=>'A',''=>'a',''=>'E',''=>'e',''=>'E',''=>'e',''=>'E',''=>'e',''=>'E','ế'=>'e',''=>'E',''=>'e',''=>'E',''=>'e',''=>'E',''=>'e',''=>'E',''=>'e',''=>'I',''=>'i',''=>'I',''=>'i',''=>'O',''=>'o',''=>'O',''=>'o',''=>'O',''=>'o',''=>'O',''=>'o',''=>'O',''=>'o',''=>'O',''=>'o',''=>'O',''=>'o',''=>'O',''=>'o',''=>'O',''=>'o',''=>'O',''=>'o',''=>'O',''=>'o',''=>'O',''=>'o',''=>'U',''=>'u',''=>'U',''=>'u',''=>'U',''=>'u',''=>'U',''=>'u',''=>'U',''=>'u',''=>'U',''=>'u',''=>'U',''=>'u',''=>'Y',''=>'y',''=>'Y',''=>'y',''=>'Y',''=>'y',''=>'Y',''=>'y',''=>'α',''=>'α',''=>'α',''=>'α',''=>'α',''=>'α',''=>'α',''=>'α',''=>'Α',''=>'Α',''=>'Α',''=>'Α',''=>'Α',''=>'Α',''=>'Α',''=>'Α',''=>'ε',''=>'ε',''=>'ε',''=>'ε',''=>'ε',''=>'ε',''=>'Ε',''=>'Ε',''=>'Ε',''=>'Ε',''=>'Ε',''=>'Ε',''=>'η',''=>'η',''=>'η',''=>'η',''=>'η',''=>'η',''=>'η',''=>'η',''=>'Η',''=>'Η',''=>'Η',''=>'Η',''=>'Η',''=>'Η',''=>'Η',''=>'Η',''=>'ι',''=>'ι',''=>'ι',''=>'ι',''=>'ι',''=>'ι',''=>'ι',''=>'ι',''=>'Ι',''=>'Ι',''=>'Ι',''=>'Ι',''=>'Ι',''=>'Ι',''=>'Ι','Ἷ'=>'Ι',''=>'ο',''=>'ο',''=>'ο',''=>'ο',''=>'ο',''=>'ο',''=>'Ο',''=>'Ο',''=>'Ο',''=>'Ο',''=>'Ο',''=>'Ο',''=>'υ',''=>'υ',''=>'υ',''=>'υ',''=>'υ',''=>'υ',''=>'υ',''=>'υ',''=>'Υ',''=>'Υ',''=>'Υ',''=>'Υ',''=>'ω',''=>'ω',''=>'ω',''=>'ω',''=>'ω',''=>'ω',''=>'ω',''=>'ω',''=>'Ω',''=>'Ω',''=>'Ω',''=>'Ω',''=>'Ω',''=>'Ω',''=>'Ω',''=>'Ω',''=>'α',''=>'α',''=>'ε',''=>'ε',''=>'η',''=>'η',''=>'ι',''=>'ι',''=>'ο',''=>'ο',''=>'υ',''=>'υ',''=>'ω',''=>'ω',''=>'α',''=>'α',''=>'α',''=>'α',''=>'α',''=>'α',''=>'α',''=>'α',''=>'Α',''=>'Α',''=>'Α',''=>'Α',''=>'Α',''=>'Α',''=>'Α',''=>'Α',''=>'η',''=>'η',''=>'η',''=>'η',''=>'η',''=>'η',''=>'η',''=>'η',''=>'Η',''=>'Η',''=>'Η',''=>'Η',''=>'Η',''=>'Η',''=>'Η',''=>'Η',''=>'ω',''=>'ω',''=>'ω',''=>'ω',''=>'ω',''=>'ω',''=>'ω',''=>'ω',''=>'Ω',''=>'Ω',''=>'Ω',''=>'Ω',''=>'Ω',''=>'Ω',''=>'Ω',''=>'Ω',''=>'α',''=>'α',''=>'α',''=>'α',''=>'α',''=>'α',''=>'α',''=>'Α',''=>'Α',''=>'Α',''=>'Α',''=>'Α',''=>'ι',''=>'η',''=>'η',''=>'η',''=>'η',''=>'η',''=>'Ε',''=>'Ε',''=>'Η',''=>'Η',''=>'Η',''=>'ι',''=>'ι',''=>'ι',''=>'ι',''=>'ι',''=>'ι',''=>'Ι',''=>'Ι',''=>'Ι',''=>'Ι',''=>'υ',''=>'υ',''=>'υ',''=>'υ',''=>'ρ',''=>'ρ',''=>'υ',''=>'υ',''=>'Υ',''=>'Υ',''=>'Υ',''=>'Υ',''=>'Ρ',''=>'ω',''=>'ω',''=>'ω',''=>'ω',''=>'ω',''=>'Ο',''=>'Ο',''=>'Ω',''=>'Ω',''=>'Ω'];
public function removeDiacritics($string) {
return strtr($string, static::MAP);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment