Skip to content

Instantly share code, notes, and snippets.

@evaisse
Created August 18, 2009 07:43
Show Gist options
  • Save evaisse/169594 to your computer and use it in GitHub Desktop.
Save evaisse/169594 to your computer and use it in GitHub Desktop.
php function to remove accent from the input string string. An example string like `ÀØėÿᾜὨζὅБю` * will be translated to `AOeyIOzoBY`
<?php # -*- coding: utf-8 -*-
// function remove_accents()
/**
* Unaccent the input string string. An example string like `ÀØėÿᾜὨζὅБю`
* will be translated to `AOeyIOzoBY`. More complete than :
* strtr( (string)$str,
* "ÀÁÂÃÄÅàáâãäåÒÓÔÕÖØòóôõöøÈÉÊËèéêëÇçÌÍÎÏìíîïÙÚÛÜùúûüÿÑñ",
* "aaaaaaaaaaaaooooooooooooeeeeeeeecciiiiiiiiuuuuuuuuynn" );
*
* @param $str input string
* @param $utf8 if null, function will detect input string encoding
* @return string input string without accent
*/
function remove_accents( $str, $utf8=true )
{
$str = (string)$str;
if( is_null($utf8) ) {
if( !function_exists('mb_detect_encoding') ) {
$utf8 = (strtolower( mb_detect_encoding($str) )=='utf-8');
} else {
$length = strlen($str);
$utf8 = true;
for ($i=0; $i < $length; $i++) {
$c = ord($str[$i]);
if ($c < 0x80) $n = 0; # 0bbbbbbb
elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb
elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb
elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb
elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb
elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b
else return false; # Does not match any model
for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
if ((++$i == $length)
|| ((ord($str[$i]) & 0xC0) != 0x80)) {
$utf8 = false;
break;
}
}
}
}
}
if(!$utf8)
$str = utf8_encode($str);
$transliteration = array(
'IJ' => 'I', 'Ö' => 'O','Œ' => 'O','Ü' => 'U','ä' => 'a','æ' => 'a',
'ij' => 'i','ö' => 'o','œ' => 'o','ü' => 'u','ß' => 's','ſ' => 's',
'À' => 'A','Á' => 'A','Â' => 'A','Ã' => 'A','Ä' => 'A','Å' => 'A',
'Æ' => 'A','Ā' => 'A','Ą' => 'A','Ă' => 'A','Ç' => 'C','Ć' => 'C',
'Č' => 'C','Ĉ' => 'C','Ċ' => 'C','Ď' => 'D','Đ' => 'D','È' => 'E',
'É' => 'E','Ê' => 'E','Ë' => 'E','Ē' => 'E','Ę' => 'E','Ě' => 'E',
'Ĕ' => 'E','Ė' => 'E','Ĝ' => 'G','Ğ' => 'G','Ġ' => 'G','Ģ' => 'G',
'Ĥ' => 'H','Ħ' => 'H','Ì' => 'I','Í' => 'I','Î' => 'I','Ï' => 'I',
'Ī' => 'I','Ĩ' => 'I','Ĭ' => 'I','Į' => 'I','İ' => 'I','Ĵ' => 'J',
'Ķ' => 'K','Ľ' => 'K','Ĺ' => 'K','Ļ' => 'K','Ŀ' => 'K','Ł' => 'L',
'Ñ' => 'N','Ń' => 'N','Ň' => 'N','Ņ' => 'N','Ŋ' => 'N','Ò' => 'O',
'Ó' => 'O','Ô' => 'O','Õ' => 'O','Ø' => 'O','Ō' => 'O','Ő' => 'O',
'Ŏ' => 'O','Ŕ' => 'R','Ř' => 'R','Ŗ' => 'R','Ś' => 'S','Ş' => 'S',
'Ŝ' => 'S','Ș' => 'S','Š' => 'S','Ť' => 'T','Ţ' => 'T','Ŧ' => 'T',
'Ț' => 'T','Ù' => 'U','Ú' => 'U','Û' => 'U','Ū' => 'U','Ů' => 'U',
'Ű' => 'U','Ŭ' => 'U','Ũ' => 'U','Ų' => 'U','Ŵ' => 'W','Ŷ' => 'Y',
'Ÿ' => 'Y','Ý' => 'Y','Ź' => 'Z','Ż' => 'Z','Ž' => 'Z','à' => 'a',
'á' => 'a','â' => 'a','ã' => 'a','ā' => 'a','ą' => 'a','ă' => 'a',
'å' => 'a','ç' => 'c','ć' => 'c','č' => 'c','ĉ' => 'c','ċ' => 'c',
'ď' => 'd','đ' => 'd','è' => 'e','é' => 'e','ê' => 'e','ë' => 'e',
'ē' => 'e','ę' => 'e','ě' => 'e','ĕ' => 'e','ė' => 'e','ƒ' => 'f',
'ĝ' => 'g','ğ' => 'g','ġ' => 'g','ģ' => 'g','ĥ' => 'h','ħ' => 'h',
'ì' => 'i','í' => 'i','î' => 'i','ï' => 'i','ī' => 'i','ĩ' => 'i',
'ĭ' => 'i','į' => 'i','ı' => 'i','ĵ' => 'j','ķ' => 'k','ĸ' => 'k',
'ł' => 'l','ľ' => 'l','ĺ' => 'l','ļ' => 'l','ŀ' => 'l','ñ' => 'n',
'ń' => 'n','ň' => 'n','ņ' => 'n','ʼn' => 'n','ŋ' => 'n','ò' => 'o',
'ó' => 'o','ô' => 'o','õ' => 'o','ø' => 'o','ō' => 'o','ő' => 'o',
'ŏ' => 'o','ŕ' => 'r','ř' => 'r','ŗ' => 'r','ś' => 's','š' => 's',
'ť' => 't','ù' => 'u','ú' => 'u','û' => 'u','ū' => 'u','ů' => 'u',
'ű' => 'u','ŭ' => 'u','ũ' => 'u','ų' => 'u','ŵ' => 'w','ÿ' => 'y',
'ý' => 'y','ŷ' => 'y','ż' => 'z','ź' => 'z','ž' => 'z','Α' => 'A',
'Ά' => 'A','' => 'A','' => 'A','' => 'A','' => 'A','' => 'A',
'' => 'A','' => 'A','' => 'A','' => 'A','' => 'A','' => 'A',
'' => 'A','' => 'A','' => 'A','' => 'A','' => 'A','' => 'A',
'' => 'A','' => 'A','' => 'A','Β' => 'B','Γ' => 'G','Δ' => 'D',
'Ε' => 'E','Έ' => 'E','' => 'E','' => 'E','' => 'E','' => 'E',
'' => 'E','' => 'E','' => 'E','Ζ' => 'Z','Η' => 'I','Ή' => 'I',
'' => 'I','' => 'I','' => 'I','' => 'I','' => 'I','' => 'I',
'' => 'I','' => 'I','' => 'I','' => 'I','' => 'I','' => 'I',
'' => 'I','' => 'I','' => 'I','' => 'I','' => 'I','' => 'I',
'Θ' => 'T','Ι' => 'I','Ί' => 'I','Ϊ' => 'I','' => 'I','' => 'I',
'' => 'I','' => 'I','' => 'I','' => 'I','' => 'I','Ἷ' => 'I',
'' => 'I','' => 'I','' => 'I','Κ' => 'K','Λ' => 'L','Μ' => 'M',
'Ν' => 'N','Ξ' => 'K','Ο' => 'O','Ό' => 'O','' => 'O','' => 'O',
'' => 'O','' => 'O','' => 'O','' => 'O','' => 'O','Π' => 'P',
'Ρ' => 'R','' => 'R','Σ' => 'S','Τ' => 'T','Υ' => 'Y','Ύ' => 'Y',
'Ϋ' => 'Y','' => 'Y','' => 'Y','' => 'Y','' => 'Y','' => 'Y',
'' => 'Y','' => 'Y','Φ' => 'F','Χ' => 'X','Ψ' => 'P','Ω' => 'O',
'Ώ' => 'O','' => 'O','' => 'O','' => 'O','' => 'O','' => 'O',
'' => 'O','' => 'O','' => 'O','' => 'O','' => 'O','' => 'O',
'' => 'O','' => 'O','' => 'O','' => 'O','' => 'O','' => 'O',
'' => 'O','α' => 'a','ά' => 'a','' => 'a','' => 'a','' => 'a',
'' => 'a','' => 'a','' => 'a','' => 'a','' => 'a','' => 'a',
'' => 'a','' => 'a','' => 'a','' => 'a','' => 'a','' => 'a',
'' => 'a','' => 'a','' => 'a','' => 'a','' => 'a','' => 'a',
'' => 'a','' => 'a','' => 'a','β' => 'b','γ' => 'g','δ' => 'd',
'ε' => 'e','έ' => 'e','' => 'e','' => 'e','' => 'e','' => 'e',
'' => 'e','' => 'e','' => 'e','ζ' => 'z','η' => 'i','ή' => 'i',
'' => 'i','' => 'i','' => 'i','' => 'i','' => 'i','' => 'i',
'' => 'i','' => 'i','' => 'i','' => 'i','' => 'i','' => 'i',
'' => 'i','' => 'i','' => 'i','' => 'i','' => 'i','' => 'i',
'' => 'i','' => 'i','' => 'i','' => 'i','θ' => 't','ι' => 'i',
'ί' => 'i','ϊ' => 'i','ΐ' => 'i','' => 'i','' => 'i','' => 'i',
'' => 'i','' => 'i','' => 'i','' => 'i','' => 'i','' => 'i',
'' => 'i','' => 'i','' => 'i','' => 'i','' => 'i','κ' => 'k',
'λ' => 'l','μ' => 'm','ν' => 'n','ξ' => 'k','ο' => 'o','ό' => 'o',
'' => 'o','' => 'o','' => 'o','' => 'o','' => 'o','' => 'o',
'' => 'o','π' => 'p','ρ' => 'r','' => 'r','' => 'r','σ' => 's',
'ς' => 's','τ' => 't','υ' => 'y','ύ' => 'y','ϋ' => 'y','ΰ' => 'y',
'' => 'y','' => 'y','' => 'y','' => 'y','' => 'y','' => 'y',
'' => 'y','' => 'y','' => 'y','' => 'y','' => 'y','' => 'y',
'' => 'y','' => 'y','φ' => 'f','χ' => 'x','ψ' => 'p','ω' => 'o',
'ώ' => 'o','' => 'o','' => 'o','' => 'o','' => 'o','' => 'o',
'' => 'o','' => 'o','' => 'o','' => 'o','' => 'o','' => 'o',
'' => 'o','' => 'o','' => 'o','' => 'o','' => 'o','' => 'o',
'' => 'o','' => 'o','' => 'o','' => 'o','' => 'o','А' => 'A',
'Б' => 'B','В' => 'V','Г' => 'G','Д' => 'D','Е' => 'E','Ё' => 'E',
'Ж' => 'Z','З' => 'Z','И' => 'I','Й' => 'I','К' => 'K','Л' => 'L',
'М' => 'M','Н' => 'N','О' => 'O','П' => 'P','Р' => 'R','С' => 'S',
'Т' => 'T','У' => 'U','Ф' => 'F','Х' => 'K','Ц' => 'T','Ч' => 'C',
'Ш' => 'S','Щ' => 'S','Ы' => 'Y','Э' => 'E','Ю' => 'Y','Я' => 'Y',
'а' => 'A','б' => 'B','в' => 'V','г' => 'G','д' => 'D','е' => 'E',
'ё' => 'E','ж' => 'Z','з' => 'Z','и' => 'I','й' => 'I','к' => 'K',
'л' => 'L','м' => 'M','н' => 'N','о' => 'O','п' => 'P','р' => 'R',
'с' => 'S','т' => 'T','у' => 'U','ф' => 'F','х' => 'K','ц' => 'T',
'ч' => 'C','ш' => 'S','щ' => 'S','ы' => 'Y','э' => 'E','ю' => 'Y',
'я' => 'Y','ð' => 'd','Ð' => 'D','þ' => 't','Þ' => 'T','' => 'a',
'' => 'b','' => 'g','' => 'd','' => 'e','' => 'v','' => 'z',
'' => 't','' => 'i','' => 'k','' => 'l','' => 'm','' => 'n',
'' => 'o','' => 'p','' => 'z','' => 'r','' => 's','' => 't',
'' => 'u','' => 'p','' => 'k','' => 'g','' => 'q','' => 's',
'' => 'c','' => 't','' => 'd','' => 't','' => 'c','' => 'k',
'' => 'j','' => 'h'
);
$str = str_replace( array_keys( $transliteration ),
array_values( $transliteration ),
$str);
return $str;
}
//- remove_accents()
@FrantataCZ
Copy link

thx

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment