Last active
November 21, 2022 23:49
-
-
Save fawazahmed0/4da9561308dce1780aa625a8ee3d0e06 to your computer and use it in GitHub Desktop.
Remove Arabic diacritics & other things for searching
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions/Unicode_Property_Escapes | |
// https://unicode.org/reports/tr18/#General_Category_Property | |
// https://unicode.org/reports/tr24/#Script | |
// https://tc39.es/ecma262/multipage/text-processing.html#table-nonbinary-unicode-properties | |
// \p{Symbol} also includes pipe symbol i.e | etc | |
let str = "ٱلَّذِينَ يُؤۡمِنُونَ بِٱلۡغَيۡبِ وَيُقِيمُونَ ٱلصَّلَوٰةَ وَمِمَّا رَزَقۡنَٰهُمۡ يُنفِقُونَ" | |
str = str.normalize("NFD").replace(/\p{Diacritic}|\p{Mark}|\p{Extender}|\p{Bidi_Control}/gu, "").replaceAll('ٱ','ا') | |
console.log(str) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment