Created
March 11, 2024 22:22
-
-
Save codejockie/10dc34c94e6fedd4477bc9072ef61024 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// REFERENCE UNICODE TABLES: | |
// http://www.rikai.com/library/kanjitables/kanji_codes.unicode.shtml | |
// http://www.tamasoft.co.jp/en/general-info/unicode.html | |
// | |
// TEST EDITOR: | |
// http://www.gethifi.com/tools/regex | |
// | |
// UNICODE RANGE: DESCRIPTION | |
// | |
// 3000-303F: punctuation | |
// 3040-309F: hiragana | |
// 30A0-30FF: katakana | |
// FF00-FFEF: Full-width roman + half-width katakana | |
// 4E00-9FAF: Common and uncommon kanji | |
// | |
// Non-Japanese punctuation/formatting characters commonly used in Japanese text | |
// 2605-2606: Stars | |
// 2190-2195: Arrows | |
// u203B: Asterisk | |
var regex = /[\u3000-\u303F]|[\u3040-\u309F]|[\u30A0-\u30FF]|[\uFF00-\uFFEF]|[\u4E00-\u9FAF]|[\u2605-\u2606]|[\u2190-\u2195]|\u203B/g; | |
var input = "input string"; | |
if(regex.test(input)) { | |
console.log("Japanese characters found") | |
} | |
else { | |
console.log("No Japanese characters"); | |
} | |
// As a side note, the characters from 0x4e00 to 0x9faf include Chinese-only characters. The following code will give you a list of the standard 6355 Japanese kanji: | |
for (var i = 0x4e00, acc=[]; i < 0x9faf; i++) acc.push(String.fromCharCode(i)); | |
var sortedChars = acc.sort(Intl.Collator("ja-JP").compare); | |
var level1Kanji = sortedChars.slice(0, 2965); // JIS X 0208 - Level 1 Kanji (2965 characters) | |
var level2Kanji = sortedChars.slice(2965, 6355) // JIS X 0208 - Level 2 Kanji (3390 characters) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment