Last active
May 10, 2017 23:54
-
-
Save miken32/5e6785b2d2bd0bf13bbb2d910df373a1 to your computer and use it in GitHub Desktop.
Verify if all characters of a string belong to a given Unicode character block
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* BlockCheck 1.0 | |
* | |
* Copyright (C) 2017 Michael Newton | |
* | |
* This program is free software: you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License as published by | |
* the Free Software Foundation, either version 3 of the License, or | |
* (at your option) any later version. | |
* | |
* This program is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
* GNU General Public License for more details. | |
* | |
* You should have received a copy of the GNU General Public License | |
* along with this program. If not, see <http://www.gnu.org/licenses/>. | |
* | |
* @package BlockCheck | |
* @author Michael Newton <miken32> | |
* @copyright 2017 Michael Newton | |
* @license https://opensource.org/licenses/GPL-3.0 GPLv3 | |
* @version 1.0 | |
* @see http://stackoverflow.com/a/43010755/1255289 | |
*/ | |
namespace Miken32; | |
/** | |
* BlockCheck | |
* | |
* A utility class to check whether or not strings are part of | |
* a Unicode character block. | |
* | |
* Can be used statically or not. Sample usage: | |
* ``` | |
* <?php | |
* use Miken32\BlockCheck as BC; | |
* if (BC::isValid("🜈🝮🝤", BC::ALCHEMICAL_SYMBOLS)) { | |
* echo "Valid characters\n"; | |
* } else { | |
* echo "Invalid characters\n"; | |
* } | |
* | |
* $myChecker = new BC("BASIC_LATIN"); | |
* $test = "Meet me at the café."; | |
* if ($myChecker->check($test)) { | |
* echo "Valid characters\n"; | |
* } else { | |
* echo "Invalid characters\n"; | |
* } | |
* $myChecker->addBlock(BC::LATIN_1_SUPPLEMENT); | |
* if ($myChecker->check($test)) { | |
* echo "Valid characters\n"; | |
* } else { | |
* echo "Invalid characters\n"; | |
* } | |
* ``` | |
*/ | |
class BlockCheck | |
{ | |
const ADLAM = [0x1E900, 0x1E95F]; | |
const AEGEAN_NUMBERS = [0x10100, 0x1013F]; | |
const AHOM = [0x11700, 0x1173F]; | |
const ALCHEMICAL_SYMBOLS = [0x1F700, 0x1F77F]; | |
const ALPHABETIC_PRESENTATION_FORMS = [0xFB00, 0xFB4F]; | |
const ANATOLIAN_HIEROGLYPHS = [0x14400, 0x1467F]; | |
const ANCIENT_GREEK_MUSICAL_NOTATION = [0x1D200, 0x1D24F]; | |
const ANCIENT_GREEK_NUMBERS = [0x10140, 0x1018F]; | |
const ANCIENT_SYMBOLS = [0x10190, 0x101CF]; | |
const ARABIC = [0x0600, 0x06FF]; | |
const ARABIC_EXTENDED_A = [0x08A0, 0x08FF]; | |
const ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = [0x1EE00, 0x1EEFF]; | |
const ARABIC_PRESENTATION_FORMS_A = [0xFB50, 0xFDFF]; | |
const ARABIC_PRESENTATION_FORMS_B = [0xFE70, 0xFEFF]; | |
const ARABIC_SUPPLEMENT = [0x0750, 0x077F]; | |
const ARMENIAN = [0x0530, 0x058F]; | |
const ARROWS = [0x2190, 0x21FF]; | |
const AVESTAN = [0x10B00, 0x10B3F]; | |
const BALINESE = [0x1B00, 0x1B7F]; | |
const BAMUM = [0xA6A0, 0xA6FF]; | |
const BAMUM_SUPPLEMENT = [0x16800, 0x16A3F]; | |
const BASIC_LATIN = [0x0000, 0x007F]; | |
const BASSA_VAH = [0x16AD0, 0x16AFF]; | |
const BATAK = [0x1BC0, 0x1BFF]; | |
const BENGALI = [0x0980, 0x09FF]; | |
const BHAIKSUKI = [0x11C00, 0x11C6F]; | |
const BLOCK_ELEMENTS = [0x2580, 0x259F]; | |
const BOPOMOFO = [0x3100, 0x312F]; | |
const BOPOMOFO_EXTENDED = [0x31A0, 0x31BF]; | |
const BOX_DRAWING = [0x2500, 0x257F]; | |
const BRAHMI = [0x11000, 0x1107F]; | |
const BRAILLE_PATTERNS = [0x2800, 0x28FF]; | |
const BUGINESE = [0x1A00, 0x1A1F]; | |
const BUHID = [0x1740, 0x175F]; | |
const BYZANTINE_MUSICAL_SYMBOLS = [0x1D000, 0x1D0FF]; | |
const CARIAN = [0x102A0, 0x102DF]; | |
const CAUCASIAN_ALBANIAN = [0x10530, 0x1056F]; | |
const CHAKMA = [0x11100, 0x1114F]; | |
const CHAM = [0xAA00, 0xAA5F]; | |
const CHEROKEE = [0x13A0, 0x13FF]; | |
const CHEROKEE_SUPPLEMENT = [0xAB70, 0xABBF]; | |
const CJK_COMPATIBILITY = [0x3300, 0x33FF]; | |
const CJK_COMPATIBILITY_FORMS = [0xFE30, 0xFE4F]; | |
const CJK_COMPATIBILITY_IDEOGRAPHS = [0xF900, 0xFAFF]; | |
const CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = [0x2F800, 0x2FA1F]; | |
const CJK_RADICALS_SUPPLEMENT = [0x2E80, 0x2EFF]; | |
const CJK_STROKES = [0x31C0, 0x31EF]; | |
const CJK_SYMBOLS_AND_PUNCTUATION = [0x3000, 0x303F]; | |
const CJK_UNIFIED_IDEOGRAPHS = [0x4E00, 0x9FFF]; | |
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = [0x3400, 0x4DBF]; | |
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = [0x20000, 0x2A6DF]; | |
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = [0x2A700, 0x2B73F]; | |
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = [0x2B740, 0x2B81F]; | |
const CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = [0x2B820, 0x2CEAF]; | |
const COMBINING_DIACRITICAL_MARKS = [0x0300, 0x036F]; | |
const COMBINING_DIACRITICAL_MARKS_EXTENDED = [0x1AB0, 0x1AFF]; | |
const COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS = [0x20D0, 0x20FF]; | |
const COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = [0x1DC0, 0x1DFF]; | |
const COMBINING_HALF_MARKS = [0xFE20, 0xFE2F]; | |
const COMMON_INDIC_NUMBER_FORMS = [0xA830, 0xA83F]; | |
const CONTROL_PICTURES = [0x2400, 0x243F]; | |
const COPTIC = [0x2C80, 0x2CFF]; | |
const COPTIC_EPACT_NUMBERS = [0x102E0, 0x102FF]; | |
const COUNTING_ROD_NUMERALS = [0x1D360, 0x1D37F]; | |
const CUNEIFORM = [0x12000, 0x123FF]; | |
const CUNEIFORM_NUMBERS_AND_PUNCTUATION = [0x12400, 0x1247F]; | |
const CURRENCY_SYMBOLS = [0x20A0, 0x20CF]; | |
const CYPRIOT_SYLLABARY = [0x10800, 0x1083F]; | |
const CYRILLIC = [0x0400, 0x04FF]; | |
const CYRILLIC_EXTENDED_A = [0x2DE0, 0x2DFF]; | |
const CYRILLIC_EXTENDED_B = [0xA640, 0xA69F]; | |
const CYRILLIC_EXTENDED_C = [0x1C80, 0x1C8F]; | |
const CYRILLIC_SUPPLEMENT = [0x0500, 0x052F]; | |
const DESERET = [0x10400, 0x1044F]; | |
const DEVANAGARI = [0x0900, 0x097F]; | |
const DEVANAGARI_EXTENDED = [0xA8E0, 0xA8FF]; | |
const DINGBATS = [0x2700, 0x27BF]; | |
const DOMINO_TILES = [0x1F030, 0x1F09F]; | |
const DUPLOYAN = [0x1BC00, 0x1BC9F]; | |
const EARLY_DYNASTIC_CUNEIFORM = [0x12480, 0x1254F]; | |
const EGYPTIAN_HIEROGLYPHS = [0x13000, 0x1342F]; | |
const ELBASAN = [0x10500, 0x1052F]; | |
const EMOTICONS = [0x1F600, 0x1F64F]; | |
const ENCLOSED_ALPHANUMERICS = [0x2460, 0x24FF]; | |
const ENCLOSED_ALPHANUMERIC_SUPPLEMENT = [0x1F100, 0x1F1FF]; | |
const ENCLOSED_CJK_LETTERS_AND_MONTHS = [0x3200, 0x32FF]; | |
const ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = [0x1F200, 0x1F2FF]; | |
const ETHIOPIC = [0x1200, 0x137F]; | |
const ETHIOPIC_EXTENDED = [0x2D80, 0x2DDF]; | |
const ETHIOPIC_EXTENDED_A = [0xAB00, 0xAB2F]; | |
const ETHIOPIC_SUPPLEMENT = [0x1380, 0x139F]; | |
const GENERAL_PUNCTUATION = [0x2000, 0x206F]; | |
const GEOMETRIC_SHAPES = [0x25A0, 0x25FF]; | |
const GEOMETRIC_SHAPES_EXTENDED = [0x1F780, 0x1F7FF]; | |
const GEORGIAN = [0x10A0, 0x10FF]; | |
const GEORGIAN_SUPPLEMENT = [0x2D00, 0x2D2F]; | |
const GLAGOLITIC = [0x2C00, 0x2C5F]; | |
const GLAGOLITIC_SUPPLEMENT = [0x1E000, 0x1E02F]; | |
const GOTHIC = [0x10330, 0x1034F]; | |
const GRANTHA = [0x11300, 0x1137F]; | |
const GREEK_AND_COPTIC = [0x0370, 0x03FF]; | |
const GREEK_EXTENDED = [0x1F00, 0x1FFF]; | |
const GUJARATI = [0x0A80, 0x0AFF]; | |
const GURMUKHI = [0x0A00, 0x0A7F]; | |
const HALFWIDTH_AND_FULLWIDTH_FORMS = [0xFF00, 0xFFEF]; | |
const HANGUL_COMPATIBILITY_JAMO = [0x3130, 0x318F]; | |
const HANGUL_JAMO = [0x1100, 0x11FF]; | |
const HANGUL_JAMO_EXTENDED_A = [0xA960, 0xA97F]; | |
const HANGUL_JAMO_EXTENDED_B = [0xD7B0, 0xD7FF]; | |
const HANGUL_SYLLABLES = [0xAC00, 0xD7AF]; | |
const HANUNOO = [0x1720, 0x173F]; | |
const HATRAN = [0x108E0, 0x108FF]; | |
const HEBREW = [0x0590, 0x05FF]; | |
const HIGH_PRIVATE_USE_SURROGATES = [0xDB80, 0xDBFF]; | |
const HIGH_SURROGATES = [0xD800, 0xDB7F]; | |
const HIRAGANA = [0x3040, 0x309F]; | |
const IDEOGRAPHIC_DESCRIPTION_CHARACTERS = [0x2FF0, 0x2FFF]; | |
const IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = [0x16FE0, 0x16FFF]; | |
const IMPERIAL_ARAMAIC = [0x10840, 0x1085F]; | |
const INSCRIPTIONAL_PAHLAVI = [0x10B60, 0x10B7F]; | |
const INSCRIPTIONAL_PARTHIAN = [0x10B40, 0x10B5F]; | |
const IPA_EXTENSIONS = [0x0250, 0x02AF]; | |
const JAVANESE = [0xA980, 0xA9DF]; | |
const KAITHI = [0x11080, 0x110CF]; | |
const KANA_SUPPLEMENT = [0x1B000, 0x1B0FF]; | |
const KANBUN = [0x3190, 0x319F]; | |
const KANGXI_RADICALS = [0x2F00, 0x2FDF]; | |
const KANNADA = [0x0C80, 0x0CFF]; | |
const KATAKANA = [0x30A0, 0x30FF]; | |
const KATAKANA_PHONETIC_EXTENSIONS = [0x31F0, 0x31FF]; | |
const KAYAH_LI = [0xA900, 0xA92F]; | |
const KHAROSHTHI = [0x10A00, 0x10A5F]; | |
const KHMER = [0x1780, 0x17FF]; | |
const KHMER_SYMBOLS = [0x19E0, 0x19FF]; | |
const KHOJKI = [0x11200, 0x1124F]; | |
const KHUDAWADI = [0x112B0, 0x112FF]; | |
const LAO = [0x0E80, 0x0EFF]; | |
const LATIN_1_SUPPLEMENT = [0x0080, 0x00FF]; | |
const LATIN_EXTENDED_A = [0x0100, 0x017F]; | |
const LATIN_EXTENDED_ADDITIONAL = [0x1E00, 0x1EFF]; | |
const LATIN_EXTENDED_B = [0x0180, 0x024F]; | |
const LATIN_EXTENDED_C = [0x2C60, 0x2C7F]; | |
const LATIN_EXTENDED_D = [0xA720, 0xA7FF]; | |
const LATIN_EXTENDED_E = [0xAB30, 0xAB6F]; | |
const LEPCHA = [0x1C00, 0x1C4F]; | |
const LETTERLIKE_SYMBOLS = [0x2100, 0x214F]; | |
const LIMBU = [0x1900, 0x194F]; | |
const LINEAR_A = [0x10600, 0x1077F]; | |
const LINEAR_B_IDEOGRAMS = [0x10080, 0x100FF]; | |
const LINEAR_B_SYLLABARY = [0x10000, 0x1007F]; | |
const LISU = [0xA4D0, 0xA4FF]; | |
const LOW_SURROGATES = [0xDC00, 0xDFFF]; | |
const LYCIAN = [0x10280, 0x1029F]; | |
const LYDIAN = [0x10920, 0x1093F]; | |
const MAHAJANI = [0x11150, 0x1117F]; | |
const MAHJONG_TILES = [0x1F000, 0x1F02F]; | |
const MALAYALAM = [0x0D00, 0x0D7F]; | |
const MANDAIC = [0x0840, 0x085F]; | |
const MANICHAEAN = [0x10AC0, 0x10AFF]; | |
const MARCHEN = [0x11C70, 0x11CBF]; | |
const MATHEMATICAL_ALPHANUMERIC_SYMBOLS = [0x1D400, 0x1D7FF]; | |
const MATHEMATICAL_OPERATORS = [0x2200, 0x22FF]; | |
const MEETEI_MAYEK = [0xABC0, 0xABFF]; | |
const MEETEI_MAYEK_EXTENSIONS = [0xAAE0, 0xAAFF]; | |
const MENDE_KIKAKUI = [0x1E800, 0x1E8DF]; | |
const MEROITIC_CURSIVE = [0x109A0, 0x109FF]; | |
const MEROITIC_HIEROGLYPHS = [0x10980, 0x1099F]; | |
const MIAO = [0x16F00, 0x16F9F]; | |
const MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = [0x27C0, 0x27EF]; | |
const MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = [0x2980, 0x29FF]; | |
const MISCELLANEOUS_SYMBOLS = [0x2600, 0x26FF]; | |
const MISCELLANEOUS_SYMBOLS_AND_ARROWS = [0x2B00, 0x2BFF]; | |
const MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = [0x1F300, 0x1F5FF]; | |
const MISCELLANEOUS_TECHNICAL = [0x2300, 0x23FF]; | |
const MODI = [0x11600, 0x1165F]; | |
const MODIFIER_TONE_LETTERS = [0xA700, 0xA71F]; | |
const MONGOLIAN = [0x1800, 0x18AF]; | |
const MONGOLIAN_SUPPLEMENT = [0x11660, 0x1167F]; | |
const MRO = [0x16A40, 0x16A6F]; | |
const MULTANI = [0x11280, 0x112AF]; | |
const MUSICAL_SYMBOLS = [0x1D100, 0x1D1FF]; | |
const MYANMAR = [0x1000, 0x109F]; | |
const MYANMAR_EXTENDED_A = [0xAA60, 0xAA7F]; | |
const MYANMAR_EXTENDED_B = [0xA9E0, 0xA9FF]; | |
const NABATAEAN = [0x10880, 0x108AF]; | |
const NEWA = [0x11400, 0x1147F]; | |
const NEW_TAI_LUE = [0x1980, 0x19DF]; | |
const NKO = [0x07C0, 0x07FF]; | |
const NUMBER_FORMS = [0x2150, 0x218F]; | |
const OGHAM = [0x1680, 0x169F]; | |
const OLD_HUNGARIAN = [0x10C80, 0x10CFF]; | |
const OLD_ITALIC = [0x10300, 0x1032F]; | |
const OLD_NORTH_ARABIAN = [0x10A80, 0x10A9F]; | |
const OLD_PERMIC = [0x10350, 0x1037F]; | |
const OLD_PERSIAN = [0x103A0, 0x103DF]; | |
const OLD_SOUTH_ARABIAN = [0x10A60, 0x10A7F]; | |
const OLD_TURKIC = [0x10C00, 0x10C4F]; | |
const OL_CHIKI = [0x1C50, 0x1C7F]; | |
const OPTICAL_CHARACTER_RECOGNITION = [0x2440, 0x245F]; | |
const ORIYA = [0x0B00, 0x0B7F]; | |
const ORNAMENTAL_DINGBATS = [0x1F650, 0x1F67F]; | |
const OSAGE = [0x104B0, 0x104FF]; | |
const OSMANYA = [0x10480, 0x104AF]; | |
const PAHAWH_HMONG = [0x16B00, 0x16B8F]; | |
const PALMYRENE = [0x10860, 0x1087F]; | |
const PAU_CIN_HAU = [0x11AC0, 0x11AFF]; | |
const PHAGS_PA = [0xA840, 0xA87F]; | |
const PHAISTOS_DISC = [0x101D0, 0x101FF]; | |
const PHOENICIAN = [0x10900, 0x1091F]; | |
const PHONETIC_EXTENSIONS = [0x1D00, 0x1D7F]; | |
const PHONETIC_EXTENSIONS_SUPPLEMENT = [0x1D80, 0x1DBF]; | |
const PLAYING_CARDS = [0x1F0A0, 0x1F0FF]; | |
const PRIVATE_USE_AREA = [0xE000, 0xF8FF]; | |
const PSALTER_PAHLAVI = [0x10B80, 0x10BAF]; | |
const REJANG = [0xA930, 0xA95F]; | |
const RUMI_NUMERAL_SYMBOLS = [0x10E60, 0x10E7F]; | |
const RUNIC = [0x16A0, 0x16FF]; | |
const SAMARITAN = [0x0800, 0x083F]; | |
const SAURASHTRA = [0xA880, 0xA8DF]; | |
const SHARADA = [0x11180, 0x111DF]; | |
const SHAVIAN = [0x10450, 0x1047F]; | |
const SHORTHAND_FORMAT_CONTROLS = [0x1BCA0, 0x1BCAF]; | |
const SIDDHAM = [0x11580, 0x115FF]; | |
const SINHALA = [0x0D80, 0x0DFF]; | |
const SINHALA_ARCHAIC_NUMBERS = [0x111E0, 0x111FF]; | |
const SMALL_FORM_VARIANTS = [0xFE50, 0xFE6F]; | |
const SORA_SOMPENG = [0x110D0, 0x110FF]; | |
const SPACING_MODIFIER_LETTERS = [0x02B0, 0x02FF]; | |
const SPECIALS = [0xFFF0, 0xFFFF]; | |
const SUNDANESE = [0x1B80, 0x1BBF]; | |
const SUNDANESE_SUPPLEMENT = [0x1CC0, 0x1CCF]; | |
const SUPERSCRIPTS_AND_SUBSCRIPTS = [0x2070, 0x209F]; | |
const SUPPLEMENTAL_ARROWS_A = [0x27F0, 0x27FF]; | |
const SUPPLEMENTAL_ARROWS_B = [0x2900, 0x297F]; | |
const SUPPLEMENTAL_ARROWS_C = [0x1F800, 0x1F8FF]; | |
const SUPPLEMENTAL_MATHEMATICAL_OPERATORS = [0x2A00, 0x2AFF]; | |
const SUPPLEMENTAL_PUNCTUATION = [0x2E00, 0x2E7F]; | |
const SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = [0x1F900, 0x1F9FF]; | |
const SUPPLEMENTARY_PRIVATE_USE_AREA_A = [0xF0000, 0xFFFFF]; | |
const SUPPLEMENTARY_PRIVATE_USE_AREA_B = [0x100000, 0x10FFFF]; | |
const SUTTON_SIGNWRITING = [0x1D800, 0x1DAAF]; | |
const SYLOTI_NAGRI = [0xA800, 0xA82F]; | |
const SYRIAC = [0x0700, 0x074F]; | |
const TAGALOG = [0x1700, 0x171F]; | |
const TAGBANWA = [0x1760, 0x177F]; | |
const TAGS = [0xE0000, 0xE007F]; | |
const TAI_LE = [0x1950, 0x197F]; | |
const TAI_THAM = [0x1A20, 0x1AAF]; | |
const TAI_VIET = [0xAA80, 0xAADF]; | |
const TAI_XUAN_JING_SYMBOLS = [0x1D300, 0x1D35F]; | |
const TAKRI = [0x11680, 0x116CF]; | |
const TAMIL = [0x0B80, 0x0BFF]; | |
const TANGUT = [0x17000, 0x187FF]; | |
const TANGUT_COMPONENTS = [0x18800, 0x18AFF]; | |
const TELUGU = [0x0C00, 0x0C7F]; | |
const THAANA = [0x0780, 0x07BF]; | |
const THAI = [0x0E00, 0x0E7F]; | |
const TIBETAN = [0x0F00, 0x0FFF]; | |
const TIFINAGH = [0x2D30, 0x2D7F]; | |
const TIRHUTA = [0x11480, 0x114DF]; | |
const TRANSPORT_AND_MAP_SYMBOLS = [0x1F680, 0x1F6FF]; | |
const UGARITIC = [0x10380, 0x1039F]; | |
const UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = [0x1400, 0x167F]; | |
const UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = [0x18B0, 0x18FF]; | |
const VAI = [0xA500, 0xA63F]; | |
const VARIATION_SELECTORS = [0xFE00, 0xFE0F]; | |
const VARIATION_SELECTORS_SUPPLEMENT = [0xE0100, 0xE01EF]; | |
const VEDIC_EXTENSIONS = [0x1CD0, 0x1CFF]; | |
const VERTICAL_FORMS = [0xFE10, 0xFE1F]; | |
const WARANG_CITI = [0x118A0, 0x118FF]; | |
const YIJING_HEXAGRAM_SYMBOLS = [0x4DC0, 0x4DFF]; | |
const YI_RADICALS = [0xA490, 0xA4CF]; | |
const YI_SYLLABLES = [0xA000, 0xA48F]; | |
/** @var string $block The Unicode character block to check against */ | |
private $blocks = []; | |
/** | |
* Ojbect constructor | |
* | |
* Can be passed one or more of the following: | |
* * a constant defining a Unicode character block | |
* * a string containing the name of a constant | |
* * an array containing start and end code points in numeric form | |
* | |
* @param string|int[] ...$blocks The block(s) to check against | |
* @return void | |
* @throws \Exception if an invalid block is passed | |
*/ | |
public function __construct(...$blocks) | |
{ | |
foreach ($blocks as $block) { | |
if (is_string($block) && defined("self::$block")) { | |
$this->blocks[] = constant("self::$block"); | |
} elseif (!is_array($block) || $block[1] < $block[0]) { | |
throw new \Exception("Bad character range passed!"); | |
} else { | |
$this->blocks[] = $block; | |
} | |
} | |
} | |
/** | |
* Adds a block to the existing list | |
* | |
* @param string|int[] $block The block to add (see constructor for details) | |
* @return void | |
* @throws \Exception if an invalid block is passed | |
*/ | |
public function addBlock($block) | |
{ | |
if (is_string($block) && defined("self::$block")) { | |
$this->blocks[] = constant("self::$block"); | |
} elseif (!is_array($block) || $block[1] < $block[0]) { | |
throw new \Exception("Bad character range passed!"); | |
} else { | |
$this->blocks[] = $block; | |
} | |
} | |
/** | |
* Checks if the given string is composed only of characters in the defined block(s) | |
* | |
* @param string $string The string to check | |
* @return boolean | |
*/ | |
public function check($string) | |
{ | |
if (empty($string)) { | |
return true; | |
} | |
$pattern = "/^["; | |
foreach ($this->blocks as $block) { | |
$st = dechex($block[0]); | |
$fi = dechex($block[1]); | |
// note double braces to get literal braces in expression | |
$pattern .= "\x{{$st}}-\x{{$fi}}"; | |
} | |
$pattern .= "]*$/u"; | |
return (boolean)preg_match($pattern, $string); | |
} | |
/** | |
* Static method to check if the string is composed only of characters in a block | |
* | |
* @param string $string The string to check | |
* @param string|int[] $block The block to check against (see constructor for details) | |
* @return boolean | |
* @throws \Exception if an invalid block is passed | |
*/ | |
public static function isValid($string, $block) | |
{ | |
if (empty($string)) { | |
return true; | |
} elseif (is_string($block) && defined("self::$block")) { | |
$block = constant("self::$block"); | |
} elseif (!is_array($block) || $block[1] < $block[0]) { | |
throw new Exception("Bad character range passed!"); | |
} | |
$st = dechex($block[0]); | |
$fi = dechex($block[1]); | |
// note double braces to get literal braces in expression | |
$pattern = "/^[\x{{$st}}-\x{{$fi}}]*$/u"; | |
return (boolean)preg_match($pattern, $string); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment