Skip to content

Instantly share code, notes, and snippets.

@oozoofrog
Last active December 30, 2025 06:29
Show Gist options
  • Select an option

  • Save oozoofrog/07d7eec63750c4992b09e2709f180497 to your computer and use it in GitHub Desktop.

Select an option

Save oozoofrog/07d7eec63750c4992b09e2709f180497 to your computer and use it in GitHub Desktop.
한글 유니코드 다루기
import Cocoa
var str = "궉토abcd스234꾹타ㅎ하후훼의"
extension Collection {
var toArray: [Element] {
return Array(self)
}
}
protocol UnicodeScalarCreatable {
var toUnicodeScalar: Unicode.Scalar { get }
}
extension UInt32: UnicodeScalarCreatable {
var toUnicodeScalar: Unicode.Scalar { return Unicode.Scalar(self) ?? Unicode.Scalar(0) }
}
extension UInt16: UnicodeScalarCreatable {
var toUnicodeScalar: Unicode.Scalar { return Unicode.Scalar(self) ?? Unicode.Scalar(0) }
}
extension Int: UnicodeScalarCreatable {
var toUnicodeScalar: Unicode.Scalar { return Unicode.Scalar(self) ?? Unicode.Scalar(0) }
}
extension Unicode.Scalar: Strideable {
public typealias Stride = Int32
public func distance(to other: Unicode.Scalar) -> Stride {
return Int32(other.value) - Int32(self.value)
}
public func advanced(by n: Int32) -> Unicode.Scalar {
return Unicode.Scalar(self.value + UInt32(n)) ?? self
}
var toCharacter: Character { return Character(self) }
}
class KoreanUnicode {
/// NFD 초성 유니코드 값
/// ᄀ, ᄁ, ᄂ, ᄃ, ᄄ, ᄅ, ᄆ, ᄇ, ᄈ, ᄉ, ᄊ, ᄋ, ᄌ, ᄍ, ᄎ, ᄏ, ᄐ, ᄑ, ᄒ
private(set) lazy var initialConsonant: ClosedRange<Unicode.Scalar> = 0x1100.toUnicodeScalar...0x1112.toUnicodeScalar
/// NFD 중성 유니코드 값
/// ᅡ, ᅢ, ᅣ, ᅤ, ᅥ, ᅦ, ᅧ, ᅨ, ᅩ, ᅪ, ᅫ, ᅬ, ᅭ, ᅮ, ᅯ, ᅰ, ᅱ, ᅲ, ᅳ, ᅴ, ᅵ
private(set) lazy var medial: ClosedRange<Unicode.Scalar> = 0x1161.toUnicodeScalar...0x1175.toUnicodeScalar
/// NFD 종성 유니코드 값
/// 이 이후에도 어학적 의미가 있는 값은 있으나 토스에서 쓰일리는 없다고 판단해 여기까지
/// ᆨ, ᆩ, ᆪ, ᆫ, ᆬ, ᆭ, ᆮ, ᆯ, ᆰ, ᆱ, ᆲ, ᆳ, ᆴ, ᆵ, ᆶ, ᆷ, ᆸ, ᆹ, ᆺ, ᆻ, ᆼ, ᆽ, ᆾ, ᆿ, ᇀ, ᇁ, ᇂ
private(set) lazy var finalConsonant: ClosedRange<Unicode.Scalar> = 0x11A8.toUnicodeScalar...0x11C2.toUnicodeScalar
/// 한글 호환 자모 영역
private(set) lazy var koreanCompatibilityJamo: ClosedRange<Unicode.Scalar> = 0x3131.toUnicodeScalar...0x3163.toUnicodeScalar
/// NFC 완성형 한글영역
private(set) lazy var koreanOfNFC: ClosedRange<Unicode.Scalar> = 0xAC00.toUnicodeScalar...0xD7A3.toUnicodeScalar
/// NFD + Korean Compatibility Jamo + NFC 유니코드 한글 영역
private(set) lazy var korean = Array(initialConsonant) + Array(medial) + Array(finalConsonant) + Array(koreanCompatibilityJamo) + Array(koreanOfNFC)
private(set) lazy var compatibilityInitialConsonantConvertTable: [Character: Character] =
[Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character("")]
private(set) lazy var compatibilityMedialConvertTable: [Character: Character] =
[Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character("")]
private(set) lazy var compatibilityFinalConsonantConvertTable: [Character: Character] =
[Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character(""),
Character(""): Character("")]
func isInitialConsonant(_ unicodeScalar: Unicode.Scalar) -> Bool {
return initialConsonant.contains(unicodeScalar)
}
func isMedial(_ unicodeScalar: Unicode.Scalar) -> Bool {
return medial.contains(unicodeScalar)
}
func isFinalConsonant(_ unicodeScalar: Unicode.Scalar) -> Bool {
return finalConsonant.contains(unicodeScalar)
}
func isKorean(_ unicodeScalar: Unicode.Scalar) -> Bool {
return korean.contains(unicodeScalar)
}
func isKorean(_ character: Character) -> Bool {
return character.unicodeScalars.allSatisfy(isKorean)
}
/// 한글만 들어있는 경우 true를 반환하고 한글만 들어있지 않은 경우는 false를 반환
func isKorean(_ string: String) -> Bool {
return string
.decomposedStringWithCompatibilityMapping
.unicodeScalars
.toArray
.allSatisfy(isKorean)
}
/// 한글이 들어있는 경우는 true를 반환, 한글이 전혀 들어있지 않은 경우 false를 반환
func hasKorean(_ string: String) -> Bool {
for scalar in string.decomposedStringWithCompatibilityMapping.unicodeScalars.toArray where isKorean(scalar) {
return true
}
return false
}
func koreanCompatibilityJamoCharacterFromNFDCharacter(_ character: Character) -> Character {
if let initialConsonant = compatibilityInitialConsonantConvertTable[character] {
return initialConsonant
} else if let medial = compatibilityMedialConvertTable[character] {
return medial
} else if let finalConsonant = compatibilityFinalConsonantConvertTable[character] {
return finalConsonant
} else {
return character
}
}
}
let korean = KoreanUnicode()
extension String {
var isKorean: Bool {
return korean.isKorean(self)
}
var hasKorean: Bool {
return korean.hasKorean(self)
}
var koreanOnly: String {
return filter(korean.isKorean)
}
var koreanInitialConsonantOnly: String {
return decomposedStringWithCompatibilityMapping
.unicodeScalars
.filter(korean.isInitialConsonant)
.map(Character.init)
.map(String.init)
.joined()
}
func decomposedUnicodeScalars() -> [Unicode.Scalar] {
return Array(decomposedStringWithCompatibilityMapping.unicodeScalars)
}
func decomposed() -> String {
return decomposedStringWithCompatibilityMapping
.map { $0.unicodeScalars.map { $0.toCharacter } }
.flatMap({ $0 })
.map({ $0.description })
.joined()
}
var toKoreanCompatiblityJamo: String {
return decomposedStringWithCompatibilityMapping
.map(korean.koreanCompatibilityJamoCharacterFromNFDCharacter)
.map { $0.description }.joined()
}
}
let a: String = "안녕하세요.뿡뿡뿡helloおはよう宜しくね脳"
let b = a.decomposed()
print(Array(a.unicodeScalars.map(Character.init)))
print(Array(b.unicodeScalars.map(Character.init)))
print(a == b)
print(b.unicodeScalars.map(Character.init).map(korean.koreanCompatibilityJamoCharacterFromNFDCharacter).map(String.init).joined())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment