Skip to content

Instantly share code, notes, and snippets.

@wotjd
Last active November 25, 2024 08:57
Show Gist options
  • Save wotjd/61738b60ed6afd825774db77071aa5ca to your computer and use it in GitHub Desktop.
Save wotjd/61738b60ed6afd825774db77071aa5ca to your computer and use it in GitHub Desktop.
poor man's hangul fuzzy search implementation
import Foundation
struct FuzzySearch {
struct Match {
let weight: Double
let indexes: [String.Index]
let string: String
}
struct Option: OptionSet {
let rawValue: Int
static let caseInsensitive = Option(rawValue: 1 << 0)
// ignore diacritic difference (for example, "naïve" and "naive").
// static let diacriticInsensitive = Option(rawValue: 1 << 1)
// compatibility mapping for example, ® and r
// static let compatibilityMapping
// static let partialHangulMatch = Option(rawValue: 1 << 7)
static let hangulChosungMatch = Option(rawValue: 1 << 8)
}
enum Scale {
case byMaxDistance
case byTotalDistance
}
private let chosungDict = "ㄱㄲㄴㄷㄸㄹㅁㅂㅃㅅㅆㅇㅈㅉㅊㅋㅌㅍㅎ".enumerated().reduce(into: [Character: Character]()) { dict, enumerated in
let pair = Character(Unicode.Scalar(0x1100 + enumerated.offset)!)
dict[enumerated.element] = pair
dict[pair] = enumerated.element
}
var matchOption: Option = [.caseInsensitive, .hangulChosungMatch]
var scale = Scale.byMaxDistance
var countsFirstMatchDistance = false
private func calculateWeight(
stringCount: Int,
firstMatchDistance: Int,
matchCount: Int,
totalDistance: Int
) -> Double {
// Double((stringCount - firstMatchDistance) * stringCount + matchCount) / Double(stringCount * stringCount) + 1 / Double(totalDistance + 2)
1 / Double(totalDistance + 2)
}
func callAsFunction(
keyword: String,
from string: String
) -> Match? {
guard keyword.count <= string.count && !keyword.isEmpty else { return nil }
var (keyword, string) = if matchOption.contains(.caseInsensitive) {
(keyword.lowercased(), string.lowercased())
} else {
(keyword, string)
}
var indexes = [String.Index]()
var distance = 0
func findIndex(of character: Character, from index: String.Index) -> String.Index? {
string[index..<string.endIndex].firstIndex {
if $0 == character {
true
} else if
matchOption.contains(.hangulChosungMatch),
let anotherChosung = chosungDict[character]
{
String($0).decomposedStringWithCanonicalMapping.contains {
[anotherChosung, character]
.compactMap(\.unicodeScalars.first)
.contains($0.unicodeScalars.first)
}
} else {
false
}
}
}
for character in keyword {
let lastIndex = 0 < indexes.count ? indexes[indexes.count - 1] : nil
let cursorIndex = lastIndex.map(string.index(after:)) ?? string.startIndex
guard
cursorIndex < string.endIndex,
let index = findIndex(of: character, from: cursorIndex)
else { return nil }
indexes.append(index)
guard lastIndex != nil || countsFirstMatchDistance else { continue }
let currentDistance = string.distance(from: cursorIndex, to: index)
distance = if scale == .byMaxDistance {
max(distance, currentDistance)
} else {
distance + currentDistance
}
}
return Match(
weight:
calculateWeight(
stringCount: string.count,
firstMatchDistance: string.distance(from: string.startIndex, to: indexes[0]),
matchCount: indexes.count,
totalDistance: distance
),
indexes: indexes,
string: string
)
}
func callAsFunction(
keyword: String,
from strings: [String]
) -> [Match] {
strings.compactMap { callAsFunction(keyword: keyword, from: $0) }
}
}
// usage
FuzzySearch()(keyword: "ㄷㄹㅈ", from: "다람쥐 쳇바퀴 어쩌구")
FuzzySearch()(keyword: "도군", from: ["강원도 고성군", "경기도 용인시", "충청남도 금산군", "서울특별시"])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment