Last active
August 4, 2020 23:45
-
-
Save harlanhaskins/4331dc15961ab4a71c1356a2ed569fe0 to your computer and use it in GitHub Desktop.
Swift String Scanner
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
/// `StringScanner` is a fast scanner for Strings and String-like objects. | |
/// It's used to extract structured bits from unstructured strings, while | |
/// avoiding making extra copies of string bits until absolutely necessary. | |
/// You can build Scanners over Substrings, allowing you to scan | |
/// parts of strings and use smaller, more specialized scanners to extract bits | |
/// of that String without needing to reuse another scanner. | |
public struct StringScanner<Input: StringProtocol> { | |
let input: Input | |
var index: Input.Index | |
/// Creates a new scanner over the provided input. | |
/// - Parameter input: The string or substring to scan. | |
public init(_ input: Input) { | |
self.input = input | |
self.index = input.startIndex | |
} | |
var currentChar: Character? { | |
return index < input.endIndex ? input[index] : nil | |
} | |
mutating func advance() { | |
_ = input.formIndex(&index, offsetBy: 1, limitedBy: input.endIndex) | |
} | |
/// Whether this scanner has exhausted all of its input string. | |
public var hasInput: Bool { | |
return currentChar != nil | |
} | |
/// Attempts to scan an integer from the current position in the input string. | |
/// If the cursor is not pointing to an integer, this function returns `nil`. | |
public mutating func scanInt() -> Int? { | |
guard isAt(.decimalDigits) else { | |
return nil | |
} | |
let int = scan(.decimalDigits) | |
return Int(int) | |
} | |
/// Scans multiple integers separated by the provided separator set. | |
/// It stops scanning as soon as the first non-integer character is found | |
/// that is not in the separator set. | |
/// | |
/// - Parameter separator: The character set that separates each integer | |
/// in the input. | |
/// - Returns: All integers from the current cursor position, separated by | |
/// the separator character. | |
public mutating func scanInts(separatedBy separator: CharacterSet) -> [Int] { | |
let rawInts = scan(.decimalDigits, separatedBy: separator) | |
return rawInts.map { Int($0)! } | |
} | |
/// Scans the provided characters in the provided character set, each time | |
/// skipping characters in a separator character set. | |
/// You can use this to extract similar data separated by a given separator. | |
/// | |
/// - Parameters: | |
/// - chars: The set of characters you're trying to keep. | |
/// - separator: The set of separator characters between each entry | |
/// you're scanning. | |
/// - Returns: All substrings that matched the initial character set with the | |
/// `separator` set between them. | |
public mutating func scan( | |
_ chars: CharacterSet, | |
separatedBy separator: CharacterSet | |
) -> [Input.SubSequence] { | |
var results = [Input.SubSequence]() | |
repeat { | |
results.append(scan(chars)) | |
guard isAt(separator) else { | |
break | |
} | |
skip(separator) | |
} while isAt(chars) | |
return results | |
} | |
/// All remaining input that has yet to be consumed. Useful for debugging. | |
public var remainingInput: Input.SubSequence { | |
return input[index...] | |
} | |
/// Determines if the scanner is currently pointing to a member of a character | |
/// set. | |
/// - Parameter chars: The character set you're testing the current character | |
/// against. | |
public func isAt(_ chars: CharacterSet) -> Bool { | |
guard let c = currentChar else { return false } | |
for scalar in c.unicodeScalars { | |
if !chars.contains(scalar) { return false } | |
} | |
return true | |
} | |
/// Scans and saves all characters up to, but not including, the first | |
/// character that is contained within the provided character set. | |
/// | |
/// - Parameter chars: The character set that signals the end of the scanned | |
/// region. | |
/// - Returns: The sequence of characters up to, but not including, the first | |
/// character that appears in the provided character set. | |
public mutating func scanUpTo(_ chars: CharacterSet) -> Input.SubSequence { | |
let start = index | |
skip(to: chars) | |
return input[start..<index] | |
} | |
/// Scans and saves all characters that are contained within the provided | |
/// character set. | |
/// | |
/// - Parameter chars: The character set that each scanned character must | |
/// belong to. | |
/// - Returns: The sequence of characters that all are contained within the | |
/// character set, starting at the current character. | |
public mutating func scan(_ chars: CharacterSet) -> Input.SubSequence { | |
let start = index | |
skip(chars) | |
return input[start..<index] | |
} | |
/// Scans the exact string or substring provided, and returns its range in the | |
/// input string, or `nil` if it did not match. | |
/// | |
/// - Parameter string: The string to match. | |
/// - Returns: The range in the input string where this string first | |
/// appears after the current cursor, or `nil` if the string | |
/// does not appear. | |
public mutating func scan<Str: StringProtocol>( | |
_ string: Str | |
) -> Input.SubSequence? { | |
let start = index | |
guard skip(string) else { return nil } | |
return input[start..<index] | |
} | |
/// Skips the exact string or substring provided, if it is currently at the | |
/// Scanner's cursor. | |
/// | |
/// - Parameter string: The string to skip. | |
public mutating func skip<Str: StringProtocol>(_ string: Str) -> Bool { | |
let start = index | |
var scanner = StringScanner<Str>(string) | |
while let char = currentChar, let strChar = scanner.currentChar { | |
if char == strChar { | |
advance() | |
scanner.advance() | |
} else { | |
index = start | |
return false | |
} | |
} | |
return !scanner.hasInput | |
} | |
/// Skips all characters up to, but not including, the first character in the | |
/// provided character set. | |
/// | |
/// - Parameter chars: The character set that signals the end of the skipped | |
/// region. | |
public mutating func skip(to chars: CharacterSet) { | |
while currentChar != nil && !isAt(chars) { | |
advance() | |
} | |
} | |
/// Skips all characters in the provided character set. | |
/// | |
/// - Parameter chars: The character set that should be skipped. | |
public mutating func skip(_ chars: CharacterSet) { | |
while isAt(chars) { | |
advance() | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment