Last active
March 25, 2021 08:41
-
-
Save Jimmy-Prime/fbe5004203d8d0787804dcbf0bf18a82 to your computer and use it in GitHub Desktop.
serialize json from scratch
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Foundation | |
// valid JSON format | |
// 1. null | |
// 2. String | |
// 3. Number | |
// 4. { "key": JSON, "key": JSON, ... } | |
// 5. [JSON, JSON, ...] | |
enum Status { | |
case startOfJSON | |
case inString | |
case inNumber | |
case waitForKey | |
case inKey | |
case endKey | |
case endObject | |
case done | |
} | |
enum Path { | |
case array | |
case object | |
case key(String) | |
} | |
struct State { | |
var status: Status = .startOfJSON | |
var paths: [Path] = [] | |
var bufferStartIndex: Int = 0 | |
var escapeNext: Bool = false | |
var index: Int = 0 | |
} | |
extension UInt8 { | |
static var leftCurlyBracket: UInt8 { 123 } | |
static var rightCurlyBracket: UInt8 { 125 } | |
static var leftSquareBracket: UInt8 { 91 } | |
static var rightSquareBracket: UInt8 { 93 } | |
static var comma: UInt8 { 44 } | |
static var quotation: UInt8 { 34 } | |
static var colon: UInt8 { 58 } | |
static var tab: UInt8 { 9 } | |
static var newline: UInt8 { 10 } | |
static var space: UInt8 { 32 } | |
// carriage return | |
// other valid control characters? | |
static var escapeControl: UInt8 { 92 } | |
static var n: UInt8 { 110 } | |
static var u: UInt8 { 117 } | |
static var l: UInt8 { 108 } | |
var isDecimal: Bool { | |
(48...57).contains(self) | |
} | |
var isWhiteSpace: Bool { | |
self == .tab || self == .newline || self == .space | |
} | |
} | |
struct IllForm: Error { | |
let byte: UInt8 | |
let state: State | |
} | |
class Serializer { | |
let data: Data | |
var state = State() | |
init(data: Data) { | |
self.data = data | |
} | |
func serialize() throws { | |
while state.index < data.count { | |
let byte = data[state.index] | |
// print("before: byte: \(byte), state: \(state)") | |
switch state.status { | |
case .startOfJSON: | |
try handleStartOfJSON(byte) | |
case .inString: | |
try handleInString(byte) | |
case .inNumber: | |
try handleInNumber(byte) | |
case .waitForKey: | |
try handleWaitForKey(byte) | |
case .inKey: | |
try handleInKey(byte) | |
case .endKey: | |
try handleEndKey(byte) | |
case .endObject: | |
try handleEndObject(byte) | |
case .done: | |
throw IllForm(byte: byte, state: state) | |
} | |
// print("after: byte: \(byte), state: \(state)") | |
// print("---") | |
state.index += 1 | |
} | |
guard case .done = state.status else { | |
throw IllForm(byte: 0, state: state) | |
} | |
} | |
func handleStartOfJSON(_ byte: UInt8) throws { | |
switch byte { | |
case .leftCurlyBracket: | |
// start of object | |
state.status = .waitForKey | |
state.paths.append(.object) | |
case .leftSquareBracket: | |
// start of array | |
state.paths.append(.array) | |
case .quotation: | |
// start of string | |
state.status = .inString | |
state.bufferStartIndex = state.index + 1 | |
case byte where byte.isDecimal: | |
// start of number | |
state.status = .inNumber | |
state.bufferStartIndex = state.index | |
case byte where byte.isWhiteSpace: | |
// ignore | |
break | |
case .n: | |
// check for null | |
guard data.count > state.index + 3, | |
data[state.index + 1] == .u, | |
data[state.index + 2] == .l, | |
data[state.index + 3] == .l else { | |
throw IllForm(byte: byte, state: state) | |
} | |
state.index += 3 | |
state.status = .endObject | |
// print("get value null") | |
popLastKeyIfNeeded() | |
default: | |
throw IllForm(byte: byte, state: state) | |
} | |
} | |
func handleInString(_ byte: UInt8) throws { | |
if state.escapeNext { | |
state.escapeNext = false | |
return | |
} | |
switch byte { | |
case .quotation: | |
// end of string | |
state.status = .endObject | |
guard let string = String(bytes: data[state.bufferStartIndex ..< state.index], encoding: .utf8) else { | |
throw IllForm(byte: byte, state: state) | |
} | |
// print("get value: \(string)") | |
state.bufferStartIndex = 0 | |
popLastKeyIfNeeded() | |
case .escapeControl: | |
state.escapeNext = true | |
fallthrough | |
default: | |
break | |
} | |
} | |
func handleInNumber(_ byte: UInt8) throws { | |
// TODO add floating point | |
// TODO add scientific notation | |
// TODO cannot have leading zero | |
switch byte { | |
case byte where byte.isDecimal: | |
break | |
default: | |
state.status = .endObject | |
guard let string = String(bytes: data[state.bufferStartIndex ..< state.index], encoding: .utf8) else { | |
throw IllForm(byte: byte, state: state) | |
} | |
// print("get value: \(string)") | |
state.bufferStartIndex = 0 | |
popLastKeyIfNeeded() | |
try handleEndObject(byte) | |
} | |
} | |
func handleWaitForKey(_ byte: UInt8) throws { | |
switch byte { | |
case .quotation: | |
// start of key | |
state.status = .inKey | |
state.bufferStartIndex = state.index + 1 | |
case byte where byte.isWhiteSpace: | |
// ignore | |
break | |
default: | |
throw IllForm(byte: byte, state: state) | |
} | |
} | |
func handleInKey(_ byte: UInt8) throws { | |
if state.escapeNext { | |
state.escapeNext = false | |
return | |
} | |
switch byte { | |
case .quotation: | |
// end of key | |
state.status = .endKey | |
guard let string = String(bytes: data[state.bufferStartIndex ..< state.index], encoding: .utf8) else { | |
throw IllForm(byte: byte, state: state) | |
} | |
// print("get key: \(string)") | |
state.paths.append(.key(string)) | |
state.bufferStartIndex = 0 | |
case .escapeControl: | |
state.escapeNext = true | |
fallthrough | |
default: | |
break | |
} | |
} | |
func handleEndKey(_ byte: UInt8) throws { | |
switch byte { | |
case .colon: | |
// handle value, and value is a JSON | |
state.status = .startOfJSON | |
case byte where byte.isWhiteSpace: | |
// ignore | |
break | |
default: | |
throw IllForm(byte: byte, state: state) | |
} | |
} | |
func handleEndObject(_ byte: UInt8) throws { | |
switch byte { | |
case byte where byte.isWhiteSpace: | |
// ignore | |
break | |
case .comma: | |
guard let lastPath = state.paths.last else { | |
throw IllForm(byte: byte, state: state) | |
} | |
switch lastPath { | |
case .array: | |
state.status = .startOfJSON | |
case .object: | |
state.status = .waitForKey | |
case .key: | |
throw IllForm(byte: byte, state: state) | |
} | |
case .rightCurlyBracket: | |
// end of outer object | |
guard case .object = state.paths.popLast() else { | |
throw IllForm(byte: byte, state: state) | |
} | |
popLastKeyIfNeeded() | |
// no status change | |
case .rightSquareBracket: | |
// end of outer array | |
guard case .array = state.paths.popLast() else { | |
throw IllForm(byte: byte, state: state) | |
} | |
popLastKeyIfNeeded() | |
default: | |
throw IllForm(byte: byte, state: state) | |
} | |
if state.paths.isEmpty { | |
state.status = .done | |
} | |
} | |
func popLastKeyIfNeeded() { | |
if case .key = state.paths.last { | |
state.paths.removeLast() | |
} | |
} | |
} | |
let string = """ | |
{ | |
"id": 100, | |
"name": "jimmylee", | |
"version": [0, 0, 1], | |
"avatar": null, | |
"prop": { | |
"start": "a", | |
"end": "b", | |
"favorite": [1, 2, 3], | |
"pref": { | |
"zone": "GMT", | |
"format": "yyyyMMMdd" | |
} | |
} | |
} | |
""" | |
let data = Data(string.utf8) | |
let serializer = Serializer(data: data) | |
try serializer.serialize() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment