Last active
May 28, 2025 04:21
-
-
Save yawaramin/75f0a35ece0cc7cf208d6c04e6e82bb9 to your computer and use it in GitHub Desktop.
OCaml Angstrom parser for JSON
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open Angstrom | |
let ( <?> ) p l = | |
let* idx = pos in | |
let* remaining = available in | |
let remaining = min remaining 25 in | |
let* s = peek_string remaining in | |
p <?> Printf.sprintf "\n@%d: expected: %s, got: %s" idx l s | |
let delim ~start ~item ~sep ~stop = | |
let* _ = start in | |
fix (fun m -> | |
(sep *> stop *> commit *> fail "trailing delimiter" <?> "item") | |
<|> (stop *> return []) | |
<|> (let+ i = item and+ _ = stop in [i]) | |
<|> (let+ i = item and+ _ = sep and+ rest = m in i :: rest)) | |
let parse p s = match parse_string ~consume:Consume.All p s with | |
| Ok v -> v | |
| Error s -> | |
prerr_endline s; | |
failwith "Parse error" | |
let ws = skip_while (function | |
| '\x20' | '\x0a' | '\x0d' | '\x09' -> true | |
| _ -> false) <?> "whitespace" | |
let comma = ws *> (string "," <?> "comma") <* ws | |
let colon = ws *> (string ":" <?> "colon") <* ws | |
let lbrace = ws *> (string "{" <?> "left brace") <* ws | |
let rbrace = ws *> (string "}" <?> "right brace") <* ws | |
let lbrack = ws *> (string "[" <?> "left bracket") <* ws | |
let rbrack = ws *> (string "]" <?> "right bracket") <* ws | |
let lquo = ws *> (char '"' <?> "left quote") | |
let rquo = (char '"' <?> "right quote") <* ws | |
let true_ = (ws *> (string "true" <?> "true") <* ws) *> return `True | |
let false_ = (ws *> (string "false" <?> "false") <* ws) *> return `False | |
let null_ = (ws *> (string "null" <?> "null") <* ws) *> return `Null | |
let number = | |
(let num = take_while1 (function | |
| '0' .. '9' -> true | |
| _ -> false) | |
in | |
let+ _ = ws | |
and+ whole_part = num | |
and+ frac_part = option "" (char '.' *> num) | |
and+ _ = ws in | |
`Number (float_of_string (whole_part ^ "." ^ frac_part))) <?> "number" | |
let string_ = | |
lquo *> | |
fix (fun s -> | |
choice [ | |
(let+ _ = string {|\\|} and+ rest = s in {|\|} ^ rest); | |
(let+ _ = string {|\"|} and+ rest = s in {|"|} ^ rest); | |
char '"' *> ws *> return ""; | |
(let+ init = take_till (function '"' | '\\' -> true | _ -> false) | |
and+ rest = s in | |
init ^ rest); | |
]) <?> "string" | |
let json = fix (fun json -> | |
let array = | |
(let+ items = delim ~start:lbrack ~item:json ~sep:comma ~stop:rbrack in | |
`Array items) <?> "array" | |
and object_ = | |
(let kv = | |
(let+ _ = ws | |
and+ k = string_ <?> "key" | |
and+ _ = ws | |
and+ _ = colon | |
and+ _ = ws | |
and+ v = json | |
and+ _ = ws in | |
k, v) <?> "key-value pair" | |
in | |
let+ items = delim ~start:lbrace ~item:kv ~sep:comma ~stop:rbrace in | |
`Object items) <?> "object" | |
in | |
let* _ = ws in | |
let* ch = peek_char in | |
match ch with | |
| Some 'n' -> null_ | |
| Some 't' -> true_ | |
| Some 'f' -> false_ | |
| Some '"' -> let+ s = string_ in `String s | |
| Some '0' .. '9' -> number | |
| Some '[' -> array | |
| Some '{' -> object_ | |
| _ -> fail "invalid" <?> "JSON" | |
) <* end_of_input <?> "JSON" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment