Skip to content

Instantly share code, notes, and snippets.

@yawaramin
Last active May 28, 2025 04:21
Show Gist options
  • Save yawaramin/75f0a35ece0cc7cf208d6c04e6e82bb9 to your computer and use it in GitHub Desktop.
Save yawaramin/75f0a35ece0cc7cf208d6c04e6e82bb9 to your computer and use it in GitHub Desktop.
OCaml Angstrom parser for JSON
open Angstrom
let ( <?> ) p l =
let* idx = pos in
let* remaining = available in
let remaining = min remaining 25 in
let* s = peek_string remaining in
p <?> Printf.sprintf "\n@%d: expected: %s, got: %s" idx l s
let delim ~start ~item ~sep ~stop =
let* _ = start in
fix (fun m ->
(sep *> stop *> commit *> fail "trailing delimiter" <?> "item")
<|> (stop *> return [])
<|> (let+ i = item and+ _ = stop in [i])
<|> (let+ i = item and+ _ = sep and+ rest = m in i :: rest))
let parse p s = match parse_string ~consume:Consume.All p s with
| Ok v -> v
| Error s ->
prerr_endline s;
failwith "Parse error"
let ws = skip_while (function
| '\x20' | '\x0a' | '\x0d' | '\x09' -> true
| _ -> false) <?> "whitespace"
let comma = ws *> (string "," <?> "comma") <* ws
let colon = ws *> (string ":" <?> "colon") <* ws
let lbrace = ws *> (string "{" <?> "left brace") <* ws
let rbrace = ws *> (string "}" <?> "right brace") <* ws
let lbrack = ws *> (string "[" <?> "left bracket") <* ws
let rbrack = ws *> (string "]" <?> "right bracket") <* ws
let lquo = ws *> (char '"' <?> "left quote")
let rquo = (char '"' <?> "right quote") <* ws
let true_ = (ws *> (string "true" <?> "true") <* ws) *> return `True
let false_ = (ws *> (string "false" <?> "false") <* ws) *> return `False
let null_ = (ws *> (string "null" <?> "null") <* ws) *> return `Null
let number =
(let num = take_while1 (function
| '0' .. '9' -> true
| _ -> false)
in
let+ _ = ws
and+ whole_part = num
and+ frac_part = option "" (char '.' *> num)
and+ _ = ws in
`Number (float_of_string (whole_part ^ "." ^ frac_part))) <?> "number"
let string_ =
lquo *>
fix (fun s ->
choice [
(let+ _ = string {|\\|} and+ rest = s in {|\|} ^ rest);
(let+ _ = string {|\"|} and+ rest = s in {|"|} ^ rest);
char '"' *> ws *> return "";
(let+ init = take_till (function '"' | '\\' -> true | _ -> false)
and+ rest = s in
init ^ rest);
]) <?> "string"
let json = fix (fun json ->
let array =
(let+ items = delim ~start:lbrack ~item:json ~sep:comma ~stop:rbrack in
`Array items) <?> "array"
and object_ =
(let kv =
(let+ _ = ws
and+ k = string_ <?> "key"
and+ _ = ws
and+ _ = colon
and+ _ = ws
and+ v = json
and+ _ = ws in
k, v) <?> "key-value pair"
in
let+ items = delim ~start:lbrace ~item:kv ~sep:comma ~stop:rbrace in
`Object items) <?> "object"
in
let* _ = ws in
let* ch = peek_char in
match ch with
| Some 'n' -> null_
| Some 't' -> true_
| Some 'f' -> false_
| Some '"' -> let+ s = string_ in `String s
| Some '0' .. '9' -> number
| Some '[' -> array
| Some '{' -> object_
| _ -> fail "invalid" <?> "JSON"
) <* end_of_input <?> "JSON"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment