MonoidMusician · October 8, 2025 14:05
diff --git a/ParseMachine.hs b/ParseMachine.hs
 -- A parsing machine inspired by “Comonads as Spaces” by Phil Freeman
 -- (https://blog.functorial.com/posts/2016-08-07-Comonads-As-Spaces.html)
 {-# LANGUAGE ScopedTypeVariables, OverloadedStrings, DerivingVia, GeneralizedNewtypeDeriving, LambdaCase, BlockArguments, DataKinds #-}
 module ParseMachine where

 import Prelude hiding (filter, lex)
 import qualified Data.Map as Map
 import Control.Applicative (Alternative(..), asum)
 import Control.Monad (join, void)
 import Data.Function ((&))
 import Data.Void (absurd)
 import Witherable (Filterable (mapMaybe, filter))
 import Data.Maybe (isNothing)
 import Data.Char (isControl)
 import GHC.Generics (Generic)
 import Data.Foldable (for_)

 --------------------------------------------------------------------------------
 -- Tutorial / introduction                                                    --
 --------------------------------------------------------------------------------

 -- A monadic parser encoded as an infinite lazy tree, which branches on a token
 -- classification `ix`, receives token values of type `tok` along the way, and
 -- ultimately returns a result `r` or fails.
 --
 -- The basic idea going on here: at each state (position) in the parse machine
 -- we can accept a token from the map (where tokens are indexed by `ix`: the
 -- category of token you want to accept, before you receive the actual data
 -- via `tok`).
 --
 -- This does not include any backtracking: there is only one possible walk
 -- through the `ProtoParseMachine` for a given input, and it is a _linear_ walk,
 -- with strictly increasing depth. The real `ParseMachine` below adds
 -- backtracking (which requires a bit more complexity than you would expect),
 -- `Coyoneda` (to handle maps/binds more efficiently), and fallbacks for
 -- when `ix` fails to match the current token: there is no reason for the `Map`
 -- to be exhaustive, it just makes it more efficient, theoretically.
 data ProtoParseMachine ix tok r = ProtoParseMachine
  { acceptP :: Map.Map ix (tok -> ProtoParseMachine ix tok r)
  , finishP :: Maybe r -- only called on EOF in this proto parser, unlike
                       -- below where it is used for stopping early for staging
  } deriving (Functor)

 protoParse :: Ord ix => (tok -> ix) -> ProtoParseMachine ix tok r -> [tok] -> Maybe r
 protoParse _        (ProtoParseMachine { finishP }) [] = finishP
 protoParse classify (ProtoParseMachine { acceptP }) (tok : toks) = do
  -- We classify the token by `ix`, which we look up in the map
  Map.lookup (classify tok) acceptP
    -- Then we let it receive the actual `tok` value, and continue onwards
    >>= \receive -> protoParse classify (receive tok) toks

 instance Ord ix => Alternative (ProtoParseMachine ix tok) where
  empty = ProtoParseMachine Map.empty Nothing
  -- Merge the tokens to parse, and the EOF behaviors
  ProtoParseMachine steps1 final1 <|> ProtoParseMachine steps2 final2 =
    ProtoParseMachine (Map.union steps1 steps2) (final1 <|> final2)

 -- `>>=` is interesting because it accumulates the tokens to expect before
 -- it runs (in the case of monadic parsing, this is generally interleaved
 -- with actual parsing thanks to laziness)
 instance Ord ix => Monad (ProtoParseMachine ix tok) where
  ProtoParseMachine steps final >>= cont =
    -- If a step succeeds, it binds the continuation... eventually
    ProtoParseMachine ((fmap . fmap) (>>= cont) steps) Nothing
      -- Otherwise we are allowed to start the continuation on the fallback
      <|> maybe empty cont final

 instance Ord ix => Applicative (ProtoParseMachine ix tok) where
  pure = ProtoParseMachine Map.empty . Just
  mf <*> ma = mf >>= (<$> ma)


 --------------------------------------------------------------------------------
 -- Full parser type and parser interpreter                                    --
 --------------------------------------------------------------------------------


 -- The full parser, with backtracking and a catchall case after the `Map`.
 --
 -- It proceeds by gathering indexed tokens into a `Map ix`, one layer of
 -- backtracking at a time, and then matching it on the stream. If it fails,
 -- backtracking proceeds at the next `MatchTrack`. Finally, when the stream
 -- is exhausted, the `Maybe r` at the end of the tunnel is returned.
 --
 -- Interestingly this statically asserts part of the backtracking behavior of
 -- `Parsec`: for a parser with singleton `MatchTrack`s, there is no backtracking
 -- since each map only has one behavior for the `ix` that it sees.
 --
 -- I think using the `Coyoneda` trick to factor out the implicit state should
 -- allow finite applicative parsers (for context-free grammars) to be finite data
 -- structures in memory. Otherwise it is just lazy for absolutely no reason, and
 -- repeated usages of the same parser instance would leak memory.
 data ParseMachine ix tok r = ParseStep
  { accept ::
    -- `MatchTrack` gives backtracking behavior by allowing several `Map`s`
    -- to propose the tokens that should match (without backtracking, the
    -- first result for a token wins, via left-biased `Map.union`)
   !( MatchTrack
      -- We include a catchall, to match any single token
      ( WithCatchall
        -- The `Map`s index what _kind_ of tokens to accept and contain
        -- continuations which receive the particular _value_ of the token
        (Coyoneda (Map.Map ix))
        -- The continuation is staged so that the first stage can include
        -- backtracking to the next `Map` in the `MatchTrack`, while the second
        -- stage commits locally to the parse (notwithstanding `try`s further back)
        (tok -> ParseMachine ix tok (ParseMachine ix tok r))
    ) )
  , finish ::
    -- Finally there is a pure value: either a result for EOF, hopefully, or,
    -- for intermediate steps, a fallback that is used to call the next continuation
    Maybe r
  }
  deriving (Functor)
 -- A monoid (with suggestive names) that overlaps elements by one when appending
 data MatchTrack m = NoMatch | MatchTrack m (MatchTrack m)
  deriving (Functor)
 type ParseMatchTrack ix tok r = MatchTrack
  ( WithCatchall
    (Coyoneda (Map.Map ix))
    (tok -> ParseMachine ix tok (ParseMachine ix tok r))
  )

 classified :: (tok -> ix) -> [tok] -> [(ix, tok)]
 classified classifier = fmap \(!tok) -> (classifier tok, tok)

 -- Basic parsing through the above type (obviously it would be good to add state
 -- and errors and so on: pass down the currently expected tokens during
 -- backtracking, for example)
 parse' :: Ord ix => ParseMachine ix tok r -> [(ix, tok)] -> Maybe (r, [(ix, tok)])
 parse' (ParseStep _ mr) [] = (,[]) <$> mr                     -- EOF, expected or not
 parse' (ParseStep NoMatch mr) remaining = (,remaining) <$> mr -- Ran out of tokens
 -- Specialize this case to avoid leaking tokens when no backtracking is possible
 parse' (ParseStep (MatchTrack (WithCatchall (Coyoneda fromMap ixMap) catchall) more) Nothing) ((!ix, tok) : continue) | hasNoMatches more =
  case fromMap <$> Map.lookup ix ixMap <|> catchall of
    Just receive -> parse' (join (receive tok)) continue
    Nothing -> Nothing -- Unexpected token
 parse' (ParseStep (MatchTrack (WithCatchall (Coyoneda fromMap ixMap) catchall) more) mr) backtrack@((!ix, tok) : continue) =
  case fromMap <$> Map.lookup ix ixMap <|> catchall of
    -- Here we try to shallowly parse the matching case, up to the next bind that
    -- is not covered by the current `try`
    Just receive | Just (committed, remaining) <- parse' (receive tok) continue ->
      -- Now we are hopefully back on a happy path with no backtracking for a while
      parse' committed remaining
    -- If not, we have to backtrack
    _ -> parse' (ParseStep more mr) backtrack

 -- Ensure that the parser reached EOF before returning the result
 parse :: Ord ix => ParseMachine ix tok a -> [(ix, tok)] -> Maybe a
 parse parser toks = case parse' parser toks of
  Just (r, []) -> Just r
  _ -> Nothing

 -- Specialize it to `Char`s
 type StringParseMachine = ParseMachine Char Char
 stringParse' :: StringParseMachine r -> String -> Maybe (r, String)
 stringParse' m = (fmap . fmap . fmap) snd . parse' m . map (join (,))

 stringParse :: StringParseMachine r -> String -> Maybe r
 stringParse m = parse m . map (join (,))


 --------------------------------------------------------------------------------
 -- Basic combinators                                                          --
 --------------------------------------------------------------------------------


 -- The `try` combinator extends the `MatchTrack` with one additional `Map`
 -- to allow backtracking: that forces the next `<|>` to start anew instead of
 -- trying to add to the existing matches
 --
 -- `try . try = try`
 try :: ParseMachine ix tok r -> ParseMachine ix tok r
 try (ParseStep matchtrack mr) = ParseStep (extend $ compress matchtrack) mr where
  -- Extend with a fresh `MatchTrack`
  extend NoMatch = MatchTrack (WithCatchall (Coyoneda absurd Map.empty) Nothing) NoMatch
  extend (MatchTrack possibilities more) =
    MatchTrack possibilities (extend more)
  -- Compress both stages into the first stage, so they trigger backtracking:
  -- further binds will go on the fresh second stage and not trigger backtracking
  compress = fmap . fmap . fmap $ _pure . (`tryBind` id) -- `pure . tryJoin`

 -- The opposite of `try`: if it consumes one token, it fails hard instead of
 -- backtracking into the next alternative. (Could still be caught in higher
 -- backtracking, of course.)
 --
 -- They aren't exactly inverses: `commit . try . commit = commit`, since
 -- `commit` is a little more destructive: it undoes all `try`s
 commit :: ParseMachine ix tok r -> ParseMachine ix tok r
 commit (ParseStep matchtrack mr) = ParseStep (trim $ compress matchtrack) mr where
  -- Extend with a fresh `MatchTrack`
  trim possibilities | hasNoMatches possibilities = NoMatch
  trim (MatchTrack possibilities more) = MatchTrack possibilities (trim more)
  trim NoMatch = NoMatch
  -- Compress both stages into the *second* stage, so they *do not* trigger
  -- backtracking
  compress = fmap . fmap . fmap $ (`tryBind` id) . _pure -- `tryJoin . pure`

 -- Accepts a single kind of token and calls the continuation with its value
 token :: ix -> ParseMachine ix tok tok
 token ix = ParseStep
  (MatchTrack
    (WithCatchall step Nothing)
    NoMatch
  ) Nothing where
  step = Coyoneda
    ((_pure . _pure) .)
    (Map.singleton ix id)

 -- Note: this consumes the token like `token` does, preventing backtracking
 -- without an explicit `try`.
 anyToken :: ParseMachine ix tok tok
 anyToken = ParseStep
  (MatchTrack
    (WithCatchall
      (Coyoneda absurd Map.empty)
      (Just (_pure . _pure))
    )
    NoMatch
  ) Nothing

 satisfies :: (tok -> Bool) -> ParseMachine ix tok tok
 satisfies p = try $ filter p anyToken


 --------------------------------------------------------------------------------
 -- Instances / implementation                                                 --
 --------------------------------------------------------------------------------


 -- First we implement `Monoid (MatchTrack m)`
 instance Monoid m => Monoid (MatchTrack m) where
  mempty = NoMatch
 -- This appends the lists together with *one* element of overlap
 instance Semigroup m => Semigroup (MatchTrack m) where
  NoMatch <> r = r
  l <> NoMatch = l
  MatchTrack trySome NoMatch <> MatchTrack more remaining =
    MatchTrack (trySome <> more) remaining
  MatchTrack priority trailing <> r =
    MatchTrack priority (trailing <> r)

 -- Now we get to the parser instances

 instance Ord ix => Alternative (ParseMachine ix tok) where
  empty = ParseStep NoMatch Nothing
  -- If there is a catchall already, do nothing
  l@(ParseStep _ (Just _)) <|> _ = l
  -- Otherwise append the steps, with one element of overlap as discussed above,
  -- to avoid needless backtracking
  ParseStep steps1 Nothing <|> ParseStep steps2 final =
    ParseStep (steps1 <> steps2) final

 instance Ord ix => Applicative (ParseMachine ix tok) where
  pure = ParseStep NoMatch . Just
  mf <*> ma = mf >>= (<$> ma) -- this might need better sharing?

 -- `>>=` is interesting because it accumulates the tokens to expect before
 -- it runs (in the case of monadic parsing, this is generally interleaved
 -- with actual parsing thanks to laziness)
 instance Ord ix => Monad (ParseMachine ix tok) where
  ParseStep steps final >>= cont =
    -- If a step succeeds, it binds the continuation... eventually.
    -- The last `fmap` is to nestle it in the nested parser
    -- `ParseMachine ix tok (ParseMachine ix tok r)`, so that it sequences
    -- after the alternatives in `(try x <|> y) >>= f`, instead of distributing
    -- inside as `(try (x >>= f) <|> (y >>= f))`.
    ParseStep ((fmap . fmap . fmap . fmap) (>>= cont) steps) Nothing
      -- Otherwise we are allowed to start the continuation on the fallback
      <|> maybe empty cont final

 -- `tryAlt l r = try l <|> r` which does not need `Ord ix`
 tryAlt :: ParseMachine ix tok r -> ParseMachine ix tok r -> ParseMachine ix tok r
 tryAlt (ParseStep NoMatch final1) (ParseStep steps2 final2) =
    ParseStep steps2 (final1 <|> final2)
 tryAlt (ParseStep (MatchTrack step steps) final) r =
  let ParseStep steps' final' = tryAlt (ParseStep steps final) r in
  ParseStep (MatchTrack step steps') final'

 -- Avoid `Ord ix` for different semantics: `tryBind m f = try $ m >>= f`
 tryBind :: ParseMachine ix tok i -> (i -> ParseMachine ix tok r) -> ParseMachine ix tok r
 tryBind (ParseStep steps final) cont =
  ParseStep ((fmap . fmap . fmap) (\m -> _pure $ tryBind m id `tryBind` cont) steps) Nothing
    `tryAlt` maybe (ParseStep NoMatch Nothing) cont final

 -- Avoids an `Ord ix` constraint
 _pure :: r -> ParseMachine ix tok r
 _pure = ParseStep NoMatch . Just


 -- Does not call `try` (that would violate `mapMaybe pure = id`)
 instance Filterable (ParseMachine ix tok) where
  mapMaybe p (ParseStep steps final) = ParseStep steps' (mapMaybe p final) where
    steps' = (fmap . fmap . fmap . fmap) (mapMaybe p) steps
  filter p (ParseStep steps final) = ParseStep steps' (filter p final) where
    steps' = (fmap . fmap . fmap . fmap) (filter p) steps

 --------------------------------------------------------------------------------
 -- Usage as a lexer (tokenizer): prefer longest matches, more backtracking    --
 --------------------------------------------------------------------------------

 -- Measure the depth down the parse machine tree (that is, the number of tokens
 -- it consumed)
 depth :: (Int -> i -> o) -> ParseMachine ix tok i -> ParseMachine ix tok o
 depth = go 0 where
  -- We need bounded polymorphic recursion to handle the staged continuations
  go :: forall ix tok x y. Int -> (Int -> x -> y) -> ParseMachine ix tok x -> ParseMachine ix tok y
  go !i f (ParseStep steps finish) = ParseStep
    ((fmap . fmap . fmap) (go (i+1) \j -> go j f) steps)
    (f i <$> finish)

 withDepth :: ParseMachine ix tok r -> ParseMachine ix tok (Int, r)
 withDepth = depth (,)

 -- Return the tokens parsed during the parser
 withSource :: ParseMachine ix tok r -> ParseMachine ix tok ([tok], r)
 withSource = fmap (\(acc, r) -> (reverse acc, r)) . go [] (,) where
  -- We need bounded polymorphic recursion to handle the staged continuations
  go :: forall ix tok x y. [tok] -> ([tok] -> x -> y) -> ParseMachine ix tok x -> ParseMachine ix tok y
  go !acc f (ParseStep steps finish) = ParseStep
    ((fmap . fmap) (\cont tok -> go (tok : acc) (\j -> go j f) (cont tok)) steps)
    -- We can't reverse here, because it is called many times
    -- during staging and such, just reverse once at the top level
    (f acc <$> finish)

 readSource :: forall r ix void. Read r => ParseMachine ix Char void -> ParseMachine ix Char r
 readSource = fmap (\(src, _) -> read src) . withSource

 newtype Lexer ix tok out r = Lexer (ParseMachine ix tok ([out], r))
  deriving (Functor)

 emit :: out -> Lexer ix tok out ()
 emit out = Lexer (_pure ([out], ()))

 instance Ord ix => Alternative (Lexer ix tok out) where
  empty = Lexer empty
  -- More backtracking: use `tryAlt l r = try l <|> r` instead of the derived `<|>`
  Lexer l <|> Lexer r = Lexer (l `tryAlt` r)

 instance Ord ix => Applicative (Lexer ix tok out) where
  pure = Lexer . pure . ([],)
  mf <*> ma = mf >>= (<$> ma)

 instance Ord ix => Monad (Lexer ix tok out) where
  -- Nothing special(? use `tryAlt`?), just accumulate
  Lexer mi >>= f = Lexer do
    (!acc, i) <- mi
    let Lexer mr = f i
    (\(!more, r) -> (acc ++ more, r)) <$> mr

 lex' :: forall ix tok out r. Ord ix => Lexer ix tok out r -> [(ix, tok)] -> Maybe (([out], r), [(ix, tok)])
 lex' (Lexer (ParseStep _ mr)) [] = (,[]) <$> mr
 lex' (Lexer (ParseStep NoMatch mr)) remaining = (,remaining) <$> mr
 -- Specialize this case to avoid leaking tokens when no backtracking is possible
 lex' (Lexer (ParseStep (MatchTrack (WithCatchall (Coyoneda fromMap ixMap) catchall) more) Nothing)) ((!ix, tok) : continue) | hasNoMatches more =
  case fromMap <$> Map.lookup ix ixMap <|> catchall of
    Just receive -> lex' (Lexer (join (receive tok))) continue
    Nothing -> Nothing -- Unexpected token
 lex' (Lexer (ParseStep shallow mr)) backtrack@((!ix, tok) : continue) =
    scanning Nothing shallow
  where
  -- Apply *all* of the backtracking and take the longest result, before the
  -- next bind. Then commit to it and run whichever bind continuation was chosen.
  scanning ::
    Maybe (Int, (ParseMachine ix tok ([out], r), [(ix, tok)])) ->
    ParseMatchTrack ix tok ([out], r) -> Maybe (([out], r), [(ix, tok)])
  scanning !maybeLongest (MatchTrack (WithCatchall (Coyoneda fromMap ixMap) catchall) more) =
    case fromMap <$> Map.lookup ix ixMap <|> catchall of
      Just receive | Just ((!parsedDepth, committed), remaining) <- parse' (withDepth (receive tok)) continue ->
        scanning (longer maybeLongest (parsedDepth, (committed, remaining))) more
      _ -> scanning maybeLongest more
  scanning Nothing NoMatch = (,backtrack) <$> mr
  scanning (Just (_parsedDepth, (committed, remaining))) NoMatch =
    lex' (Lexer committed) remaining

  longer :: forall d. Maybe (Int, d) -> (Int, d) -> Maybe (Int, d)
  longer Nothing d = Just d
  longer (Just (l1, d1)) (l2, d2) =
    if l1 >= l2 then Just (l1, d1) else Just (l2, d2)

 lex :: forall ix tok out r. Ord ix => Lexer ix tok out r -> [(ix, tok)] -> Maybe ([out], r)
 lex lexer input = case lex' lexer input of
  Just (r, []) -> Just r
  _ -> Nothing

 lexAndParse ::
  forall ix1 ix2 tok1 tok2 r.
    Ord ix1 => Ord ix2 =>
  (tok1 -> ix1, tok2 -> ix2) -> Lexer ix1 tok1 tok2 (ParseMachine ix2 tok2 r) -> [tok1] -> Maybe r
 lexAndParse (classify1, classify2) lexer input = do
  (tokens, parser) <- lex lexer $ classified classify1 input
  parse parser $ classified classify2 tokens

 --------------------------------------------------------------------------------
 -- Extra helpers                                                              --
 --------------------------------------------------------------------------------

 -- Map every index of `ParseMachine` (destroys sharing)
 fullMap ::
  Ord ix' => (ix -> ix') -> (tok' -> tok) -> (i -> o) ->
  ParseMachine ix tok i -> ParseMachine ix' tok' o
 fullMap f g h (ParseStep possibilities final) =
  ParseStep
    (fmap mapContinuation . mapCoyoneda <$> possibilities)
    (h <$> final)
  where
  mapCoyoneda (WithCatchall (Coyoneda x y) z) =
    WithCatchall (Coyoneda x (Map.mapKeys f y)) z
  mapContinuation z = fullMap f g (fullMap f g h) . z . g

 noSharing :: Ord ix => ParseMachine ix tok r -> ParseMachine ix tok r
 noSharing = fullMap id id id

 hasNoMatches :: MatchTrack (WithCatchall (Coyoneda (Map.Map k)) a) -> Bool
 hasNoMatches (MatchTrack (WithCatchall (Coyoneda _ ixMap) catchall) more) =
  Map.null ixMap && isNothing catchall && hasNoMatches more
 hasNoMatches NoMatch = True

 --------------------------------------------------------------------------------
 -- Boring helper types                                                        --
 --------------------------------------------------------------------------------

 data WithCatchall f a = WithCatchall !(f a) (Maybe a)
  deriving (Functor)

 instance (Monoid (f a)) => Monoid (WithCatchall f a) where
  mempty = WithCatchall mempty Nothing
 instance (Semigroup (f a)) => Semigroup (WithCatchall f a) where
  WithCatchall l (Just catchall) <> WithCatchall r _ =
    WithCatchall (l <> r) (Just catchall)
  WithCatchall l Nothing <> WithCatchall r catchall =
    WithCatchall (l <> r) catchall


 data Coyoneda f a = forall b. Coyoneda (b -> a) !(f b)

 instance Functor (Coyoneda f) where
  fmap f (Coyoneda g v) = Coyoneda (f . g) v

 -- Note: this means that applicatives really need to be finite, to fit in
 -- finite space! because we can share the `Map`s across several invocations of
 -- `fmap` via `Coyoneda`, but we cannot share them across infinite chained
 -- `try <|>`. See this blog post for an explanation of how that would allow some
 -- context sensitive languages to be parsed by otherwise applicative parsers:
 -- https://byorgey.wordpress.com/2012/01/05/parsing-context-sensitive-languages-with-applicative/
 instance Ord ix => Semigroup (Coyoneda (Map.Map ix) v) where
  Coyoneda _ l <> r | Map.null l = r
  l <> Coyoneda _ r | Map.null r = l
  Coyoneda f l <> Coyoneda g r = Coyoneda (either f g)
    ((Left <$> l) `Map.union` (Right <$> r))




 --------------------------------------------------------------------------------
 -- EXAMPLES: JSON                                                             --
 --------------------------------------------------------------------------------

 type JSONLexer = Lexer Char Char JTok
 type JSONL r = ParseMachine Char Char ([JTok], r)
 type JSONParser = ParseMachine JCls JTok

 data JTok = TLBrace | TRBrace | TLBracket | TRBracket | TComma | TColon | TTrue | TFalse | TNull | TString String | TNumber !Double
  deriving (Eq, Ord, Generic, Show)
 data JCls =  LBrace |  RBrace |  LBracket |  RBracket |  Comma |  Colon | CTrue | CFalse |  Null |  String        |  Number
  deriving (Eq, Ord, Generic, Show)
 data JSON = JString String | JNumber !Double | JArray [JSON] | JObject [(String, JSON)] | JBool Bool | JNull
  deriving (Eq, Ord, Generic, Show)

 parseJSON :: String -> Maybe JSON
 parseJSON = lexAndParse (id, jsonClassify) (jsonParser <$ jsonLexer)

 -- I'm disappointed this isn't turned into a tag coercion...
 jsonClassify :: JTok -> JCls
 jsonClassify = \case
  TLBrace -> LBrace
  TRBrace -> RBrace
  TLBracket -> LBracket
  TRBracket -> RBracket
  TComma -> Comma
  TColon -> Colon
  TTrue -> CTrue
  TFalse -> CFalse
  TNull -> Null
  TString _ -> String
  TNumber _ -> Number

 jsonParser :: JSONParser JSON
 jsonParser = asum
  [ token String & mapMaybe \case { TString v -> Just (JString v); _ -> Nothing }
  , token Number & mapMaybe \case { TNumber v -> Just (JNumber v); _ -> Nothing }
  , JObject <$> do token LBrace   *> separated member     <* token RBrace
  , JArray  <$> do token LBracket *> separated jsonParser <* token RBracket
  , JBool <$> (True <$ token CTrue <|> False <$ token CFalse)
  , JNull <$ token Null
  ]
  where
  comma = token Comma
  separated :: forall r. JSONParser r -> JSONParser [r]
  separated p = liftA2 (:) p (many (comma *> p)) <|> pure []

  member = (\k _ v -> (k, v))
    <$> do token String & mapMaybe \case { TString v -> Just v; _ -> Nothing }
    <*> do token Colon
    <*> do jsonParser

 jsonLexer :: JSONLexer ()
 jsonLexer =
  Lexer $ ws *> do
    mconcat <$> many (jsonTok <* ws)
  where
  ws :: JSONL ()
  ws = ([],) <$> void do many (token ' ' <|> token '\n' <|> token '\r' <|> token '\t')

  jsonTok = symbols <|> datas

  tokMap :: forall ix tok r. Ord ix => [(ix, r)] -> ParseMachine ix tok r
  tokMap = foldr (\(chr, val) more -> val <$ token chr <|> more) empty

  -- A keyword is a series of tokens
  keyword :: String -> JTok -> JSONL ()
  keyword [] val = pure ([val], ())
  keyword (c : cs) val = token c *> keyword cs val

  symbols :: JSONL ()
  symbols = foldr (\(str, val) more -> keyword str val <|> more) empty symbolTable
  symbolTable =
    [ ("{", TLBrace)
    , ("}", TRBrace)
    , ("[", TLBracket)
    , ("]", TRBracket)
    , (",", TComma)
    , (":", TColon)
    , ("true", TTrue)
    , ("false", TFalse)
    , ("null", TNull)
    ]

  datas = string <|> number

  string :: JSONL ()
  string = (\chars -> ([TString chars], ())) <$> do
    token '"' *> charList
  -- TODO: fix the fact that `token '"'` needs to be in front?
  charList = [] <$ token '"' <|> (:) <$> char <*> charList
  char = (token '\\' *> escape)
    <|> satisfies (not . isControl)
  escape = escapes <|> token 'u' *> do
    (\m n o p -> toEnum (base 16 [m,n,o,p]))
      <$> hex <*> hex <*> hex <*> hex
  escapes = tokMap
    [ ('"', '"')
    , ('\\', '\\')
    , ('/', '/')
    , ('b', '\b')
    , ('f', '\f')
    , ('n', '\n')
    , ('r', '\r')
    , ('t', '\t')
    ]
  base :: Int -> [Int] -> Int
  base b = fst . foldr (\d (!acc, !pl) -> (d*pl + acc, pl*b)) (0, 1)

  hex = tokMap $ zip ['0'..'9'] [0..9] <> zip ['a'..'f'] [10..15] <> zip ['A'..'F'] [10..15]

  digit = tokMap $ (,()) <$> ['0'..'9']

  number = (\num -> ([TNumber num], ())) <$> readSource do
    let t = void . token
    let opt p = p <|> pure ()
    t '-' <|> pure ()
    t '0' <|> void (some digit)
    opt (t '.' <* some digit)
    opt do (t 'e' <|> t 'E') <* opt (t '+' <|> t '-') <* some digit


 main :: IO ()
 main = do
  let
    examples =
      [ "null"
      , "{}"
      , "[true]"
      , "\"\""
      , "\"x\""
      , "\"xy\""
      , "\"\\\"\""
      , "\"a\\u0304\""
      , "\"string\""
      , "0"
      , "[0,1,2]"
      , "[0 ,1, 2]"
      , "45"
      , " 45.2  "
      , "-0.34e12"
      , "{\"x\":true}"
      ]
  for_ examples \s -> do
    print s
    print $ lex jsonLexer $ join (,) <$> s
    print $ parseJSON s
No results found