Skip to content

Instantly share code, notes, and snippets.

@MonoidMusician
Last active October 8, 2025 14:05
Show Gist options
  • Select an option

  • Save MonoidMusician/b8ce75f012a4008f7904cde941a7fcba to your computer and use it in GitHub Desktop.

Select an option

Save MonoidMusician/b8ce75f012a4008f7904cde941a7fcba to your computer and use it in GitHub Desktop.
A cute little comonadic-inspired parsing machine
-- A parsing machine inspired by “Comonads as Spaces” by Phil Freeman
-- (https://blog.functorial.com/posts/2016-08-07-Comonads-As-Spaces.html)
{-# LANGUAGE ScopedTypeVariables, OverloadedStrings, DerivingVia, GeneralizedNewtypeDeriving, LambdaCase, BlockArguments, DataKinds #-}
module ParseMachine where
import Prelude hiding (filter, lex)
import qualified Data.Map as Map
import Control.Applicative (Alternative(..), asum)
import Control.Monad (join, void)
import Data.Function ((&))
import Data.Void (absurd)
import Witherable (Filterable (mapMaybe, filter))
import Data.Maybe (isNothing)
import Data.Char (isControl)
import GHC.Generics (Generic)
import Data.Foldable (for_)
--------------------------------------------------------------------------------
-- Tutorial / introduction --
--------------------------------------------------------------------------------
-- A monadic parser encoded as an infinite lazy tree, which branches on a token
-- classification `ix`, receives token values of type `tok` along the way, and
-- ultimately returns a result `r` or fails.
--
-- The basic idea going on here: at each state (position) in the parse machine
-- we can accept a token from the map (where tokens are indexed by `ix`: the
-- category of token you want to accept, before you receive the actual data
-- via `tok`).
--
-- This does not include any backtracking: there is only one possible walk
-- through the `ProtoParseMachine` for a given input, and it is a _linear_ walk,
-- with strictly increasing depth. The real `ParseMachine` below adds
-- backtracking (which requires a bit more complexity than you would expect),
-- `Coyoneda` (to handle maps/binds more efficiently), and fallbacks for
-- when `ix` fails to match the current token: there is no reason for the `Map`
-- to be exhaustive, it just makes it more efficient, theoretically.
data ProtoParseMachine ix tok r = ProtoParseMachine
{ acceptP :: Map.Map ix (tok -> ProtoParseMachine ix tok r)
, finishP :: Maybe r -- only called on EOF in this proto parser, unlike
-- below where it is used for stopping early for staging
} deriving (Functor)
protoParse :: Ord ix => (tok -> ix) -> ProtoParseMachine ix tok r -> [tok] -> Maybe r
protoParse _ (ProtoParseMachine { finishP }) [] = finishP
protoParse classify (ProtoParseMachine { acceptP }) (tok : toks) = do
-- We classify the token by `ix`, which we look up in the map
Map.lookup (classify tok) acceptP
-- Then we let it receive the actual `tok` value, and continue onwards
>>= \receive -> protoParse classify (receive tok) toks
instance Ord ix => Alternative (ProtoParseMachine ix tok) where
empty = ProtoParseMachine Map.empty Nothing
-- Merge the tokens to parse, and the EOF behaviors
ProtoParseMachine steps1 final1 <|> ProtoParseMachine steps2 final2 =
ProtoParseMachine (Map.union steps1 steps2) (final1 <|> final2)
-- `>>=` is interesting because it accumulates the tokens to expect before
-- it runs (in the case of monadic parsing, this is generally interleaved
-- with actual parsing thanks to laziness)
instance Ord ix => Monad (ProtoParseMachine ix tok) where
ProtoParseMachine steps final >>= cont =
-- If a step succeeds, it binds the continuation... eventually
ProtoParseMachine ((fmap . fmap) (>>= cont) steps) Nothing
-- Otherwise we are allowed to start the continuation on the fallback
<|> maybe empty cont final
instance Ord ix => Applicative (ProtoParseMachine ix tok) where
pure = ProtoParseMachine Map.empty . Just
mf <*> ma = mf >>= (<$> ma)
--------------------------------------------------------------------------------
-- Full parser type and parser interpreter --
--------------------------------------------------------------------------------
-- The full parser, with backtracking and a catchall case after the `Map`.
--
-- It proceeds by gathering indexed tokens into a `Map ix`, one layer of
-- backtracking at a time, and then matching it on the stream. If it fails,
-- backtracking proceeds at the next `MatchTrack`. Finally, when the stream
-- is exhausted, the `Maybe r` at the end of the tunnel is returned.
--
-- Interestingly this statically asserts part of the backtracking behavior of
-- `Parsec`: for a parser with singleton `MatchTrack`s, there is no backtracking
-- since each map only has one behavior for the `ix` that it sees.
--
-- I think using the `Coyoneda` trick to factor out the implicit state should
-- allow finite applicative parsers (for context-free grammars) to be finite data
-- structures in memory. Otherwise it is just lazy for absolutely no reason, and
-- repeated usages of the same parser instance would leak memory.
data ParseMachine ix tok r = ParseStep
{ accept ::
-- `MatchTrack` gives backtracking behavior by allowing several `Map`s`
-- to propose the tokens that should match (without backtracking, the
-- first result for a token wins, via left-biased `Map.union`)
!( MatchTrack
-- We include a catchall, to match any single token
( WithCatchall
-- The `Map`s index what _kind_ of tokens to accept and contain
-- continuations which receive the particular _value_ of the token
(Coyoneda (Map.Map ix))
-- The continuation is staged so that the first stage can include
-- backtracking to the next `Map` in the `MatchTrack`, while the second
-- stage commits locally to the parse (notwithstanding `try`s further back)
(tok -> ParseMachine ix tok (ParseMachine ix tok r))
) )
, finish ::
-- Finally there is a pure value: either a result for EOF, hopefully, or,
-- for intermediate steps, a fallback that is used to call the next continuation
Maybe r
}
deriving (Functor)
-- A monoid (with suggestive names) that overlaps elements by one when appending
data MatchTrack m = NoMatch | MatchTrack m (MatchTrack m)
deriving (Functor)
type ParseMatchTrack ix tok r = MatchTrack
( WithCatchall
(Coyoneda (Map.Map ix))
(tok -> ParseMachine ix tok (ParseMachine ix tok r))
)
classified :: (tok -> ix) -> [tok] -> [(ix, tok)]
classified classifier = fmap \(!tok) -> (classifier tok, tok)
-- Basic parsing through the above type (obviously it would be good to add state
-- and errors and so on: pass down the currently expected tokens during
-- backtracking, for example)
parse' :: Ord ix => ParseMachine ix tok r -> [(ix, tok)] -> Maybe (r, [(ix, tok)])
parse' (ParseStep _ mr) [] = (,[]) <$> mr -- EOF, expected or not
parse' (ParseStep NoMatch mr) remaining = (,remaining) <$> mr -- Ran out of tokens
-- Specialize this case to avoid leaking tokens when no backtracking is possible
parse' (ParseStep (MatchTrack (WithCatchall (Coyoneda fromMap ixMap) catchall) more) Nothing) ((!ix, tok) : continue) | hasNoMatches more =
case fromMap <$> Map.lookup ix ixMap <|> catchall of
Just receive -> parse' (join (receive tok)) continue
Nothing -> Nothing -- Unexpected token
parse' (ParseStep (MatchTrack (WithCatchall (Coyoneda fromMap ixMap) catchall) more) mr) backtrack@((!ix, tok) : continue) =
case fromMap <$> Map.lookup ix ixMap <|> catchall of
-- Here we try to shallowly parse the matching case, up to the next bind that
-- is not covered by the current `try`
Just receive | Just (committed, remaining) <- parse' (receive tok) continue ->
-- Now we are hopefully back on a happy path with no backtracking for a while
parse' committed remaining
-- If not, we have to backtrack
_ -> parse' (ParseStep more mr) backtrack
-- Ensure that the parser reached EOF before returning the result
parse :: Ord ix => ParseMachine ix tok a -> [(ix, tok)] -> Maybe a
parse parser toks = case parse' parser toks of
Just (r, []) -> Just r
_ -> Nothing
-- Specialize it to `Char`s
type StringParseMachine = ParseMachine Char Char
stringParse' :: StringParseMachine r -> String -> Maybe (r, String)
stringParse' m = (fmap . fmap . fmap) snd . parse' m . map (join (,))
stringParse :: StringParseMachine r -> String -> Maybe r
stringParse m = parse m . map (join (,))
--------------------------------------------------------------------------------
-- Basic combinators --
--------------------------------------------------------------------------------
-- The `try` combinator extends the `MatchTrack` with one additional `Map`
-- to allow backtracking: that forces the next `<|>` to start anew instead of
-- trying to add to the existing matches
--
-- `try . try = try`
try :: ParseMachine ix tok r -> ParseMachine ix tok r
try (ParseStep matchtrack mr) = ParseStep (extend $ compress matchtrack) mr where
-- Extend with a fresh `MatchTrack`
extend NoMatch = MatchTrack (WithCatchall (Coyoneda absurd Map.empty) Nothing) NoMatch
extend (MatchTrack possibilities more) =
MatchTrack possibilities (extend more)
-- Compress both stages into the first stage, so they trigger backtracking:
-- further binds will go on the fresh second stage and not trigger backtracking
compress = fmap . fmap . fmap $ _pure . (`tryBind` id) -- `pure . tryJoin`
-- The opposite of `try`: if it consumes one token, it fails hard instead of
-- backtracking into the next alternative. (Could still be caught in higher
-- backtracking, of course.)
--
-- They aren't exactly inverses: `commit . try . commit = commit`, since
-- `commit` is a little more destructive: it undoes all `try`s
commit :: ParseMachine ix tok r -> ParseMachine ix tok r
commit (ParseStep matchtrack mr) = ParseStep (trim $ compress matchtrack) mr where
-- Extend with a fresh `MatchTrack`
trim possibilities | hasNoMatches possibilities = NoMatch
trim (MatchTrack possibilities more) = MatchTrack possibilities (trim more)
trim NoMatch = NoMatch
-- Compress both stages into the *second* stage, so they *do not* trigger
-- backtracking
compress = fmap . fmap . fmap $ (`tryBind` id) . _pure -- `tryJoin . pure`
-- Accepts a single kind of token and calls the continuation with its value
token :: ix -> ParseMachine ix tok tok
token ix = ParseStep
(MatchTrack
(WithCatchall step Nothing)
NoMatch
) Nothing where
step = Coyoneda
((_pure . _pure) .)
(Map.singleton ix id)
-- Note: this consumes the token like `token` does, preventing backtracking
-- without an explicit `try`.
anyToken :: ParseMachine ix tok tok
anyToken = ParseStep
(MatchTrack
(WithCatchall
(Coyoneda absurd Map.empty)
(Just (_pure . _pure))
)
NoMatch
) Nothing
satisfies :: (tok -> Bool) -> ParseMachine ix tok tok
satisfies p = try $ filter p anyToken
--------------------------------------------------------------------------------
-- Instances / implementation --
--------------------------------------------------------------------------------
-- First we implement `Monoid (MatchTrack m)`
instance Monoid m => Monoid (MatchTrack m) where
mempty = NoMatch
-- This appends the lists together with *one* element of overlap
instance Semigroup m => Semigroup (MatchTrack m) where
NoMatch <> r = r
l <> NoMatch = l
MatchTrack trySome NoMatch <> MatchTrack more remaining =
MatchTrack (trySome <> more) remaining
MatchTrack priority trailing <> r =
MatchTrack priority (trailing <> r)
-- Now we get to the parser instances
instance Ord ix => Alternative (ParseMachine ix tok) where
empty = ParseStep NoMatch Nothing
-- If there is a catchall already, do nothing
l@(ParseStep _ (Just _)) <|> _ = l
-- Otherwise append the steps, with one element of overlap as discussed above,
-- to avoid needless backtracking
ParseStep steps1 Nothing <|> ParseStep steps2 final =
ParseStep (steps1 <> steps2) final
instance Ord ix => Applicative (ParseMachine ix tok) where
pure = ParseStep NoMatch . Just
mf <*> ma = mf >>= (<$> ma) -- this might need better sharing?
-- `>>=` is interesting because it accumulates the tokens to expect before
-- it runs (in the case of monadic parsing, this is generally interleaved
-- with actual parsing thanks to laziness)
instance Ord ix => Monad (ParseMachine ix tok) where
ParseStep steps final >>= cont =
-- If a step succeeds, it binds the continuation... eventually.
-- The last `fmap` is to nestle it in the nested parser
-- `ParseMachine ix tok (ParseMachine ix tok r)`, so that it sequences
-- after the alternatives in `(try x <|> y) >>= f`, instead of distributing
-- inside as `(try (x >>= f) <|> (y >>= f))`.
ParseStep ((fmap . fmap . fmap . fmap) (>>= cont) steps) Nothing
-- Otherwise we are allowed to start the continuation on the fallback
<|> maybe empty cont final
-- `tryAlt l r = try l <|> r` which does not need `Ord ix`
tryAlt :: ParseMachine ix tok r -> ParseMachine ix tok r -> ParseMachine ix tok r
tryAlt (ParseStep NoMatch final1) (ParseStep steps2 final2) =
ParseStep steps2 (final1 <|> final2)
tryAlt (ParseStep (MatchTrack step steps) final) r =
let ParseStep steps' final' = tryAlt (ParseStep steps final) r in
ParseStep (MatchTrack step steps') final'
-- Avoid `Ord ix` for different semantics: `tryBind m f = try $ m >>= f`
tryBind :: ParseMachine ix tok i -> (i -> ParseMachine ix tok r) -> ParseMachine ix tok r
tryBind (ParseStep steps final) cont =
ParseStep ((fmap . fmap . fmap) (\m -> _pure $ tryBind m id `tryBind` cont) steps) Nothing
`tryAlt` maybe (ParseStep NoMatch Nothing) cont final
-- Avoids an `Ord ix` constraint
_pure :: r -> ParseMachine ix tok r
_pure = ParseStep NoMatch . Just
-- Does not call `try` (that would violate `mapMaybe pure = id`)
instance Filterable (ParseMachine ix tok) where
mapMaybe p (ParseStep steps final) = ParseStep steps' (mapMaybe p final) where
steps' = (fmap . fmap . fmap . fmap) (mapMaybe p) steps
filter p (ParseStep steps final) = ParseStep steps' (filter p final) where
steps' = (fmap . fmap . fmap . fmap) (filter p) steps
--------------------------------------------------------------------------------
-- Usage as a lexer (tokenizer): prefer longest matches, more backtracking --
--------------------------------------------------------------------------------
-- Measure the depth down the parse machine tree (that is, the number of tokens
-- it consumed)
depth :: (Int -> i -> o) -> ParseMachine ix tok i -> ParseMachine ix tok o
depth = go 0 where
-- We need bounded polymorphic recursion to handle the staged continuations
go :: forall ix tok x y. Int -> (Int -> x -> y) -> ParseMachine ix tok x -> ParseMachine ix tok y
go !i f (ParseStep steps finish) = ParseStep
((fmap . fmap . fmap) (go (i+1) \j -> go j f) steps)
(f i <$> finish)
withDepth :: ParseMachine ix tok r -> ParseMachine ix tok (Int, r)
withDepth = depth (,)
-- Return the tokens parsed during the parser
withSource :: ParseMachine ix tok r -> ParseMachine ix tok ([tok], r)
withSource = fmap (\(acc, r) -> (reverse acc, r)) . go [] (,) where
-- We need bounded polymorphic recursion to handle the staged continuations
go :: forall ix tok x y. [tok] -> ([tok] -> x -> y) -> ParseMachine ix tok x -> ParseMachine ix tok y
go !acc f (ParseStep steps finish) = ParseStep
((fmap . fmap) (\cont tok -> go (tok : acc) (\j -> go j f) (cont tok)) steps)
-- We can't reverse here, because it is called many times
-- during staging and such, just reverse once at the top level
(f acc <$> finish)
readSource :: forall r ix void. Read r => ParseMachine ix Char void -> ParseMachine ix Char r
readSource = fmap (\(src, _) -> read src) . withSource
newtype Lexer ix tok out r = Lexer (ParseMachine ix tok ([out], r))
deriving (Functor)
emit :: out -> Lexer ix tok out ()
emit out = Lexer (_pure ([out], ()))
instance Ord ix => Alternative (Lexer ix tok out) where
empty = Lexer empty
-- More backtracking: use `tryAlt l r = try l <|> r` instead of the derived `<|>`
Lexer l <|> Lexer r = Lexer (l `tryAlt` r)
instance Ord ix => Applicative (Lexer ix tok out) where
pure = Lexer . pure . ([],)
mf <*> ma = mf >>= (<$> ma)
instance Ord ix => Monad (Lexer ix tok out) where
-- Nothing special(? use `tryAlt`?), just accumulate
Lexer mi >>= f = Lexer do
(!acc, i) <- mi
let Lexer mr = f i
(\(!more, r) -> (acc ++ more, r)) <$> mr
lex' :: forall ix tok out r. Ord ix => Lexer ix tok out r -> [(ix, tok)] -> Maybe (([out], r), [(ix, tok)])
lex' (Lexer (ParseStep _ mr)) [] = (,[]) <$> mr
lex' (Lexer (ParseStep NoMatch mr)) remaining = (,remaining) <$> mr
-- Specialize this case to avoid leaking tokens when no backtracking is possible
lex' (Lexer (ParseStep (MatchTrack (WithCatchall (Coyoneda fromMap ixMap) catchall) more) Nothing)) ((!ix, tok) : continue) | hasNoMatches more =
case fromMap <$> Map.lookup ix ixMap <|> catchall of
Just receive -> lex' (Lexer (join (receive tok))) continue
Nothing -> Nothing -- Unexpected token
lex' (Lexer (ParseStep shallow mr)) backtrack@((!ix, tok) : continue) =
scanning Nothing shallow
where
-- Apply *all* of the backtracking and take the longest result, before the
-- next bind. Then commit to it and run whichever bind continuation was chosen.
scanning ::
Maybe (Int, (ParseMachine ix tok ([out], r), [(ix, tok)])) ->
ParseMatchTrack ix tok ([out], r) -> Maybe (([out], r), [(ix, tok)])
scanning !maybeLongest (MatchTrack (WithCatchall (Coyoneda fromMap ixMap) catchall) more) =
case fromMap <$> Map.lookup ix ixMap <|> catchall of
Just receive | Just ((!parsedDepth, committed), remaining) <- parse' (withDepth (receive tok)) continue ->
scanning (longer maybeLongest (parsedDepth, (committed, remaining))) more
_ -> scanning maybeLongest more
scanning Nothing NoMatch = (,backtrack) <$> mr
scanning (Just (_parsedDepth, (committed, remaining))) NoMatch =
lex' (Lexer committed) remaining
longer :: forall d. Maybe (Int, d) -> (Int, d) -> Maybe (Int, d)
longer Nothing d = Just d
longer (Just (l1, d1)) (l2, d2) =
if l1 >= l2 then Just (l1, d1) else Just (l2, d2)
lex :: forall ix tok out r. Ord ix => Lexer ix tok out r -> [(ix, tok)] -> Maybe ([out], r)
lex lexer input = case lex' lexer input of
Just (r, []) -> Just r
_ -> Nothing
lexAndParse ::
forall ix1 ix2 tok1 tok2 r.
Ord ix1 => Ord ix2 =>
(tok1 -> ix1, tok2 -> ix2) -> Lexer ix1 tok1 tok2 (ParseMachine ix2 tok2 r) -> [tok1] -> Maybe r
lexAndParse (classify1, classify2) lexer input = do
(tokens, parser) <- lex lexer $ classified classify1 input
parse parser $ classified classify2 tokens
--------------------------------------------------------------------------------
-- Extra helpers --
--------------------------------------------------------------------------------
-- Map every index of `ParseMachine` (destroys sharing)
fullMap ::
Ord ix' => (ix -> ix') -> (tok' -> tok) -> (i -> o) ->
ParseMachine ix tok i -> ParseMachine ix' tok' o
fullMap f g h (ParseStep possibilities final) =
ParseStep
(fmap mapContinuation . mapCoyoneda <$> possibilities)
(h <$> final)
where
mapCoyoneda (WithCatchall (Coyoneda x y) z) =
WithCatchall (Coyoneda x (Map.mapKeys f y)) z
mapContinuation z = fullMap f g (fullMap f g h) . z . g
noSharing :: Ord ix => ParseMachine ix tok r -> ParseMachine ix tok r
noSharing = fullMap id id id
hasNoMatches :: MatchTrack (WithCatchall (Coyoneda (Map.Map k)) a) -> Bool
hasNoMatches (MatchTrack (WithCatchall (Coyoneda _ ixMap) catchall) more) =
Map.null ixMap && isNothing catchall && hasNoMatches more
hasNoMatches NoMatch = True
--------------------------------------------------------------------------------
-- Boring helper types --
--------------------------------------------------------------------------------
data WithCatchall f a = WithCatchall !(f a) (Maybe a)
deriving (Functor)
instance (Monoid (f a)) => Monoid (WithCatchall f a) where
mempty = WithCatchall mempty Nothing
instance (Semigroup (f a)) => Semigroup (WithCatchall f a) where
WithCatchall l (Just catchall) <> WithCatchall r _ =
WithCatchall (l <> r) (Just catchall)
WithCatchall l Nothing <> WithCatchall r catchall =
WithCatchall (l <> r) catchall
data Coyoneda f a = forall b. Coyoneda (b -> a) !(f b)
instance Functor (Coyoneda f) where
fmap f (Coyoneda g v) = Coyoneda (f . g) v
-- Note: this means that applicatives really need to be finite, to fit in
-- finite space! because we can share the `Map`s across several invocations of
-- `fmap` via `Coyoneda`, but we cannot share them across infinite chained
-- `try <|>`. See this blog post for an explanation of how that would allow some
-- context sensitive languages to be parsed by otherwise applicative parsers:
-- https://byorgey.wordpress.com/2012/01/05/parsing-context-sensitive-languages-with-applicative/
instance Ord ix => Semigroup (Coyoneda (Map.Map ix) v) where
Coyoneda _ l <> r | Map.null l = r
l <> Coyoneda _ r | Map.null r = l
Coyoneda f l <> Coyoneda g r = Coyoneda (either f g)
((Left <$> l) `Map.union` (Right <$> r))
--------------------------------------------------------------------------------
-- EXAMPLES: JSON --
--------------------------------------------------------------------------------
type JSONLexer = Lexer Char Char JTok
type JSONL r = ParseMachine Char Char ([JTok], r)
type JSONParser = ParseMachine JCls JTok
data JTok = TLBrace | TRBrace | TLBracket | TRBracket | TComma | TColon | TTrue | TFalse | TNull | TString String | TNumber !Double
deriving (Eq, Ord, Generic, Show)
data JCls = LBrace | RBrace | LBracket | RBracket | Comma | Colon | CTrue | CFalse | Null | String | Number
deriving (Eq, Ord, Generic, Show)
data JSON = JString String | JNumber !Double | JArray [JSON] | JObject [(String, JSON)] | JBool Bool | JNull
deriving (Eq, Ord, Generic, Show)
parseJSON :: String -> Maybe JSON
parseJSON = lexAndParse (id, jsonClassify) (jsonParser <$ jsonLexer)
-- I'm disappointed this isn't turned into a tag coercion...
jsonClassify :: JTok -> JCls
jsonClassify = \case
TLBrace -> LBrace
TRBrace -> RBrace
TLBracket -> LBracket
TRBracket -> RBracket
TComma -> Comma
TColon -> Colon
TTrue -> CTrue
TFalse -> CFalse
TNull -> Null
TString _ -> String
TNumber _ -> Number
jsonParser :: JSONParser JSON
jsonParser = asum
[ token String & mapMaybe \case { TString v -> Just (JString v); _ -> Nothing }
, token Number & mapMaybe \case { TNumber v -> Just (JNumber v); _ -> Nothing }
, JObject <$> do token LBrace *> separated member <* token RBrace
, JArray <$> do token LBracket *> separated jsonParser <* token RBracket
, JBool <$> (True <$ token CTrue <|> False <$ token CFalse)
, JNull <$ token Null
]
where
comma = token Comma
separated :: forall r. JSONParser r -> JSONParser [r]
separated p = liftA2 (:) p (many (comma *> p)) <|> pure []
member = (\k _ v -> (k, v))
<$> do token String & mapMaybe \case { TString v -> Just v; _ -> Nothing }
<*> do token Colon
<*> do jsonParser
jsonLexer :: JSONLexer ()
jsonLexer =
Lexer $ ws *> do
mconcat <$> many (jsonTok <* ws)
where
ws :: JSONL ()
ws = ([],) <$> void do many (token ' ' <|> token '\n' <|> token '\r' <|> token '\t')
jsonTok = symbols <|> datas
tokMap :: forall ix tok r. Ord ix => [(ix, r)] -> ParseMachine ix tok r
tokMap = foldr (\(chr, val) more -> val <$ token chr <|> more) empty
-- A keyword is a series of tokens
keyword :: String -> JTok -> JSONL ()
keyword [] val = pure ([val], ())
keyword (c : cs) val = token c *> keyword cs val
symbols :: JSONL ()
symbols = foldr (\(str, val) more -> keyword str val <|> more) empty symbolTable
symbolTable =
[ ("{", TLBrace)
, ("}", TRBrace)
, ("[", TLBracket)
, ("]", TRBracket)
, (",", TComma)
, (":", TColon)
, ("true", TTrue)
, ("false", TFalse)
, ("null", TNull)
]
datas = string <|> number
string :: JSONL ()
string = (\chars -> ([TString chars], ())) <$> do
token '"' *> charList
-- TODO: fix the fact that `token '"'` needs to be in front?
charList = [] <$ token '"' <|> (:) <$> char <*> charList
char = (token '\\' *> escape)
<|> satisfies (not . isControl)
escape = escapes <|> token 'u' *> do
(\m n o p -> toEnum (base 16 [m,n,o,p]))
<$> hex <*> hex <*> hex <*> hex
escapes = tokMap
[ ('"', '"')
, ('\\', '\\')
, ('/', '/')
, ('b', '\b')
, ('f', '\f')
, ('n', '\n')
, ('r', '\r')
, ('t', '\t')
]
base :: Int -> [Int] -> Int
base b = fst . foldr (\d (!acc, !pl) -> (d*pl + acc, pl*b)) (0, 1)
hex = tokMap $ zip ['0'..'9'] [0..9] <> zip ['a'..'f'] [10..15] <> zip ['A'..'F'] [10..15]
digit = tokMap $ (,()) <$> ['0'..'9']
number = (\num -> ([TNumber num], ())) <$> readSource do
let t = void . token
let opt p = p <|> pure ()
t '-' <|> pure ()
t '0' <|> void (some digit)
opt (t '.' <* some digit)
opt do (t 'e' <|> t 'E') <* opt (t '+' <|> t '-') <* some digit
main :: IO ()
main = do
let
examples =
[ "null"
, "{}"
, "[true]"
, "\"\""
, "\"x\""
, "\"xy\""
, "\"\\\"\""
, "\"a\\u0304\""
, "\"string\""
, "0"
, "[0,1,2]"
, "[0 ,1, 2]"
, "45"
, " 45.2 "
, "-0.34e12"
, "{\"x\":true}"
]
for_ examples \s -> do
print s
print $ lex jsonLexer $ join (,) <$> s
print $ parseJSON s
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment