Last active
August 29, 2015 13:57
-
-
Save mmhelloworld/9613242 to your computer and use it in GitHub Desktop.
Test ":load" command on REPL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- Small parser, inspired by Parsec, but much less versatile | |
module testfrege.data.NanoParsec where | |
import Data.List | |
import Prelude.PreludeBase (StringJ) | |
import Data.Monoid | |
import Test.QuickCheck as Q() | |
{-- | |
A stream of tokens. | |
-} | |
class Stream s where | |
{-- split the input stream in head and tail -} | |
uncons :: s a -> Maybe (a, s a) | |
--- give the length of the stream | |
slength :: s a -> Int | |
--- lazily convert to list | |
slist :: s a -> [a] | |
slist s | |
| Just (a, b) <- uncons s = a : slist b | |
| otherwise = [] | |
--- check if empty | |
snull :: s a -> Bool | |
snull = maybe true (const false) . uncons | |
instance Stream [] where | |
uncons [] = Nothing | |
uncons (h:t) = Just (h, t) | |
slength = length | |
instance Stream StringJ where | |
uncons x | |
| null x = Nothing | |
| otherwise = Just (x.polymorphicElemAt 0, strtail x 1) | |
slength = length | |
instance Stream StringIterator | |
abstract data StringIterator a = SI { !string :: StringJ a, !at :: Int, !limit :: Int } | |
where | |
--- make iterator from 'String' | |
from str = SI str 0 str.length | |
--- length | |
slength :: StringIterator a -> Int | |
slength s = s.limit - s.at | |
--- is it null? | |
snull SI{at, limit} = at >= limit | |
--- deconstruct | |
uncons SI{string,at,limit} | |
| at >= limit = Nothing | |
| otherwise = case (string.polymorphicElemAt at; SI string (at+1) limit) of | |
!result -> Just result | |
instance Stream ArrayIterator | |
abstract data ArrayIterator a = AI { !array :: JArray a, !at :: Int, !limit :: Int } | |
where | |
--- make iterator from 'JArray' | |
from str = AI str 0 str.length | |
--- length | |
slength :: ArrayIterator a -> Int | |
slength s = s.limit - s.at | |
--- is it null? | |
snull AI{at, limit} = at >= limit | |
--- deconstruct | |
uncons AI{array,at,limit} | |
| at >= limit = Nothing | |
| otherwise = case (JArray.elemAt array at; AI array (at+1) limit) of | |
!result -> Just result | |
{-- | |
Parser type | |
-} | |
data Parser s t r = Parser { | |
run :: s t -> Either (s t, String) (s t, r) | |
} where | |
--- fail with given error message | |
failure s = Parser (\ts -> Left (ts, s)) | |
instance MonadAlt Parser s t where | |
--- generic failure | |
pzero = Parser.failure "parse failed" | |
--- generic success | |
return a = Parser $ (\ts -> Right (ts, a)) | |
{-- | |
> p >>= (r -> q) | |
> p >> q | |
If @p@ succeeds, the overall result is that of @q@ | |
Otherwise, the overall result is failure. | |
Could be read as _p followed by q_. | |
-} | |
Parser p >>= f = Parser next | |
where | |
next i = case p i of | |
Right (i', r) = case f r of | |
Parser q = q i' | |
Left err = Left err | |
{-- | |
> p <|> q | |
The result is that of the first parser, if it succeeds, | |
otherwise that of the second one. Note that @q@ is | |
run on the same input as @p@, even if @p@ already consumed something. | |
> expect 'c' <|> expect 'd' | |
would succeed if the input starts with 'c' or 'd'. | |
-} | |
Parser p <|> Parser q = Parser plus | |
where | |
plus i = case p i of | |
Left _ = q i | |
right = right | |
{-- | |
> p <+> q | |
The result is that of the first parser, if it succeeds, | |
otherwise that of the second one, who is applied to the | |
input left by _p_ | |
-} | |
Parser p <+> Parser q = Parser plus | |
where | |
plus i = case p i of | |
Left (rest, _) = q rest | |
right = right | |
--- run a 'Parser' on some input and report | |
parseTest Parser{run} s = case run s of | |
Left xs = do | |
print "Parse failed: " | |
println (reporterror xs) | |
Right (rest, result) = do | |
println "Parse succeeded!" | |
println result | |
unless (snull rest) do | |
println (reporterror (rest, "Warning! Tokens left")) | |
-- ---------------------- error reporting ------------------------- | |
reporterror (ts,msg) = msg ++ ", found: " | |
++ fold (++) "" (map display (take 8 $ slist ts)) | |
{-- | |
Causes the error message to be _msg_ when _p_ fails. | |
The error will be reported at the position where _p_ was attempted, | |
not necessarily at the position where _p_ failed: | |
> Parser.run (letter >> digit) "a?" | |
> unexpected token, found ? | |
> Parser.run (label "id expected" $ letter >> digit) "a?" | |
> id expected, found a? | |
-} | |
label msg p = p <|> Parser.failure msg | |
infix 14 `<?>` | |
{-- | |
> p <?> msg | |
Causes the error message to be _msg_ when _p_ fails. | |
The error will be reported at the position where _p_ failed. | |
-} | |
p <?> msg = p <+> Parser.failure msg | |
-- ---------------------- simple parsers -------------------------- | |
{-- | |
> expect t | |
This parser succeeds if the input is not empty and the head | |
of the input equals _t_. | |
-} | |
expect c = Parser exp | |
where | |
exp ts = case uncons ts of | |
Just (h, t) | |
| h == c = Right (t, h) | |
nothing = Left (ts, (show c) ++ " expected") | |
--- The 'eos' parser suceeds if the input is empty. | |
eos = Parser noinput | |
where | |
noinput ts = case uncons ts of | |
Just _ = Left (ts, "end of input expected") | |
sonst = Right (ts, ()) | |
--- > satisfy property | |
--- Suceeds if there is a next token _t_ and _property_ _t_ is true. | |
satisfy p = Parser sat | |
where | |
sat ts = case uncons ts of | |
Just (h, t) | p h = Right (t, h) | |
nothing = Left (ts, "unexpected token") | |
--- > pany | |
--- Fails if and only if 'eos' succeeds, otherwise returns the next token. | |
pany = satisfy (const true) | |
--- > symbol p | |
--- Is the same parser as _p_, but skips spaces afterwards | |
symbol p = p <* spaces | |
-- ------------------------------------------- character classification | |
space = satisfy (Char.isWhitespace :: Char -> Bool) | |
digit = satisfy (Char.isDigit :: Char -> Bool) | |
letter = satisfy (Char.isLetter :: Char -> Bool) | |
uppercase = satisfy (Char.isUpperCase :: Char -> Bool) | |
spaces = skip space | |
-- ---------------------------------------- special parsers for strings | |
string s = Parser it | |
where | |
it xs = if xs.startsWith s | |
then Right (strtail xs (length s), s) | |
else Left (xs, "expected '" ++ display s ++ "'") | |
--- This parser succeeds if the pattern matches the beginning of the string. | |
--- For efficiency reasons, the pattern should start with @^@ | |
match r = Parser it | |
where | |
it xs = case m.find of | |
Just y | xs.startsWith y.match -> Right (strtail xs (y.match.length), y) | |
nothing -> Left (xs, "expected to match ´" ++ display r ++ "´") | |
where m = Regex.matcher r xs | |
-- -------------------------------------------------- common tokens | |
ident = symbol . label "identifier expected" $ do | |
l <- letter | |
fmap (l:) (many (letter <|> digit)) | |
-- -------------------------------------------------- parser repetition | |
--- The optional parser always succeeds and returns its result wrapped in 'Maybe' | |
optional :: Parser s t r -> Parser s t (Maybe r) | |
optional p = p >>= return . Just <|> return Nothing | |
--- > many p | |
--- Collects as many _p_ as possible and returns the results in a list. | |
--- Suceeds also when _p_ fails the first time, in that case the result is an empty list. | |
--- Must not be applied to a parser that always succeeds! | |
many p = do | |
a <- optional p | |
case a of | |
Nothing -> return [] | |
Just a -> fmap (a:) (many p) | |
--- > some p | |
--- Like 'many', except there must be at least one _p_ | |
--- Must not be applied to a parser that always succeeds! | |
some p = p >>= (\x -> fmap (x:) (many p)) | |
many1 = some | |
--- > skip p | |
--- equivalent to | |
--- > many p >> return () | |
--- but faster, because it does not build up lists. | |
--- Must not be applied to a parser that always succeeds! | |
skip p = forever (p <|> pzero) <+> return () | |
--- > manyWhile p | |
--- Collect tokens as long as they satisfy _p_ | |
manyWhile p = many (satisfy p) | |
--- > skipWhile p | |
--- Skip tokens as long as they staisfy p | |
skipWhile p = skip (satisfy p) | |
-- ------------------------------------------------ parser combinators | |
look (Parser prefix) (Parser p1) (Parser p2) = Parser it | |
where | |
it inp = case prefix inp of | |
Right _ -> p1 inp | |
Left _ -> p2 inp | |
select :: [(Parser s t b, Parser s t a)] -> Parser s t a -> Parser s t a | |
select xs y = foldr (\(p1,p2)\end -> look p1 p2 end) y xs | |
--- > choice ps | |
--- Tries the parsers in the list from left to right, until success. | |
choice = fold (<|>) pzero | |
--- > count n p | |
--- Applies _p_ _n_ times and returns a list of the results | |
count n p = replicateM n p | |
--- > between left right p | |
--- Parses _left_, then _p_ and finally _right_ and returns the result of _p_ | |
between left right p = left *> (p <* right) | |
--- > option v p | |
--- Applies _p_, and returns _v_ when it fails. | |
--- Always succeeds. | |
option v p = p <|> Parser.return v | |
--- > p `sepBy1` q | |
--- Parses p and many q followed by p | |
sepBy1 p q = p >>= (\r -> fmap (r:) (many (q *> p))) | |
--- > p `sepBy` q | |
--- Like 'sepBy1', but allows zero _p_ elements | |
sepBy p q = p `sepBy1` q <|> Parser.return [] | |
--- > p `endBy` q | |
--- Parses zero or more occurrences of _p_ separated and ended by _q_ | |
endBy p q = many (p <* q) | |
--- > p `endBy1` q | |
--- Parses one or more occurrences of _p_ separated and ended by _q_ | |
endBy1 p q = some (p <* q) | |
--- > p `sepEndBy` q | |
--- Parses zero or more occurences of _p_, separated and optionally ended by _q_ | |
sepEndBy p q = p `sepBy` q <* optional q | |
--- > p `sepEndBy1` q | |
--- Parses one or more occurences of _p_, separated and optionally ended by _q_ | |
sepEndBy1 p q = p `sepBy1` q <* optional q | |
-- ------------------------------------------------ Parser properties | |
ascii = fmap chr (Q.choose (ord ' ', 126)) | |
inputs = Q.listOf ascii | |
parsers = Q.elements [ | |
("match no char", satisfy (const false)), -- fail | |
("match any char", pany), | |
("match even char", satisfy (even . ord)), -- fail 50% | |
("letter", letter), | |
("digit", digit), | |
("space", space), | |
-- ("eos", eos >> return (chr 0)), | |
("letter letter", letter >> letter), | |
("digit letter", digit >> letter), | |
("letter any letter", letter >> pany >> letter) | |
] | |
-- avoid deriving Show for Parser | |
allParsers prop = parsers >>= (\(s,x) -> | |
Q.printTestCase s (prop x)) | |
--- p always suceeds | |
succeeds p = Q.forAll inputs (\xs -> | |
either (const false) (const true) (Parser.run p xs)) | |
--- p always fails | |
fails p = Q.forAll inputs (\xs -> | |
either (const true) (const false) (Parser.run p xs)) | |
--- p and q are the same parsers | |
same p q = Q.forAll inputs (\xs -> | |
Parser.run p xs == Parser.run q xs) | |
--- p succeeds if and only if q succeeds | |
agree p q = Q.forAll inputs (\xs -> | |
case Parser.run p xs of | |
Left _ -> either (const true) (const false) (Parser.run q xs) | |
Right _ -> either (const false) (const true) (Parser.run q xs) | |
) | |
--- p succeeds if and only if q fails | |
disagree p q = Q.forAll inputs (\xs -> | |
case Parser.run p xs of | |
Right _ -> either (const true) (const false) (Parser.run q xs) | |
Left _ -> either (const false) (const true) (Parser.run q xs) | |
) | |
--- p and q consume the same input | |
consumeTheSame p q = Q.forAll inputs (\xs -> | |
fmap fst (Parser.run p xs) | |
== fmap fst (Parser.run q xs)) | |
--- p consumes no input on failure | |
consumesNotOnFailure p = Q.forAll inputs (\xs -> | |
case Parser.run p xs of | |
Left (ts, _) -> Q.label "parser failed" (ts == xs) | |
Right _ -> Q.label "parser success" true | |
) | |
--- 'return' _a_ always succeeds | |
prop_return = succeeds (return 'a') | |
--- 'pzero' fails | |
prop_pzero_1 = fails pzero | |
--- 'pzero' consumes not | |
prop_pzero_2 = consumesNotOnFailure pzero | |
--- 'Parser.failure' fails | |
prop_failure = fails (Parser.failure "x") | |
--- @any@ and @eos@ disagree | |
prop_any_eos = pany `disagree` eos | |
--- 'optional' always succeeds | |
prop_optional = allParsers (\p -> succeeds (optional p)) | |
--- 'option' always succeeds | |
prop_option = allParsers (\p -> succeeds (option 'a' p)) | |
--- 'many' always succeeds | |
prop_many = allParsers (\p -> succeeds (many p)) | |
--- 'skip' always succeeds | |
prop_skip = allParsers (\p -> succeeds (skip p)) | |
--- 'many' and 'skip' consume the same amount of tokens | |
prop_skip_many_consume_the_same = allParsers (\p -> | |
skip p `consumeTheSame` many p) | |
--- 'many' and 'skip' agree | |
prop_skip_many_agree = allParsers (\p -> | |
skip p `agree` many p) | |
--- 'skip' is 'many' followed by return () | |
prop_skip_fast_many = allParsers (\p -> | |
skip p `same` (many p >> return ())) | |
--- > p <|> pzero | |
--- consumes nothing on failure of _p_ | |
prop_alt_pzero_no_consume = allParsers (\p -> | |
consumesNotOnFailure (p <|> pzero)) | |
--- @p@ and @p <|> pzero@ agree | |
prop_p_agrees_p_or_pzero = allParsers (\p -> | |
p `agree` (p <|> pzero)) | |
--- @pzero <|> p@ is the same as @p@ | |
prop_p_or_zero_same_p = allParsers (\p -> | |
(pzero <|> p) `same` p) | |
--- @choice []@ is the same as @pzero@ | |
prop_choice_0 = (choice [] `asTypeOf` pany) `same` (pzero) | |
--- @choice [p]@ is the same as @p@ | |
prop_choice_1 = allParsers (\p -> | |
choice [p] `same` p) | |
--- @choice [p,q]@ is the same as @p <|> q@ | |
prop_choice_2 = allParsers (\p -> | |
allParsers (\q -> | |
choice [p,q] `same` (p <|> q))) | |
--- @count 0@ is the same as @return []@ | |
prop_count_0 = allParsers (\p -> | |
count 0 p `same` return []) | |
--- @count 0@ is the same as @p@ | |
prop_count_1 = allParsers (\p -> | |
count 1 p `same` fmap return p) | |
private main = do | |
parseTest (skip $ letter >> satisfy (const true) >> letter) ['d', 'W', '^'] | |
parseTest (many $ letter >> satisfy (const true) >> letter) ['d', 'W', '^'] | |
parseTest (fmap _.match $ match ´foo´) "xfoo" | |
parseTest (fmap _.match $ match ´(?i)(x?)foo´) "XfOo" | |
parseTest (ident `sepBy` expect ',') "a,b,c" | |
parseTest (letter `sepBy` expect ',') "" | |
parseTest (letter `sepBy1` expect ',') "a,,b,c" | |
parseTest (between (expect '[') (expect ']') $ ident `sepEndBy` expect ',') "[,]" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment