Last active
April 28, 2025 15:30
-
-
Save MikuroXina/8c5977fca082fb745396e78f3bb3609b to your computer and use it in GitHub Desktop.
An implementation of parser combinator with Haskell.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{-# LANGUAGE LambdaCase #-} | |
{-# OPTIONS_GHC -Wno-name-shadowing #-} | |
module Lib ( | |
(+++), | |
chainL, | |
chainR, | |
char, | |
delimited, | |
integer, | |
item, | |
many0, | |
many1, | |
opt, | |
parse, | |
Parser, | |
preceded, | |
sat, | |
separatedBy0, | |
separatedBy1, | |
separatedPair, | |
spaces0, | |
spaces1, | |
string, | |
tag, | |
terminated, | |
trimEnd, | |
trimStart, | |
) where | |
import Control.Applicative | |
import qualified Data.Bifunctor | |
import Data.Char | |
newtype Parser a = Parser (String -> [(a, String)]) | |
parse :: Parser a -> String -> [(a, String)] | |
parse (Parser f) = f | |
instance Functor Parser where | |
fmap f p = Parser (map (Data.Bifunctor.first f) . parse p) | |
instance Applicative Parser where | |
pure a = Parser (\cs -> [(a, cs)]) | |
f <*> p = | |
Parser | |
( \cs -> | |
concat [parse (fmap fn p) cs' | (fn, cs') <- parse f cs] | |
) | |
instance Monad Parser where | |
p >>= f = | |
Parser | |
( \cs -> | |
concat [parse (f a) cs' | (a, cs') <- parse p cs] | |
) | |
instance Alternative Parser where | |
empty = Parser (const []) | |
p <|> q = Parser (\cs -> parse p cs <|> parse q cs) | |
-- | Uses the first if it matches, otherwise uses the second. | |
(+++) :: Parser a -> Parser a -> Parser a | |
p +++ q = | |
Parser | |
( \cs -> case parse (p <|> q) cs of | |
[] -> [] | |
(x : _) -> [x] | |
) | |
-- | Matches the first character or fails if it is empty. | |
item :: Parser Char | |
item = | |
Parser | |
( \case | |
"" -> [] | |
(c : cs) -> [(c, cs)] | |
) | |
-- | Matches a character if it satisfies the predicate. | |
sat :: (Char -> Bool) -> Parser Char | |
sat p = do | |
c <- item | |
if p c | |
then | |
return c | |
else | |
empty | |
-- | Matches a matching character. | |
char :: Char -> Parser Char | |
char c = sat (c ==) | |
-- | Matches a matching string. | |
string :: String -> Parser String | |
string "" = return "" | |
string (c : cs) = do | |
_ <- char c | |
_ <- string cs | |
return (c : cs) | |
-- | Matches by the parser but returns `Nothing` when it failed. | |
opt :: Parser a -> Parser (Maybe a) | |
opt p = (Just <$> p) +++ return Nothing | |
-- | Matches by the preceding first and discards it, then applies the second. | |
preceded :: Parser b -> Parser a -> Parser a | |
preceded pre p = do | |
_ <- pre | |
p | |
-- | Matches the first only if the terminating second matched. | |
terminated :: Parser a -> Parser b -> Parser a | |
terminated p post = do | |
ret <- p | |
_ <- post | |
return ret | |
-- | Matches the second parser only if the enclosing first and third matched. | |
delimited :: Parser b -> Parser a -> Parser c -> Parser a | |
delimited pre p post = preceded pre $ terminated p post | |
-- | Matches by the first and third parsers separated by the second. | |
separatedPair :: Parser a -> Parser c -> Parser b -> Parser (a, b) | |
separatedPair lhs sep rhs = do | |
left <- lhs | |
_ <- sep | |
right <- rhs | |
return (left, right) | |
-- | Matches repeated items. | |
many0 :: Parser a -> Parser [a] | |
many0 p = many1 p +++ return [] | |
-- | Matches repeated items at least one. | |
many1 :: Parser a -> Parser [a] | |
many1 p = do | |
x <- p | |
xs <- many p | |
return (x : xs) | |
-- | Matches items separated by matching character of `sep`. | |
separatedBy0 :: Parser a -> Parser b -> Parser [a] | |
p `separatedBy0` sep = (p `separatedBy1` sep) +++ return [] | |
-- | Matches items separated by matching character of `sep` at least once. | |
separatedBy1 :: Parser a -> Parser b -> Parser [a] | |
p `separatedBy1` sep = do | |
x <- p | |
xs <- many $ do | |
_ <- sep | |
p | |
return (x : xs) | |
-- | Matches left associated operator. | |
chainL :: Parser a -> Parser (a -> a -> a) -> Parser a | |
chainL p op = do | |
x <- p | |
rest x | |
where | |
rest x = | |
( do | |
f <- op | |
y <- p | |
rest $ f x y | |
) | |
+++ return x | |
-- | Matches right associated operator. | |
chainR :: Parser a -> Parser (a -> a -> a) -> Parser a | |
chainR p op = | |
( do | |
x <- p | |
f <- op | |
y <- chainR p op | |
return $ f x y | |
) | |
+++ p | |
-- | Matches whitespace characters. | |
spaces0 :: Parser String | |
spaces0 = many0 $ sat isSpace | |
-- | Matches whitespace characters at least one. | |
spaces1 :: Parser String | |
spaces1 = many1 $ sat isSpace | |
-- | Matches a symbolic token. | |
tag :: String -> Parser String | |
tag = trimEnd . string | |
-- | Drops leading whitespace characters. | |
trimStart :: Parser a -> String -> [(a, String)] | |
trimStart p = parse $ do | |
_ <- spaces0 | |
p | |
-- | Drops trailing whitespace characters. | |
trimEnd :: Parser a -> Parser a | |
trimEnd p = do | |
ret <- p | |
_ <- spaces0 | |
return ret | |
-- | Matches decimal digits and parse into and integer. | |
integer :: Parser Int | |
integer = do | |
digitStr <- trimEnd $ many1 $ sat isDigit | |
return $ convert $ reverse digitStr | |
where | |
convert :: String -> Int | |
convert [] = 0 | |
convert (x : xs) = | |
(ord x - ord '0') + 10 * convert xs |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Main (main) where | |
import Lib | |
expr :: Parser Int | |
expr = term `chainL` addOp | |
term :: Parser Int | |
term = factor `chainL` mulOp | |
factor :: Parser Int | |
factor = integer +++ delimited (tag "(") expr (tag ")") | |
addOp :: Parser (Int -> Int -> Int) | |
addOp = ((+) <$ tag "+") +++ ((-) <$ tag "-") | |
mulOp :: Parser (Int -> Int -> Int) | |
mulOp = ((*) <$ tag "*") +++ (div <$ tag "/") | |
main :: IO () | |
main = print $ parse expr "4 * (11 + 2) - 14 / 7" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment