Skip to content

Instantly share code, notes, and snippets.

@MikuroXina
Last active April 28, 2025 15:30
Show Gist options
  • Save MikuroXina/8c5977fca082fb745396e78f3bb3609b to your computer and use it in GitHub Desktop.
Save MikuroXina/8c5977fca082fb745396e78f3bb3609b to your computer and use it in GitHub Desktop.
An implementation of parser combinator with Haskell.
{-# LANGUAGE LambdaCase #-}
{-# OPTIONS_GHC -Wno-name-shadowing #-}
module Lib (
(+++),
chainL,
chainR,
char,
delimited,
integer,
item,
many0,
many1,
opt,
parse,
Parser,
preceded,
sat,
separatedBy0,
separatedBy1,
separatedPair,
spaces0,
spaces1,
string,
tag,
terminated,
trimEnd,
trimStart,
) where
import Control.Applicative
import qualified Data.Bifunctor
import Data.Char
newtype Parser a = Parser (String -> [(a, String)])
parse :: Parser a -> String -> [(a, String)]
parse (Parser f) = f
instance Functor Parser where
fmap f p = Parser (map (Data.Bifunctor.first f) . parse p)
instance Applicative Parser where
pure a = Parser (\cs -> [(a, cs)])
f <*> p =
Parser
( \cs ->
concat [parse (fmap fn p) cs' | (fn, cs') <- parse f cs]
)
instance Monad Parser where
p >>= f =
Parser
( \cs ->
concat [parse (f a) cs' | (a, cs') <- parse p cs]
)
instance Alternative Parser where
empty = Parser (const [])
p <|> q = Parser (\cs -> parse p cs <|> parse q cs)
-- | Uses the first if it matches, otherwise uses the second.
(+++) :: Parser a -> Parser a -> Parser a
p +++ q =
Parser
( \cs -> case parse (p <|> q) cs of
[] -> []
(x : _) -> [x]
)
-- | Matches the first character or fails if it is empty.
item :: Parser Char
item =
Parser
( \case
"" -> []
(c : cs) -> [(c, cs)]
)
-- | Matches a character if it satisfies the predicate.
sat :: (Char -> Bool) -> Parser Char
sat p = do
c <- item
if p c
then
return c
else
empty
-- | Matches a matching character.
char :: Char -> Parser Char
char c = sat (c ==)
-- | Matches a matching string.
string :: String -> Parser String
string "" = return ""
string (c : cs) = do
_ <- char c
_ <- string cs
return (c : cs)
-- | Matches by the parser but returns `Nothing` when it failed.
opt :: Parser a -> Parser (Maybe a)
opt p = (Just <$> p) +++ return Nothing
-- | Matches by the preceding first and discards it, then applies the second.
preceded :: Parser b -> Parser a -> Parser a
preceded pre p = do
_ <- pre
p
-- | Matches the first only if the terminating second matched.
terminated :: Parser a -> Parser b -> Parser a
terminated p post = do
ret <- p
_ <- post
return ret
-- | Matches the second parser only if the enclosing first and third matched.
delimited :: Parser b -> Parser a -> Parser c -> Parser a
delimited pre p post = preceded pre $ terminated p post
-- | Matches by the first and third parsers separated by the second.
separatedPair :: Parser a -> Parser c -> Parser b -> Parser (a, b)
separatedPair lhs sep rhs = do
left <- lhs
_ <- sep
right <- rhs
return (left, right)
-- | Matches repeated items.
many0 :: Parser a -> Parser [a]
many0 p = many1 p +++ return []
-- | Matches repeated items at least one.
many1 :: Parser a -> Parser [a]
many1 p = do
x <- p
xs <- many p
return (x : xs)
-- | Matches items separated by matching character of `sep`.
separatedBy0 :: Parser a -> Parser b -> Parser [a]
p `separatedBy0` sep = (p `separatedBy1` sep) +++ return []
-- | Matches items separated by matching character of `sep` at least once.
separatedBy1 :: Parser a -> Parser b -> Parser [a]
p `separatedBy1` sep = do
x <- p
xs <- many $ do
_ <- sep
p
return (x : xs)
-- | Matches left associated operator.
chainL :: Parser a -> Parser (a -> a -> a) -> Parser a
chainL p op = do
x <- p
rest x
where
rest x =
( do
f <- op
y <- p
rest $ f x y
)
+++ return x
-- | Matches right associated operator.
chainR :: Parser a -> Parser (a -> a -> a) -> Parser a
chainR p op =
( do
x <- p
f <- op
y <- chainR p op
return $ f x y
)
+++ p
-- | Matches whitespace characters.
spaces0 :: Parser String
spaces0 = many0 $ sat isSpace
-- | Matches whitespace characters at least one.
spaces1 :: Parser String
spaces1 = many1 $ sat isSpace
-- | Matches a symbolic token.
tag :: String -> Parser String
tag = trimEnd . string
-- | Drops leading whitespace characters.
trimStart :: Parser a -> String -> [(a, String)]
trimStart p = parse $ do
_ <- spaces0
p
-- | Drops trailing whitespace characters.
trimEnd :: Parser a -> Parser a
trimEnd p = do
ret <- p
_ <- spaces0
return ret
-- | Matches decimal digits and parse into and integer.
integer :: Parser Int
integer = do
digitStr <- trimEnd $ many1 $ sat isDigit
return $ convert $ reverse digitStr
where
convert :: String -> Int
convert [] = 0
convert (x : xs) =
(ord x - ord '0') + 10 * convert xs
module Main (main) where
import Lib
expr :: Parser Int
expr = term `chainL` addOp
term :: Parser Int
term = factor `chainL` mulOp
factor :: Parser Int
factor = integer +++ delimited (tag "(") expr (tag ")")
addOp :: Parser (Int -> Int -> Int)
addOp = ((+) <$ tag "+") +++ ((-) <$ tag "-")
mulOp :: Parser (Int -> Int -> Int)
mulOp = ((*) <$ tag "*") +++ (div <$ tag "/")
main :: IO ()
main = print $ parse expr "4 * (11 + 2) - 14 / 7"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment