Skip to content

Instantly share code, notes, and snippets.

@matthewbauer
Last active July 20, 2024 20:24
Show Gist options
  • Save matthewbauer/6bd1cc57bd32860b36b0e0c949310eb2 to your computer and use it in GitHub Desktop.
Save matthewbauer/6bd1cc57bd32860b36b0e0c949310eb2 to your computer and use it in GitHub Desktop.
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE OverloadedStrings #-}
module Text.Megaparsec.ParseNumWordsEn (parseNumWordsEn, showNumWordsEn, test) where
import Control.Exception hiding (try)
import Control.Monad
import Data.CaseInsensitive (FoldCase)
import Data.Int
import Data.List (foldl')
import Data.Maybe
import Data.String
import Data.Text (Text)
import Data.Void
import Data.Word
import Prelude
import System.Random
import Text.Megaparsec
import Text.Megaparsec.Byte
type MyToken s = (Stream s, FoldCase (Tokens s), IsString (Tokens s))
-- | List of names to use for 10^3n parsing and the value of n.
ions :: [(String, Word8)]
ions = [ ("thousand", 1)
, ("million", 2)
, ("billion", 3)
, ("trillion", 4)
, ("quadrillion", 5)
, ("quintillion", 6)
]
-- | Parser for the names of numbers in English.
-- For example, "five hundred fifty five" would become 555.
--
-- This supports:
--
-- - supports input of Text, ByteString, and String
-- - can parse up to 10^21-1 numbers (one sextillion minus one)
-- - can output any instance of Fractional (which is any rational number)
-- - can also parse phone number style like "nine oh two one oh" or "five five five five five five five"
-- - can parse "negative" prefix
parseNumWordsEn :: (MyToken s, Fractional a) => Parsec Void s a
parseNumWordsEn = do
negative <- fmap isJust <$> optional $ string' "negative"
skipFormatting
number <- allDigits <|> foldl' goion l1000r ions
eof
pure $ if negative then -number else number
where
-- | Skip any formatting like whitespace or dashes or commas.
skipFormatting :: MyToken s => Parsec Void s ()
skipFormatting = skipMany (string " " <|> string "-" <|> string ",") <?> "formatting"
-- | Match 1-9 whole numbers.
posDigits :: MyToken s => Parsec Void s Word8
posDigits = (1 <$ string' "one")
<|> (2 <$ string' "two")
<|> (3 <$ string' "three")
<|> (4 <$ string' "four")
<|> (5 <$ string' "five")
<|> (6 <$ string' "six")
<|> (7 <$ string' "seven")
<|> (8 <$ string' "eight")
<|> (9 <$ string' "nine")
-- | Match zero, including some unusual uses like "oh" and "none".
zero :: MyToken s => Parsec Void s Word8
zero = 0 <$ (string' "zero" <|> string' "oh" <|> string' "none" <|> string' "no" <|> string' "nil" <|> string "nothing") <?> "zero"
-- | Match 0-9 whole numbers.
l10 :: MyToken s => Parsec Void s Word8
l10 = zero <|> posDigits
-- | Match 10-19 whole numbers.
teens :: MyToken s => Parsec Void s Word8
teens = (10 <$ string' "ten")
<|> (11 <$ string' "eleven")
<|> (12 <$ string' "twelve")
<|> (13 <$ string' "thirteen")
<|> (14 <$ string' "fourteen")
<|> (15 <$ string' "fifteen")
<|> (16 <$ string' "sixteen")
<|> (17 <$ string' "seventeen")
<|> (18 <$ string' "eighteen")
<|> (19 <$ string' "nineteen")
-- | Match <20 non-negative whole numbers.
l20 :: MyToken s => Parsec Void s Word8
l20 = teens <|> l10
-- | Match 20-99 prefixes like "twenty".
tys :: MyToken s => Parsec Void s Word8
tys = (20 <$ string' "twenty")
<|> (30 <$ string' "thirty")
<|> (40 <$ string' "forty")
<|> (50 <$ string' "fifty")
<|> (60 <$ string' "sixty")
<|> (70 <$ string' "seventy")
<|> (80 <$ string' "eighty")
<|> (90 <$ string' "ninety")
-- | Match 20-99 whole numbers.
tens :: MyToken s => Parsec Void s Word8
tens = do
prefix <- tys
skipFormatting
mSuffix <- optional posDigits
pure $ prefix + fromMaybe 0 mSuffix
-- | Match <100 non-negative whole numbers.
l100 :: MyToken s => Parsec Void s Word8
l100 = tens <|> l20
-- | Match 100-999 whole numbers.
hundreds :: MyToken s => Parsec Void s Word16
hundreds = do
prefix <- try $ do
prefix <- fromIntegral <$> l100 -- could be l10, but want to support "nineteen hundred twenty two"
skipFormatting
_ <- string' "hundred"
pure prefix
skipFormatting
_ <- optional $ do
_ <- string' "and"
skipFormatting
mSuffix <- optional $ fromIntegral <$> l100
pure $ prefix * 100 + fromMaybe 0 mSuffix
-- | Match decimal place.
fractional :: (MyToken s, Fractional a) => Parsec Void s a
fractional = do
_ <- string' "point"
skipFormatting
xs <- some $ do
x <- fromIntegral <$> l10
skipFormatting
pure x
pure $ foldl' (\a (b, n) -> a + b / (10 ^ n)) 0 (zip xs ([1..] :: [Integer]))
-- | Match <1000 non-negative rational numbers.
l1000r :: (MyToken s, Fractional a) => Parsec Void s a
l1000r = do
number <- fromIntegral <$> hundreds <|> fromIntegral <$> l100
skipFormatting
mFractional <- optional fractional
pure $ number + fromMaybe 0 mFractional
-- | Match >= 10^(3(n-1)) and < 10^3n rational numbers.
l1000nr :: (MyToken s, Fractional a) => Parsec Void s a -> String -> Word8 -> Parsec Void s a
l1000nr pSmaller name n = do
prefix <- try $ do
prefix <- l1000r
skipFormatting
_ <- string' (fromString name)
pure prefix
skipFormatting
_ <- optional $ do
_ <- string' "and"
skipFormatting
mSuffix <- optional $ do
suffix <- pSmaller
skipFormatting
pure suffix
mFractional <- optional fractional
pure $ prefix * 10 ^ (3 * n) + fromMaybe 0 mSuffix + fromMaybe 0 mFractional
-- | Process one iteration of "ion" suffix.
goion a (name, n) = (l1000nr a name n <?> (name <> " cardinal")) <|> a
-- | Parser for all digits words like "four oh five".
allDigits = do
xs <- try $ do
x1 <- l10
skipFormatting
xs <- some $ do -- need at least two l10’s, otherwise we’d take from normal numbers
x <- l10
skipFormatting
pure x
pure $ fromIntegral <$> (x1 : xs)
mFractional <- optional fractional
pure $ foldl' (\a (b, n) -> a + b * (10 ^ n)) 0 (zip (reverse xs) ([0..] :: [Integer])) + fromMaybe 0 mFractional
-- | Convert provided number into English words.
--
-- So 101 becomes "one hundred one".
--
-- This supports:
-- - numbers up to 10^21-1 (one sextillion minus one)
-- - can create input of Text, ByteString, and String
-- - supports negative numbers
showNumWordsEn :: (RealFrac a, Show a, IsString b, Monoid b) => a -> b
showNumWordsEn = go
where
l20 :: IsString a => Word8 -> a
l20 0 = "zero"
l20 1 = "one"
l20 2 = "two"
l20 3 = "three"
l20 4 = "four"
l20 5 = "five"
l20 6 = "six"
l20 7 = "seven"
l20 8 = "eight"
l20 9 = "nine"
l20 10 = "ten"
l20 11 = "eleven"
l20 12 = "twelve"
l20 13 = "thirteen"
l20 14 = "fourteen"
l20 15 = "fifteen"
l20 16 = "sixteen"
l20 17 = "seventeen"
l20 18 = "eighteen"
l20 19 = "nineteen"
l20 n = error $ "l20: unexpected number >=20: " <> show n
fractional :: (RealFrac a, Show a, IsString b, Monoid b) => a -> b
fractional n | n < 0 = error $ "fractional: unexpected negative number: " <> show n
fractional n | n >= 1 = error $ "fractional: unexpected number >=1: " <> show n
fractional n =
let (x, y) = properFraction (n * 10)
in l20 x <> if y == 0 then mempty else " " <> fractional y
tys :: IsString a => Word8 -> a
tys 2 = "twenty"
tys 3 = "thirty"
tys 4 = "forty"
tys 5 = "fifty"
tys 6 = "sixty"
tys 7 = "seventy"
tys 8 = "eighty"
tys 9 = "ninety"
tys x = error $ "tys: unexpected number less than 2 or greater than 9: " <> show x
l100 :: (IsString a, Monoid a) => Word8 -> a
l100 n | n < 20 = l20 n
l100 n | n < 100 =
let (x, y) = n `quotRem` 10
in tys x <> (if y == 0 then mempty else "-" <> l20 y)
l100 x = error $ "l100: unexpected number >= 100: " <> show x
l1000 :: (IsString a, Monoid a) => Word16 -> a
l1000 n | n < 100 = l100 (fromIntegral n)
l1000 n | n < 1000 =
let (x, y) = n `quotRem` 100
in l20 (fromIntegral x) <> " hundred" <> if y == 0 then mempty else " " <> l100 (fromIntegral y)
l1000 x = error $ "l10000: unexpected number >= 1000: " <> show x
l1000n :: (IsString a, Monoid a) => [(String, Word8)] -> Integer -> a
l1000n _ n | n < 0 = error $ "l1000n: unexpected negative number: " <> show n
l1000n _ n | n < 1000 = l1000 (fromIntegral n)
l1000n ((name, e) : xs) n | n < 10 ^ (3 * (e + 1)) =
let (x, y) = n `quotRem` (10 ^ (3 * e))
in if x == 0
then l1000n xs y
else l1000 (fromIntegral x) <> " " <> fromString name <> (if y == 0 then mempty else " and " <> l1000n xs y)
l1000n ((_, e) : _) x = error $ "l1000n: unexpected number >= 10^" <> show (3 * (e + 1)) <> ": " <> show x
l1000n _ x = error $ "l1000n: unexpected number: " <> show x
go n | n < 0 = "negative " <> go (-n)
go n = let (x, y) = properFraction n
in l1000n (reverse ions) x <> (if y == 0 then mempty else " point " <> fractional y)
------------------------------------------------------------------------------------------------------------
data TestError = TestError { testExpected :: Double, testGot :: Double}
deriving stock Show
instance Exception TestError
test :: IO ()
test = do
let testParse :: Text -> Double -> IO ()
testParse input expected = do
putStrLn $ "testing " <> show input
case parse parseNumWordsEn "" input of
Right res | res == expected -> do
putStrLn $ " => got " <> show res
pure ()
Right got -> throwIO $ TestError expected got
Left err -> do
putStrLn $ errorBundlePretty err
throwIO err
testParse "none" 0
testParse "nineteen" 19
testParse "twenty" 20
testParse "ninetynine" 99
testParse "fiftyfive" 55
testParse "one-hundred" 100
testParse "one hundred fifty-five" 155
testParse "fifty-five" 55
testParse "one thousand two hundred forty four" 1244
testParse "fifty five point five" 55.5
testParse "four million five hundred twentytwo" 4_000_522.0
testParse "seventy six" 76
testParse "nineteen hundred forty-four" 1944
testParse "Seventy Thousand and one" 70_001
testParse "Negative five" (-5)
testParse "Five Billion And Five Hundred And Fifty Five Million And Five Hundred And Fifty Five Thousand And Five Hundred And Fifty Five" 5_555_555_555
testParse "Ten point five five five" 10.555
testParse "four oh five" 405
testParse "five five five" 555
testParse "five five" 55
testParse "five point five five five five" 5.555_5
testParse "nineteen hundred twenty two" 1922
testParse "nine oh two one oh" 90_210
testParse "one hundred and one" 101
testParse "five point five million" 5_500_000
testParse "five point five million point five" 5_500_000.5
replicateM_ 10 $ do
-- Testing with Random Double doesn’t seem to work because we get rounding issues.
n <- fromIntegral <$> (randomIO :: IO Int32)
putStrLn $ "roundtrip " <> show n
let str :: Text = showNumWordsEn n
putStrLn $ " => showNumWordsEn " <> show n <> " = " <> show str
case parse parseNumWordsEn "" str of
Right res -> do
putStrLn $ " => parseNumWordsEn " <> show str <> " = " <> show res
when (n /= res) $ throwIO $ TestError n res
Left err -> do
putStrLn $ errorBundlePretty err
throwIO err
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment