Skip to content

Instantly share code, notes, and snippets.

@brandonchinn178
Last active May 19, 2026 17:23
Show Gist options
  • Select an option

  • Save brandonchinn178/ded11b09b6ca36a5f319203e6e95c3bc to your computer and use it in GitHub Desktop.

Select an option

Save brandonchinn178/ded11b09b6ca36a5f319203e6e95c3bc to your computer and use it in GitHub Desktop.
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
module Main where
import Criterion.Main
import Data.Array.Byte
import Data.Char (ord)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TB
import qualified Data.Text.Internal as TI
import GHC.Exts
import GHC.IO (IO (..))
-- ─── Shared payload ───────────────────────────────────────────────────────────
--
-- "user:42 score=9999 tag=haskell ratio=314"
-- ─── 1. ShowS ─────────────────────────────────────────────────────────────────
-- Hand-rolled so we don't pay Show Int's overhead.
-- Digits are accumulated in the correct order via the continuation stack.
intShowS :: Int -> ShowS
intShowS n
| n < 0 = ('-' :) . go (-n)
| n == 0 = ('0' :)
| otherwise = go n
where
go 0 s = s
go k s = go (k `div` 10) (toEnum (ord '0' + k `mod` 10) : s)
buildShowS :: Int -> String
buildShowS n =
( showString "user:"
. intShowS 42
. showString " score="
. intShowS n
. showString " tag="
. showString "haskell"
. showString " ratio="
. intShowS 314
) ""
-- ─── 2. MutableByteArray# → String ───────────────────────────────────────────
--
-- Allocate a MutableByteArray#, poke Word8s (via writeWord8Array#),
-- freeze it, then build a [Char] from it.
-- ASCII only: each byte maps 1-1 to a Char.
type Offset = Int
unboxInt :: Int -> Int#
unboxInt (I# i) = i
-- Write a Char's ASCII byte into the array.
-- writeWord8Array# wants a Word# (or Word8# on GHC >= 9.2).
-- We unpack the Char to a Char#, coerce through char2Int#/int2Word#.
writeChar :: MutableByteArray# RealWorld -> Offset -> Char -> IO Offset
writeChar buf off (C# c#) = IO $ \s0 ->
case writeWord8Array# buf (unboxInt off) (wordToWord8# (int2Word# (ord# c#))) s0 of
s1 -> (# s1, off + 1 #)
-- Write an ASCII String.
writeStr :: MutableByteArray# RealWorld -> Offset -> String -> IO Offset
writeStr buf = go
where
go off [] = return off
go off (c:cs) = writeChar buf off c >>= \off' -> go off' cs
-- Write a decimal Int (ASCII digits).
writeInt :: MutableByteArray# RealWorld -> Offset -> Int -> IO Offset
writeInt buf off n
| n < 0 = writeChar buf off '-' >>= \o -> writeNat buf o (-n)
| n == 0 = writeChar buf off '0'
| otherwise = writeNat buf off n
-- Digits written most-significant first via continuation recursion.
writeNat :: MutableByteArray# RealWorld -> Offset -> Int -> IO Offset
writeNat buf off0 n = go n off0
where
go 0 off = return off
go k off = do
off' <- go (k `div` 10) off
writeChar buf off' (toEnum (ord '0' + k `mod` 10))
-- Freeze and read back as [Char].
-- indexWord8Array# returns Word# (or Word8#); we go Word# -> Int# -> Char#.
byteArrayToString :: MutableByteArray# RealWorld -> Int -> IO String
byteArrayToString buf len = IO $ \s0 ->
case unsafeFreezeByteArray# buf s0 of
(# s1, arr #) ->
let chars = [ C# (chr# (word2Int# (word8ToWord# (indexWord8Array# arr (unboxInt i)))))
| i <- [0 .. len - 1] ]
in (# s1, chars #)
maxBuf :: Int
maxBuf = 128
-- Thread state by using the IO monad normally; only drop to primops at the
-- allocation boundary and the freeze boundary.
buildAddrString :: Int -> IO String
buildAddrString n = IO $ \s0 ->
case newByteArray# (unboxInt maxBuf) s0 of
(# s1, buf #) -> unIO (go buf) s1
where
go buf = do
off0 <- writeStr buf 0 "user:"
off1 <- writeInt buf off0 42
off2 <- writeStr buf off1 " score="
off3 <- writeInt buf off2 n
off4 <- writeStr buf off3 " tag="
off5 <- writeStr buf off4 "haskell"
off6 <- writeStr buf off5 " ratio="
off7 <- writeInt buf off6 314
byteArrayToString buf off7
unIO :: IO a -> State# RealWorld -> (# State# RealWorld, a #)
unIO (IO f) = f
-- ─── 3. MutableByteArray# → Text (memcpy / zero-extra-copy path) ─────────────
--
-- Same byte-writing as above, but instead of building [Char] we freeze the
-- MutableByteArray# and pass it straight to Text's internal constructor.
--
-- Requires text >= 2.0 where Text is backed by a UTF-8 ByteArray#:
-- data Text = Text !ByteArray !Int{- byte offset -} !Int{- byte length -}
--
-- We wrap the frozen array in the newtype ByteArray from GHC.Exts and call
-- TI.Text directly — no per-character allocation, just one freeze + one
-- pointer registration.
byteArrayToText :: MutableByteArray# RealWorld -> Int -> IO T.Text
byteArrayToText buf len = IO $ \s0 ->
case unsafeFreezeByteArray# buf s0 of
(# s1, arr #) ->
let t = TI.Text (ByteArray arr) 0 len
in (# s1, t #)
buildAddrText :: Int -> IO T.Text
buildAddrText n = IO $ \s0 ->
case newByteArray# (unboxInt maxBuf) s0 of
(# s1, buf #) -> unIO (go buf) s1
where
go buf = do
off0 <- writeStr buf 0 "user:"
off1 <- writeInt buf off0 42
off2 <- writeStr buf off1 " score="
off3 <- writeInt buf off2 n
off4 <- writeStr buf off3 " tag="
off5 <- writeStr buf off4 "haskell"
off6 <- writeStr buf off5 " ratio="
off7 <- writeInt buf off6 314
byteArrayToText buf off7
-- ─── 4. Text.Builder → Text ───────────────────────────────────────────────────
--
-- Data.Text.Lazy.Builder accumulates chunks and materialises lazily.
-- toStrict collapses the result into a single strict Text.
intBuilder :: Int -> TB.Builder
intBuilder n
| n < 0 = TB.singleton '-' <> go (-n) mempty
| n == 0 = TB.singleton '0'
| otherwise = go n mempty
where
go 0 acc = acc
go k acc = go (k `div` 10)
(TB.singleton (toEnum (ord '0' + k `mod` 10)) <> acc)
buildTextBuilder :: Int -> T.Text
buildTextBuilder n = TL.toStrict . TB.toLazyText $
TB.fromString "user:"
<> intBuilder 42
<> TB.fromString " score="
<> intBuilder n
<> TB.fromString " tag="
<> TB.fromString "haskell"
<> TB.fromString " ratio="
<> intBuilder 314
-- ─── 5. ShowS -> String -> Text ───────────────────────────────────────────────
buildShowSText :: Int -> T.Text
buildShowSText = fromString . buildShowS
-- ─── Main ─────────────────────────────────────────────────────────────────────
main :: IO ()
main = do
as <- buildAddrString 99
at <- buildAddrText 99
let tb = buildTextBuilder 99
ss = buildShowS 99
sst = buildShowSText 99
putStrLn $ "ShowS : " ++ ss
putStrLn $ "Addr→String : " ++ as
putStrLn $ "Addr→Text : " ++ T.unpack at
putStrLn $ "TextBuilder : " ++ T.unpack tb
putStrLn $ "ShowS->Text : " ++ T.unpack sst
putStrLn $ "All match : " ++ show (ss == as && T.pack ss == at && T.pack ss == tb && T.pack ss == sst)
putStrLn ""
defaultMain
[ bench "ShowS -> String" $ nf buildShowS 99
, bench "Addr# -> String" $ nfIO (buildAddrString 99)
, bench "Addr# -> Text" $ nfIO (buildAddrText 99)
, bench "Text.Builder -> Text" $ nf buildTextBuilder 99
, bench "ShowS -> String -> Text" $ nf buildShowSText 99
]
ShowS : user:42 score=99 tag=haskell ratio=314
Addr→String : user:42 score=99 tag=haskell ratio=314
Addr→Text : user:42 score=99 tag=haskell ratio=314
TextBuilder : user:42 score=99 tag=haskell ratio=314
ShowS->Text : user:42 score=99 tag=haskell ratio=314
All match : True
benchmarking ShowS -> String
time 120.7 ns (120.3 ns .. 121.3 ns)
1.000 R² (0.999 R² .. 1.000 R²)
mean 121.3 ns (120.8 ns .. 122.6 ns)
std dev 2.366 ns (1.097 ns .. 4.109 ns)
variance introduced by outliers: 26% (moderately inflated)
benchmarking Addr# -> String
time 416.8 ns (415.1 ns .. 419.4 ns)
0.999 R² (0.999 R² .. 1.000 R²)
mean 419.0 ns (416.1 ns .. 431.5 ns)
std dev 15.74 ns (5.357 ns .. 36.03 ns)
variance introduced by outliers: 54% (severely inflated)
benchmarking Addr# -> Text
time 153.7 ns (152.9 ns .. 154.7 ns)
1.000 R² (0.999 R² .. 1.000 R²)
mean 153.6 ns (153.1 ns .. 155.1 ns)
std dev 2.541 ns (1.142 ns .. 5.230 ns)
variance introduced by outliers: 20% (moderately inflated)
benchmarking Text.Builder -> Text
time 256.5 ns (254.3 ns .. 258.8 ns)
0.999 R² (0.999 R² .. 1.000 R²)
mean 256.7 ns (254.6 ns .. 260.6 ns)
std dev 9.403 ns (5.493 ns .. 16.51 ns)
variance introduced by outliers: 54% (severely inflated)
benchmarking ShowS -> String -> Text
time 194.1 ns (192.3 ns .. 196.7 ns)
0.999 R² (0.999 R² .. 1.000 R²)
mean 193.4 ns (192.5 ns .. 194.8 ns)
std dev 3.707 ns (2.370 ns .. 5.338 ns)
variance introduced by outliers: 25% (moderately inflated)
@noughtmare
Copy link
Copy Markdown

I'll continue the discussion here:

I get quite different results if I make all the "variables" real variables, e.g.:

buildShowS :: (Int,Int,String,Int) -> String
{-# OPAQUE buildShowS #-}
buildShowS (u,s,t,r) =
    ( showString "user:"
    . intShowS u
    . showString " score="
    . intShowS s
    . showString " tag="
    . showString t
    . showString " ratio="
    . intShowS r
    ) ""
All
  ShowS -> String:             OK
    225  ns ±  14 ns
  Text.Builder -> Text:        OK
    308  ns ±  27 ns
  Text.Builder.Linear -> Text: OK
    108  ns ± 6.8 ns
  ShowS -> String -> Text:     OK
    326  ns ±  21 ns
Full code:
{- cabal:
build-depends: base, text, tasty-bench, text-builder-linear
-}

{-# OPTIONS_GHC -ddump-simpl -ddump-stg-final -dsuppress-all -dno-suppress-type-signatures -dno-typeable-binds -ddump-to-file #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TypeApplications #-}

module Main(main) where

import Test.Tasty.Bench
import Data.Array.Byte
import Data.Char (ord)
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TB
import qualified Data.Text.Internal as TI
import GHC.Exts
import GHC.IO (IO (..))
import qualified Data.Text.Builder.Linear as BL
import Data.String (fromString)

-- ─── Shared payload ───────────────────────────────────────────────────────────
--
-- "user:42 score=9999 tag=haskell ratio=314"

-- ─── 1. ShowS ─────────────────────────────────────────────────────────────────

-- Hand-rolled so we don't pay Show Int's overhead.
-- Digits are accumulated in the correct order via the continuation stack.
intShowS :: Int -> ShowS
intShowS n
  | n < 0     = ('-' :) . go (-n)
  | n == 0    = ('0' :)
  | otherwise = go n
  where
    go 0 s = s
    go k s = go (k `div` 10) (toEnum (ord '0' + k `mod` 10) : s)

buildShowS :: (Int,Int,String,Int) -> String
{-# OPAQUE buildShowS #-}
buildShowS (u,s,t,r) =
    ( showString "user:"
    . intShowS u
    . showString " score="
    . intShowS s
    . showString " tag="
    . showString t
    . showString " ratio="
    . intShowS r
    ) ""

-- ─── 2. MutableByteArray# → String ───────────────────────────────────────────
--
-- Allocate a MutableByteArray#, poke Word8s (via writeWord8Array#),
-- freeze it, then build a [Char] from it.
-- ASCII only: each byte maps 1-1 to a Char.

type Offset = Int

unboxInt :: Int -> Int#
unboxInt (I# i) = i

-- Write a Char's ASCII byte into the array.
-- writeWord8Array# wants a Word# (or Word8# on GHC >= 9.2).
-- We unpack the Char to a Char#, coerce through char2Int#/int2Word#.
writeChar :: MutableByteArray# RealWorld -> Offset -> Char -> IO Offset
writeChar !buf !off (C# c#) = IO $ \s0 ->
    case writeWord8Array# buf (unboxInt off) (wordToWord8# (int2Word# (ord# c#))) s0 of
      s1 -> (# s1, off + 1 #)

-- Write an ASCII String.
writeStr :: MutableByteArray# RealWorld -> Offset -> String -> IO Offset
writeStr buf = go
  where
    go off []     = return off
    go off (c:cs) = writeChar buf off c >>= \off' -> go off' cs

-- Write a decimal Int (ASCII digits).
writeInt :: MutableByteArray# RealWorld -> Offset -> Int -> IO Offset
writeInt !buf off n
  | n < 0     = writeChar buf off '-' >>= \o -> writeNat buf o (-n)
  | n == 0    = writeChar buf off '0'
  | otherwise = writeNat buf off n

-- Digits written most-significant first via continuation recursion.
writeNat :: MutableByteArray# RealWorld -> Offset -> Int -> IO Offset
writeNat !buf off0 n = go n off0
  where
    go 0 !off = return off
    go k off = do
        off' <- go (k `div` 10) off
        writeChar buf off' (toEnum (ord '0' + k `mod` 10))

-- Freeze and read back as [Char].
-- indexWord8Array# returns Word# (or Word8#); we go Word# -> Int# -> Char#.
byteArrayToString :: MutableByteArray# RealWorld -> Int -> IO String
byteArrayToString buf len = IO $ \s0 ->
    case unsafeFreezeByteArray# buf s0 of
      (# s1, arr #) ->
        let chars = [ C# (chr# (word2Int# (word8ToWord# (indexWord8Array# arr (unboxInt i)))))
                    | i <- [0 .. len - 1] ]
        in (# s1, chars #)

maxBuf :: Int
maxBuf = 128

-- Thread state by using the IO monad normally; only drop to primops at the
-- allocation boundary and the freeze boundary.
buildAddrString :: Int -> IO String
{-# OPAQUE buildAddrString #-}
buildAddrString n = IO $ \s0 ->
    case newByteArray# (unboxInt maxBuf) s0 of
      (# s1, buf #) -> unIO (go buf) s1
  where
    go buf = do
        off0 <- writeStr  buf 0    "user:"
        off1 <- writeInt  buf off0 42
        off2 <- writeStr  buf off1 " score="
        off3 <- writeInt  buf off2 n
        off4 <- writeStr  buf off3 " tag="
        off5 <- writeStr  buf off4 "haskell"
        off6 <- writeStr  buf off5 " ratio="
        off7 <- writeInt  buf off6 314
        byteArrayToString buf off7

unIO :: IO a -> State# RealWorld -> (# State# RealWorld, a #)
unIO (IO f) = f

-- ─── 3. MutableByteArray# → Text (memcpy / zero-extra-copy path) ─────────────
--
-- Same byte-writing as above, but instead of building [Char] we freeze the
-- MutableByteArray# and pass it straight to Text's internal constructor.
--
-- Requires text >= 2.0 where Text is backed by a UTF-8 ByteArray#:
--   data Text = Text  !ByteArray  !Int{- byte offset -}  !Int{- byte length -}
--
-- We wrap the frozen array in the newtype ByteArray from GHC.Exts and call
-- TI.Text directly — no per-character allocation, just one freeze + one
-- pointer registration.

byteArrayToText :: MutableByteArray# RealWorld -> Int -> IO T.Text
byteArrayToText buf len = IO $ \s0 ->
    case unsafeFreezeByteArray# buf s0 of
      (# s1, arr #) ->
        let t = TI.Text (ByteArray arr) 0 len
        in (# s1, t #)

buildAddrText :: Int -> IO T.Text
{-# OPAQUE buildAddrText #-}
buildAddrText n = IO $ \s0 ->
    case newByteArray# (unboxInt maxBuf) s0 of
      (# s1, buf #) -> unIO (go buf) s1
  where
    go buf = do
        off0 <- writeStr buf 0    "user:"
        off1 <- writeInt buf off0 42
        off2 <- writeStr buf off1 " score="
        off3 <- writeInt buf off2 n
        off4 <- writeStr buf off3 " tag="
        off5 <- writeStr buf off4 "haskell"
        off6 <- writeStr buf off5 " ratio="
        off7 <- writeInt buf off6 314
        byteArrayToText buf off7

-- ─── 4. Text.Builder → Text ───────────────────────────────────────────────────
--
-- Data.Text.Lazy.Builder accumulates chunks and materialises lazily.
-- toStrict collapses the result into a single strict Text.

intBuilder :: Int -> TB.Builder
intBuilder n
  | n < 0     = TB.singleton '-' <> go (-n) mempty
  | n == 0    = TB.singleton '0'
  | otherwise = go n mempty
  where
    go 0 acc = acc
    go k acc = go (k `div` 10)
                  (TB.singleton (toEnum (ord '0' + k `mod` 10)) <> acc)

buildTextBuilder :: (Int,Int,String,Int) -> T.Text
{-# OPAQUE buildTextBuilder #-}
buildTextBuilder (u,s,t,r) = TL.toStrict . TB.toLazyText $
       TB.fromString "user:"
    <> intBuilder u
    <> TB.fromString " score="
    <> intBuilder s
    <> TB.fromString " tag="
    <> TB.fromString t
    <> TB.fromString " ratio="
    <> intBuilder r

buildTextBuilderLinear :: (Int,Int,String,Int) -> T.Text
{-# OPAQUE buildTextBuilderLinear #-}
buildTextBuilderLinear (u,s,t,r) = BL.runBuilder $
       "user:"
    <> BL.fromDec @Int u
    <> " score="
    <> BL.fromDec s
    <> " tag="
    <> fromString t
    <> " ratio="
    <> BL.fromDec @Int r

-- ─── 5. ShowS -> String -> Text ───────────────────────────────────────────────

buildShowSText :: (Int,Int,String,Int) -> T.Text
{-# OPAQUE buildShowSText #-}
buildShowSText = fromString . buildShowS

-- ─── Main ─────────────────────────────────────────────────────────────────────

inputs = (42,99,"haskell",314)

main :: IO ()
main = do
--    as  <- buildAddrString inputs
--    at  <- buildAddrText inputs
    let tb = buildTextBuilder inputs
        tbl = buildTextBuilderLinear inputs
        ss = buildShowS inputs
        sst = buildShowSText inputs
    putStrLn $ "ShowS       : " ++ ss
    -- putStrLn $ "Addr→String : " ++ as
    -- putStrLn $ "Addr→Text   : " ++ T.unpack at
    putStrLn $ "TextBuilder : " ++ T.unpack tb
    putStrLn $ "Text.Builder.Linear : " ++ T.unpack tbl
    putStrLn $ "ShowS->Text : " ++ T.unpack sst
    putStrLn $ "All match   : " ++ show ({- ss == as && T.pack ss == at && -} T.pack ss == tb && T.pack ss == tbl && T.pack ss == sst)
    putStrLn ""

    defaultMain
      [ bench "ShowS -> String"         $ nf    buildShowS       inputs
      -- , bench "Addr# -> String"         $ nfIO (buildAddrString  99)
      -- , bench "Addr# -> Text"           $ nfIO (buildAddrText    99)
      , bench "Text.Builder -> Text"    $ nf    buildTextBuilder inputs
      , bench "Text.Builder.Linear -> Text"    $ nf    buildTextBuilderLinear inputs
      , bench "ShowS -> String -> Text" $ nf    buildShowSText   inputs
      ]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment