Skip to content

use environments to lazily initialize benchmarks #204

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 30 additions & 38 deletions benchmarks/haskell/Benchmarks.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ module Main
( main
) where

import Criterion.Main (Benchmark, defaultMain, bgroup)
import Criterion.Main (defaultMain, bgroup, env)
import System.FilePath ((</>))
import System.IO (IOMode (WriteMode), openFile, hSetEncoding, utf8)

Expand All @@ -32,50 +32,42 @@ import qualified Benchmarks.Programs.StripTags as Programs.StripTags
import qualified Benchmarks.Programs.Throughput as Programs.Throughput

main :: IO ()
main = benchmarks >>= defaultMain

benchmarks :: IO [Benchmark]
benchmarks = do
main = do
sink <- openFile "/dev/null" WriteMode
hSetEncoding sink utf8

-- Traditional benchmarks
bs <- sequence
defaultMain
[ Builder.benchmark
, Concat.benchmark
, DecodeUtf8.benchmark "html" (tf "libya-chinese.html")
, DecodeUtf8.benchmark "xml" (tf "yiwiki.xml")
, DecodeUtf8.benchmark "ascii" (tf "ascii.txt")
, DecodeUtf8.benchmark "russian" (tf "russian.txt")
, DecodeUtf8.benchmark "japanese" (tf "japanese.txt")
, env (DecodeUtf8.initEnv (tf "libya-chinese.html")) (DecodeUtf8.benchmark "html")
, env (DecodeUtf8.initEnv (tf "yiwiki.xml")) (DecodeUtf8.benchmark "xml")
, env (DecodeUtf8.initEnv (tf "ascii.txt")) (DecodeUtf8.benchmark "ascii")
, env (DecodeUtf8.initEnv (tf "russian.txt")) (DecodeUtf8.benchmark "russian")
, env (DecodeUtf8.initEnv (tf "japanese.txt")) (DecodeUtf8.benchmark "japanese")
, EncodeUtf8.benchmark "επανάληψη 竺法蘭共譯"
, Equality.benchmark (tf "japanese.txt")
, env (Equality.initEnv (tf "japanese.txt")) Equality.benchmark
, FileRead.benchmark (tf "russian.txt")
, FoldLines.benchmark (tf "russian.txt")
, Mul.benchmark
, Pure.benchmark "tiny" (tf "tiny.txt")
, Pure.benchmark "ascii" (tf "ascii-small.txt")
-- , Pure.benchmark "france" (tf "france.html")
, Pure.benchmark "russian" (tf "russian-small.txt")
, Pure.benchmark "japanese" (tf "japanese.txt")
, ReadNumbers.benchmark (tf "numbers.txt")
, Replace.benchmark (tf "russian.txt") "принимая" "своем"
, Search.benchmark (tf "russian.txt") "принимая"
, Stream.benchmark (tf "russian.txt")
, WordFrequencies.benchmark (tf "russian.txt")
, env Mul.initEnv Mul.benchmark
, env (Pure.initEnv (tf "tiny.txt")) (Pure.benchmark "tiny")
, env (Pure.initEnv (tf "ascii-small.txt")) (Pure.benchmark "ascii-small")
, env (Pure.initEnv (tf "ascii.txt")) (Pure.benchmark "ascii")
, env (Pure.initEnv (tf "english.txt")) (Pure.benchmark "english")
, env (Pure.initEnv (tf "russian-small.txt")) (Pure.benchmark "russian")
, env (Pure.initEnv (tf "japanese.txt")) (Pure.benchmark "japanese")
, env (ReadNumbers.initEnv (tf "numbers.txt")) ReadNumbers.benchmark
, env (Replace.initEnv (tf "russian.txt")) (Replace.benchmark "принимая" "своем")
, env (Search.initEnv (tf "russian.txt")) (Search.benchmark "принимая")
, env (Stream.initEnv (tf "russian.txt")) Stream.benchmark
, env (WordFrequencies.initEnv (tf "russian.txt")) WordFrequencies.benchmark
, bgroup "Programs"
[ Programs.BigTable.benchmark sink
, Programs.Cut.benchmark (tf "russian.txt") sink 20 40
, Programs.Fold.benchmark (tf "russian.txt") sink
, Programs.Sort.benchmark (tf "russian.txt") sink
, Programs.StripTags.benchmark (tf "yiwiki.xml") sink
, Programs.Throughput.benchmark (tf "russian.txt") sink
]
]

-- Program-like benchmarks
ps <- bgroup "Programs" `fmap` sequence
[ Programs.BigTable.benchmark sink
, Programs.Cut.benchmark (tf "russian.txt") sink 20 40
, Programs.Fold.benchmark (tf "russian.txt") sink
, Programs.Sort.benchmark (tf "russian.txt") sink
, Programs.StripTags.benchmark (tf "yiwiki.xml") sink
, Programs.Throughput.benchmark (tf "russian.txt") sink
]

return $ bs ++ [ps]
where
where
-- Location of a test file
tf = ("../tests/text-test-data" </>)
4 changes: 2 additions & 2 deletions benchmarks/haskell/Benchmarks/Builder.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ import qualified Data.Text.Lazy.Builder as LTB
import qualified Data.Text.Lazy.Builder.Int as Int
import Data.Int (Int64)

benchmark :: IO Benchmark
benchmark = return $ bgroup "Builder"
benchmark :: Benchmark
benchmark = bgroup "Builder"
[ bgroup "Comparison"
[ bench "LazyText" $ nf
(LT.length . LTB.toLazyText . mconcat . map LTB.fromText) texts
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/haskell/Benchmarks/Concat.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import Control.Monad.Trans.Writer
import Criterion (Benchmark, bgroup, bench, whnf)
import Data.Text as T

benchmark :: IO Benchmark
benchmark = return $ bgroup "Concat"
benchmark :: Benchmark
benchmark = bgroup "Concat"
[ bench "append" $ whnf (append4 "Text 1" "Text 2" "Text 3") "Text 4"
, bench "concat" $ whnf (concat4 "Text 1" "Text 2" "Text 3") "Text 4"
, bench "write" $ whnf (write4 "Text 1" "Text 2" "Text 3") "Text 4"
Expand Down
15 changes: 11 additions & 4 deletions benchmarks/haskell/Benchmarks/DecodeUtf8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
-- The latter are used for testing stream fusion.
--
module Benchmarks.DecodeUtf8
( benchmark
( initEnv
, benchmark
) where

import Foreign.C.Types
Expand All @@ -34,18 +35,24 @@ import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL

benchmark :: String -> FilePath -> IO Benchmark
benchmark kind fp = do
type Env = (B.ByteString, BL.ByteString)

initEnv :: FilePath -> IO Env
initEnv fp = do
bs <- B.readFile fp
lbs <- BL.readFile fp
return (bs, lbs)

benchmark :: String -> Env -> Benchmark
benchmark kind ~(bs, lbs) =
let bench name = C.bench (name ++ "+" ++ kind)
decodeStream (Chunk b0 bs0) = case T.streamDecodeUtf8 b0 of
T.Some t0 _ f0 -> t0 : go f0 bs0
where go f (Chunk b bs1) = case f b of
T.Some t1 _ f1 -> t1 : go f1 bs1
go _ _ = []
decodeStream _ = []
return $ bgroup "DecodeUtf8"
in bgroup "DecodeUtf8"
[ bench "Strict" $ nf T.decodeUtf8 bs
, bench "Stream" $ nf decodeStream lbs
, bench "IConv" $ whnfIO $ iconv bs
Expand Down
6 changes: 3 additions & 3 deletions benchmarks/haskell/Benchmarks/EncodeUtf8.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL

benchmark :: String -> IO Benchmark
benchmark string = do
return $ bgroup "EncodeUtf8"
benchmark :: String -> Benchmark
benchmark string =
bgroup "EncodeUtf8"
[ bench "Text" $ whnf (B.length . T.encodeUtf8) text
, bench "LazyText" $ whnf (BL.length . TL.encodeUtf8) lazyText
]
Expand Down
17 changes: 11 additions & 6 deletions benchmarks/haskell/Benchmarks/Equality.hs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
-- * Comparison of strings (Eq instance)
--
module Benchmarks.Equality
( benchmark
( initEnv
, benchmark
) where

import Criterion (Benchmark, bgroup, bench, whnf)
Expand All @@ -17,8 +18,10 @@ import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL

benchmark :: FilePath -> IO Benchmark
benchmark fp = do
type Env = (T.Text, TL.Text, B.ByteString, BL.ByteString, BL.ByteString, String)

initEnv :: FilePath -> IO Env
initEnv fp = do
b <- B.readFile fp
bl1 <- BL.readFile fp
-- A lazy bytestring is a list of chunks. When we do not explicitly create two
Expand All @@ -27,9 +30,11 @@ benchmark fp = do
-- we read the lazy bytestring twice here.
bl2 <- BL.readFile fp
l <- readFile fp
let t = T.decodeUtf8 b
tl = TL.decodeUtf8 bl1
return $ bgroup "Equality"
return (T.decodeUtf8 b, TL.decodeUtf8 bl1, b, bl1, bl2, l)

benchmark :: Env -> Benchmark
benchmark ~(t, tl, b, bl1, bl2, l) =
bgroup "Equality"
[ bench "Text" $ whnf (== T.init t `T.snoc` '\xfffd') t
, bench "LazyText" $ whnf (== TL.init tl `TL.snoc` '\xfffd') tl
, bench "ByteString" $ whnf (== B.init b `B.snoc` '\xfffd') b
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/haskell/Benchmarks/FileRead.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ import qualified Data.Text.Lazy as LT
import qualified Data.Text.Lazy.Encoding as LT
import qualified Data.Text.Lazy.IO as LT

benchmark :: FilePath -> IO Benchmark
benchmark p = return $ bgroup "FileRead"
benchmark :: FilePath -> Benchmark
benchmark p = bgroup "FileRead"
[ bench "String" $ whnfIO $ length <$> readFile p
, bench "ByteString" $ whnfIO $ SB.length <$> SB.readFile p
, bench "LazyByteString" $ whnfIO $ LB.length <$> LB.readFile p
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/haskell/Benchmarks/FoldLines.hs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ import qualified Data.ByteString as B
import qualified Data.Text as T
import qualified Data.Text.IO as T

benchmark :: FilePath -> IO Benchmark
benchmark fp = return $ bgroup "ReadLines"
benchmark :: FilePath -> Benchmark
benchmark fp = bgroup "ReadLines"
[ bench "Text" $ withHandle $ foldLinesT (\n _ -> n + 1) (0 :: Int)
, bench "ByteString" $ withHandle $ foldLinesB (\n _ -> n + 1) (0 :: Int)
]
Expand Down
30 changes: 19 additions & 11 deletions benchmarks/haskell/Benchmarks/Mul.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
module Benchmarks.Mul (benchmark) where
module Benchmarks.Mul
( initEnv
, benchmark
) where

import Control.Exception (evaluate)
import Criterion.Main
Expand All @@ -12,16 +15,21 @@ oldMul m n
| m <= maxBound `quot` n = m * n
| otherwise = error "overflow"

benchmark :: IO Benchmark
benchmark = do
_ <- evaluate testVector32
_ <- evaluate testVector64
return $ bgroup "Mul" [
bench "oldMul" $ whnf (U.map (uncurry oldMul)) testVector64
, bench "mul64" $ whnf (U.map (uncurry mul64)) testVector64
, bench "*64" $ whnf (U.map (uncurry (*))) testVector64
, bench "mul32" $ whnf (U.map (uncurry mul32)) testVector32
, bench "*32" $ whnf (U.map (uncurry (*))) testVector32
type Env = (U.Vector (Int32,Int32), U.Vector (Int64,Int64))

initEnv :: IO Env
initEnv = do
x <- evaluate testVector32
y <- evaluate testVector64
return (x, y)

benchmark :: Env -> Benchmark
benchmark ~(tv32, tv64) = bgroup "Mul"
[ bench "oldMul" $ whnf (U.map (uncurry oldMul)) tv64
, bench "mul64" $ whnf (U.map (uncurry mul64)) tv64
, bench "*64" $ whnf (U.map (uncurry (*))) tv64
, bench "mul32" $ whnf (U.map (uncurry mul32)) tv32
, bench "*32" $ whnf (U.map (uncurry (*))) tv32
]

testVector64 :: U.Vector (Int64,Int64)
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/haskell/Benchmarks/Programs/BigTable.hs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ import Data.Text.Lazy.IO (hPutStr)
import System.IO (Handle)
import qualified Data.Text as T

benchmark :: Handle -> IO Benchmark
benchmark sink = return $ bench "BigTable" $ whnfIO $ do
benchmark :: Handle -> Benchmark
benchmark sink = bench "BigTable" $ whnfIO $ do
hPutStr sink "Content-Type: text/html\n\n<table>"
hPutStr sink . toLazyText . makeTable =<< rows
hPutStr sink "</table>"
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/haskell/Benchmarks/Programs/Cut.hs
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import qualified Data.Text.Lazy.IO as TL

benchmark :: FilePath -> Handle -> Int -> Int -> IO Benchmark
benchmark p sink from to = return $ bgroup "Cut"
benchmark :: FilePath -> Handle -> Int -> Int -> Benchmark
benchmark p sink from to = bgroup "Cut"
[ bench' "String" string
, bench' "ByteString" byteString
, bench' "LazyByteString" lazyByteString
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/haskell/Benchmarks/Programs/Fold.hs
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ import qualified Data.Text.Lazy.Builder as TLB
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.IO as TL

benchmark :: FilePath -> Handle -> IO Benchmark
benchmark i o = return $
benchmark :: FilePath -> Handle -> Benchmark
benchmark i o =
bench "Fold" $ whnfIO $ T.readFile i >>= TL.hPutStr o . fold 80

-- | We represent a paragraph by a word list
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/haskell/Benchmarks/Programs/Sort.hs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ import qualified Data.Text.Lazy.Builder as TLB
import qualified Data.Text.Lazy.Encoding as TL
import qualified Data.Text.Lazy.IO as TL

benchmark :: FilePath -> Handle -> IO Benchmark
benchmark i o = return $ bgroup "Sort"
benchmark :: FilePath -> Handle -> Benchmark
benchmark i o = bgroup "Sort"
[ bench "String" $ whnfIO $ readFile i >>= hPutStr o . string
, bench "ByteString" $ whnfIO $ B.readFile i >>= B.hPutStr o . byteString
, bench "LazyByteString" $ whnfIO $
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/haskell/Benchmarks/Programs/StripTags.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.IO as T

benchmark :: FilePath -> Handle -> IO Benchmark
benchmark i o = return $ bgroup "StripTags"
benchmark :: FilePath -> Handle -> Benchmark
benchmark i o = bgroup "StripTags"
[ bench "String" $ whnfIO $ readFile i >>= hPutStr o . string
, bench "ByteString" $ whnfIO $ B.readFile i >>= B.hPutStr o . byteString
, bench "Text" $ whnfIO $ T.readFile i >>= T.hPutStr o . text
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/haskell/Benchmarks/Programs/Throughput.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ import qualified Data.Text.IO as T
import qualified Data.Text.Lazy.Encoding as TL
import qualified Data.Text.Lazy.IO as TL

benchmark :: FilePath -> Handle -> IO Benchmark
benchmark fp sink = return $ bgroup "Throughput"
benchmark :: FilePath -> Handle -> Benchmark
benchmark fp sink = bgroup "Throughput"
[ bench "String" $ whnfIO $ readFile fp >>= hPutStr sink
, bench "ByteString" $ whnfIO $ B.readFile fp >>= B.hPutStr sink
, bench "LazyByteString" $ whnfIO $ BL.readFile fp >>= BL.hPutStr sink
Expand Down
40 changes: 36 additions & 4 deletions benchmarks/haskell/Benchmarks/Pure.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
-- * Most pure functions defined the string types
--
{-# LANGUAGE BangPatterns, CPP, GADTs, MagicHash #-}
{-# LANGUAGE DeriveAnyClass, DeriveGeneric, RecordWildCards #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Benchmarks.Pure
( benchmark
( initEnv
, benchmark
) where

import Control.DeepSeq (NFData (..))
Expand All @@ -16,6 +18,8 @@ import Criterion (Benchmark, bgroup, bench, nf)
import Data.Char (toLower, toUpper)
import Data.Monoid (mappend, mempty)
import GHC.Base (Char (..), Int (..), chr#, ord#, (+#))
import GHC.Generics (Generic)
import GHC.Int (Int64)
import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.ByteString.UTF8 as UTF8
Expand All @@ -26,8 +30,32 @@ import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Builder as TB
import qualified Data.Text.Lazy.Encoding as TL

benchmark :: String -> FilePath -> IO Benchmark
benchmark kind fp = do
data Env = Env
{ bsa :: !BS.ByteString
, ta :: !T.Text
, tb :: !T.Text
, tla :: !TL.Text
, tlb :: !TL.Text
, bsb :: !BS.ByteString
, bla :: !BL.ByteString
, blb :: !BL.ByteString
, sa :: !String
, sb :: !String
, bsa_len :: !Int
, ta_len :: !Int
, bla_len :: !Int64
, tla_len :: !Int64
, sa_len :: !Int
, bsl :: [BS.ByteString]
, bll :: [BL.ByteString]
, tl :: [T.Text]
, tll :: [TL.Text]
, sl :: [String]
} deriving (Generic, NFData)


initEnv :: FilePath -> IO Env
initEnv fp = do
-- Evaluate stuff before actually running the benchmark, we don't want to
-- count it here.

Expand Down Expand Up @@ -63,7 +91,11 @@ benchmark kind fp = do
tll <- evaluate $ TL.lines tla
sl <- evaluate $ L.lines sa

return $ bgroup "Pure"
return Env{..}

benchmark :: String -> Env -> Benchmark
benchmark kind ~Env{..} =
bgroup "Pure"
[ bgroup "append"
[ benchT $ nf (T.append tb) ta
, benchTL $ nf (TL.append tlb) tla
Expand Down
Loading