diff --git a/benchmarks/haskell/Benchmarks.hs b/benchmarks/haskell/Benchmarks.hs index 7b3aa17c..33e6a025 100644 --- a/benchmarks/haskell/Benchmarks.hs +++ b/benchmarks/haskell/Benchmarks.hs @@ -20,6 +20,7 @@ import qualified Benchmarks.EncodeUtf8 as EncodeUtf8 import qualified Benchmarks.Equality as Equality import qualified Benchmarks.FileRead as FileRead import qualified Benchmarks.FoldLines as FoldLines +import qualified Benchmarks.Micro as Micro import qualified Benchmarks.Multilang as Multilang import qualified Benchmarks.Pure as Pure import qualified Benchmarks.ReadNumbers as ReadNumbers @@ -61,6 +62,7 @@ main = do defaultMain [ Builder.benchmark , Concat.benchmark + , Micro.benchmark , bgroup "DecodeUtf8" [ env (DecodeUtf8.initEnv (tf "libya-chinese.html")) (DecodeUtf8.benchmark "html") , env (DecodeUtf8.initEnv (tf "yiwiki.xml")) (DecodeUtf8.benchmark "xml") diff --git a/benchmarks/haskell/Benchmarks/Micro.hs b/benchmarks/haskell/Benchmarks/Micro.hs new file mode 100644 index 00000000..5462393a --- /dev/null +++ b/benchmarks/haskell/Benchmarks/Micro.hs @@ -0,0 +1,22 @@ +-- | Benchmarks on artificial data. + +module Benchmarks.Micro (benchmark) where + +import qualified Data.Text.Lazy as TL +import qualified Data.Text as T +import Test.Tasty.Bench (Benchmark, bgroup, bench, nf) + +benchmark :: Benchmark +benchmark = bgroup "Micro" + [ -- Accessing i-th element should take O(i) time. + -- The 2k case should run in 2x the time of the 1k case. + bgroup "Lazy.inits" + [ bench "last 1k" $ nf (last . TL.inits) (chunks 1000) + , bench "last 2k" $ nf (last . TL.inits) (chunks 2000) + , bench "map-take1 1k" $ nf (map (TL.take 1) . TL.inits) (chunks 1000) + , bench "map-take1 2k" $ nf (map (TL.take 1) . TL.inits) (chunks 2000) + ] + ] + +chunks :: Int -> TL.Text +chunks n = TL.fromChunks (replicate n (T.pack "a")) diff --git a/src/Data/Text/Lazy.hs b/src/Data/Text/Lazy.hs index 41e6885d..b93c37f9 100644 --- a/src/Data/Text/Lazy.hs +++ b/src/Data/Text/Lazy.hs @@ -1443,10 +1443,18 @@ inits = (NE.toList P.$!) . initsNE -- -- @since 2.1.2 initsNE :: Text -> NonEmpty Text -initsNE = (Empty NE.:|) . inits' - where inits' Empty = [] - inits' (Chunk t ts) = L.map (\t' -> Chunk t' Empty) (NE.tail (T.initsNE t)) - ++ L.map (Chunk t) (inits' ts) +initsNE ts0 = Empty NE.:| inits' 0 ts0 + where + inits' :: Int64 -- Number of previous chunks i + -> Text -- The remainder after dropping i chunks from ts0 + -> [Text] -- Prefixes longer than the first i chunks of ts0. + inits' _ Empty = [] + inits' i (Chunk t ts) = L.map (takeChunks i ts0) (NE.tail (T.initsNE t)) + ++ inits' (i + 1) ts + +takeChunks :: Int64 -> Text -> T.Text -> Text +takeChunks !i (Chunk t ts) lastChunk | i > 0 = Chunk t (takeChunks (i - 1) ts lastChunk) +takeChunks _ _ lastChunk = Chunk lastChunk Empty -- | /O(n)/ Return all final segments of the given 'Text', longest -- first. diff --git a/text.cabal b/text.cabal index af868d22..9b10c97c 100644 --- a/text.cabal +++ b/text.cabal @@ -333,6 +333,7 @@ benchmark text-benchmarks Benchmarks.Equality Benchmarks.FileRead Benchmarks.FoldLines + Benchmarks.Micro Benchmarks.Multilang Benchmarks.Programs.BigTable Benchmarks.Programs.Cut