Skip to content

Commit

Permalink
Add encodingToStrictByteString
Browse files Browse the repository at this point in the history
This runs Builder to produce Strict ByteString directly,
by making a mutable buffer and growing it exponentially.

This might be good or bad, better or worse than
LBS.toStrict . encodingToLazyByteString.
Latter allocates many small chunks, and copies once;
encodingToStrictByteString makes a buffer exponentially,
but copies data everytime.
  • Loading branch information
phadej committed Jan 20, 2023
1 parent 105fe14 commit f3ad3d7
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 4 deletions.
5 changes: 3 additions & 2 deletions aeson.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ library
Data.Aeson.Encoding.Builder
Data.Aeson.Internal.ByteString
Data.Aeson.Internal.Functions
Data.Aeson.Internal.StrictBuilder
Data.Aeson.Internal.Text
Data.Aeson.Internal.TH
Data.Aeson.Parser.Time
Expand All @@ -99,10 +100,10 @@ library
, bytestring >=0.10.8.1 && <0.12
, containers >=0.5.7.1 && <0.7
, deepseq >=1.4.2.0 && <1.5
, exceptions >=0.10.4 && <0.11
, ghc-prim >=0.5.0.0 && <0.10
, template-haskell >=2.11.0.0 && <2.20
, text >=1.2.3.0 && <1.3 || >=2.0 && <2.1
, exceptions >=0.10.4 && <0.11
, text >=1.2.3.0 && <1.3 || >=2.0 && <2.1
, time >=1.6.0.1 && <1.13

-- Compat
Expand Down
1 change: 1 addition & 0 deletions benchmarks/aeson-benchmarks.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ library
Data.Aeson.Internal
Data.Aeson.Internal.ByteString
Data.Aeson.Internal.Functions
Data.Aeson.Internal.StrictBuilder
Data.Aeson.Internal.Text
Data.Aeson.Internal.TH
Data.Aeson.Internal.Time
Expand Down
1 change: 1 addition & 0 deletions src/Data/Aeson/Encoding.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ module Data.Aeson.Encoding
Encoding
, Encoding'
, encodingToLazyByteString
, encodingToStrictByteString
, fromEncoding
, unsafeToEncoding
, Series
Expand Down
14 changes: 14 additions & 0 deletions src/Data/Aeson/Encoding/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ module Data.Aeson.Encoding.Internal
Encoding' (..)
, Encoding
, encodingToLazyByteString
, encodingToStrictByteString
, unsafeToEncoding
, retagEncoding
, Series (..)
Expand Down Expand Up @@ -65,6 +66,7 @@ module Data.Aeson.Encoding.Internal
import Prelude.Compat

import Data.Aeson.Types.Internal (Value, Key)
import Data.Aeson.Internal.StrictBuilder (toStrictByteString)
import Data.ByteString.Builder (Builder, char7, toLazyByteString)
import Data.ByteString.Short (ShortByteString)
import qualified Data.Aeson.Key as Key
Expand All @@ -77,6 +79,7 @@ import Data.Time.Calendar.Quarter.Compat (Quarter)
import Data.Typeable (Typeable)
import Data.Word (Word8, Word16, Word32, Word64)
import qualified Data.Aeson.Encoding.Builder as EB
import qualified Data.ByteString as BS
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Text.Lazy as LT
Expand All @@ -101,10 +104,21 @@ type Encoding = Encoding' Value
unsafeToEncoding :: Builder -> Encoding' a
unsafeToEncoding = Encoding

-- | Convert 'Encoding' to /lazy/ 'BSL.ByteString'.
encodingToLazyByteString :: Encoding' a -> BSL.ByteString
encodingToLazyByteString = toLazyByteString . fromEncoding
{-# INLINE encodingToLazyByteString #-}

-- | Convert 'Encoding' to /strict/ 'BS.ByteString'.
--
-- This might or might not be more efficient than @'BSL.toStrict' . 'encodingToLazyByteString'@
--
-- @since 2.1.2.0
--
encodingToStrictByteString :: Encoding' a -> BS.ByteString
encodingToStrictByteString = toStrictByteString . fromEncoding
{-# INLINE encodingToStrictByteString #-}

retagEncoding :: Encoding' a -> Encoding' b
retagEncoding = Encoding . fromEncoding

Expand Down
77 changes: 77 additions & 0 deletions src/Data/Aeson/Internal/StrictBuilder.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
module Data.Aeson.Internal.StrictBuilder (
toStrictByteString,
toStrictByteStringIO,
) where

import Data.ByteString.Builder.Internal (BufferRange (BufferRange), BuildStep, Builder, fillWithBuildStep, runBuilder)
import Data.ByteString.Internal (ByteString (..))
import Data.Word (Word8)
import GHC.Exts (Addr#, Ptr (..), minusAddr#, plusAddr#)
import GHC.Exts (Int (I#), Int#, orI#, (+#))
import GHC.Exts (MutableByteArray#, RealWorld, newPinnedByteArray#, resizeMutableByteArray#, shrinkMutableByteArray#)
import GHC.ForeignPtr (ForeignPtr (ForeignPtr), ForeignPtrContents (PlainPtr))
import GHC.IO (IO (IO), unIO, unsafePerformIO)

#if MIN_VERSION_base(4,16,0)
import GHC.Exts (mutableByteArrayContents#)
#else
import GHC.Exts (byteArrayContents#, unsafeCoerce#)

mutableByteArrayContents# :: MutableByteArray# s -> Addr#
mutableByteArrayContents# mba = byteArrayContents# (unsafeCoerce# mba)
#endif

toStrictByteString :: Builder -> ByteString
toStrictByteString b = unsafePerformIO (toStrictByteStringIO b)
{-# NOINLINE toStrictByteString #-}

toStrictByteStringIO :: Builder -> IO ByteString
toStrictByteStringIO b = IO $ \s ->
case newPinnedByteArray# 4096# s of
(# s', mba #) -> case mutableByteArrayContents# mba of
start -> unIO (toStrictByteStringWorker mba 4096# start start (plusAddr# start 4096#) (runBuilder b)) s'

-- Progressively double the buffer size if it's reported to be full.
-- (convertion to lazy bytestring allocates new buffer chunks).
toStrictByteStringWorker
:: MutableByteArray# RealWorld -- ^ the buffer bytearray
-> Int# -- ^ size of the bytearray
-> Addr# -- ^ beginning of the bytearray
-> Addr# -- ^ current write position
-> Addr# -- ^ end of the bytearray
-> BuildStep ()
-> IO ByteString
toStrictByteStringWorker mba size start begin end !curr =
fillWithBuildStep curr kDone kFull kChunk (BufferRange (Ptr begin) (Ptr end))
where
kDone :: Ptr Word8 -> () -> IO ByteString
kDone (Ptr pos) _ = IO $ \s1 ->
case minusAddr# pos start of { len ->
case shrinkMutableByteArray# mba len s1 of { s2 ->
#if MIN_VERSION_bytestring(0,11,0)
(# s2 , BS (ForeignPtr start (PlainPtr mba)) (I# len) #)
#else
(# s2 , PS (ForeignPtr start (PlainPtr mba)) 0 (I# len) #)
#endif
}}

kFull :: Ptr Word8 -> Int -> BuildStep () -> IO ByteString
kFull (Ptr pos) (I# nsize) next = IO $ \s1 ->
-- orI# is an approximation of max
case size +# orI# size nsize of { size' ->
case resizeMutableByteArray# mba size' s1 of { (# s2, mba' #) ->
case mutableByteArrayContents# mba' of { start' ->
unIO (toStrictByteStringWorker mba' size' start' (plusAddr# start' (minusAddr# pos start)) (plusAddr# start' size') next) s2
}}}

kChunk :: Ptr Word8 -> ByteString -> BuildStep () -> IO ByteString
#if MIN_VERSION_bytestring(0,11,0)
kChunk (Ptr pos) (BS _ 0) next = toStrictByteStringWorker mba size start pos end next
#else
kChunk (Ptr pos) (PS _ _ 0) next = toStrictByteStringWorker mba size start pos end next
#endif
kChunk _ _ _ = fail "TODO: non-empty chunk"
16 changes: 14 additions & 2 deletions tests/PropUtils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ module PropUtils (module PropUtils) where
import Prelude.Compat

import Data.Aeson (eitherDecode, encode)
import Data.Aeson.Encoding (encodingToLazyByteString)
import Data.Aeson.Encoding (encodingToLazyByteString, encodingToStrictByteString)
import Data.Aeson.Internal (IResult(..), formatError, ifromJSON, iparse)
import qualified Data.Aeson.Internal as I
import Data.Aeson.Parser (value)
Expand All @@ -25,6 +25,7 @@ import Test.QuickCheck (Arbitrary(..), Property, Testable, (===), (.&&.), counte
import Types
import Text.Read (readMaybe)
import qualified Data.Attoparsec.Lazy as L
import qualified Data.Attoparsec.ByteString as S
import qualified Data.ByteString.Lazy.Char8 as L
import qualified Data.Map as Map
import qualified Data.Text as T
Expand Down Expand Up @@ -66,6 +67,14 @@ roundTripEnc eq _ i =
L.Done _ (IError path err) -> failure "fromJSON" (formatError path err) i
L.Fail _ _ err -> failure "parse" err i

roundTripStrictEnc :: (FromJSON a, ToJSON a, Show a) =>
(a -> a -> Property) -> a -> a -> Property
roundTripStrictEnc eq _ i =
case fmap ifromJSON . S.parseOnly value . encodingToStrictByteString . toEncoding $ i of
Right (ISuccess v) -> v `eq` i
Right (IError path err) -> failure "fromJSON" (formatError path err) i
Left err -> failure "parse" err i

roundTripNoEnc :: (FromJSON a, ToJSON a, Show a) =>
(a -> a -> Property) -> a -> a -> Property
roundTripNoEnc eq _ i =
Expand All @@ -74,7 +83,10 @@ roundTripNoEnc eq _ i =
(IError path err) -> failure "fromJSON" (formatError path err) i

roundTripEq :: (Eq a, FromJSON a, ToJSON a, Show a) => a -> a -> Property
roundTripEq x y = roundTripEnc (===) x y .&&. roundTripNoEnc (===) x y
roundTripEq x y =
roundTripEnc (===) x y .&&.
roundTripStrictEnc (===) x y .&&.
roundTripNoEnc (===) x y

roundtripReadShow :: Value -> Property
roundtripReadShow v = readMaybe (show v) === Just v
Expand Down

0 comments on commit f3ad3d7

Please sign in to comment.