From 590495fd7170f1e8a3172c3f9869242260280894 Mon Sep 17 00:00:00 2001 From: Oleg Grenrus Date: Fri, 20 Jan 2023 16:16:52 +0200 Subject: [PATCH] Add encodingToStrictByteString This runs Builder to produce Strict ByteString directly, by making a mutable buffer and growing it exponentially. This might be good or bad, better or worse than LBS.toStrict . encodingToLazyByteString. Latter allocates many small chunks, and copies once; encodingToStrictByteString makes a buffer exponentially, but copies data everytime. --- aeson.cabal | 1 + src/Data/Aeson/Encoding.hs | 1 + src/Data/Aeson/Encoding/Internal.hs | 14 +++++ src/Data/Aeson/Internal/StrictBuilder.hs | 77 ++++++++++++++++++++++++ tests/PropUtils.hs | 16 ++++- 5 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 src/Data/Aeson/Internal/StrictBuilder.hs diff --git a/aeson.cabal b/aeson.cabal index a3412df77..3b84eaf7b 100644 --- a/aeson.cabal +++ b/aeson.cabal @@ -80,6 +80,7 @@ library other-modules: Data.Aeson.Encoding.Builder Data.Aeson.Internal.ByteString + Data.Aeson.Internal.StrictBuilder Data.Aeson.Internal.Functions Data.Aeson.Internal.Text Data.Aeson.Internal.TH diff --git a/src/Data/Aeson/Encoding.hs b/src/Data/Aeson/Encoding.hs index e1d17d892..5a0ce0433 100644 --- a/src/Data/Aeson/Encoding.hs +++ b/src/Data/Aeson/Encoding.hs @@ -11,6 +11,7 @@ module Data.Aeson.Encoding Encoding , Encoding' , encodingToLazyByteString + , encodingToStrictByteString , fromEncoding , unsafeToEncoding , Series diff --git a/src/Data/Aeson/Encoding/Internal.hs b/src/Data/Aeson/Encoding/Internal.hs index 04a6039d7..8ba752685 100644 --- a/src/Data/Aeson/Encoding/Internal.hs +++ b/src/Data/Aeson/Encoding/Internal.hs @@ -10,6 +10,7 @@ module Data.Aeson.Encoding.Internal Encoding' (..) , Encoding , encodingToLazyByteString + , encodingToStrictByteString , unsafeToEncoding , retagEncoding , Series (..) @@ -65,6 +66,7 @@ module Data.Aeson.Encoding.Internal import Prelude.Compat import Data.Aeson.Types.Internal (Value, Key) +import Data.Aeson.Internal.StrictBuilder (toStrictByteString) import Data.ByteString.Builder (Builder, char7, toLazyByteString) import Data.ByteString.Short (ShortByteString) import qualified Data.Aeson.Key as Key @@ -77,6 +79,7 @@ import Data.Time.Calendar.Quarter.Compat (Quarter) import Data.Typeable (Typeable) import Data.Word (Word8, Word16, Word32, Word64) import qualified Data.Aeson.Encoding.Builder as EB +import qualified Data.ByteString as BS import qualified Data.ByteString.Builder as B import qualified Data.ByteString.Lazy as BSL import qualified Data.Text.Lazy as LT @@ -101,10 +104,21 @@ type Encoding = Encoding' Value unsafeToEncoding :: Builder -> Encoding' a unsafeToEncoding = Encoding +-- | Convert 'Encoding' to /lazy/ 'BSL.ByteString'. encodingToLazyByteString :: Encoding' a -> BSL.ByteString encodingToLazyByteString = toLazyByteString . fromEncoding {-# INLINE encodingToLazyByteString #-} +-- | Convert 'Encoding' to /strict/ 'BS.ByteString'. +-- +-- This might or might not be more efficient than @'BSL.toStrict' . 'encodingToLazyByteString'@ +-- +-- @since 2.1.2.0 +-- +encodingToStrictByteString :: Encoding' a -> BS.ByteString +encodingToStrictByteString = toStrictByteString . fromEncoding +{-# INLINE encodingToStrictByteString #-} + retagEncoding :: Encoding' a -> Encoding' b retagEncoding = Encoding . fromEncoding diff --git a/src/Data/Aeson/Internal/StrictBuilder.hs b/src/Data/Aeson/Internal/StrictBuilder.hs new file mode 100644 index 000000000..0935c4292 --- /dev/null +++ b/src/Data/Aeson/Internal/StrictBuilder.hs @@ -0,0 +1,77 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} +module Data.Aeson.Internal.StrictBuilder ( + toStrictByteString, + toStrictByteStringIO, +) where + +import Data.ByteString.Builder.Internal (BufferRange (BufferRange), BuildStep, Builder, fillWithBuildStep, runBuilder) +import Data.ByteString.Internal (ByteString (..)) +import Data.Word (Word8) +import GHC.Exts (Addr#, Ptr (..), minusAddr#, plusAddr#) +import GHC.Exts (Int (I#), Int#, orI#, (+#)) +import GHC.Exts (MutableByteArray#, RealWorld, newPinnedByteArray#, resizeMutableByteArray#, shrinkMutableByteArray#) +import GHC.ForeignPtr (ForeignPtr (ForeignPtr), ForeignPtrContents (PlainPtr)) +import GHC.IO (IO (IO), unIO, unsafePerformIO) + +#if MIN_VERSION_base(4,16,0) +import GHC.Exts (mutableByteArrayContents#) +#else +import GHC.Exts (byteArrayContents#, unsafeCoerce#) + +mutableByteArrayContents# :: MutableByteArray# s -> Addr# +mutableByteArrayContents# mba = byteArrayContents# (unsafeCoerce# mba) +#endif + +toStrictByteString :: Builder -> ByteString +toStrictByteString b = unsafePerformIO (toStrictByteStringIO b) +{-# NOINLINE toStrictByteString #-} + +toStrictByteStringIO :: Builder -> IO ByteString +toStrictByteStringIO b = IO $ \s -> + case newPinnedByteArray# 4096# s of + (# s', mba #) -> case mutableByteArrayContents# mba of + start -> unIO (toStrictByteStringWorker mba 4096# start start (plusAddr# start 4096#) (runBuilder b)) s' + +-- Progressively double the buffer size if it's reported to be full. +-- (convertion to lazy bytestring allocates new buffer chunks). +toStrictByteStringWorker + :: MutableByteArray# RealWorld -- ^ the buffer bytearray + -> Int# -- ^ size of the bytearray + -> Addr# -- ^ beginning of the bytearray + -> Addr# -- ^ current write position + -> Addr# -- ^ end of the bytearray + -> BuildStep () + -> IO ByteString +toStrictByteStringWorker mba size start begin end !curr = + fillWithBuildStep curr kDone kFull kChunk (BufferRange (Ptr begin) (Ptr end)) + where + kDone :: Ptr Word8 -> () -> IO ByteString + kDone (Ptr pos) _ = IO $ \s1 -> + case minusAddr# pos start of { len -> + case shrinkMutableByteArray# mba len s1 of { s2 -> +#if MIN_VERSION_bytestring(0,11,0) + (# s2 , BS (ForeignPtr start (PlainPtr mba)) (I# len) #) +#else + (# s2 , PS (ForeignPtr start (PlainPtr mba)) 0 (I# len) #) +#endif + }} + + kFull :: Ptr Word8 -> Int -> BuildStep () -> IO ByteString + kFull (Ptr pos) (I# nsize) next = IO $ \s1 -> + -- orI# is an approximation of max + case size +# orI# size nsize of { size' -> + case resizeMutableByteArray# mba size' s1 of { (# s2, mba' #) -> + case mutableByteArrayContents# mba' of { start' -> + unIO (toStrictByteStringWorker mba' size' start' (plusAddr# start' (minusAddr# pos start)) (plusAddr# start' size') next) s2 + }}} + + kChunk :: Ptr Word8 -> ByteString -> BuildStep () -> IO ByteString +#if MIN_VERSION_bytestring(0,11,0) + kChunk (Ptr pos) (BS _ 0) next = toStrictByteStringWorker mba size start pos end next +#else + kChunk (Ptr pos) (PS _ _ 0) next = toStrictByteStringWorker mba size start pos end next +#endif + kChunk _ _ _ = fail "TODO: non-empty chunk" diff --git a/tests/PropUtils.hs b/tests/PropUtils.hs index 4883e379b..ff8d48687 100644 --- a/tests/PropUtils.hs +++ b/tests/PropUtils.hs @@ -7,7 +7,7 @@ module PropUtils (module PropUtils) where import Prelude.Compat import Data.Aeson (eitherDecode, encode) -import Data.Aeson.Encoding (encodingToLazyByteString) +import Data.Aeson.Encoding (encodingToLazyByteString, encodingToStrictByteString) import Data.Aeson.Internal (IResult(..), formatError, ifromJSON, iparse) import qualified Data.Aeson.Internal as I import Data.Aeson.Parser (value) @@ -25,6 +25,7 @@ import Test.QuickCheck (Arbitrary(..), Property, Testable, (===), (.&&.), counte import Types import Text.Read (readMaybe) import qualified Data.Attoparsec.Lazy as L +import qualified Data.Attoparsec.ByteString as S import qualified Data.ByteString.Lazy.Char8 as L import qualified Data.Map as Map import qualified Data.Text as T @@ -66,6 +67,14 @@ roundTripEnc eq _ i = L.Done _ (IError path err) -> failure "fromJSON" (formatError path err) i L.Fail _ _ err -> failure "parse" err i +roundTripStrictEnc :: (FromJSON a, ToJSON a, Show a) => + (a -> a -> Property) -> a -> a -> Property +roundTripStrictEnc eq _ i = + case fmap ifromJSON . S.parseOnly value . encodingToStrictByteString . toEncoding $ i of + Right (ISuccess v) -> v `eq` i + Right (IError path err) -> failure "fromJSON" (formatError path err) i + Left err -> failure "parse" err i + roundTripNoEnc :: (FromJSON a, ToJSON a, Show a) => (a -> a -> Property) -> a -> a -> Property roundTripNoEnc eq _ i = @@ -74,7 +83,10 @@ roundTripNoEnc eq _ i = (IError path err) -> failure "fromJSON" (formatError path err) i roundTripEq :: (Eq a, FromJSON a, ToJSON a, Show a) => a -> a -> Property -roundTripEq x y = roundTripEnc (===) x y .&&. roundTripNoEnc (===) x y +roundTripEq x y = + roundTripEnc (===) x y .&&. + roundTripStrictEnc (===) x y .&&. + roundTripNoEnc (===) x y roundtripReadShow :: Value -> Property roundtripReadShow v = readMaybe (show v) === Just v