From c6967db6b9b2fa24128c2baa03bdbe8e173d491a Mon Sep 17 00:00:00 2001 From: Rodrigo Mesquita Date: Tue, 11 Jun 2024 17:45:31 +0100 Subject: [PATCH] Improve deserialisation performance Use the lower-level array-construction primitives to avoid intermediate allocations and perform much better at deserialisation. On Cabal (which uses tar for the hackage index), we observed: * Deserialisation of IntTries go from 1.5s to 200ms, with 10GB of allocations going down to roughly 600MB. * StringTable deserialization go from 700ms to 50ms, with 4GB of allocations going down to 80MB. --- Codec/Archive/Tar/Index/IntTrie.hs | 21 ++++++------ Codec/Archive/Tar/Index/Internal.hs | 13 ++------ Codec/Archive/Tar/Index/StringTable.hs | 44 ++++++++++---------------- Codec/Archive/Tar/Index/Utils.hs | 41 ++++++++++++++++++++++++ tar.cabal | 3 +- 5 files changed, 71 insertions(+), 51 deletions(-) create mode 100644 Codec/Archive/Tar/Index/Utils.hs diff --git a/Codec/Archive/Tar/Index/IntTrie.hs b/Codec/Archive/Tar/Index/IntTrie.hs index 103cd0e..a214a15 100644 --- a/Codec/Archive/Tar/Index/IntTrie.hs +++ b/Codec/Archive/Tar/Index/IntTrie.hs @@ -1,5 +1,7 @@ {-# LANGUAGE CPP, BangPatterns, PatternGuards #-} {-# LANGUAGE DeriveDataTypeable, ScopedTypeVariables #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE UnboxedTuples #-} {-# OPTIONS_HADDOCK hide #-} module Codec.Archive.Tar.Index.IntTrie ( @@ -55,6 +57,9 @@ import Data.IntMap.Strict (IntMap) import Data.List hiding (lookup, insert) import Data.Function (on) +import GHC.IO + +import Codec.Archive.Tar.Index.Utils -- | A compact mapping from sequences of nats to nats. -- @@ -338,19 +343,11 @@ deserialise bs , let lenArr = readWord32BE bs 0 lenTotal = 4 + 4 * fromIntegral lenArr , BS.length bs >= 4 + 4 * fromIntegral lenArr - , let !arr = A.array (0, lenArr-1) - [ (i, readWord32BE bs off) - | (i, off) <- zip [0..lenArr-1] [4,8 .. lenTotal - 4] ] - !bs' = BS.drop lenTotal bs - = Just (IntTrie arr, bs') + , let !bs_without_len = BS.unsafeDrop 4 bs + !bs_remaining = BS.unsafeDrop lenTotal bs + !arr = beToLe lenArr bs_without_len + = Just (IntTrie arr, bs_remaining) | otherwise = Nothing -readWord32BE :: BS.ByteString -> Int -> Word32 -readWord32BE bs i = - assert (i >= 0 && i+3 <= BS.length bs - 1) $ - fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24 - + fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16 - + fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8 - + fromIntegral (BS.unsafeIndex bs (i + 3)) diff --git a/Codec/Archive/Tar/Index/Internal.hs b/Codec/Archive/Tar/Index/Internal.hs index 215bef8..35c1a1d 100644 --- a/Codec/Archive/Tar/Index/Internal.hs +++ b/Codec/Archive/Tar/Index/Internal.hs @@ -63,6 +63,7 @@ import Codec.Archive.Tar.Read as Tar import qualified Codec.Archive.Tar.Index.StringTable as StringTable import Codec.Archive.Tar.Index.StringTable (StringTable, StringTableBuilder) import qualified Codec.Archive.Tar.Index.IntTrie as IntTrie +import Codec.Archive.Tar.Index.Utils (readWord32BE) import Codec.Archive.Tar.Index.IntTrie (IntTrie, IntTrieBuilder) import Codec.Archive.Tar.PackAscii @@ -497,27 +498,19 @@ deserialise bs | let ver = readWord32BE bs 0 , ver == 1 = do let !finalOffset = readWord32BE bs 4 - (stringTable, bs') <- StringTable.deserialiseV1 (BS.drop 8 bs) + (stringTable, bs') <- StringTable.deserialiseV1 (BS.unsafeDrop 8 bs) (intTrie, bs'') <- IntTrie.deserialise bs' return (TarIndex stringTable intTrie finalOffset, bs'') | let ver = readWord32BE bs 0 , ver == 2 = do let !finalOffset = readWord32BE bs 4 - (stringTable, bs') <- StringTable.deserialiseV2 (BS.drop 8 bs) + (stringTable, bs') <- StringTable.deserialiseV2 (BS.unsafeDrop 8 bs) (intTrie, bs'') <- IntTrie.deserialise bs' return (TarIndex stringTable intTrie finalOffset, bs'') | otherwise = Nothing -readWord32BE :: BS.ByteString -> Int -> Word32 -readWord32BE bs i = - assert (i >= 0 && i+3 <= BS.length bs - 1) $ - fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24 - + fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16 - + fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8 - + fromIntegral (BS.unsafeIndex bs (i + 3)) - toStrict :: LBS.ByteString -> BS.ByteString toStrict = LBS.toStrict diff --git a/Codec/Archive/Tar/Index/StringTable.hs b/Codec/Archive/Tar/Index/StringTable.hs index 6635889..57b73da 100644 --- a/Codec/Archive/Tar/Index/StringTable.hs +++ b/Codec/Archive/Tar/Index/StringTable.hs @@ -46,6 +46,9 @@ import qualified Data.ByteString.Lazy as LBS import Data.ByteString.Builder as BS import Data.ByteString.Builder.Extra as BS (byteStringCopy) +import Unsafe.Coerce (unsafeCoerce) +import Codec.Archive.Tar.Index.Utils + -- | An efficient mapping from strings to a dense set of integers. -- data StringTable id = StringTable @@ -172,7 +175,7 @@ deserialiseV1 bs lenArr = fromIntegral (readWord32BE bs 4) lenTotal= 8 + lenStrs + 4 * lenArr , BS.length bs >= lenTotal - , let strs = BS.take lenStrs (BS.drop 8 bs) + , let strs = BS.unsafeTake lenStrs (BS.unsafeDrop 8 bs) arr = A.array (0, fromIntegral lenArr - 1) [ (i, readWord32BE bs off) | (i, off) <- zip [0 .. fromIntegral lenArr - 1] @@ -200,35 +203,20 @@ deserialiseV2 bs + 4 * lenArr +(4 * (lenArr - 1)) * 2 -- offsets array is 1 longer , BS.length bs >= lenTotal - , let strs = BS.take lenStrs (BS.drop 8 bs) - offs = A.listArray (0, fromIntegral lenArr - 1) - [ readWord32BE bs off - | off <- offsets offsOff ] + , let strs = BS.unsafeTake lenStrs (BS.unsafeDrop 8 bs) + offs_bs = BS.unsafeDrop (8 + lenStrs) bs + ids_bs = BS.unsafeDrop (lenArr * 4) offs_bs + ixs_bs = BS.unsafeDrop ((lenArr - 1) * 4) ids_bs + + offs = beToLe (fromIntegral lenArr) offs_bs -- the second two arrays are 1 shorter - ids = A.listArray (0, fromIntegral lenArr - 2) - [ readInt32BE bs off - | off <- offsets idsOff ] - ixs = A.listArray (0, fromIntegral lenArr - 2) - [ readInt32BE bs off - | off <- offsets ixsOff ] - offsOff = 8 + lenStrs - idsOff = offsOff + 4 * lenArr - ixsOff = idsOff + 4 * (lenArr-1) - offsets from = [from,from+4 .. from + 4 * (lenArr - 1)] - !stringTable = StringTable strs offs ids ixs - !bs' = BS.drop lenTotal bs - = Just (stringTable, bs') + ids = beToLe (fromIntegral lenArr - 1) ids_bs + ixs = beToLe (fromIntegral lenArr - 1) ixs_bs + + !stringTable = StringTable strs (unsafeCoerce offs) (unsafeCoerce ids) (unsafeCoerce ixs) + !bs_left = BS.drop lenTotal bs + = Just (stringTable, bs_left) | otherwise = Nothing -readInt32BE :: BS.ByteString -> Int -> Int32 -readInt32BE bs i = fromIntegral (readWord32BE bs i) - -readWord32BE :: BS.ByteString -> Int -> Word32 -readWord32BE bs i = - assert (i >= 0 && i+3 <= BS.length bs - 1) $ - fromIntegral (BS.unsafeIndex bs (i + 0)) `shiftL` 24 - + fromIntegral (BS.unsafeIndex bs (i + 1)) `shiftL` 16 - + fromIntegral (BS.unsafeIndex bs (i + 2)) `shiftL` 8 - + fromIntegral (BS.unsafeIndex bs (i + 3)) diff --git a/Codec/Archive/Tar/Index/Utils.hs b/Codec/Archive/Tar/Index/Utils.hs new file mode 100644 index 0000000..1212311 --- /dev/null +++ b/Codec/Archive/Tar/Index/Utils.hs @@ -0,0 +1,41 @@ +module Codec.Archive.Tar.Index.Utils where + +import Data.ByteString as BS +import Control.Exception (assert) + +import Data.ByteString.Internal (ByteString(..), unsafeWithForeignPtr, accursedUnutterablePerformIO) +import GHC.Int (Int(..), Int32) +import GHC.Word (Word32(..), byteSwap32) +import Foreign.Storable (peek) +import GHC.Ptr (castPtr, plusPtr, Ptr) +import Data.Array.Base +import Data.Array.Base +import Data.Array.IO.Internals (unsafeFreezeIOUArray) + +-- | Construct a `UArray Word32 Word32` from a ByteString of 32bit big endian +-- words. +beToLe :: Word32 + -- ^ The total array length (the number of 32bit words in the array) + -> BS.ByteString + -- ^ The bytestring from which the UArray is constructed. + -- The content must start in the first byte! (i.e. the meta-data words + -- that shouldn't be part of the array must have been dropped already) + -> UArray Word32 Word32 +beToLe lenArr (BS fptr _) = accursedUnutterablePerformIO $ + unsafeWithForeignPtr fptr $ \ptr -> do + let ptr' = castPtr ptr :: Ptr Word32 + unsafeFreezeIOUArray =<< + newGenArray (0, lenArr - 1) (\offset -> do + byteSwap32 <$> peek (ptr' `plusPtr` (fromIntegral offset * 4))) + +readInt32BE :: BS.ByteString -> Int -> Int32 +readInt32BE bs i = fromIntegral (readWord32BE bs i) +{-# INLINE readInt32BE #-} + +readWord32BE :: BS.ByteString -> Int -> Word32 +readWord32BE (BS fptr len) i = + assert (i >= 0 && i+3 <= len - 1) $ + accursedUnutterablePerformIO $ + unsafeWithForeignPtr fptr $ \ptr -> do + byteSwap32 <$> peek (castPtr (ptr `plusPtr` i)) +{-# INLINE readWord32BE #-} diff --git a/tar.cabal b/tar.cabal index 5b7c735..43f87f7 100644 --- a/tar.cabal +++ b/tar.cabal @@ -49,7 +49,7 @@ library library tar-internal default-language: Haskell2010 build-depends: base >= 4.11 && < 5, - array < 0.6, + array >= 0.5.6 && < 0.6, bytestring >= 0.10 && < 0.13, containers >= 0.2 && < 0.8, deepseq >= 1.1 && < 1.6, @@ -75,6 +75,7 @@ library tar-internal Codec.Archive.Tar.Index.StringTable Codec.Archive.Tar.Index.IntTrie Codec.Archive.Tar.Index.Internal + Codec.Archive.Tar.Index.Utils other-extensions: BangPatterns