diff --git a/src/btreeidx.cc b/src/btreeidx.cc index 22132dc3d..5e7e27f2a 100644 --- a/src/btreeidx.cc +++ b/src/btreeidx.cc @@ -4,8 +4,7 @@ #include "btreeidx.hh" #include "folding.hh" #include "utf8.hh" -#include -#include + #include #include #include @@ -14,7 +13,6 @@ #include "wstring_qt.hh" #include "utils.hh" -#include #include "wildcard.hh" #include "globalbroadcaster.hh" @@ -36,6 +34,7 @@ BtreeIndex::BtreeIndex(): idxFile( nullptr ), rootNodeLoaded( false ) { + zstd_dctx.reset( ZSTD_createDCtx() ); } BtreeDictionary::BtreeDictionary( string const & id, vector< string > const & dictionaryFiles ): @@ -411,10 +410,15 @@ void BtreeIndex::readNode( uint32_t offset, vector< char > & out ) unsigned long decompressedLength = out.size(); - if ( uncompress( (unsigned char *)&out.front(), &decompressedLength, &compressedData.front(), compressedData.size() ) - != Z_OK - || decompressedLength != out.size() ) + const size_t size_or_err = ZSTD_decompressDCtx( zstd_dctx.get(), + out.data(), + decompressedLength, + compressedData.data(), + compressedData.size() ); + + if ( ZSTD_isError( size_or_err ) || size_or_err != out.size() ) { throw exFailedToDecompressNode(); + } } char const * BtreeIndex::findChainOffsetExactOrPrefix( @@ -758,6 +762,10 @@ static uint32_t buildBtreeNode( IndexedWords::const_iterator & nextIndex, size_t maxElements, uint32_t & lastLeafLinkOffset ) { + + std::unique_ptr< ZSTD_CCtx, zstd_deleter > zstd_cctx; + zstd_cctx.reset( ZSTD_createCCtx() ); + // We compress all the node data. This buffer would hold it. vector< unsigned char > uncompressedData; @@ -846,12 +854,15 @@ static uint32_t buildBtreeNode( IndexedWords::const_iterator & nextIndex, } // Save the result. - vector< unsigned char > compressedData( compressBound( uncompressedData.size() ) ); + vector< unsigned char > compressedData( ZSTD_compressBound( uncompressedData.size() ) ); - unsigned long compressedSize = compressedData.size(); + const size_t size_or_err = ZSTD_compress2( zstd_cctx.get(), + compressedData.data(), + compressedData.size(), + uncompressedData.data(), + uncompressedData.size() ); - if ( compress( &compressedData.front(), &compressedSize, &uncompressedData.front(), uncompressedData.size() ) - != Z_OK ) { + if ( ZSTD_isError( size_or_err ) ) { qFatal( "Failed to compress btree node." ); abort(); } @@ -859,8 +870,8 @@ static uint32_t buildBtreeNode( IndexedWords::const_iterator & nextIndex, uint32_t offset = file.tell(); file.write< uint32_t >( uncompressedData.size() ); - file.write< uint32_t >( compressedSize ); - file.write( &compressedData.front(), compressedSize ); + file.write< uint32_t >( size_or_err ); + file.write( &compressedData.front(), size_or_err ); if ( isLeaf ) { // A link to the next leef, which is zero and which will be updated diff --git a/src/btreeidx.hh b/src/btreeidx.hh index 77c905460..beb942ab2 100644 --- a/src/btreeidx.hh +++ b/src/btreeidx.hh @@ -14,10 +14,10 @@ #include #include -#include -#include #include +#include + /// A base for the dictionary which creates a btree index to look up /// the words. @@ -28,11 +28,25 @@ using gd::wstring; using std::vector; using std::map; +struct zstd_deleter +{ + void operator()( ZSTD_DCtx * Ctx ) const + { + ZSTD_freeDCtx( Ctx ); + } + + void operator()( ZSTD_CCtx * Ctx ) const + { + ZSTD_freeCCtx( Ctx ); + } +}; + + enum { /// This is to be bumped up each time the internal format changes. /// The value isn't used here by itself, it is supposed to be added /// to each dictionary's internal format version. - FormatVersion = 4 + FormatVersion = 5 }; // These exceptions which might be thrown during the index traversal @@ -139,6 +153,9 @@ protected: protected: + std::unique_ptr< ZSTD_DCtx, zstd_deleter > zstd_dctx; + + // Lifetime of 2 var below is not managed by this class. QMutex * idxFileMutex; File::Class * idxFile; diff --git a/src/chunkedstorage.cc b/src/chunkedstorage.cc index 9a33e7a4a..1087a9067 100644 --- a/src/chunkedstorage.cc +++ b/src/chunkedstorage.cc @@ -2,10 +2,8 @@ * Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */ #include "chunkedstorage.hh" -#include +#include #include -#include -#include #include namespace ChunkedStorage { @@ -19,6 +17,8 @@ Writer::Writer( File::Class & f ): chunkStarted( false ), bufferUsed( 0 ) { + zstd_cctx.reset( ZSTD_createCCtx() ); + // Create a sratchpad at the beginning of file. We use it to write chunk // table if it would fit, in order to save some seek times. @@ -64,21 +64,26 @@ void Writer::addToBlock( void const * data, size_t size ) void Writer::saveCurrentChunk() { - size_t maxCompressedSize = compressBound( bufferUsed ); + size_t maxCompressedSize = ZSTD_compressBound( bufferUsed ); if ( bufferCompressed.size() < maxCompressedSize ) bufferCompressed.resize( maxCompressedSize ); - unsigned long compressedSize = bufferCompressed.size(); - if ( compress( &bufferCompressed.front(), &compressedSize, &buffer.front(), bufferUsed ) != Z_OK ) + const size_t size_or_err = ZSTD_compress2( zstd_cctx.get(), + bufferCompressed.data(), + bufferCompressed.size(), + buffer.data(), + bufferUsed ); + if ( ZSTD_isError( size_or_err ) ) { throw exFailedToCompressChunk(); + } offsets.push_back( file.tell() ); file.write( (uint32_t)bufferUsed ); - file.write( (uint32_t)compressedSize ); - file.write( &bufferCompressed.front(), compressedSize ); + file.write( (uint32_t)size_or_err ); + file.write( &bufferCompressed.front(), size_or_err ); bufferUsed = 0; @@ -118,6 +123,8 @@ uint32_t Writer::finish() Reader::Reader( File::Class & f, uint32_t offset ): file( f ) { + zstd_dctx.reset( ZSTD_createDCtx() ); + file.seek( offset ); uint32_t size = file.read< uint32_t >(); @@ -165,8 +172,13 @@ char * Reader::getBlock( uint32_t address, vector< char > & chunk ) unsigned long decompressedLength = chunk.size(); - if ( uncompress( (unsigned char *)&chunk.front(), &decompressedLength, chunkDataBytes, compressedSize ) != Z_OK - || decompressedLength != chunk.size() ) { + size_t const size_or_err = ZSTD_decompressDCtx( zstd_dctx.get(), + chunk.data(), + decompressedLength, + chunkDataBytes, + compressedSize ); + + if ( ZSTD_isError( size_or_err ) || size_or_err != chunk.size() ) { throw exFailedToDecompressChunk(); } } diff --git a/src/chunkedstorage.hh b/src/chunkedstorage.hh index 76e958ed5..8d2a773a7 100644 --- a/src/chunkedstorage.hh +++ b/src/chunkedstorage.hh @@ -8,7 +8,7 @@ #include "file.hh" #include -#include +#include /// A chunked compression storage. We use this for articles' bodies. The idea /// is to store data in a separately-compressed chunks, much like in dictzip, @@ -27,6 +27,19 @@ DEF_EX( exAddressOutOfRange, "The given chunked address is out of range", Ex ) DEF_EX( exFailedToDecompressChunk, "Failed to decompress a chunk", Ex ) DEF_EX( mapFailed, "Failed to map/unmap the file", Ex ) +struct zstd_deleter +{ + void operator()( ZSTD_DCtx * Ctx ) const + { + ZSTD_freeDCtx( Ctx ); + } + + void operator()( ZSTD_CCtx * Ctx ) const + { + ZSTD_freeCCtx( Ctx ); + } +}; + /// This class writes data blocks in chunks. class Writer { @@ -66,6 +79,8 @@ private: size_t bufferUsed; void saveCurrentChunk(); + + std::unique_ptr zstd_cctx; }; /// This class reads data blocks previously written by Writer. @@ -83,6 +98,9 @@ public: /// Uses the user-provided storage to load the entire chunk, and then to /// return a pointer to the requested block inside it. char * getBlock( uint32_t address, vector< char > & ); + +private: + std::unique_ptr zstd_dctx; }; } // namespace ChunkedStorage