diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index f56bbf2f6..ca6116b5b 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -9,6 +9,7 @@ Changes from 2.0.0-beta.3 to 2.0.0-beta.4 * Added a cache for on-disk offsets. This accelerates the reading of slices from disk quite a lot (up to 50% with my benchmarks). +* Zstd codec upgraded from 1.4.0 to 1.4.3. Changes from 2.0.0-beta.2 to 2.0.0-beta.3 ========================================= diff --git a/blosc/CMakeLists.txt b/blosc/CMakeLists.txt index 90f731199..3b8352d31 100644 --- a/blosc/CMakeLists.txt +++ b/blosc/CMakeLists.txt @@ -50,7 +50,7 @@ if (NOT DEACTIVATE_ZSTD) if (ZSTD_FOUND) set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${ZSTD_INCLUDE_DIR}) else (ZSTD_FOUND) - set(ZSTD_LOCAL_DIR ${INTERNAL_LIBS}/zstd-1.4.0) + set(ZSTD_LOCAL_DIR ${INTERNAL_LIBS}/zstd-1.4.3) set(BLOSC_INCLUDE_DIRS ${BLOSC_INCLUDE_DIRS} ${ZSTD_LOCAL_DIR} ${ZSTD_LOCAL_DIR}/common ${ZSTD_LOCAL_DIR}/dictBuilder) endif (ZSTD_FOUND) diff --git a/internal-complibs/zstd-1.4.0/BUCK b/internal-complibs/zstd-1.4.3/BUCK similarity index 100% rename from internal-complibs/zstd-1.4.0/BUCK rename to internal-complibs/zstd-1.4.3/BUCK diff --git a/internal-complibs/zstd-1.4.0/Makefile b/internal-complibs/zstd-1.4.3/Makefile similarity index 98% rename from internal-complibs/zstd-1.4.0/Makefile rename to internal-complibs/zstd-1.4.3/Makefile index 404f5b692..87a396c53 100644 --- a/internal-complibs/zstd-1.4.0/Makefile +++ b/internal-complibs/zstd-1.4.3/Makefile @@ -17,6 +17,7 @@ LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT)) LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT)) LIBVER := $(shell echo $(LIBVER_SCRIPT)) VERSION?= $(LIBVER) +CCVER := $(shell $(CC) --version) CPPFLAGS+= -I. -I./common -DXXH_NAMESPACE=ZSTD_ ifeq ($(OS),Windows_NT) # MinGW assumed @@ -45,6 +46,10 @@ ZDICT_FILES := $(sort $(wildcard dictBuilder/*.c)) ZDEPR_FILES := $(sort $(wildcard deprecated/*.c)) ZSTD_FILES := $(ZSTDCOMMON_FILES) +ifeq ($(findstring GCC,$(CCVER)),GCC) +decompress/zstd_decompress_block.o : CFLAGS+=-fno-tree-vectorize +endif + ZSTD_LEGACY_SUPPORT ?= 5 ZSTD_LIB_COMPRESSION ?= 1 ZSTD_LIB_DECOMPRESSION ?= 1 diff --git a/internal-complibs/zstd-1.4.0/README.md b/internal-complibs/zstd-1.4.3/README.md similarity index 100% rename from internal-complibs/zstd-1.4.0/README.md rename to internal-complibs/zstd-1.4.3/README.md diff --git a/internal-complibs/zstd-1.4.0/common/bitstream.h b/internal-complibs/zstd-1.4.3/common/bitstream.h similarity index 99% rename from internal-complibs/zstd-1.4.0/common/bitstream.h rename to internal-complibs/zstd-1.4.3/common/bitstream.h index d955bd677..7bdb06046 100644 --- a/internal-complibs/zstd-1.4.0/common/bitstream.h +++ b/internal-complibs/zstd-1.4.3/common/bitstream.h @@ -57,6 +57,8 @@ extern "C" { =========================================*/ #if defined(__BMI__) && defined(__GNUC__) # include /* support for bextr (experimental) */ +#elif defined(__ICCARM__) +# include #endif #define STREAM_ACCUMULATOR_MIN_32 25 @@ -163,6 +165,8 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val) return (unsigned) r; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ return 31 - __builtin_clz (val); +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return 31 - __CLZ(val); # else /* Software version */ static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, diff --git a/internal-complibs/zstd-1.4.0/common/compiler.h b/internal-complibs/zstd-1.4.3/common/compiler.h similarity index 93% rename from internal-complibs/zstd-1.4.0/common/compiler.h rename to internal-complibs/zstd-1.4.3/common/compiler.h index 0836e3ed2..6686b837d 100644 --- a/internal-complibs/zstd-1.4.0/common/compiler.h +++ b/internal-complibs/zstd-1.4.3/common/compiler.h @@ -23,7 +23,7 @@ # define INLINE_KEYWORD #endif -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__ICCARM__) # define FORCE_INLINE_ATTR __attribute__((always_inline)) #elif defined(_MSC_VER) # define FORCE_INLINE_ATTR __forceinline @@ -65,7 +65,7 @@ #ifdef _MSC_VER # define FORCE_NOINLINE static __declspec(noinline) #else -# ifdef __GNUC__ +# if defined(__GNUC__) || defined(__ICCARM__) # define FORCE_NOINLINE static __attribute__((__noinline__)) # else # define FORCE_NOINLINE static @@ -76,7 +76,7 @@ #ifndef __has_attribute #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ #endif -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__ICCARM__) # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) #else # define TARGET_ATTRIBUTE(target) @@ -127,6 +127,13 @@ } \ } +/* vectorization */ +#if !defined(__clang__) && defined(__GNUC__) +# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) +#else +# define DONT_VECTORIZE +#endif + /* disable warnings */ #ifdef _MSC_VER /* Visual Studio */ # include /* For Visual 2005 */ diff --git a/internal-complibs/zstd-1.4.0/common/cpu.h b/internal-complibs/zstd-1.4.3/common/cpu.h similarity index 100% rename from internal-complibs/zstd-1.4.0/common/cpu.h rename to internal-complibs/zstd-1.4.3/common/cpu.h diff --git a/internal-complibs/zstd-1.4.0/common/debug.c b/internal-complibs/zstd-1.4.3/common/debug.c similarity index 100% rename from internal-complibs/zstd-1.4.0/common/debug.c rename to internal-complibs/zstd-1.4.3/common/debug.c diff --git a/internal-complibs/zstd-1.4.0/common/debug.h b/internal-complibs/zstd-1.4.3/common/debug.h similarity index 100% rename from internal-complibs/zstd-1.4.0/common/debug.h rename to internal-complibs/zstd-1.4.3/common/debug.h diff --git a/internal-complibs/zstd-1.4.0/common/entropy_common.c b/internal-complibs/zstd-1.4.3/common/entropy_common.c similarity index 100% rename from internal-complibs/zstd-1.4.0/common/entropy_common.c rename to internal-complibs/zstd-1.4.3/common/entropy_common.c diff --git a/internal-complibs/zstd-1.4.0/common/error_private.c b/internal-complibs/zstd-1.4.3/common/error_private.c similarity index 100% rename from internal-complibs/zstd-1.4.0/common/error_private.c rename to internal-complibs/zstd-1.4.3/common/error_private.c diff --git a/internal-complibs/zstd-1.4.0/common/error_private.h b/internal-complibs/zstd-1.4.3/common/error_private.h similarity index 100% rename from internal-complibs/zstd-1.4.0/common/error_private.h rename to internal-complibs/zstd-1.4.3/common/error_private.h diff --git a/internal-complibs/zstd-1.4.0/common/fse.h b/internal-complibs/zstd-1.4.3/common/fse.h similarity index 100% rename from internal-complibs/zstd-1.4.0/common/fse.h rename to internal-complibs/zstd-1.4.3/common/fse.h diff --git a/internal-complibs/zstd-1.4.0/common/fse_decompress.c b/internal-complibs/zstd-1.4.3/common/fse_decompress.c similarity index 100% rename from internal-complibs/zstd-1.4.0/common/fse_decompress.c rename to internal-complibs/zstd-1.4.3/common/fse_decompress.c diff --git a/internal-complibs/zstd-1.4.0/common/huf.h b/internal-complibs/zstd-1.4.3/common/huf.h similarity index 100% rename from internal-complibs/zstd-1.4.0/common/huf.h rename to internal-complibs/zstd-1.4.3/common/huf.h diff --git a/internal-complibs/zstd-1.4.0/common/mem.h b/internal-complibs/zstd-1.4.3/common/mem.h similarity index 99% rename from internal-complibs/zstd-1.4.0/common/mem.h rename to internal-complibs/zstd-1.4.3/common/mem.h index 5da248756..c10d7f61e 100644 --- a/internal-complibs/zstd-1.4.0/common/mem.h +++ b/internal-complibs/zstd-1.4.3/common/mem.h @@ -102,7 +102,7 @@ MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (size #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) # define MEM_FORCE_MEMORY_ACCESS 2 -# elif defined(__INTEL_COMPILER) || defined(__GNUC__) +# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif diff --git a/internal-complibs/zstd-1.4.0/common/pool.c b/internal-complibs/zstd-1.4.3/common/pool.c similarity index 100% rename from internal-complibs/zstd-1.4.0/common/pool.c rename to internal-complibs/zstd-1.4.3/common/pool.c diff --git a/internal-complibs/zstd-1.4.0/common/pool.h b/internal-complibs/zstd-1.4.3/common/pool.h similarity index 100% rename from internal-complibs/zstd-1.4.0/common/pool.h rename to internal-complibs/zstd-1.4.3/common/pool.h diff --git a/internal-complibs/zstd-1.4.0/common/threading.c b/internal-complibs/zstd-1.4.3/common/threading.c similarity index 100% rename from internal-complibs/zstd-1.4.0/common/threading.c rename to internal-complibs/zstd-1.4.3/common/threading.c diff --git a/internal-complibs/zstd-1.4.0/common/threading.h b/internal-complibs/zstd-1.4.3/common/threading.h similarity index 100% rename from internal-complibs/zstd-1.4.0/common/threading.h rename to internal-complibs/zstd-1.4.3/common/threading.h diff --git a/internal-complibs/zstd-1.4.0/common/xxhash.c b/internal-complibs/zstd-1.4.3/common/xxhash.c similarity index 99% rename from internal-complibs/zstd-1.4.0/common/xxhash.c rename to internal-complibs/zstd-1.4.3/common/xxhash.c index 30599aaae..99d245962 100644 --- a/internal-complibs/zstd-1.4.0/common/xxhash.c +++ b/internal-complibs/zstd-1.4.3/common/xxhash.c @@ -53,7 +53,8 @@ # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) # define XXH_FORCE_MEMORY_ACCESS 2 # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \ + defined(__ICCARM__) # define XXH_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -120,7 +121,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcp # define INLINE_KEYWORD #endif -#if defined(__GNUC__) +#if defined(__GNUC__) || defined(__ICCARM__) # define FORCE_INLINE_ATTR __attribute__((always_inline)) #elif defined(_MSC_VER) # define FORCE_INLINE_ATTR __forceinline @@ -206,7 +207,12 @@ static U64 XXH_read64(const void* memPtr) # define XXH_rotl32(x,r) _rotl(x,r) # define XXH_rotl64(x,r) _rotl64(x,r) #else +#if defined(__ICCARM__) +# include +# define XXH_rotl32(x,r) __ROR(x,(32 - r)) +#else # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#endif # define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) #endif diff --git a/internal-complibs/zstd-1.4.0/common/xxhash.h b/internal-complibs/zstd-1.4.3/common/xxhash.h similarity index 100% rename from internal-complibs/zstd-1.4.0/common/xxhash.h rename to internal-complibs/zstd-1.4.3/common/xxhash.h diff --git a/internal-complibs/zstd-1.4.0/common/zstd_common.c b/internal-complibs/zstd-1.4.3/common/zstd_common.c similarity index 100% rename from internal-complibs/zstd-1.4.0/common/zstd_common.c rename to internal-complibs/zstd-1.4.3/common/zstd_common.c diff --git a/internal-complibs/zstd-1.4.0/common/zstd_errors.h b/internal-complibs/zstd-1.4.3/common/zstd_errors.h similarity index 100% rename from internal-complibs/zstd-1.4.0/common/zstd_errors.h rename to internal-complibs/zstd-1.4.3/common/zstd_errors.h diff --git a/internal-complibs/zstd-1.4.0/common/zstd_internal.h b/internal-complibs/zstd-1.4.3/common/zstd_internal.h similarity index 85% rename from internal-complibs/zstd-1.4.0/common/zstd_internal.h rename to internal-complibs/zstd-1.4.3/common/zstd_internal.h index 31f756ab5..585fd6b19 100644 --- a/internal-complibs/zstd-1.4.0/common/zstd_internal.h +++ b/internal-complibs/zstd-1.4.3/common/zstd_internal.h @@ -34,7 +34,6 @@ #endif #include "xxhash.h" /* XXH_reset, update, digest */ - #if defined (__cplusplus) extern "C" { #endif @@ -57,9 +56,9 @@ extern "C" { /** * Return the specified error if the condition evaluates to true. * - * In debug modes, prints additional information. In order to do that - * (particularly, printing the conditional that failed), this can't just wrap - * RETURN_ERROR(). + * In debug modes, prints additional information. + * In order to do that (particularly, printing the conditional that failed), + * this can't just wrap RETURN_ERROR(). */ #define RETURN_ERROR_IF(cond, err, ...) \ if (cond) { \ @@ -193,19 +192,72 @@ static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; * Shared functions to include for inlining *********************************************/ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } + #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } +static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); } +#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } + +#define WILDCOPY_OVERLENGTH 8 +#define VECLEN 16 + +typedef enum { + ZSTD_no_overlap, + ZSTD_overlap_src_before_dst, + /* ZSTD_overlap_dst_before_src, */ +} ZSTD_overlap_e; /*! ZSTD_wildcopy() : * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */ -#define WILDCOPY_OVERLENGTH 8 -MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length) +MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE +void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) { + ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; const BYTE* ip = (const BYTE*)src; BYTE* op = (BYTE*)dst; BYTE* const oend = op + length; - do - COPY8(op, ip) - while (op < oend); + + assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); + if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) { + do + COPY8(op, ip) + while (op < oend); + } + else { + if ((length & 8) == 0) + COPY8(op, ip); + do { + COPY16(op, ip); + } + while (op < oend); + } +} + +/*! ZSTD_wildcopy_16min() : + * same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */ +MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE +void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) +{ + ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + length; + + assert(length >= 8); + assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); + + if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) { + do + COPY8(op, ip) + while (op < oend); + } + else { + if ((length & 8) == 0) + COPY8(op, ip); + do { + COPY16(op, ip); + } + while (op < oend); + } } MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */ @@ -272,6 +324,8 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus return (unsigned)r; # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ return 31 - __builtin_clz(val); +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return 31 - __CLZ(val); # else /* Software version */ static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; U32 v = val; diff --git a/internal-complibs/zstd-1.4.0/compress/fse_compress.c b/internal-complibs/zstd-1.4.3/compress/fse_compress.c similarity index 100% rename from internal-complibs/zstd-1.4.0/compress/fse_compress.c rename to internal-complibs/zstd-1.4.3/compress/fse_compress.c diff --git a/internal-complibs/zstd-1.4.0/compress/hist.c b/internal-complibs/zstd-1.4.3/compress/hist.c similarity index 100% rename from internal-complibs/zstd-1.4.0/compress/hist.c rename to internal-complibs/zstd-1.4.3/compress/hist.c diff --git a/internal-complibs/zstd-1.4.0/compress/hist.h b/internal-complibs/zstd-1.4.3/compress/hist.h similarity index 100% rename from internal-complibs/zstd-1.4.0/compress/hist.h rename to internal-complibs/zstd-1.4.3/compress/hist.h diff --git a/internal-complibs/zstd-1.4.0/compress/huf_compress.c b/internal-complibs/zstd-1.4.3/compress/huf_compress.c similarity index 100% rename from internal-complibs/zstd-1.4.0/compress/huf_compress.c rename to internal-complibs/zstd-1.4.3/compress/huf_compress.c diff --git a/internal-complibs/zstd-1.4.0/compress/zstd_compress.c b/internal-complibs/zstd-1.4.3/compress/zstd_compress.c similarity index 83% rename from internal-complibs/zstd-1.4.0/compress/zstd_compress.c rename to internal-complibs/zstd-1.4.3/compress/zstd_compress.c index 2e163c8bf..cd73db13b 100644 --- a/internal-complibs/zstd-1.4.0/compress/zstd_compress.c +++ b/internal-complibs/zstd-1.4.3/compress/zstd_compress.c @@ -21,6 +21,8 @@ #define HUF_STATIC_LINKING_ONLY #include "huf.h" #include "zstd_compress_internal.h" +#include "zstd_compress_sequences.h" +#include "zstd_compress_literals.h" #include "zstd_fast.h" #include "zstd_double_fast.h" #include "zstd_lazy.h" @@ -385,6 +387,11 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) bounds.upperBound = ZSTD_lcm_uncompressed; return bounds; + case ZSTD_c_targetCBlockSize: + bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; + bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; + return bounds; + default: { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; return boundError; @@ -392,18 +399,6 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) } } -/* ZSTD_cParam_withinBounds: - * @return 1 if value is within cParam bounds, - * 0 otherwise */ -static int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) -{ - ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); - if (ZSTD_isError(bounds.error)) return 0; - if (value < bounds.lowerBound) return 0; - if (value > bounds.upperBound) return 0; - return 1; -} - /* ZSTD_cParam_clampBounds: * Clamps the value into the bounded range. */ @@ -452,6 +447,7 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) case ZSTD_c_ldmHashRateLog: case ZSTD_c_forceAttachDict: case ZSTD_c_literalCompressionMode: + case ZSTD_c_targetCBlockSize: default: return 0; } @@ -497,6 +493,7 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) case ZSTD_c_ldmHashLog: case ZSTD_c_ldmMinMatch: case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_targetCBlockSize: break; default: RETURN_ERROR(parameter_unsupported); @@ -671,6 +668,12 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, CCtxParams->ldmParams.hashRateLog = value; return CCtxParams->ldmParams.hashRateLog; + case ZSTD_c_targetCBlockSize : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_targetCBlockSize, value); + CCtxParams->targetCBlockSize = value; + return CCtxParams->targetCBlockSize; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } } @@ -692,13 +695,13 @@ size_t ZSTD_CCtxParams_getParameter( *value = CCtxParams->compressionLevel; break; case ZSTD_c_windowLog : - *value = CCtxParams->cParams.windowLog; + *value = (int)CCtxParams->cParams.windowLog; break; case ZSTD_c_hashLog : - *value = CCtxParams->cParams.hashLog; + *value = (int)CCtxParams->cParams.hashLog; break; case ZSTD_c_chainLog : - *value = CCtxParams->cParams.chainLog; + *value = (int)CCtxParams->cParams.chainLog; break; case ZSTD_c_searchLog : *value = CCtxParams->cParams.searchLog; @@ -773,6 +776,9 @@ size_t ZSTD_CCtxParams_getParameter( case ZSTD_c_ldmHashRateLog : *value = CCtxParams->ldmParams.hashRateLog; break; + case ZSTD_c_targetCBlockSize : + *value = (int)CCtxParams->targetCBlockSize; + break; default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); } return 0; @@ -930,12 +936,12 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) @return : 0, or an error code if one value is beyond authorized range */ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) { - BOUNDCHECK(ZSTD_c_windowLog, cParams.windowLog); - BOUNDCHECK(ZSTD_c_chainLog, cParams.chainLog); - BOUNDCHECK(ZSTD_c_hashLog, cParams.hashLog); - BOUNDCHECK(ZSTD_c_searchLog, cParams.searchLog); - BOUNDCHECK(ZSTD_c_minMatch, cParams.minMatch); - BOUNDCHECK(ZSTD_c_targetLength,cParams.targetLength); + BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); + BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); + BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); + BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); + BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); + BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); return 0; } @@ -951,7 +957,7 @@ ZSTD_clampCParams(ZSTD_compressionParameters cParams) if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ } -# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, int) +# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) CLAMP(ZSTD_c_windowLog, cParams.windowLog); CLAMP(ZSTD_c_chainLog, cParams.chainLog); CLAMP(ZSTD_c_hashLog, cParams.hashLog); @@ -1282,15 +1288,14 @@ static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) } /*! ZSTD_invalidateMatchState() - * Invalidate all the matches in the match finder tables. - * Requires nextSrc and base to be set (can be NULL). + * Invalidate all the matches in the match finder tables. + * Requires nextSrc and base to be set (can be NULL). */ static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) { ZSTD_window_clear(&ms->window); ms->nextToUpdate = ms->window.dictLimit; - ms->nextToUpdate3 = ms->window.dictLimit; ms->loadedDictEnd = 0; ms->opt.litLengthSum = 0; /* force reset of btopt stats */ ms->dictMatchState = NULL; @@ -1327,15 +1332,17 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; +typedef enum { ZSTD_resetTarget_CDict, ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e; + static void* ZSTD_reset_matchState(ZSTD_matchState_t* ms, void* ptr, const ZSTD_compressionParameters* cParams, - ZSTD_compResetPolicy_e const crp, U32 const forCCtx) + ZSTD_compResetPolicy_e const crp, ZSTD_resetTarget_e const forWho) { size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); size_t const hSize = ((size_t)1) << cParams->hashLog; - U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; size_t const h3Size = ((size_t)1) << hashLog3; size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); @@ -1349,7 +1356,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, ZSTD_invalidateMatchState(ms); /* opt parser space */ - if (forCCtx && (cParams->strategy >= ZSTD_btopt)) { + if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { DEBUGLOG(4, "reserving optimal parser space"); ms->opt.litFreq = (unsigned*)ptr; ms->opt.litLengthFreq = ms->opt.litFreq + (1< (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); +} + #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */ #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large * during at least this number of times, @@ -1388,7 +1408,7 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms, note : `params` are assumed fully validated at this stage */ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ZSTD_CCtx_params params, - U64 pledgedSrcSize, + U64 const pledgedSrcSize, ZSTD_compResetPolicy_e const crp, ZSTD_buffered_policy_e const zbuff) { @@ -1400,13 +1420,21 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, if (ZSTD_equivalentParams(zc->appliedParams, params, zc->inBuffSize, zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit, - zbuff, pledgedSrcSize)) { - DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%zu)", - zc->appliedParams.cParams.windowLog, zc->blockSize); + zbuff, pledgedSrcSize) ) { + DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> consider continue mode"); zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */ - if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) + if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) { + DEBUGLOG(4, "continue mode confirmed (wLog1=%u, blockSize1=%zu)", + zc->appliedParams.cParams.windowLog, zc->blockSize); + if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { + /* prefer a reset, faster than a rescale */ + ZSTD_reset_matchState(&zc->blockState.matchState, + zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32, + ¶ms.cParams, + crp, ZSTD_resetTarget_CCtx); + } return ZSTD_continueCCtx(zc, params, pledgedSrcSize); - } } + } } } DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); if (params.ldmParams.enableLdm) { @@ -1449,7 +1477,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); if (workSpaceTooSmall || workSpaceWasteful) { - DEBUGLOG(4, "Need to resize workSpaceSize from %zuKB to %zuKB", + DEBUGLOG(4, "Resize workSpaceSize from %zuKB to %zuKB", zc->workSpaceSize >> 10, neededSpace >> 10); @@ -1491,7 +1519,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); - ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32; + ptr = ZSTD_reset_matchState(&zc->blockState.matchState, + zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32, + ¶ms.cParams, + crp, ZSTD_resetTarget_CCtx); /* ldm hash table */ /* initialize bucketOffsets table later for pointer alignment */ @@ -1509,8 +1540,6 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, } assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ - ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, ¶ms.cParams, crp, /* forCCtx */ 1); - /* sequences storage */ zc->seqStore.maxNbSeq = maxNbSeq; zc->seqStore.sequencesStart = (seqDef*)ptr; @@ -1587,15 +1616,14 @@ static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, * handled in _enforceMaxDist */ } -static size_t ZSTD_resetCCtx_byAttachingCDict( - ZSTD_CCtx* cctx, - const ZSTD_CDict* cdict, - ZSTD_CCtx_params params, - U64 pledgedSrcSize, - ZSTD_buffered_policy_e zbuff) +static size_t +ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) { - { - const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; + { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams; unsigned const windowLog = params.cParams.windowLog; assert(windowLog != 0); /* Resize working context table params for input only, since the dict @@ -1607,8 +1635,7 @@ static size_t ZSTD_resetCCtx_byAttachingCDict( assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); } - { - const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc + { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc - cdict->matchState.window.base); const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; if (cdictLen == 0) { @@ -1625,9 +1652,9 @@ static size_t ZSTD_resetCCtx_byAttachingCDict( cctx->blockState.matchState.window.base + cdictEnd; ZSTD_window_clear(&cctx->blockState.matchState.window); } + /* loadedDictEnd is expressed within the referential of the active context */ cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; - } - } + } } cctx->dictID = cdict->dictID; @@ -1681,7 +1708,6 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; dstMatchState->window = srcMatchState->window; dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; - dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; } @@ -1761,7 +1787,6 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; dstMatchState->window = srcMatchState->window; dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; - dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; } dstCCtx->dictID = srcCCtx->dictID; @@ -1831,16 +1856,15 @@ static void ZSTD_reduceTable_btlazy2(U32* const table, U32 const size, U32 const /*! ZSTD_reduceIndex() : * rescale all indexes to avoid future overflow (indexes are U32) */ -static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue) +static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue) { - ZSTD_matchState_t* const ms = &zc->blockState.matchState; - { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog; + { U32 const hSize = (U32)1 << params->cParams.hashLog; ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); } - if (zc->appliedParams.cParams.strategy != ZSTD_fast) { - U32 const chainSize = (U32)1 << zc->appliedParams.cParams.chainLog; - if (zc->appliedParams.cParams.strategy == ZSTD_btlazy2) + if (params->cParams.strategy != ZSTD_fast) { + U32 const chainSize = (U32)1 << params->cParams.chainLog; + if (params->cParams.strategy == ZSTD_btlazy2) ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); else ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); @@ -1869,155 +1893,6 @@ static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* s return ZSTD_blockHeaderSize + srcSize; } -static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - - RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall); - - switch(flSize) - { - case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); - break; - case 2: /* 2 - 2 - 12 */ - MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); - break; - case 3: /* 2 - 2 - 20 */ - MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); - break; - default: /* not necessary : flSize is {1,2,3} */ - assert(0); - } - - memcpy(ostart + flSize, src, srcSize); - return srcSize + flSize; -} - -static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) -{ - BYTE* const ostart = (BYTE* const)dst; - U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); - - (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ - - switch(flSize) - { - case 1: /* 2 - 1 - 5 */ - ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); - break; - case 2: /* 2 - 2 - 12 */ - MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); - break; - case 3: /* 2 - 2 - 20 */ - MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); - break; - default: /* not necessary : flSize is {1,2,3} */ - assert(0); - } - - ostart[flSize] = *(const BYTE*)src; - return flSize+1; -} - - -/* ZSTD_minGain() : - * minimum compression required - * to generate a compress block or a compressed literals section. - * note : use same formula for both situations */ -static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) -{ - U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; - ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); - assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); - return (srcSize >> minlog) + 2; -} - -static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, - ZSTD_hufCTables_t* nextHuf, - ZSTD_strategy strategy, int disableLiteralCompression, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - void* workspace, size_t wkspSize, - const int bmi2) -{ - size_t const minGain = ZSTD_minGain(srcSize, strategy); - size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); - BYTE* const ostart = (BYTE*)dst; - U32 singleStream = srcSize < 256; - symbolEncodingType_e hType = set_compressed; - size_t cLitSize; - - DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", - disableLiteralCompression); - - /* Prepare nextEntropy assuming reusing the existing table */ - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - - if (disableLiteralCompression) - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - - /* small ? don't even attempt compression (speed opt) */ -# define COMPRESS_LITERALS_SIZE_MIN 63 - { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; - if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } - - RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); - { HUF_repeat repeat = prevHuf->repeatMode; - int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; - if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; - cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, - workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) - : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, - workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); - if (repeat != HUF_repeat_none) { - /* reused the existing table */ - hType = set_repeat; - } - } - - if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } - if (cLitSize==1) { - memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); - return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); - } - - if (hType == set_compressed) { - /* using a newly constructed table */ - nextHuf->repeatMode = HUF_repeat_check; - } - - /* Build header */ - switch(lhSize) - { - case 3: /* 2 - 2 - 10 - 10 */ - { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); - MEM_writeLE24(ostart, lhc); - break; - } - case 4: /* 2 - 2 - 14 - 14 */ - { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); - MEM_writeLE32(ostart, lhc); - break; - } - case 5: /* 2 - 2 - 18 - 18 */ - { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); - MEM_writeLE32(ostart, lhc); - ostart[4] = (BYTE)(cLitSize >> 10); - break; - } - default: /* not possible : lhSize is {3,4,5} */ - assert(0); - } - return lhSize+cLitSize; -} - - void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) { const seqDef* const sequences = seqStorePtr->sequencesStart; @@ -2040,418 +1915,6 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) mlCodeTable[seqStorePtr->longLengthPos] = MaxML; } - -/** - * -log2(x / 256) lookup table for x in [0, 256). - * If x == 0: Return 0 - * Else: Return floor(-log2(x / 256) * 256) - */ -static unsigned const kInverseProbabilityLog256[256] = { - 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, - 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, - 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, - 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, - 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, - 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, - 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, - 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, - 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, - 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, - 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, - 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, - 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, - 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, - 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, - 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, - 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, - 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, - 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, - 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, - 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, - 5, 4, 2, 1, -}; - - -/** - * Returns the cost in bits of encoding the distribution described by count - * using the entropy bound. - */ -static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) -{ - unsigned cost = 0; - unsigned s; - for (s = 0; s <= max; ++s) { - unsigned norm = (unsigned)((256 * count[s]) / total); - if (count[s] != 0 && norm == 0) - norm = 1; - assert(count[s] < total); - cost += count[s] * kInverseProbabilityLog256[norm]; - } - return cost >> 8; -} - - -/** - * Returns the cost in bits of encoding the distribution in count using the - * table described by norm. The max symbol support by norm is assumed >= max. - * norm must be valid for every symbol with non-zero probability in count. - */ -static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, - unsigned const* count, unsigned const max) -{ - unsigned const shift = 8 - accuracyLog; - size_t cost = 0; - unsigned s; - assert(accuracyLog <= 8); - for (s = 0; s <= max; ++s) { - unsigned const normAcc = norm[s] != -1 ? norm[s] : 1; - unsigned const norm256 = normAcc << shift; - assert(norm256 > 0); - assert(norm256 < 256); - cost += count[s] * kInverseProbabilityLog256[norm256]; - } - return cost >> 8; -} - - -static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { - void const* ptr = ctable; - U16 const* u16ptr = (U16 const*)ptr; - U32 const maxSymbolValue = MEM_read16(u16ptr + 1); - return maxSymbolValue; -} - - -/** - * Returns the cost in bits of encoding the distribution in count using ctable. - * Returns an error if ctable cannot represent all the symbols in count. - */ -static size_t ZSTD_fseBitCost( - FSE_CTable const* ctable, - unsigned const* count, - unsigned const max) -{ - unsigned const kAccuracyLog = 8; - size_t cost = 0; - unsigned s; - FSE_CState_t cstate; - FSE_initCState(&cstate, ctable); - RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC, - "Repeat FSE_CTable has maxSymbolValue %u < %u", - ZSTD_getFSEMaxSymbolValue(ctable), max); - for (s = 0; s <= max; ++s) { - unsigned const tableLog = cstate.stateLog; - unsigned const badCost = (tableLog + 1) << kAccuracyLog; - unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); - if (count[s] == 0) - continue; - RETURN_ERROR_IF(bitCost >= badCost, GENERIC, - "Repeat FSE_CTable has Prob[%u] == 0", s); - cost += count[s] * bitCost; - } - return cost >> kAccuracyLog; -} - -/** - * Returns the cost in bytes of encoding the normalized count header. - * Returns an error if any of the helper functions return an error. - */ -static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, - size_t const nbSeq, unsigned const FSELog) -{ - BYTE wksp[FSE_NCOUNTBOUND]; - S16 norm[MaxSeq + 1]; - const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); - return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); -} - - -typedef enum { - ZSTD_defaultDisallowed = 0, - ZSTD_defaultAllowed = 1 -} ZSTD_defaultPolicy_e; - -MEM_STATIC symbolEncodingType_e -ZSTD_selectEncodingType( - FSE_repeat* repeatMode, unsigned const* count, unsigned const max, - size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, - FSE_CTable const* prevCTable, - short const* defaultNorm, U32 defaultNormLog, - ZSTD_defaultPolicy_e const isDefaultAllowed, - ZSTD_strategy const strategy) -{ - ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); - if (mostFrequent == nbSeq) { - *repeatMode = FSE_repeat_none; - if (isDefaultAllowed && nbSeq <= 2) { - /* Prefer set_basic over set_rle when there are 2 or less symbols, - * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. - * If basic encoding isn't possible, always choose RLE. - */ - DEBUGLOG(5, "Selected set_basic"); - return set_basic; - } - DEBUGLOG(5, "Selected set_rle"); - return set_rle; - } - if (strategy < ZSTD_lazy) { - if (isDefaultAllowed) { - size_t const staticFse_nbSeq_max = 1000; - size_t const mult = 10 - strategy; - size_t const baseLog = 3; - size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ - assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ - assert(mult <= 9 && mult >= 7); - if ( (*repeatMode == FSE_repeat_valid) - && (nbSeq < staticFse_nbSeq_max) ) { - DEBUGLOG(5, "Selected set_repeat"); - return set_repeat; - } - if ( (nbSeq < dynamicFse_nbSeq_min) - || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { - DEBUGLOG(5, "Selected set_basic"); - /* The format allows default tables to be repeated, but it isn't useful. - * When using simple heuristics to select encoding type, we don't want - * to confuse these tables with dictionaries. When running more careful - * analysis, we don't need to waste time checking both repeating tables - * and default tables. - */ - *repeatMode = FSE_repeat_none; - return set_basic; - } - } - } else { - size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); - size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); - size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); - size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); - - if (isDefaultAllowed) { - assert(!ZSTD_isError(basicCost)); - assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); - } - assert(!ZSTD_isError(NCountCost)); - assert(compressedCost < ERROR(maxCode)); - DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", - (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); - if (basicCost <= repeatCost && basicCost <= compressedCost) { - DEBUGLOG(5, "Selected set_basic"); - assert(isDefaultAllowed); - *repeatMode = FSE_repeat_none; - return set_basic; - } - if (repeatCost <= compressedCost) { - DEBUGLOG(5, "Selected set_repeat"); - assert(!ZSTD_isError(repeatCost)); - return set_repeat; - } - assert(compressedCost < basicCost && compressedCost < repeatCost); - } - DEBUGLOG(5, "Selected set_compressed"); - *repeatMode = FSE_repeat_check; - return set_compressed; -} - -MEM_STATIC size_t -ZSTD_buildCTable(void* dst, size_t dstCapacity, - FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, - unsigned* count, U32 max, - const BYTE* codeTable, size_t nbSeq, - const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, - const FSE_CTable* prevCTable, size_t prevCTableSize, - void* workspace, size_t workspaceSize) -{ - BYTE* op = (BYTE*)dst; - const BYTE* const oend = op + dstCapacity; - DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); - - switch (type) { - case set_rle: - FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max)); - RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall); - *op = codeTable[0]; - return 1; - case set_repeat: - memcpy(nextCTable, prevCTable, prevCTableSize); - return 0; - case set_basic: - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ - return 0; - case set_compressed: { - S16 norm[MaxSeq + 1]; - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); - if (count[codeTable[nbSeq-1]] > 1) { - count[codeTable[nbSeq-1]]--; - nbSeq_1--; - } - assert(nbSeq_1 > 1); - FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); - { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ - FORWARD_IF_ERROR(NCountSize); - FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); - return NCountSize; - } - } - default: assert(0); RETURN_ERROR(GENERIC); - } -} - -FORCE_INLINE_TEMPLATE size_t -ZSTD_encodeSequences_body( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) -{ - BIT_CStream_t blockStream; - FSE_CState_t stateMatchLength; - FSE_CState_t stateOffsetBits; - FSE_CState_t stateLitLength; - - RETURN_ERROR_IF( - ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), - dstSize_tooSmall, "not enough space remaining"); - DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", - (int)(blockStream.endPtr - blockStream.startPtr), - (unsigned)dstCapacity); - - /* first symbols */ - FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); - FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); - FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); - BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); - if (MEM_32bits()) BIT_flushBits(&blockStream); - if (longOffsets) { - U32 const ofBits = ofCodeTable[nbSeq-1]; - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); - BIT_flushBits(&blockStream); - } - BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, - ofBits - extraBits); - } else { - BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); - } - BIT_flushBits(&blockStream); - - { size_t n; - for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) - BIT_flushBits(&blockStream); /* (7)*/ - BIT_addBits(&blockStream, sequences[n].litLength, llBits); - if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); - if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); - if (longOffsets) { - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[n].offset, extraBits); - BIT_flushBits(&blockStream); /* (7)*/ - } - BIT_addBits(&blockStream, sequences[n].offset >> extraBits, - ofBits - extraBits); /* 31 */ - } else { - BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ - } - BIT_flushBits(&blockStream); /* (7)*/ - DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); - } } - - DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); - FSE_flushCState(&blockStream, &stateMatchLength); - DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); - FSE_flushCState(&blockStream, &stateOffsetBits); - DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); - FSE_flushCState(&blockStream, &stateLitLength); - - { size_t const streamSize = BIT_closeCStream(&blockStream); - RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); - return streamSize; - } -} - -static size_t -ZSTD_encodeSequences_default( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) -{ - return ZSTD_encodeSequences_body(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); -} - - -#if DYNAMIC_BMI2 - -static TARGET_ATTRIBUTE("bmi2") size_t -ZSTD_encodeSequences_bmi2( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets) -{ - return ZSTD_encodeSequences_body(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); -} - -#endif - -static size_t ZSTD_encodeSequences( - void* dst, size_t dstCapacity, - FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, - FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, - FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, - seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) -{ - DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); -#if DYNAMIC_BMI2 - if (bmi2) { - return ZSTD_encodeSequences_bmi2(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); - } -#endif - (void)bmi2; - return ZSTD_encodeSequences_default(dst, dstCapacity, - CTable_MatchLength, mlCodeTable, - CTable_OffsetBits, ofCodeTable, - CTable_LitLength, llCodeTable, - sequences, nbSeq, longOffsets); -} - static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) { switch (cctxParams->literalCompressionMode) { @@ -2492,16 +1955,16 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + dstCapacity; BYTE* op = ostart; - size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); BYTE* seqHead; BYTE* lastNCount = NULL; + DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq); ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<litStart; - size_t const litSize = seqStorePtr->lit - literals; + size_t const litSize = (size_t)(seqStorePtr->lit - literals); size_t const cSize = ZSTD_compressLiterals( &prevEntropy->huf, &nextEntropy->huf, cctxParams->cParams.strategy, @@ -2524,14 +1987,16 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + assert(op <= oend); if (nbSeq==0) { /* Copy the old tables over as if we repeated them */ memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); - return op - ostart; + return (size_t)(op - ostart); } /* seqHead : flags for FSE encoding type */ seqHead = op++; + assert(op <= oend); /* convert length/distances into codes */ ZSTD_seqToCodes(seqStorePtr); @@ -2547,7 +2012,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, ZSTD_defaultAllowed, strategy); assert(set_basic < set_compressed && set_rle < set_compressed); assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), workspace, wkspSize); @@ -2555,6 +2020,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, if (LLtype == set_compressed) lastNCount = op; op += countSize; + assert(op <= oend); } } /* build CTable for Offsets */ { unsigned max = MaxOff; @@ -2569,7 +2035,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy); assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), workspace, wkspSize); @@ -2577,6 +2043,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, if (Offtype == set_compressed) lastNCount = op; op += countSize; + assert(op <= oend); } } /* build CTable for MatchLengths */ { unsigned max = MaxML; @@ -2589,7 +2056,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy); assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ - { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), workspace, wkspSize); @@ -2597,12 +2064,13 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, if (MLtype == set_compressed) lastNCount = op; op += countSize; + assert(op <= oend); } } *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); { size_t const bitstreamSize = ZSTD_encodeSequences( - op, oend - op, + op, (size_t)(oend - op), CTable_MatchLength, mlCodeTable, CTable_OffsetBits, ofCodeTable, CTable_LitLength, llCodeTable, @@ -2610,6 +2078,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, longOffsets, bmi2); FORWARD_IF_ERROR(bitstreamSize); op += bitstreamSize; + assert(op <= oend); /* zstd versions <= 1.3.4 mistakenly report corruption when * FSE_readNCount() receives a buffer < 4 bytes. * Fixed by https://github.com/facebook/zstd/pull/1146. @@ -2628,7 +2097,7 @@ ZSTD_compressSequences_internal(seqStore_t* seqStorePtr, } DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); - return op - ostart; + return (size_t)(op - ostart); } MEM_STATIC size_t @@ -2721,30 +2190,24 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr) ssPtr->longLengthID = 0; } -static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize) +typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; + +static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) { ZSTD_matchState_t* const ms = &zc->blockState.matchState; - size_t cSize; - DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", - (unsigned)dstCapacity, (unsigned)ms->window.dictLimit, (unsigned)ms->nextToUpdate); + DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); assert(srcSize <= ZSTD_BLOCKSIZE_MAX); - /* Assert that we have correctly flushed the ctx params into the ms's copy */ ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); - if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); - cSize = 0; - goto out; /* don't even attempt compression below a certain srcSize */ + return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ } ZSTD_resetSeqStore(&(zc->seqStore)); /* required for optimal parser to read stats from dictionary */ ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* tell the optimal parser how we expect to compress literals */ ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; - /* a gap between an attached dict and the current window is not safe, * they must remain adjacent, * and when that stops being the case, the dict must be unset */ @@ -2798,6 +2261,22 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); } } + return ZSTDbss_compress; +} + +static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t cSize; + DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); + + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss); + if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } + } /* encode sequences and literals */ cSize = ZSTD_compressSequences(&zc->seqStore, @@ -2826,6 +2305,25 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, } +static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, void const* ip, void const* iend) +{ + if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { + U32 const maxDist = (U32)1 << params->cParams.windowLog; + U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); + U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); + ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); + ZSTD_reduceIndex(ms, params, correction); + if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; + else ms->nextToUpdate -= correction; + /* invalidate dictionaries on overflow correction */ + ms->loadedDictEnd = 0; + ms->dictMatchState = NULL; + } +} + + /*! ZSTD_compress_frameChunk() : * Compress a chunk of data into one or multiple blocks. * All blocks will be terminated, all input will be consumed. @@ -2844,7 +2342,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, BYTE* const ostart = (BYTE*)dst; BYTE* op = ostart; U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; - assert(cctx->appliedParams.cParams.windowLog <= 31); + assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); if (cctx->appliedParams.fParams.checksumFlag && srcSize) @@ -2859,19 +2357,10 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, "not enough space to store compressed block"); if (remaining < blockSize) blockSize = remaining; - if (ZSTD_window_needOverflowCorrection(ms->window, ip + blockSize)) { - U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); - U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); - ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); - ZSTD_reduceIndex(cctx, correction); - if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; - else ms->nextToUpdate -= correction; - ms->loadedDictEnd = 0; - ms->dictMatchState = NULL; - } - ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); + ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, ip, ip + blockSize); + ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); + + /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; { size_t cSize = ZSTD_compressBlock_internal(cctx, @@ -2899,7 +2388,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, } } if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; - return op-ostart; + return (size_t)(op-ostart); } @@ -2991,6 +2480,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, cctx->pledgedSrcSizePlusOne-1, cctx->dictID); FORWARD_IF_ERROR(fhSize); + assert(fhSize <= dstCapacity); dstCapacity -= fhSize; dst = (char*)dst + fhSize; cctx->stage = ZSTDcs_ongoing; @@ -3007,18 +2497,7 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, if (!frame) { /* overflow check and correction for block mode */ - if (ZSTD_window_needOverflowCorrection(ms->window, (const char*)src + srcSize)) { - U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); - U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, 1 << cctx->appliedParams.cParams.windowLog, src); - ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); - ZSTD_reduceIndex(cctx, correction); - if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; - else ms->nextToUpdate -= correction; - ms->loadedDictEnd = 0; - ms->dictMatchState = NULL; - } + ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, src, (BYTE const*)src + srcSize); } DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); @@ -3060,8 +2539,9 @@ size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) { - size_t const blockSizeMax = ZSTD_getBlockSize(cctx); - RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); + DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); + { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); + RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); } return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); } @@ -3074,7 +2554,7 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, const void* src, size_t srcSize, ZSTD_dictTableLoadMethod_e dtlm) { - const BYTE* const ip = (const BYTE*) src; + const BYTE* ip = (const BYTE*) src; const BYTE* const iend = ip + srcSize; ZSTD_window_update(&ms->window, src, srcSize); @@ -3085,32 +2565,42 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, if (srcSize <= HASH_READ_SIZE) return 0; - switch(params->cParams.strategy) - { - case ZSTD_fast: - ZSTD_fillHashTable(ms, iend, dtlm); - break; - case ZSTD_dfast: - ZSTD_fillDoubleHashTable(ms, iend, dtlm); - break; + while (iend - ip > HASH_READ_SIZE) { + size_t const remaining = (size_t)(iend - ip); + size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); + const BYTE* const ichunk = ip + chunk; - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: - if (srcSize >= HASH_READ_SIZE) - ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE); - break; + ZSTD_overflowCorrectIfNeeded(ms, params, ip, ichunk); - case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ - case ZSTD_btopt: - case ZSTD_btultra: - case ZSTD_btultra2: - if (srcSize >= HASH_READ_SIZE) - ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); - break; + switch(params->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, ichunk, dtlm); + break; + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); + break; - default: - assert(0); /* not possible : not a valid strategy id */ + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + if (chunk >= HASH_READ_SIZE) + ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); + break; + + case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + if (chunk >= HASH_READ_SIZE) + ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); + break; + + default: + assert(0); /* not possible : not a valid strategy id */ + } + + ip = ichunk; } ms->nextToUpdate = (U32)(iend - ms->window.base); @@ -3297,12 +2787,11 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, ZSTDcrp_continue, zbuff) ); - { - size_t const dictID = ZSTD_compress_insertDictionary( + { size_t const dictID = ZSTD_compress_insertDictionary( cctx->blockState.prevCBlock, &cctx->blockState.matchState, ¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); FORWARD_IF_ERROR(dictID); - assert(dictID <= (size_t)(U32)-1); + assert(dictID <= UINT_MAX); cctx->dictID = (U32)dictID; } return 0; @@ -3555,10 +3044,10 @@ static size_t ZSTD_initCDict_internal( /* Reset the state to no dictionary */ ZSTD_reset_compressedBlockState(&cdict->cBlockState); - { void* const end = ZSTD_reset_matchState( - &cdict->matchState, - (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, - &cParams, ZSTDcrp_continue, /* forCCtx */ 0); + { void* const end = ZSTD_reset_matchState(&cdict->matchState, + (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, + &cParams, + ZSTDcrp_continue, ZSTD_resetTarget_CDict); assert(end == (char*)cdict->workspace + cdict->workspaceSize); (void)end; } @@ -4068,7 +3557,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, case zcss_flush: DEBUGLOG(5, "flush stage"); { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; - size_t const flushed = ZSTD_limitCopy(op, oend-op, + size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), zcs->outBuff + zcs->outBuffFlushedSize, toFlush); DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); @@ -4262,7 +3751,7 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ /* single thread mode : attempt to calculate remaining to flush more precisely */ { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; - size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4; + size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); return toFlush; diff --git a/internal-complibs/zstd-1.4.0/compress/zstd_compress_internal.h b/internal-complibs/zstd-1.4.3/compress/zstd_compress_internal.h similarity index 82% rename from internal-complibs/zstd-1.4.0/compress/zstd_compress_internal.h rename to internal-complibs/zstd-1.4.3/compress/zstd_compress_internal.h index cc3cbb9da..6d623cc6b 100644 --- a/internal-complibs/zstd-1.4.0/compress/zstd_compress_internal.h +++ b/internal-complibs/zstd-1.4.3/compress/zstd_compress_internal.h @@ -33,13 +33,13 @@ extern "C" { ***************************************/ #define kSearchStrength 8 #define HASH_READ_SIZE 8 -#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted". +#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". It could be confused for a real successor at index "1", if sorted as larger than its predecessor. It's not a big deal though : candidate will just be sorted again. Additionally, candidate position 1 will be lost. But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. - The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy - Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. + This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ /*-************************************* @@ -128,21 +128,26 @@ typedef struct { BYTE const* base; /* All regular indexes relative to this position */ BYTE const* dictBase; /* extDict indexes relative to this position */ U32 dictLimit; /* below that point, need extDict */ - U32 lowLimit; /* below that point, no more data */ + U32 lowLimit; /* below that point, no more valid data */ } ZSTD_window_t; typedef struct ZSTD_matchState_t ZSTD_matchState_t; struct ZSTD_matchState_t { ZSTD_window_t window; /* State for window round buffer management */ - U32 loadedDictEnd; /* index of end of dictionary */ + U32 loadedDictEnd; /* index of end of dictionary, within context's referential. + * When loadedDictEnd != 0, a dictionary is in use, and still valid. + * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. + * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). + * When dict referential is copied into active context (i.e. not attached), + * loadedDictEnd == dictSize, since referential starts from zero. + */ U32 nextToUpdate; /* index from which to continue table update */ - U32 nextToUpdate3; /* index from which to continue table update */ - U32 hashLog3; /* dispatch table : larger == faster, more memory */ + U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ U32* hashTable; U32* hashTable3; U32* chainTable; optState_t opt; /* optimal parser state */ - const ZSTD_matchState_t * dictMatchState; + const ZSTD_matchState_t* dictMatchState; ZSTD_compressionParameters cParams; }; @@ -195,6 +200,9 @@ struct ZSTD_CCtx_params_s { int compressionLevel; int forceWindow; /* force back-references to respect limit of * 1< 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; } +/* ZSTD_cParam_withinBounds: + * @return 1 if value is within cParam bounds, + * 0 otherwise */ +MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +/* ZSTD_minGain() : + * minimum compression required + * to generate a compress block or a compressed literals section. + * note : use same formula for both situations */ +MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) +{ + U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; + ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + return (srcSize >> minlog) + 2; +} + /*! ZSTD_storeSeq() : * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. * `offsetCode` : distance to match + 3 (values 1-3 are repCodes). @@ -324,7 +356,7 @@ MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const v /* copy Literals */ assert(seqStorePtr->maxNbLit <= 128 KB); assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); - ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); + ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap); seqStorePtr->lit += litLength; /* literal Length */ @@ -564,6 +596,9 @@ MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 /*-************************************* * Round buffer management ***************************************/ +#if (ZSTD_WINDOWLOG_MAX_64 > 31) +# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" +#endif /* Max current allowed */ #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) /* Maximum chunk size before overflow correction needs to be called again */ @@ -675,31 +710,49 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, * Updates lowLimit so that: * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd * - * This allows a simple check that index >= lowLimit to see if index is valid. - * This must be called before a block compression call, with srcEnd as the block - * source end. + * It ensures index is valid as long as index >= lowLimit. + * This must be called before a block compression call. * - * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit. - * This is because dictionaries are allowed to be referenced as long as the last - * byte of the dictionary is in the window, but once they are out of range, - * they cannot be referenced. If loadedDictEndPtr is NULL, we use - * loadedDictEnd == 0. + * loadedDictEnd is only defined if a dictionary is in use for current compression. + * As the name implies, loadedDictEnd represents the index at end of dictionary. + * The value lies within context's referential, it can be directly compared to blockEndIdx. * - * In normal dict mode, the dict is between lowLimit and dictLimit. In - * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary - * is below them. forceWindow and dictMatchState are therefore incompatible. + * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. + * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. + * This is because dictionaries are allowed to be referenced fully + * as long as the last byte of the dictionary is in the window. + * Once input has progressed beyond window size, dictionary cannot be referenced anymore. + * + * In normal dict mode, the dictionary lies between lowLimit and dictLimit. + * In dictMatchState mode, lowLimit and dictLimit are the same, + * and the dictionary is below them. + * forceWindow and dictMatchState are therefore incompatible. */ MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window, - void const* srcEnd, - U32 maxDist, - U32* loadedDictEndPtr, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, const ZSTD_matchState_t** dictMatchStatePtr) { - U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base); - U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; - DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u", - (unsigned)blockEndIdx, (unsigned)maxDist); + U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; + DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + + /* - When there is no dictionary : loadedDictEnd == 0. + In which case, the test (blockEndIdx > maxDist) is merely to avoid + overflowing next operation `newLowLimit = blockEndIdx - maxDist`. + - When there is a standard dictionary : + Index referential is copied from the dictionary, + which means it starts from 0. + In which case, loadedDictEnd == dictSize, + and it makes sense to compare `blockEndIdx > maxDist + dictSize` + since `blockEndIdx` also starts from zero. + - When there is an attached dictionary : + loadedDictEnd is expressed within the referential of the context, + so it can be directly compared against blockEndIdx. + */ if (blockEndIdx > maxDist + loadedDictEnd) { U32 const newLowLimit = blockEndIdx - maxDist; if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; @@ -708,11 +761,45 @@ ZSTD_window_enforceMaxDist(ZSTD_window_t* window, (unsigned)window->dictLimit, (unsigned)window->lowLimit); window->dictLimit = window->lowLimit; } - if (loadedDictEndPtr) + /* On reaching window size, dictionaries are invalidated */ + if (loadedDictEndPtr) *loadedDictEndPtr = 0; + if (dictMatchStatePtr) *dictMatchStatePtr = NULL; + } +} + +/* Similar to ZSTD_window_enforceMaxDist(), + * but only invalidates dictionary + * when input progresses beyond window size. + * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) + * loadedDictEnd uses same referential as window->base + * maxDist is the window size */ +MEM_STATIC void +ZSTD_checkDictValidity(const ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + assert(loadedDictEndPtr != NULL); + assert(dictMatchStatePtr != NULL); + { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = *loadedDictEndPtr; + DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + assert(blockEndIdx >= loadedDictEnd); + + if (blockEndIdx > loadedDictEnd + maxDist) { + /* On reaching window size, dictionaries are invalidated. + * For simplification, if window size is reached anywhere within next block, + * the dictionary is invalidated for the full block. + */ + DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); *loadedDictEndPtr = 0; - if (dictMatchStatePtr) *dictMatchStatePtr = NULL; - } + } else { + if (*loadedDictEndPtr != 0) { + DEBUGLOG(6, "dictionary considered valid for current block"); + } } } } /** @@ -754,6 +841,17 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, return contiguous; } +MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.lowLimit; + U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} + + /* debug functions */ #if (DEBUGLEVEL>=2) diff --git a/internal-complibs/zstd-1.4.3/compress/zstd_compress_literals.c b/internal-complibs/zstd-1.4.3/compress/zstd_compress_literals.c new file mode 100644 index 000000000..eb3e5a44b --- /dev/null +++ b/internal-complibs/zstd-1.4.3/compress/zstd_compress_literals.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "zstd_compress_literals.h" + +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall); + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + memcpy(ostart + flSize, src, srcSize); + return srcSize + flSize; +} + +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE* const)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ostart[flSize] = *(const BYTE*)src; + return flSize+1; +} + +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* workspace, size_t wkspSize, + const int bmi2) +{ + size_t const minGain = ZSTD_minGain(srcSize, strategy); + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + symbolEncodingType_e hType = set_compressed; + size_t cLitSize; + + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", + disableLiteralCompression); + + /* Prepare nextEntropy assuming reusing the existing table */ + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralCompression) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + + RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); + { HUF_repeat repeat = prevHuf->repeatMode; + int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; + if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; + cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, + workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) + : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, + workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); + if (repeat != HUF_repeat_none) { + /* reused the existing table */ + hType = set_repeat; + } + } + + if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + if (cLitSize==1) { + memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + } + + if (hType == set_compressed) { + /* using a newly constructed table */ + nextHuf->repeatMode = HUF_repeat_check; + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + return lhSize+cLitSize; +} diff --git a/internal-complibs/zstd-1.4.3/compress/zstd_compress_literals.h b/internal-complibs/zstd-1.4.3/compress/zstd_compress_literals.h new file mode 100644 index 000000000..7adbecc0b --- /dev/null +++ b/internal-complibs/zstd-1.4.3/compress/zstd_compress_literals.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_LITERALS_H +#define ZSTD_COMPRESS_LITERALS_H + +#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */ + + +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* workspace, size_t wkspSize, + const int bmi2); + +#endif /* ZSTD_COMPRESS_LITERALS_H */ diff --git a/internal-complibs/zstd-1.4.3/compress/zstd_compress_sequences.c b/internal-complibs/zstd-1.4.3/compress/zstd_compress_sequences.c new file mode 100644 index 000000000..3c3deae08 --- /dev/null +++ b/internal-complibs/zstd-1.4.3/compress/zstd_compress_sequences.c @@ -0,0 +1,415 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "zstd_compress_sequences.h" + +/** + * -log2(x / 256) lookup table for x in [0, 256). + * If x == 0: Return 0 + * Else: Return floor(-log2(x / 256) * 256) + */ +static unsigned const kInverseProbabilityLog256[256] = { + 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, + 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, + 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, + 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, + 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, + 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, + 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, + 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, + 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, + 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, + 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, + 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, + 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, + 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, + 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, + 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, + 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, + 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, + 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, + 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, + 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, + 5, 4, 2, 1, +}; + +static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { + void const* ptr = ctable; + U16 const* u16ptr = (U16 const*)ptr; + U32 const maxSymbolValue = MEM_read16(u16ptr + 1); + return maxSymbolValue; +} + +/** + * Returns the cost in bytes of encoding the normalized count header. + * Returns an error if any of the helper functions return an error. + */ +static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, + size_t const nbSeq, unsigned const FSELog) +{ + BYTE wksp[FSE_NCOUNTBOUND]; + S16 norm[MaxSeq + 1]; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); + return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); +} + +/** + * Returns the cost in bits of encoding the distribution described by count + * using the entropy bound. + */ +static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) +{ + unsigned cost = 0; + unsigned s; + for (s = 0; s <= max; ++s) { + unsigned norm = (unsigned)((256 * count[s]) / total); + if (count[s] != 0 && norm == 0) + norm = 1; + assert(count[s] < total); + cost += count[s] * kInverseProbabilityLog256[norm]; + } + return cost >> 8; +} + +/** + * Returns the cost in bits of encoding the distribution in count using ctable. + * Returns an error if ctable cannot represent all the symbols in count. + */ +static size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max) +{ + unsigned const kAccuracyLog = 8; + size_t cost = 0; + unsigned s; + FSE_CState_t cstate; + FSE_initCState(&cstate, ctable); + RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC, + "Repeat FSE_CTable has maxSymbolValue %u < %u", + ZSTD_getFSEMaxSymbolValue(ctable), max); + for (s = 0; s <= max; ++s) { + unsigned const tableLog = cstate.stateLog; + unsigned const badCost = (tableLog + 1) << kAccuracyLog; + unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); + if (count[s] == 0) + continue; + RETURN_ERROR_IF(bitCost >= badCost, GENERIC, + "Repeat FSE_CTable has Prob[%u] == 0", s); + cost += count[s] * bitCost; + } + return cost >> kAccuracyLog; +} + +/** + * Returns the cost in bits of encoding the distribution in count using the + * table described by norm. The max symbol support by norm is assumed >= max. + * norm must be valid for every symbol with non-zero probability in count. + */ +static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max) +{ + unsigned const shift = 8 - accuracyLog; + size_t cost = 0; + unsigned s; + assert(accuracyLog <= 8); + for (s = 0; s <= max; ++s) { + unsigned const normAcc = norm[s] != -1 ? norm[s] : 1; + unsigned const norm256 = normAcc << shift; + assert(norm256 > 0); + assert(norm256 < 256); + cost += count[s] * kInverseProbabilityLog256[norm256]; + } + return cost >> 8; +} + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy) +{ + ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); + if (mostFrequent == nbSeq) { + *repeatMode = FSE_repeat_none; + if (isDefaultAllowed && nbSeq <= 2) { + /* Prefer set_basic over set_rle when there are 2 or less symbols, + * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. + * If basic encoding isn't possible, always choose RLE. + */ + DEBUGLOG(5, "Selected set_basic"); + return set_basic; + } + DEBUGLOG(5, "Selected set_rle"); + return set_rle; + } + if (strategy < ZSTD_lazy) { + if (isDefaultAllowed) { + size_t const staticFse_nbSeq_max = 1000; + size_t const mult = 10 - strategy; + size_t const baseLog = 3; + size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ + assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ + assert(mult <= 9 && mult >= 7); + if ( (*repeatMode == FSE_repeat_valid) + && (nbSeq < staticFse_nbSeq_max) ) { + DEBUGLOG(5, "Selected set_repeat"); + return set_repeat; + } + if ( (nbSeq < dynamicFse_nbSeq_min) + || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { + DEBUGLOG(5, "Selected set_basic"); + /* The format allows default tables to be repeated, but it isn't useful. + * When using simple heuristics to select encoding type, we don't want + * to confuse these tables with dictionaries. When running more careful + * analysis, we don't need to waste time checking both repeating tables + * and default tables. + */ + *repeatMode = FSE_repeat_none; + return set_basic; + } + } + } else { + size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); + size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); + size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); + size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); + + if (isDefaultAllowed) { + assert(!ZSTD_isError(basicCost)); + assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); + } + assert(!ZSTD_isError(NCountCost)); + assert(compressedCost < ERROR(maxCode)); + DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", + (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); + if (basicCost <= repeatCost && basicCost <= compressedCost) { + DEBUGLOG(5, "Selected set_basic"); + assert(isDefaultAllowed); + *repeatMode = FSE_repeat_none; + return set_basic; + } + if (repeatCost <= compressedCost) { + DEBUGLOG(5, "Selected set_repeat"); + assert(!ZSTD_isError(repeatCost)); + return set_repeat; + } + assert(compressedCost < basicCost && compressedCost < repeatCost); + } + DEBUGLOG(5, "Selected set_compressed"); + *repeatMode = FSE_repeat_check; + return set_compressed; +} + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* workspace, size_t workspaceSize) +{ + BYTE* op = (BYTE*)dst; + const BYTE* const oend = op + dstCapacity; + DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); + + switch (type) { + case set_rle: + FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max)); + RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall); + *op = codeTable[0]; + return 1; + case set_repeat: + memcpy(nextCTable, prevCTable, prevCTableSize); + return 0; + case set_basic: + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ + return 0; + case set_compressed: { + S16 norm[MaxSeq + 1]; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + if (count[codeTable[nbSeq-1]] > 1) { + count[codeTable[nbSeq-1]]--; + nbSeq_1--; + } + assert(nbSeq_1 > 1); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); + { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ + FORWARD_IF_ERROR(NCountSize); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); + return NCountSize; + } + } + default: assert(0); RETURN_ERROR(GENERIC); + } +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_encodeSequences_body( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + RETURN_ERROR_IF( + ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), + dstSize_tooSmall, "not enough space remaining"); + DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", + (int)(blockStream.endPtr - blockStream.startPtr), + (unsigned)dstCapacity); + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + if (longOffsets) { + U32 const ofBits = ofCodeTable[nbSeq-1]; + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_flushBits(&blockStream); + } + BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + ofBits - extraBits); + } else { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + } + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); + if (longOffsets) { + int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_flushBits(&blockStream); /* (7)*/ + } + BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + ofBits - extraBits); /* 31 */ + } else { + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + } + BIT_flushBits(&blockStream); /* (7)*/ + DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); + } } + + DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); + FSE_flushCState(&blockStream, &stateMatchLength); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); + FSE_flushCState(&blockStream, &stateOffsetBits); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); + return streamSize; + } +} + +static size_t +ZSTD_encodeSequences_default( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_encodeSequences_bmi2( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + +#endif + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) +{ + DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); +#if DYNAMIC_BMI2 + if (bmi2) { + return ZSTD_encodeSequences_bmi2(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); + } +#endif + (void)bmi2; + return ZSTD_encodeSequences_default(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} diff --git a/internal-complibs/zstd-1.4.3/compress/zstd_compress_sequences.h b/internal-complibs/zstd-1.4.3/compress/zstd_compress_sequences.h new file mode 100644 index 000000000..f5234d94c --- /dev/null +++ b/internal-complibs/zstd-1.4.3/compress/zstd_compress_sequences.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_SEQUENCES_H +#define ZSTD_COMPRESS_SEQUENCES_H + +#include "fse.h" /* FSE_repeat, FSE_CTable */ +#include "zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */ + +typedef enum { + ZSTD_defaultDisallowed = 0, + ZSTD_defaultAllowed = 1 +} ZSTD_defaultPolicy_e; + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy); + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* workspace, size_t workspaceSize); + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); + +#endif /* ZSTD_COMPRESS_SEQUENCES_H */ diff --git a/internal-complibs/zstd-1.4.0/compress/zstd_double_fast.c b/internal-complibs/zstd-1.4.3/compress/zstd_double_fast.c similarity index 87% rename from internal-complibs/zstd-1.4.0/compress/zstd_double_fast.c rename to internal-complibs/zstd-1.4.3/compress/zstd_double_fast.c index 47faf6d64..54467cc31 100644 --- a/internal-complibs/zstd-1.4.0/compress/zstd_double_fast.c +++ b/internal-complibs/zstd-1.4.3/compress/zstd_double_fast.c @@ -43,8 +43,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, /* Only load extra positions for ZSTD_dtlm_full */ if (dtlm == ZSTD_dtlm_fast) break; - } - } + } } } @@ -63,7 +62,11 @@ size_t ZSTD_compressBlock_doubleFast_generic( const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; - const U32 prefixLowestIndex = ms->window.dictLimit; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowestValid = ms->window.dictLimit; + const U32 maxDistance = 1U << cParams->windowLog; + /* presumes that, if there is a dictionary, it must be using Attach mode */ + const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid; const BYTE* const prefixLowest = base + prefixLowestIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; @@ -95,8 +98,15 @@ size_t ZSTD_compressBlock_doubleFast_generic( dictCParams->chainLog : hBitsS; const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart); + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); + assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + /* if a dictionary is attached, it must be within window range */ + if (dictMode == ZSTD_dictMatchState) { + assert(lowestValid + maxDistance >= endIndex); + } + /* init */ ip += (dictAndPrefixLength == 0); if (dictMode == ZSTD_noDict) { @@ -138,7 +148,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; ip++; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); goto _match_stored; } @@ -147,7 +157,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; ip++; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); goto _match_stored; } @@ -170,8 +180,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( offset = (U32)(current - dictMatchIndexL - dictIndexDelta); while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ goto _match_found; - } - } + } } if (matchIndexS > prefixLowestIndex) { /* check prefix short match */ @@ -186,16 +195,14 @@ size_t ZSTD_compressBlock_doubleFast_generic( if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { goto _search_next_long; - } - } + } } ip += ((ip-anchor) >> kSearchStrength) + 1; continue; _search_next_long: - { - size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); U32 const matchIndexL3 = hashLong[hl3]; const BYTE* matchL3 = base + matchIndexL3; @@ -221,9 +228,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ goto _match_found; - } - } - } + } } } /* if no long +1 match, explore the short match we found */ if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { @@ -242,7 +247,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); _match_stored: /* match found */ @@ -250,11 +255,14 @@ size_t ZSTD_compressBlock_doubleFast_generic( anchor = ip; if (ip <= ilimit) { - /* Fill Table */ - hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = - hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = current+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } /* check immediate repcode */ if (dictMode == ZSTD_dictMatchState) { @@ -278,8 +286,7 @@ size_t ZSTD_compressBlock_doubleFast_generic( continue; } break; - } - } + } } if (dictMode == ZSTD_noDict) { while ( (ip <= ilimit) @@ -294,14 +301,15 @@ size_t ZSTD_compressBlock_doubleFast_generic( ip += rLength; anchor = ip; continue; /* faster when present ... (?) */ - } } } } + } } } + } /* while (ip < ilimit) */ /* save reps for next block */ rep[0] = offset_1 ? offset_1 : offsetSaved; rep[1] = offset_2 ? offset_2 : offsetSaved; /* Return the last literals size */ - return iend - anchor; + return (size_t)(iend - anchor); } @@ -360,10 +368,13 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( const BYTE* anchor = istart; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; - const U32 prefixStartIndex = ms->window.dictLimit; const BYTE* const base = ms->window.base; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; const BYTE* const prefixStart = base + prefixStartIndex; - const U32 dictStartIndex = ms->window.lowLimit; const BYTE* const dictBase = ms->window.dictBase; const BYTE* const dictStart = dictBase + dictStartIndex; const BYTE* const dictEnd = dictBase + prefixStartIndex; @@ -371,6 +382,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); + /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); + /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); @@ -396,7 +411,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); } else { if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; @@ -407,7 +422,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); @@ -432,23 +447,27 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( } offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } else { ip += ((ip-anchor) >> kSearchStrength) + 1; continue; } } - /* found a match : store it */ + /* move to next sequence start */ ip += mLength; anchor = ip; if (ip <= ilimit) { - /* Fill Table */ - hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; - hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2; - hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base); - hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = current+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + /* check immediate repcode */ while (ip <= ilimit) { U32 const current2 = (U32)(ip-base); @@ -475,7 +494,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( rep[1] = offset_2; /* Return the last literals size */ - return iend - anchor; + return (size_t)(iend - anchor); } diff --git a/internal-complibs/zstd-1.4.0/compress/zstd_double_fast.h b/internal-complibs/zstd-1.4.3/compress/zstd_double_fast.h similarity index 100% rename from internal-complibs/zstd-1.4.0/compress/zstd_double_fast.h rename to internal-complibs/zstd-1.4.3/compress/zstd_double_fast.h diff --git a/internal-complibs/zstd-1.4.0/compress/zstd_fast.c b/internal-complibs/zstd-1.4.3/compress/zstd_fast.c similarity index 87% rename from internal-complibs/zstd-1.4.0/compress/zstd_fast.c rename to internal-complibs/zstd-1.4.3/compress/zstd_fast.c index ed997b441..59267ffbb 100644 --- a/internal-complibs/zstd-1.4.0/compress/zstd_fast.c +++ b/internal-complibs/zstd-1.4.3/compress/zstd_fast.c @@ -13,7 +13,8 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, - void const* end, ZSTD_dictTableLoadMethod_e dtlm) + const void* const end, + ZSTD_dictTableLoadMethod_e dtlm) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32* const hashTable = ms->hashTable; @@ -41,6 +42,7 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms, } } } } } + FORCE_INLINE_TEMPLATE size_t ZSTD_compressBlock_fast_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], @@ -58,7 +60,10 @@ size_t ZSTD_compressBlock_fast_generic( const BYTE* ip0 = istart; const BYTE* ip1; const BYTE* anchor = istart; - const U32 prefixStartIndex = ms->window.dictLimit; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 validStartIndex = ms->window.dictLimit; + const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex; const BYTE* const prefixStart = base + prefixStartIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - HASH_READ_SIZE; @@ -66,6 +71,7 @@ size_t ZSTD_compressBlock_fast_generic( U32 offsetSaved = 0; /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); ip0 += (ip0 == prefixStart); ip1 = ip0 + 1; { @@ -165,7 +171,7 @@ size_t ZSTD_compressBlock_fast_generic( rep[1] = offset_2 ? offset_2 : offsetSaved; /* Return the last literals size */ - return iend - anchor; + return (size_t)(iend - anchor); } @@ -222,11 +228,19 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); const U32 dictHLog = dictCParams->hashLog; - /* otherwise, we would get index underflow when translating a dict index - * into a local index */ + /* if a dictionary is still attached, it necessarily means that + * it is within window size. So we just check it. */ + const U32 maxDistance = 1U << cParams->windowLog; + const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); + assert(endIndex - prefixStartIndex <= maxDistance); + (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ + + /* ensure there will be no no underflow + * when translating a dict index into a local index */ assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); ip += (dictAndPrefixLength == 0); /* dictMatchState repCode checks don't currently handle repCode == 0 * disabling. */ @@ -251,7 +265,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); } else if ( (matchIndex <= prefixStartIndex) ) { size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); U32 const dictMatchIndex = dictHashTable[dictHash]; @@ -271,7 +285,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } } else if (MEM_read32(match) != MEM_read32(ip)) { /* it's not a match, and we're not going to check the dictionary */ @@ -286,7 +300,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } /* match found */ @@ -327,7 +341,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic( rep[1] = offset_2 ? offset_2 : offsetSaved; /* Return the last literals size */ - return iend - anchor; + return (size_t)(iend - anchor); } size_t ZSTD_compressBlock_fast_dictMatchState( @@ -366,15 +380,24 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; const BYTE* anchor = istart; - const U32 dictStartIndex = ms->window.lowLimit; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; const BYTE* const dictStart = dictBase + dictStartIndex; - const U32 prefixStartIndex = ms->window.dictLimit; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; const BYTE* const prefixStart = base + prefixStartIndex; const BYTE* const dictEnd = dictBase + prefixStartIndex; const BYTE* const iend = istart + srcSize; const BYTE* const ilimit = iend - 8; U32 offset_1=rep[0], offset_2=rep[1]; + DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic"); + + /* switch to "regular" variant if extDict is invalidated due to maxDistance */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); + /* Search Loop */ while (ip < ilimit) { /* < instead of <=, because (ip+1) */ const size_t h = ZSTD_hashPtr(ip, hlog, mls); @@ -391,10 +414,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { - const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; ip++; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); } else { if ( (matchIndex < dictStartIndex) || (MEM_read32(match) != MEM_read32(ip)) ) { @@ -402,15 +425,15 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( ip += ((ip-anchor) >> kSearchStrength) + stepSize; continue; } - { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; - const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; U32 offset; mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ offset = current - matchIndex; offset_2 = offset_1; offset_1 = offset; - ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); } } /* found a match : store it */ @@ -430,7 +453,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; - U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; ip += repLength2; @@ -445,7 +468,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( rep[1] = offset_2; /* Return the last literals size */ - return iend - anchor; + return (size_t)(iend - anchor); } diff --git a/internal-complibs/zstd-1.4.0/compress/zstd_fast.h b/internal-complibs/zstd-1.4.3/compress/zstd_fast.h similarity index 100% rename from internal-complibs/zstd-1.4.0/compress/zstd_fast.h rename to internal-complibs/zstd-1.4.3/compress/zstd_fast.h diff --git a/internal-complibs/zstd-1.4.0/compress/zstd_lazy.c b/internal-complibs/zstd-1.4.3/compress/zstd_lazy.c similarity index 96% rename from internal-complibs/zstd-1.4.0/compress/zstd_lazy.c rename to internal-complibs/zstd-1.4.3/compress/zstd_lazy.c index 53f998a43..0af41724c 100644 --- a/internal-complibs/zstd-1.4.0/compress/zstd_lazy.c +++ b/internal-complibs/zstd-1.4.3/compress/zstd_lazy.c @@ -83,7 +83,10 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms, U32* largerPtr = smallerPtr + 1; U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ U32 dummy32; /* to be nullified at the end */ - U32 const windowLow = ms->window.lowLimit; + U32 const windowValid = ms->window.lowLimit; + U32 const maxDistance = 1U << cParams->windowLog; + U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid; + DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", current, dictLimit, windowLow); @@ -239,7 +242,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, const BYTE* const base = ms->window.base; U32 const current = (U32)(ip-base); - U32 const windowLow = ms->window.lowLimit; + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); U32* const bt = ms->chainTable; U32 const btLog = cParams->chainLog - 1; @@ -490,8 +493,12 @@ size_t ZSTD_HcFindBestMatch_generic ( const U32 dictLimit = ms->window.dictLimit; const BYTE* const prefixStart = base + dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; - const U32 lowLimit = ms->window.lowLimit; const U32 current = (U32)(ip-base); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowestValid = ms->window.lowLimit; + const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; + const U32 isDictionary = (ms->loadedDictEnd != 0); + const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; const U32 minChain = current > chainSize ? current - chainSize : 0; U32 nbAttempts = 1U << cParams->searchLog; size_t ml=4-1; @@ -612,12 +619,14 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( /* ******************************* * Common parser - lazy strategy *********************************/ -FORCE_INLINE_TEMPLATE -size_t ZSTD_compressBlock_lazy_generic( +typedef enum { search_hashChain, search_binaryTree } searchMethod_e; + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_lazy_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize, - const U32 searchMethod, const U32 depth, + const searchMethod_e searchMethod, const U32 depth, ZSTD_dictMode_e const dictMode) { const BYTE* const istart = (const BYTE*)src; @@ -633,8 +642,10 @@ size_t ZSTD_compressBlock_lazy_generic( ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? - (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : - (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS); + (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS + : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : + (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS + : ZSTD_HcFindBestMatch_selectMLS); U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; const ZSTD_matchState_t* const dms = ms->dictMatchState; @@ -653,7 +664,6 @@ size_t ZSTD_compressBlock_lazy_generic( /* init */ ip += (dictAndPrefixLength == 0); - ms->nextToUpdate3 = ms->nextToUpdate; if (dictMode == ZSTD_noDict) { U32 const maxRep = (U32)(ip - prefixLowest); if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; @@ -844,7 +854,7 @@ size_t ZSTD_compressBlock_lazy_generic( rep[1] = offset_2 ? offset_2 : savedOffset; /* Return the last literals size */ - return iend - anchor; + return (size_t)(iend - anchor); } @@ -852,56 +862,56 @@ size_t ZSTD_compressBlock_btlazy2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_noDict); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); } size_t ZSTD_compressBlock_lazy2( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_noDict); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); } size_t ZSTD_compressBlock_lazy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_noDict); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); } size_t ZSTD_compressBlock_greedy( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_noDict); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); } size_t ZSTD_compressBlock_btlazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_dictMatchState); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); } size_t ZSTD_compressBlock_lazy2_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_dictMatchState); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); } size_t ZSTD_compressBlock_lazy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_dictMatchState); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); } size_t ZSTD_compressBlock_greedy_dictMatchState( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_dictMatchState); + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); } @@ -910,7 +920,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], const void* src, size_t srcSize, - const U32 searchMethod, const U32 depth) + const searchMethod_e searchMethod, const U32 depth) { const BYTE* const istart = (const BYTE*)src; const BYTE* ip = istart; @@ -928,12 +938,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( typedef size_t (*searchMax_f)( ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); - searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; + searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; U32 offset_1 = rep[0], offset_2 = rep[1]; /* init */ - ms->nextToUpdate3 = ms->nextToUpdate; ip += (ip == prefixStart); /* Match Loop */ @@ -1070,7 +1079,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( rep[1] = offset_2; /* Return the last literals size */ - return iend - anchor; + return (size_t)(iend - anchor); } @@ -1078,7 +1087,7 @@ size_t ZSTD_compressBlock_greedy_extDict( ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 0); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); } size_t ZSTD_compressBlock_lazy_extDict( @@ -1086,7 +1095,7 @@ size_t ZSTD_compressBlock_lazy_extDict( void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 1); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); } size_t ZSTD_compressBlock_lazy2_extDict( @@ -1094,7 +1103,7 @@ size_t ZSTD_compressBlock_lazy2_extDict( void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 2); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); } size_t ZSTD_compressBlock_btlazy2_extDict( @@ -1102,5 +1111,5 @@ size_t ZSTD_compressBlock_btlazy2_extDict( void const* src, size_t srcSize) { - return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 1, 2); + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); } diff --git a/internal-complibs/zstd-1.4.0/compress/zstd_lazy.h b/internal-complibs/zstd-1.4.3/compress/zstd_lazy.h similarity index 100% rename from internal-complibs/zstd-1.4.0/compress/zstd_lazy.h rename to internal-complibs/zstd-1.4.3/compress/zstd_lazy.h diff --git a/internal-complibs/zstd-1.4.0/compress/zstd_ldm.c b/internal-complibs/zstd-1.4.3/compress/zstd_ldm.c similarity index 99% rename from internal-complibs/zstd-1.4.0/compress/zstd_ldm.c rename to internal-complibs/zstd-1.4.3/compress/zstd_ldm.c index 784d20f3a..3dcf86e6e 100644 --- a/internal-complibs/zstd-1.4.0/compress/zstd_ldm.c +++ b/internal-complibs/zstd-1.4.3/compress/zstd_ldm.c @@ -447,7 +447,7 @@ size_t ZSTD_ldm_generateSequences( if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { U32 const ldmHSize = 1U << params->hashLog; U32 const correction = ZSTD_window_correctOverflow( - &ldmState->window, /* cycleLog */ 0, maxDist, src); + &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); } /* 2. We enforce the maximum offset allowed. diff --git a/internal-complibs/zstd-1.4.0/compress/zstd_ldm.h b/internal-complibs/zstd-1.4.3/compress/zstd_ldm.h similarity index 100% rename from internal-complibs/zstd-1.4.0/compress/zstd_ldm.h rename to internal-complibs/zstd-1.4.3/compress/zstd_ldm.h diff --git a/internal-complibs/zstd-1.4.0/compress/zstd_opt.c b/internal-complibs/zstd-1.4.3/compress/zstd_opt.c similarity index 94% rename from internal-complibs/zstd-1.4.0/compress/zstd_opt.c rename to internal-complibs/zstd-1.4.3/compress/zstd_opt.c index efb69d326..2da363f93 100644 --- a/internal-complibs/zstd-1.4.0/compress/zstd_opt.c +++ b/internal-complibs/zstd-1.4.3/compress/zstd_opt.c @@ -255,13 +255,13 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP * to provide a cost which is directly comparable to a match ending at same position */ static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel) { - if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel); + if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel); /* dynamic statistics */ { U32 const llCode = ZSTD_LLcode(litLength); - int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER) - + WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */ - - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); + int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER) + + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */ + - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel); #if 1 return contribution; #else @@ -278,7 +278,7 @@ static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLe const optState_t* const optPtr, int optLevel) { - int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel) + int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel) + ZSTD_litLengthContribution(litLength, optPtr, optLevel); return contribution; } @@ -372,13 +372,15 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) /* Update hashTable3 up to ip (excluded) Assumption : always within prefix (i.e. not within extDict) */ -static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* const ip) +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip) { U32* const hashTable3 = ms->hashTable3; U32 const hashLog3 = ms->hashLog3; const BYTE* const base = ms->window.base; - U32 idx = ms->nextToUpdate3; - U32 const target = ms->nextToUpdate3 = (U32)(ip - base); + U32 idx = *nextToUpdate3; + U32 const target = (U32)(ip - base); size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); assert(hashLog3 > 0); @@ -387,6 +389,7 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, const BYTE* idx++; } + *nextToUpdate3 = target; return hashTable3[hash3]; } @@ -503,9 +506,11 @@ static U32 ZSTD_insertBt1( } } *smallerPtr = *largerPtr = 0; - if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ - assert(matchEndIdx > current + 8); - return matchEndIdx - (current + 8); + { U32 positions = 0; + if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ + assert(matchEndIdx > current + 8); + return MAX(positions, matchEndIdx - (current + 8)); + } } FORCE_INLINE_TEMPLATE @@ -520,8 +525,13 @@ void ZSTD_updateTree_internal( DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", idx, target, dictMode); - while(idx < target) - idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); + while(idx < target) { + U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); + assert(idx < (U32)(idx + forward)); + idx += forward; + } + assert((size_t)(ip - base) <= (size_t)(U32)(-1)); + assert((size_t)(iend - base) <= (size_t)(U32)(-1)); ms->nextToUpdate = target; } @@ -531,11 +541,12 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { FORCE_INLINE_TEMPLATE U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ ZSTD_matchState_t* ms, + U32* nextToUpdate3, const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, - U32 rep[ZSTD_REP_NUM], + const U32 rep[ZSTD_REP_NUM], U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ - ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */) { @@ -556,8 +567,8 @@ U32 ZSTD_insertBtAndGetAllMatches ( U32 const dictLimit = ms->window.dictLimit; const BYTE* const dictEnd = dictBase + dictLimit; const BYTE* const prefixStart = base + dictLimit; - U32 const btLow = btMask >= current ? 0 : current - btMask; - U32 const windowLow = ms->window.lowLimit; + U32 const btLow = (btMask >= current) ? 0 : current - btMask; + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); U32 const matchLow = windowLow ? windowLow : 1; U32* smallerPtr = bt + 2*(current&btMask); U32* largerPtr = bt + 2*(current&btMask) + 1; @@ -627,7 +638,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( /* HC3 match finder */ if ((mls == 3) /*static*/ && (bestLength < mls)) { - U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip); + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); if ((matchIndex3 >= matchLow) & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { size_t mlen; @@ -653,9 +664,7 @@ U32 ZSTD_insertBtAndGetAllMatches ( (ip+mlen == iLimit) ) { /* best possible length */ ms->nextToUpdate = current+1; /* skip insertion */ return 1; - } - } - } + } } } /* no dictMatchState lookup: dicts don't have a populated HC3 table */ } @@ -663,19 +672,21 @@ U32 ZSTD_insertBtAndGetAllMatches ( while (nbCompares-- && (matchIndex >= matchLow)) { U32* const nextPtr = bt + 2*(matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ const BYTE* match; + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ assert(current > matchIndex); if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ match = base + matchIndex; + if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); } else { match = dictBase + matchIndex; + assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); if (matchIndex+matchLength >= dictLimit) - match = base + matchIndex; /* prepare for match[matchLength] */ + match = base + matchIndex; /* prepare for match[matchLength] read */ } if (matchLength > bestLength) { @@ -760,10 +771,13 @@ U32 ZSTD_insertBtAndGetAllMatches ( FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( + ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */ ZSTD_matchState_t* ms, + U32* nextToUpdate3, const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, - U32 rep[ZSTD_REP_NUM], U32 const ll0, - ZSTD_match_t* matches, U32 const lengthToBeat) + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, + U32 const lengthToBeat) { const ZSTD_compressionParameters* const cParams = &ms->cParams; U32 const matchLengthSearch = cParams->minMatch; @@ -772,12 +786,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); switch(matchLengthSearch) { - case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3); + case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3); default : - case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4); - case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5); + case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4); + case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5); case 7 : - case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6); + case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); } } @@ -853,6 +867,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; + U32 nextToUpdate3 = ms->nextToUpdate; ZSTD_optimal_t* const opt = optStatePtr->priceTable; ZSTD_match_t* const matches = optStatePtr->matchTable; @@ -862,7 +877,6 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); assert(optLevel <= 2); - ms->nextToUpdate3 = ms->nextToUpdate; ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); ip += (ip==prefixStart); @@ -873,7 +887,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, /* find first match */ { U32 const litlen = (U32)(ip - anchor); U32 const ll0 = !litlen; - U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, matches, minMatch); + U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); if (!nbMatches) { ip++; continue; } /* initialize opt[0] */ @@ -970,7 +984,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; U32 const previousPrice = opt[cur].price; U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); - U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch); + U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); U32 matchNb; if (!nbMatches) { DEBUGLOG(7, "rPos:%u : no match found", cur); @@ -1094,7 +1108,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, } /* while (ip < ilimit) */ /* Return the last literals size */ - return iend - anchor; + return (size_t)(iend - anchor); } @@ -1158,7 +1172,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms, ms->window.dictLimit += (U32)srcSize; ms->window.lowLimit = ms->window.dictLimit; ms->nextToUpdate = ms->window.dictLimit; - ms->nextToUpdate3 = ms->window.dictLimit; /* re-inforce weight of collected statistics */ ZSTD_upscaleStats(&ms->opt); diff --git a/internal-complibs/zstd-1.4.0/compress/zstd_opt.h b/internal-complibs/zstd-1.4.3/compress/zstd_opt.h similarity index 100% rename from internal-complibs/zstd-1.4.0/compress/zstd_opt.h rename to internal-complibs/zstd-1.4.3/compress/zstd_opt.h diff --git a/internal-complibs/zstd-1.4.0/compress/zstdmt_compress.c b/internal-complibs/zstd-1.4.3/compress/zstdmt_compress.c similarity index 99% rename from internal-complibs/zstd-1.4.0/compress/zstdmt_compress.c rename to internal-complibs/zstd-1.4.3/compress/zstdmt_compress.c index 38fbb9076..9e537b884 100644 --- a/internal-complibs/zstd-1.4.0/compress/zstdmt_compress.c +++ b/internal-complibs/zstd-1.4.3/compress/zstdmt_compress.c @@ -1129,9 +1129,14 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx) size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; assert(flushed <= produced); + assert(jobPtr->consumed <= jobPtr->src.size); toFlush = produced - flushed; - if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) { - /* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */ + /* if toFlush==0, nothing is available to flush. + * However, jobID is expected to still be active: + * if jobID was already completed and fully flushed, + * ZSTDMT_flushProduced() should have already moved onto next job. + * Therefore, some input has not yet been consumed. */ + if (toFlush==0) { assert(jobPtr->consumed < jobPtr->src.size); } } @@ -1148,12 +1153,16 @@ size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx) static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params) { - if (params.ldmParams.enableLdm) + unsigned jobLog; + if (params.ldmParams.enableLdm) { /* In Long Range Mode, the windowLog is typically oversized. * In which case, it's preferable to determine the jobSize * based on chainLog instead. */ - return MAX(21, params.cParams.chainLog + 4); - return MAX(20, params.cParams.windowLog + 2); + jobLog = MAX(21, params.cParams.chainLog + 4); + } else { + jobLog = MAX(20, params.cParams.windowLog + 2); + } + return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX); } static int ZSTDMT_overlapLog_default(ZSTD_strategy strat) @@ -1197,7 +1206,7 @@ static size_t ZSTDMT_computeOverlapSize(ZSTD_CCtx_params const params) ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) - overlapRLog; } - assert(0 <= ovLog && ovLog <= 30); + assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX); DEBUGLOG(4, "overlapLog : %i", params.overlapLog); DEBUGLOG(4, "overlap size : %i", 1 << ovLog); return (ovLog==0) ? 0 : (size_t)1 << ovLog; @@ -1391,7 +1400,7 @@ size_t ZSTDMT_initCStream_internal( FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) ); if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN; - if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX; + if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX; mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ if (mtctx->singleBlockingThread) { @@ -1432,6 +1441,8 @@ size_t ZSTDMT_initCStream_internal( if (mtctx->targetSectionSize == 0) { mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params); } + assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX); + if (params.rsyncable) { /* Aim for the targetsectionSize as the average job size. */ U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20); diff --git a/internal-complibs/zstd-1.4.0/compress/zstdmt_compress.h b/internal-complibs/zstd-1.4.3/compress/zstdmt_compress.h similarity index 99% rename from internal-complibs/zstd-1.4.0/compress/zstdmt_compress.h rename to internal-complibs/zstd-1.4.3/compress/zstdmt_compress.h index 12e6bcb3a..12a526087 100644 --- a/internal-complibs/zstd-1.4.0/compress/zstdmt_compress.h +++ b/internal-complibs/zstd-1.4.3/compress/zstdmt_compress.h @@ -50,6 +50,7 @@ #ifndef ZSTDMT_JOBSIZE_MIN # define ZSTDMT_JOBSIZE_MIN (1 MB) #endif +#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30) #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB)) diff --git a/internal-complibs/zstd-1.4.0/decompress/huf_decompress.c b/internal-complibs/zstd-1.4.3/decompress/huf_decompress.c similarity index 100% rename from internal-complibs/zstd-1.4.0/decompress/huf_decompress.c rename to internal-complibs/zstd-1.4.3/decompress/huf_decompress.c diff --git a/internal-complibs/zstd-1.4.0/decompress/zstd_ddict.c b/internal-complibs/zstd-1.4.3/decompress/zstd_ddict.c similarity index 100% rename from internal-complibs/zstd-1.4.0/decompress/zstd_ddict.c rename to internal-complibs/zstd-1.4.3/decompress/zstd_ddict.c diff --git a/internal-complibs/zstd-1.4.0/decompress/zstd_ddict.h b/internal-complibs/zstd-1.4.3/decompress/zstd_ddict.h similarity index 100% rename from internal-complibs/zstd-1.4.0/decompress/zstd_ddict.h rename to internal-complibs/zstd-1.4.3/decompress/zstd_ddict.h diff --git a/internal-complibs/zstd-1.4.0/decompress/zstd_decompress.c b/internal-complibs/zstd-1.4.3/decompress/zstd_decompress.c similarity index 98% rename from internal-complibs/zstd-1.4.0/decompress/zstd_decompress.c rename to internal-complibs/zstd-1.4.3/decompress/zstd_decompress.c index 675596f5a..751060b2c 100644 --- a/internal-complibs/zstd-1.4.0/decompress/zstd_decompress.c +++ b/internal-complibs/zstd-1.4.3/decompress/zstd_decompress.c @@ -360,8 +360,11 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize) sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, frameParameter_unsupported); - - return skippableHeaderSize + sizeU32; + { + size_t const skippableSize = skippableHeaderSize + sizeU32; + RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong); + return skippableSize; + } } /** ZSTD_findDecompressedSize() : @@ -378,11 +381,10 @@ unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { size_t const skippableSize = readSkippableFrameSize(src, srcSize); - if (ZSTD_isError(skippableSize)) - return skippableSize; - if (srcSize < skippableSize) { + if (ZSTD_isError(skippableSize)) { return ZSTD_CONTENTSIZE_ERROR; } + assert(skippableSize <= srcSize); src = (const BYTE *)src + skippableSize; srcSize -= skippableSize; @@ -467,6 +469,8 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); + assert(ZSTD_isError(frameSizeInfo.compressedSize) || + frameSizeInfo.compressedSize <= srcSize); return frameSizeInfo; } else { const BYTE* ip = (const BYTE*)src; @@ -529,7 +533,6 @@ size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) return frameSizeInfo.compressedSize; } - /** ZSTD_decompressBound() : * compatible with legacy mode * `src` must point to the start of a ZSTD frame or a skippeable frame @@ -546,6 +549,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) return ZSTD_CONTENTSIZE_ERROR; + assert(srcSize >= compressedSize); src = (const BYTE*)src + compressedSize; srcSize -= compressedSize; bound += decompressedBound; @@ -570,9 +574,10 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) } /** ZSTD_insertBlock() : - insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ + * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) { + DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); ZSTD_checkContinuity(dctx, blockStart); dctx->previousDstEnd = (const char*)blockStart + blockSize; return blockSize; @@ -738,9 +743,8 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, (unsigned)magicNumber, ZSTD_MAGICNUMBER); if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { size_t const skippableSize = readSkippableFrameSize(src, srcSize); - if (ZSTD_isError(skippableSize)) - return skippableSize; - RETURN_ERROR_IF(srcSize < skippableSize, srcSize_wrong); + FORWARD_IF_ERROR(skippableSize); + assert(skippableSize <= srcSize); src = (const BYTE *)src + skippableSize; srcSize -= skippableSize; @@ -906,6 +910,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c { blockProperties_t bp; size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); if (ZSTD_isError(cBlockSize)) return cBlockSize; + RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum"); dctx->expected = cBlockSize; dctx->bType = bp.blockType; dctx->rleSize = bp.origSize; @@ -950,6 +955,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c RETURN_ERROR(corruption_detected); } if (ZSTD_isError(rSize)) return rSize; + RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); dctx->decodedSize += rSize; if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); diff --git a/internal-complibs/zstd-1.4.0/decompress/zstd_decompress_block.c b/internal-complibs/zstd-1.4.3/decompress/zstd_decompress_block.c similarity index 97% rename from internal-complibs/zstd-1.4.0/decompress/zstd_decompress_block.c rename to internal-complibs/zstd-1.4.3/decompress/zstd_decompress_block.c index a2a7eedcf..cbcfc0840 100644 --- a/internal-complibs/zstd-1.4.0/decompress/zstd_decompress_block.c +++ b/internal-complibs/zstd-1.4.3/decompress/zstd_decompress_block.c @@ -79,6 +79,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ { + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected); { const BYTE* const istart = (const BYTE*) src; @@ -87,6 +88,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, switch(litEncType) { case set_repeat: + DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted); /* fall-through */ @@ -116,7 +118,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, /* 2 - 2 - 18 - 18 */ lhSize = 5; litSize = (lhc >> 4) & 0x3FFFF; - litCSize = (lhc >> 22) + (istart[4] << 10); + litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); break; } RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); @@ -391,7 +393,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, symbolNext[s] = 1; } else { if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; - symbolNext[s] = normalizedCounter[s]; + assert(normalizedCounter[s]>=0); + symbolNext[s] = (U16)normalizedCounter[s]; } } } memcpy(dt, &DTableH, sizeof(DTableH)); } @@ -505,7 +508,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, *nbSeqPtr = nbSeq; /* FSE table descriptors */ - RETURN_ERROR_IF(ip+4 > iend, srcSize_wrong); /* minimum possible size */ + RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */ { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); @@ -637,9 +640,10 @@ size_t ZSTD_execSequence(BYTE* op, if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); /* copy Literals */ - ZSTD_copy8(op, *litPtr); if (sequence.litLength > 8) - ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + else + ZSTD_copy8(op, *litPtr); op = oLitEnd; *litPtr = iLitEnd; /* update for next sequence */ @@ -686,13 +690,13 @@ size_t ZSTD_execSequence(BYTE* op, if (oMatchEnd > oend-(16-MINMATCH)) { if (op < oend_w) { - ZSTD_wildcopy(op, match, oend_w - op); + ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst); match += oend_w - op; op = oend_w; } while (op < oMatchEnd) *op++ = *match++; } else { - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */ } return sequenceLength; } @@ -717,9 +721,11 @@ size_t ZSTD_execSequenceLong(BYTE* op, if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd); /* copy Literals */ - ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */ if (sequence.litLength > 8) - ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ + else + ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */ + op = oLitEnd; *litPtr = iLitEnd; /* update for next sequence */ @@ -766,13 +772,13 @@ size_t ZSTD_execSequenceLong(BYTE* op, if (oMatchEnd > oend-(16-MINMATCH)) { if (op < oend_w) { - ZSTD_wildcopy(op, match, oend_w - op); + ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst); match += oend_w - op; op = oend_w; } while (op < oMatchEnd) *op++ = *match++; } else { - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */ } return sequenceLength; } @@ -889,6 +895,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) } FORCE_INLINE_TEMPLATE size_t +DONT_VECTORIZE ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, @@ -918,6 +925,11 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + ZSTD_STATIC_ASSERT( + BIT_DStream_unfinished < BIT_DStream_completed && + BIT_DStream_endOfBuffer < BIT_DStream_completed && + BIT_DStream_completed < BIT_DStream_overflow); + for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { nbSeq--; { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); @@ -930,6 +942,7 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, /* check if reached exact end */ DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); RETURN_ERROR_IF(nbSeq, corruption_detected); + RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected); /* save reps for next block */ { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } } @@ -1131,6 +1144,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG static TARGET_ATTRIBUTE("bmi2") size_t +DONT_VECTORIZE ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, diff --git a/internal-complibs/zstd-1.4.0/decompress/zstd_decompress_block.h b/internal-complibs/zstd-1.4.3/decompress/zstd_decompress_block.h similarity index 100% rename from internal-complibs/zstd-1.4.0/decompress/zstd_decompress_block.h rename to internal-complibs/zstd-1.4.3/decompress/zstd_decompress_block.h diff --git a/internal-complibs/zstd-1.4.0/decompress/zstd_decompress_internal.h b/internal-complibs/zstd-1.4.3/decompress/zstd_decompress_internal.h similarity index 100% rename from internal-complibs/zstd-1.4.0/decompress/zstd_decompress_internal.h rename to internal-complibs/zstd-1.4.3/decompress/zstd_decompress_internal.h diff --git a/internal-complibs/zstd-1.4.0/deprecated/zbuff.h b/internal-complibs/zstd-1.4.3/deprecated/zbuff.h similarity index 100% rename from internal-complibs/zstd-1.4.0/deprecated/zbuff.h rename to internal-complibs/zstd-1.4.3/deprecated/zbuff.h diff --git a/internal-complibs/zstd-1.4.0/deprecated/zbuff_common.c b/internal-complibs/zstd-1.4.3/deprecated/zbuff_common.c similarity index 100% rename from internal-complibs/zstd-1.4.0/deprecated/zbuff_common.c rename to internal-complibs/zstd-1.4.3/deprecated/zbuff_common.c diff --git a/internal-complibs/zstd-1.4.0/deprecated/zbuff_compress.c b/internal-complibs/zstd-1.4.3/deprecated/zbuff_compress.c similarity index 100% rename from internal-complibs/zstd-1.4.0/deprecated/zbuff_compress.c rename to internal-complibs/zstd-1.4.3/deprecated/zbuff_compress.c diff --git a/internal-complibs/zstd-1.4.0/deprecated/zbuff_decompress.c b/internal-complibs/zstd-1.4.3/deprecated/zbuff_decompress.c similarity index 100% rename from internal-complibs/zstd-1.4.0/deprecated/zbuff_decompress.c rename to internal-complibs/zstd-1.4.3/deprecated/zbuff_decompress.c diff --git a/internal-complibs/zstd-1.4.0/dictBuilder/cover.c b/internal-complibs/zstd-1.4.3/dictBuilder/cover.c similarity index 85% rename from internal-complibs/zstd-1.4.0/dictBuilder/cover.c rename to internal-complibs/zstd-1.4.3/dictBuilder/cover.c index 21464ad03..621996759 100644 --- a/internal-complibs/zstd-1.4.0/dictBuilder/cover.c +++ b/internal-complibs/zstd-1.4.3/dictBuilder/cover.c @@ -526,10 +526,10 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) { * Prepare a context for dictionary building. * The context is only dependent on the parameter `d` and can used multiple * times. - * Returns 1 on success or zero on error. + * Returns 0 on success or error code on error. * The context must be destroyed with `COVER_ctx_destroy()`. */ -static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, +static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, unsigned d, double splitPoint) { const BYTE *const samples = (const BYTE *)samplesBuffer; @@ -544,17 +544,17 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); - return 0; + return ERROR(srcSize_wrong); } /* Check if there are at least 5 training samples */ if (nbTrainSamples < 5) { DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples); - return 0; + return ERROR(srcSize_wrong); } /* Check if there's testing sample */ if (nbTestSamples < 1) { DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples); - return 0; + return ERROR(srcSize_wrong); } /* Zero the context */ memset(ctx, 0, sizeof(*ctx)); @@ -577,7 +577,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); COVER_ctx_destroy(ctx); - return 0; + return ERROR(memory_allocation); } ctx->freqs = NULL; ctx->d = d; @@ -624,7 +624,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); ctx->freqs = ctx->suffix; ctx->suffix = NULL; - return 1; + return 0; } void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel) @@ -729,11 +729,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( /* Checks */ if (!COVER_checkParameters(parameters, dictBufferCapacity)) { DISPLAYLEVEL(1, "Cover parameters incorrect\n"); - return ERROR(GENERIC); + return ERROR(parameter_outOfBound); } if (nbSamples == 0) { DISPLAYLEVEL(1, "Cover must have at least one input file\n"); - return ERROR(GENERIC); + return ERROR(srcSize_wrong); } if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", @@ -741,15 +741,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( return ERROR(dstSize_tooSmall); } /* Initialize context and activeDmers */ - if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, - parameters.d, parameters.splitPoint)) { - return ERROR(GENERIC); + { + size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + parameters.d, parameters.splitPoint); + if (ZSTD_isError(initVal)) { + return initVal; + } } COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel); if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); COVER_ctx_destroy(&ctx); - return ERROR(GENERIC); + return ERROR(memory_allocation); } DISPLAYLEVEL(2, "Building dictionary\n"); @@ -810,7 +813,7 @@ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, cctx, dst, dstCapacity, samples + offsets[i], samplesSizes[i], cdict); if (ZSTD_isError(size)) { - totalCompressedSize = ERROR(GENERIC); + totalCompressedSize = size; goto _compressCleanup; } totalCompressedSize += size; @@ -886,9 +889,11 @@ void COVER_best_start(COVER_best_t *best) { * Decrements liveJobs and signals any waiting threads if liveJobs == 0. * If this dictionary is the best so far save it and its parameters. */ -void COVER_best_finish(COVER_best_t *best, size_t compressedSize, - ZDICT_cover_params_t parameters, void *dict, - size_t dictSize) { +void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, + COVER_dictSelection_t selection) { + void* dict = selection.dictContent; + size_t compressedSize = selection.totalCompressedSize; + size_t dictSize = selection.dictSize; if (!best) { return; } @@ -914,6 +919,9 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize, } } /* Save the dictionary, parameters, and size */ + if (!dict) { + return; + } memcpy(best->dict, dict, dictSize); best->dictSize = dictSize; best->parameters = parameters; @@ -926,6 +934,111 @@ void COVER_best_finish(COVER_best_t *best, size_t compressedSize, } } +COVER_dictSelection_t COVER_dictSelectionError(size_t error) { + COVER_dictSelection_t selection = { NULL, 0, error }; + return selection; +} + +unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) { + return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent); +} + +void COVER_dictSelectionFree(COVER_dictSelection_t selection){ + free(selection.dictContent); +} + +COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, + size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, + size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) { + + size_t largestDict = 0; + size_t largestCompressed = 0; + BYTE* customDictContentEnd = customDictContent + dictContentSize; + + BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize); + BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize); + double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00; + + if (!largestDictbuffer || !candidateDictBuffer) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + } + + /* Initial dictionary size and compressed size */ + memcpy(largestDictbuffer, customDictContent, dictContentSize); + dictContentSize = ZDICT_finalizeDictionary( + largestDictbuffer, dictContentSize, customDictContent, dictContentSize, + samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); + + if (ZDICT_isError(dictContentSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + } + + totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, + samplesBuffer, offsets, + nbCheckSamples, nbSamples, + largestDictbuffer, dictContentSize); + + if (ZSTD_isError(totalCompressedSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(totalCompressedSize); + } + + if (params.shrinkDict == 0) { + COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; + free(candidateDictBuffer); + return selection; + } + + largestDict = dictContentSize; + largestCompressed = totalCompressedSize; + dictContentSize = ZDICT_DICTSIZE_MIN; + + /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */ + while (dictContentSize < largestDict) { + memcpy(candidateDictBuffer, largestDictbuffer, largestDict); + dictContentSize = ZDICT_finalizeDictionary( + candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize, + samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); + + if (ZDICT_isError(dictContentSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + + } + + totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, + samplesBuffer, offsets, + nbCheckSamples, nbSamples, + candidateDictBuffer, dictContentSize); + + if (ZSTD_isError(totalCompressedSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(totalCompressedSize); + } + + if (totalCompressedSize <= largestCompressed * regressionTolerance) { + COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize }; + free(largestDictbuffer); + return selection; + } + dictContentSize *= 2; + } + dictContentSize = largestDict; + totalCompressedSize = largestCompressed; + { + COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; + free(candidateDictBuffer); + return selection; + } +} + /** * Parameters for COVER_tryParameters(). */ @@ -951,6 +1064,7 @@ static void COVER_tryParameters(void *opaque) { /* Allocate space for hash table, dict, and freqs */ COVER_map_t activeDmers; BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); + COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); @@ -966,29 +1080,21 @@ static void COVER_tryParameters(void *opaque) { { const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, dictBufferCapacity, parameters); - dictBufferCapacity = ZDICT_finalizeDictionary( - dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, - ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, - parameters.zParams); - if (ZDICT_isError(dictBufferCapacity)) { - DISPLAYLEVEL(1, "Failed to finalize dictionary\n"); + selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail, + ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, + totalCompressedSize); + + if (COVER_dictSelectionIsError(selection)) { + DISPLAYLEVEL(1, "Failed to select dictionary\n"); goto _cleanup; } } - /* Check total compressed size */ - totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes, - ctx->samples, ctx->offsets, - ctx->nbTrainSamples, ctx->nbSamples, - dict, dictBufferCapacity); - _cleanup: - COVER_best_finish(data->best, totalCompressedSize, parameters, dict, - dictBufferCapacity); + free(dict); + COVER_best_finish(data->best, parameters, selection); free(data); COVER_map_destroy(&activeDmers); - if (dict) { - free(dict); - } + COVER_dictSelectionFree(selection); if (freqs) { free(freqs); } @@ -1010,6 +1116,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); const unsigned kIterations = (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); + const unsigned shrinkDict = 0; /* Local variables */ const int displayLevel = parameters->zParams.notificationLevel; unsigned iteration = 1; @@ -1022,15 +1129,15 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( /* Checks */ if (splitPoint <= 0 || splitPoint > 1) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); - return ERROR(GENERIC); + return ERROR(parameter_outOfBound); } if (kMinK < kMaxD || kMaxK < kMinK) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); - return ERROR(GENERIC); + return ERROR(parameter_outOfBound); } if (nbSamples == 0) { DISPLAYLEVEL(1, "Cover must have at least one input file\n"); - return ERROR(GENERIC); + return ERROR(srcSize_wrong); } if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", @@ -1054,11 +1161,14 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( /* Initialize the context for this value of d */ COVER_ctx_t ctx; LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); - if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) { - LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); - COVER_best_destroy(&best); - POOL_free(pool); - return ERROR(GENERIC); + { + const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint); + if (ZSTD_isError(initVal)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return initVal; + } } if (!warned) { COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel); @@ -1075,7 +1185,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( COVER_best_destroy(&best); COVER_ctx_destroy(&ctx); POOL_free(pool); - return ERROR(GENERIC); + return ERROR(memory_allocation); } data->ctx = &ctx; data->best = &best; @@ -1085,6 +1195,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( data->parameters.d = d; data->parameters.splitPoint = splitPoint; data->parameters.steps = kSteps; + data->parameters.shrinkDict = shrinkDict; data->parameters.zParams.notificationLevel = g_displayLevel; /* Check the parameters */ if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) { diff --git a/internal-complibs/zstd-1.4.0/dictBuilder/cover.h b/internal-complibs/zstd-1.4.3/dictBuilder/cover.h similarity index 67% rename from internal-complibs/zstd-1.4.0/dictBuilder/cover.h rename to internal-complibs/zstd-1.4.3/dictBuilder/cover.h index efb46807c..d9e0636a6 100644 --- a/internal-complibs/zstd-1.4.0/dictBuilder/cover.h +++ b/internal-complibs/zstd-1.4.3/dictBuilder/cover.h @@ -46,6 +46,15 @@ typedef struct { U32 size; } COVER_epoch_info_t; +/** + * Struct used for the dictionary selection function. + */ +typedef struct COVER_dictSelection { + BYTE* dictContent; + size_t dictSize; + size_t totalCompressedSize; +} COVER_dictSelection_t; + /** * Computes the number of epochs and the size of each epoch. * We will make sure that each epoch gets at least 10 * k bytes. @@ -107,6 +116,32 @@ void COVER_best_start(COVER_best_t *best); * Decrements liveJobs and signals any waiting threads if liveJobs == 0. * If this dictionary is the best so far save it and its parameters. */ -void COVER_best_finish(COVER_best_t *best, size_t compressedSize, - ZDICT_cover_params_t parameters, void *dict, - size_t dictSize); +void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, + COVER_dictSelection_t selection); +/** + * Error function for COVER_selectDict function. Checks if the return + * value is an error. + */ +unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection); + + /** + * Error function for COVER_selectDict function. Returns a struct where + * return.totalCompressedSize is a ZSTD error. + */ +COVER_dictSelection_t COVER_dictSelectionError(size_t error); + +/** + * Always call after selectDict is called to free up used memory from + * newly created dictionary. + */ +void COVER_dictSelectionFree(COVER_dictSelection_t selection); + +/** + * Called to finalize the dictionary and select one based on whether or not + * the shrink-dict flag was enabled. If enabled the dictionary used is the + * smallest dictionary within a specified regression of the compressed size + * from the largest dictionary. + */ + COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, + size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, + size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize); diff --git a/internal-complibs/zstd-1.4.0/dictBuilder/divsufsort.c b/internal-complibs/zstd-1.4.3/dictBuilder/divsufsort.c similarity index 100% rename from internal-complibs/zstd-1.4.0/dictBuilder/divsufsort.c rename to internal-complibs/zstd-1.4.3/dictBuilder/divsufsort.c diff --git a/internal-complibs/zstd-1.4.0/dictBuilder/divsufsort.h b/internal-complibs/zstd-1.4.3/dictBuilder/divsufsort.h similarity index 100% rename from internal-complibs/zstd-1.4.0/dictBuilder/divsufsort.h rename to internal-complibs/zstd-1.4.3/dictBuilder/divsufsort.h diff --git a/internal-complibs/zstd-1.4.0/dictBuilder/fastcover.c b/internal-complibs/zstd-1.4.3/dictBuilder/fastcover.c similarity index 93% rename from internal-complibs/zstd-1.4.0/dictBuilder/fastcover.c rename to internal-complibs/zstd-1.4.3/dictBuilder/fastcover.c index 5b6b941a9..941bb5a26 100644 --- a/internal-complibs/zstd-1.4.0/dictBuilder/fastcover.c +++ b/internal-complibs/zstd-1.4.3/dictBuilder/fastcover.c @@ -287,10 +287,10 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx) * Prepare a context for dictionary building. * The context is only dependent on the parameter `d` and can used multiple * times. - * Returns 1 on success or zero on error. + * Returns 0 on success or error code on error. * The context must be destroyed with `FASTCOVER_ctx_destroy()`. */ -static int +static size_t FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, @@ -310,19 +310,19 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) { DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20)); - return 0; + return ERROR(srcSize_wrong); } /* Check if there are at least 5 training samples */ if (nbTrainSamples < 5) { DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples); - return 0; + return ERROR(srcSize_wrong); } /* Check if there's testing sample */ if (nbTestSamples < 1) { DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples); - return 0; + return ERROR(srcSize_wrong); } /* Zero the context */ @@ -347,7 +347,7 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, if (ctx->offsets == NULL) { DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n"); FASTCOVER_ctx_destroy(ctx); - return 0; + return ERROR(memory_allocation); } /* Fill offsets from the samplesSizes */ @@ -364,13 +364,13 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, if (ctx->freqs == NULL) { DISPLAYLEVEL(1, "Failed to allocate frequency table \n"); FASTCOVER_ctx_destroy(ctx); - return 0; + return ERROR(memory_allocation); } DISPLAYLEVEL(2, "Computing frequencies\n"); FASTCOVER_computeFrequency(ctx->freqs, ctx); - return 1; + return 0; } @@ -435,7 +435,6 @@ FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx, return tail; } - /** * Parameters for FASTCOVER_tryParameters(). */ @@ -464,6 +463,7 @@ static void FASTCOVER_tryParameters(void *opaque) U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16)); /* Allocate space for hash table, dict, and freqs */ BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); + COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32)); if (!segmentFreqs || !dict || !freqs) { DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); @@ -473,27 +473,24 @@ static void FASTCOVER_tryParameters(void *opaque) memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32)); /* Build the dictionary */ { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity, - parameters, segmentFreqs); + parameters, segmentFreqs); + const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100); - dictBufferCapacity = ZDICT_finalizeDictionary( - dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, - ctx->samples, ctx->samplesSizes, nbFinalizeSamples, parameters.zParams); - if (ZDICT_isError(dictBufferCapacity)) { - DISPLAYLEVEL(1, "Failed to finalize dictionary\n"); + selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail, + ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, + totalCompressedSize); + + if (COVER_dictSelectionIsError(selection)) { + DISPLAYLEVEL(1, "Failed to select dictionary\n"); goto _cleanup; } } - /* Check total compressed size */ - totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes, - ctx->samples, ctx->offsets, - ctx->nbTrainSamples, ctx->nbSamples, - dict, dictBufferCapacity); _cleanup: - COVER_best_finish(data->best, totalCompressedSize, parameters, dict, - dictBufferCapacity); + free(dict); + COVER_best_finish(data->best, parameters, selection); free(data); free(segmentFreqs); - free(dict); + COVER_dictSelectionFree(selection); free(freqs); } @@ -508,6 +505,7 @@ FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams, coverParams->nbThreads = fastCoverParams.nbThreads; coverParams->splitPoint = fastCoverParams.splitPoint; coverParams->zParams = fastCoverParams.zParams; + coverParams->shrinkDict = fastCoverParams.shrinkDict; } @@ -524,6 +522,7 @@ FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams, fastCoverParams->f = f; fastCoverParams->accel = accel; fastCoverParams->zParams = coverParams.zParams; + fastCoverParams->shrinkDict = coverParams.shrinkDict; } @@ -550,11 +549,11 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f, parameters.accel)) { DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); - return ERROR(GENERIC); + return ERROR(parameter_outOfBound); } if (nbSamples == 0) { DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n"); - return ERROR(GENERIC); + return ERROR(srcSize_wrong); } if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", @@ -564,11 +563,14 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, /* Assign corresponding FASTCOVER_accel_t to accelParams*/ accelParams = FASTCOVER_defaultAccelParameters[parameters.accel]; /* Initialize context */ - if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + { + size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, coverParams.d, parameters.splitPoint, parameters.f, - accelParams)) { - DISPLAYLEVEL(1, "Failed to initialize context\n"); - return ERROR(GENERIC); + accelParams); + if (ZSTD_isError(initVal)) { + DISPLAYLEVEL(1, "Failed to initialize context\n"); + return initVal; + } } COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel); /* Build the dictionary */ @@ -616,6 +618,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover( (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f; const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel; + const unsigned shrinkDict = 0; /* Local variables */ const int displayLevel = parameters->zParams.notificationLevel; unsigned iteration = 1; @@ -627,19 +630,19 @@ ZDICT_optimizeTrainFromBuffer_fastCover( /* Checks */ if (splitPoint <= 0 || splitPoint > 1) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n"); - return ERROR(GENERIC); + return ERROR(parameter_outOfBound); } if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n"); - return ERROR(GENERIC); + return ERROR(parameter_outOfBound); } if (kMinK < kMaxD || kMaxK < kMinK) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n"); - return ERROR(GENERIC); + return ERROR(parameter_outOfBound); } if (nbSamples == 0) { LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n"); - return ERROR(GENERIC); + return ERROR(srcSize_wrong); } if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n", @@ -666,11 +669,14 @@ ZDICT_optimizeTrainFromBuffer_fastCover( /* Initialize the context for this value of d */ FASTCOVER_ctx_t ctx; LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); - if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams)) { - LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); - COVER_best_destroy(&best); - POOL_free(pool); - return ERROR(GENERIC); + { + size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams); + if (ZSTD_isError(initVal)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return initVal; + } } if (!warned) { COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel); @@ -687,7 +693,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover( COVER_best_destroy(&best); FASTCOVER_ctx_destroy(&ctx); POOL_free(pool); - return ERROR(GENERIC); + return ERROR(memory_allocation); } data->ctx = &ctx; data->best = &best; @@ -697,6 +703,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover( data->parameters.d = d; data->parameters.splitPoint = splitPoint; data->parameters.steps = kSteps; + data->parameters.shrinkDict = shrinkDict; data->parameters.zParams.notificationLevel = g_displayLevel; /* Check the parameters */ if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity, diff --git a/internal-complibs/zstd-1.4.0/dictBuilder/zdict.c b/internal-complibs/zstd-1.4.3/dictBuilder/zdict.c similarity index 99% rename from internal-complibs/zstd-1.4.0/dictBuilder/zdict.c rename to internal-complibs/zstd-1.4.3/dictBuilder/zdict.c index c753da0db..ee21ee1a9 100644 --- a/internal-complibs/zstd-1.4.0/dictBuilder/zdict.c +++ b/internal-complibs/zstd-1.4.3/dictBuilder/zdict.c @@ -741,7 +741,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, /* analyze, build stats, starting with literals */ { size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog); if (HUF_isError(maxNbBits)) { - eSize = ERROR(GENERIC); + eSize = maxNbBits; DISPLAYLEVEL(1, " HUF_buildCTable error \n"); goto _cleanup; } @@ -764,7 +764,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u]; errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax); if (FSE_isError(errorCode)) { - eSize = ERROR(GENERIC); + eSize = errorCode; DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n"); goto _cleanup; } @@ -773,7 +773,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u]; errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML); if (FSE_isError(errorCode)) { - eSize = ERROR(GENERIC); + eSize = errorCode; DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n"); goto _cleanup; } @@ -782,7 +782,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u]; errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL); if (FSE_isError(errorCode)) { - eSize = ERROR(GENERIC); + eSize = errorCode; DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n"); goto _cleanup; } @@ -791,7 +791,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, /* write result to buffer */ { size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog); if (HUF_isError(hhSize)) { - eSize = ERROR(GENERIC); + eSize = hhSize; DISPLAYLEVEL(1, "HUF_writeCTable error \n"); goto _cleanup; } @@ -802,7 +802,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, { size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog); if (FSE_isError(ohSize)) { - eSize = ERROR(GENERIC); + eSize = ohSize; DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n"); goto _cleanup; } @@ -813,7 +813,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, { size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog); if (FSE_isError(mhSize)) { - eSize = ERROR(GENERIC); + eSize = mhSize; DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n"); goto _cleanup; } @@ -824,7 +824,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, { size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog); if (FSE_isError(lhSize)) { - eSize = ERROR(GENERIC); + eSize = lhSize; DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n"); goto _cleanup; } @@ -834,7 +834,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, } if (maxDstSize<12) { - eSize = ERROR(GENERIC); + eSize = ERROR(dstSize_tooSmall); DISPLAYLEVEL(1, "not enough space to write RepOffsets \n"); goto _cleanup; } diff --git a/internal-complibs/zstd-1.4.0/dictBuilder/zdict.h b/internal-complibs/zstd-1.4.3/dictBuilder/zdict.h similarity index 94% rename from internal-complibs/zstd-1.4.0/dictBuilder/zdict.h rename to internal-complibs/zstd-1.4.3/dictBuilder/zdict.h index e22973173..37978ecdf 100644 --- a/internal-complibs/zstd-1.4.0/dictBuilder/zdict.h +++ b/internal-complibs/zstd-1.4.3/dictBuilder/zdict.h @@ -94,6 +94,8 @@ typedef struct { unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */ + unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */ + unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */ ZDICT_params_t zParams; } ZDICT_cover_params_t; @@ -105,6 +107,9 @@ typedef struct { unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */ unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */ + unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */ + unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */ + ZDICT_params_t zParams; } ZDICT_fastCover_params_t; diff --git a/internal-complibs/zstd-1.4.0/dll/example/Makefile b/internal-complibs/zstd-1.4.3/dll/example/Makefile similarity index 100% rename from internal-complibs/zstd-1.4.0/dll/example/Makefile rename to internal-complibs/zstd-1.4.3/dll/example/Makefile diff --git a/internal-complibs/zstd-1.4.0/dll/example/README.md b/internal-complibs/zstd-1.4.3/dll/example/README.md similarity index 100% rename from internal-complibs/zstd-1.4.0/dll/example/README.md rename to internal-complibs/zstd-1.4.3/dll/example/README.md diff --git a/internal-complibs/zstd-1.4.0/dll/example/build_package.bat b/internal-complibs/zstd-1.4.3/dll/example/build_package.bat similarity index 100% rename from internal-complibs/zstd-1.4.0/dll/example/build_package.bat rename to internal-complibs/zstd-1.4.3/dll/example/build_package.bat diff --git a/internal-complibs/zstd-1.4.0/dll/example/fullbench-dll.sln b/internal-complibs/zstd-1.4.3/dll/example/fullbench-dll.sln similarity index 100% rename from internal-complibs/zstd-1.4.0/dll/example/fullbench-dll.sln rename to internal-complibs/zstd-1.4.3/dll/example/fullbench-dll.sln diff --git a/internal-complibs/zstd-1.4.0/dll/example/fullbench-dll.vcxproj b/internal-complibs/zstd-1.4.3/dll/example/fullbench-dll.vcxproj similarity index 100% rename from internal-complibs/zstd-1.4.0/dll/example/fullbench-dll.vcxproj rename to internal-complibs/zstd-1.4.3/dll/example/fullbench-dll.vcxproj diff --git a/internal-complibs/zstd-1.4.0/legacy/zstd_legacy.h b/internal-complibs/zstd-1.4.3/legacy/zstd_legacy.h similarity index 98% rename from internal-complibs/zstd-1.4.0/legacy/zstd_legacy.h rename to internal-complibs/zstd-1.4.3/legacy/zstd_legacy.h index e5b383ee4..0dbd3c7a4 100644 --- a/internal-complibs/zstd-1.4.0/legacy/zstd_legacy.h +++ b/internal-complibs/zstd-1.4.3/legacy/zstd_legacy.h @@ -238,6 +238,10 @@ MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; break; } + if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) { + frameSizeInfo.compressedSize = ERROR(srcSize_wrong); + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + } return frameSizeInfo; } diff --git a/internal-complibs/zstd-1.4.0/legacy/zstd_v01.c b/internal-complibs/zstd-1.4.3/legacy/zstd_v01.c similarity index 93% rename from internal-complibs/zstd-1.4.0/legacy/zstd_v01.c rename to internal-complibs/zstd-1.4.3/legacy/zstd_v01.c index cad2b99b4..ae8cba2a3 100644 --- a/internal-complibs/zstd-1.4.0/legacy/zstd_v01.c +++ b/internal-complibs/zstd-1.4.3/legacy/zstd_v01.c @@ -1073,99 +1073,102 @@ static size_t HUF_decompress_usingDTable( /* -3% slower when non static */ const void* cSrc, size_t cSrcSize, const U16* DTable) { - BYTE* const ostart = (BYTE*) dst; - BYTE* op = ostart; - BYTE* const omax = op + maxDstSize; - BYTE* const olimit = omax-15; - - const void* ptr = DTable; - const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; - const U32 dtLog = DTable[0]; - size_t errorCode; - U32 reloadStatus; - - /* Init */ - - const U16* jumpTable = (const U16*)cSrc; - const size_t length1 = FSE_readLE16(jumpTable); - const size_t length2 = FSE_readLE16(jumpTable+1); - const size_t length3 = FSE_readLE16(jumpTable+2); - const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !! - const char* const start1 = (const char*)(cSrc) + 6; - const char* const start2 = start1 + length1; - const char* const start3 = start2 + length2; - const char* const start4 = start3 + length3; - FSE_DStream_t bitD1, bitD2, bitD3, bitD4; - - if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; - - errorCode = FSE_initDStream(&bitD1, start1, length1); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_initDStream(&bitD2, start2, length2); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_initDStream(&bitD3, start3, length3); - if (FSE_isError(errorCode)) return errorCode; - errorCode = FSE_initDStream(&bitD4, start4, length4); - if (FSE_isError(errorCode)) return errorCode; - - reloadStatus=FSE_reloadDStream(&bitD2); - - /* 16 symbols per loop */ - for ( ; (reloadStatus12)) FSE_reloadDStream(&Dstream) - -#define HUF_DECODE_SYMBOL_2(n, Dstream) \ - op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ - if (FSE_32bits()) FSE_reloadDStream(&Dstream) - - HUF_DECODE_SYMBOL_1( 0, bitD1); - HUF_DECODE_SYMBOL_1( 1, bitD2); - HUF_DECODE_SYMBOL_1( 2, bitD3); - HUF_DECODE_SYMBOL_1( 3, bitD4); - HUF_DECODE_SYMBOL_2( 4, bitD1); - HUF_DECODE_SYMBOL_2( 5, bitD2); - HUF_DECODE_SYMBOL_2( 6, bitD3); - HUF_DECODE_SYMBOL_2( 7, bitD4); - HUF_DECODE_SYMBOL_1( 8, bitD1); - HUF_DECODE_SYMBOL_1( 9, bitD2); - HUF_DECODE_SYMBOL_1(10, bitD3); - HUF_DECODE_SYMBOL_1(11, bitD4); - HUF_DECODE_SYMBOL_0(12, bitD1); - HUF_DECODE_SYMBOL_0(13, bitD2); - HUF_DECODE_SYMBOL_0(14, bitD3); - HUF_DECODE_SYMBOL_0(15, bitD4); - } + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = omax-15; + + const void* ptr = DTable; + const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; + const U32 dtLog = DTable[0]; + size_t errorCode; + U32 reloadStatus; + + /* Init */ + + const U16* jumpTable = (const U16*)cSrc; + const size_t length1 = FSE_readLE16(jumpTable); + const size_t length2 = FSE_readLE16(jumpTable+1); + const size_t length3 = FSE_readLE16(jumpTable+2); + const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; // check coherency !! + const char* const start1 = (const char*)(cSrc) + 6; + const char* const start2 = start1 + length1; + const char* const start3 = start2 + length2; + const char* const start4 = start3 + length3; + FSE_DStream_t bitD1, bitD2, bitD3, bitD4; + + if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + + errorCode = FSE_initDStream(&bitD1, start1, length1); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD2, start2, length2); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD3, start3, length3); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD4, start4, length4); + if (FSE_isError(errorCode)) return errorCode; + + reloadStatus=FSE_reloadDStream(&bitD2); + + /* 16 symbols per loop */ + for ( ; (reloadStatus12)) FSE_reloadDStream(&Dstream) + + #define HUF_DECODE_SYMBOL_2(n, Dstream) \ + op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ + if (FSE_32bits()) FSE_reloadDStream(&Dstream) + + HUF_DECODE_SYMBOL_1( 0, bitD1); + HUF_DECODE_SYMBOL_1( 1, bitD2); + HUF_DECODE_SYMBOL_1( 2, bitD3); + HUF_DECODE_SYMBOL_1( 3, bitD4); + HUF_DECODE_SYMBOL_2( 4, bitD1); + HUF_DECODE_SYMBOL_2( 5, bitD2); + HUF_DECODE_SYMBOL_2( 6, bitD3); + HUF_DECODE_SYMBOL_2( 7, bitD4); + HUF_DECODE_SYMBOL_1( 8, bitD1); + HUF_DECODE_SYMBOL_1( 9, bitD2); + HUF_DECODE_SYMBOL_1(10, bitD3); + HUF_DECODE_SYMBOL_1(11, bitD4); + HUF_DECODE_SYMBOL_0(12, bitD1); + HUF_DECODE_SYMBOL_0(13, bitD2); + HUF_DECODE_SYMBOL_0(14, bitD3); + HUF_DECODE_SYMBOL_0(15, bitD4); + } - if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */ - return (size_t)-FSE_ERROR_corruptionDetected; + if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */ + return (size_t)-FSE_ERROR_corruptionDetected; - /* tail */ - { - // bitTail = bitD1; // *much* slower : -20% !??! - FSE_DStream_t bitTail; - bitTail.ptr = bitD1.ptr; - bitTail.bitsConsumed = bitD1.bitsConsumed; - bitTail.bitContainer = bitD1.bitContainer; // required in case of FSE_DStream_endOfBuffer - bitTail.start = start1; - for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (opprevOffset = seq->offset; if (litLength == MaxLL) { - U32 add = dumps 1 byte */ + litLength = ZSTD_readLE24(dumps); dumps += 3; } } @@ -1732,13 +1726,13 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); if (matchLength == MaxML) { - U32 add = dumps 1 byte */ + matchLength = ZSTD_readLE24(dumps); dumps += 3; } } diff --git a/internal-complibs/zstd-1.4.0/legacy/zstd_v01.h b/internal-complibs/zstd-1.4.3/legacy/zstd_v01.h similarity index 100% rename from internal-complibs/zstd-1.4.0/legacy/zstd_v01.h rename to internal-complibs/zstd-1.4.3/legacy/zstd_v01.h diff --git a/internal-complibs/zstd-1.4.0/legacy/zstd_v02.c b/internal-complibs/zstd-1.4.3/legacy/zstd_v02.c similarity index 99% rename from internal-complibs/zstd-1.4.0/legacy/zstd_v02.c rename to internal-complibs/zstd-1.4.3/legacy/zstd_v02.c index 561bc412e..793df6024 100644 --- a/internal-complibs/zstd-1.4.0/legacy/zstd_v02.c +++ b/internal-complibs/zstd-1.4.3/legacy/zstd_v02.c @@ -217,6 +217,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) } } +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + MEM_STATIC U32 MEM_readLE32(const void* memPtr) { if (MEM_isLittleEndian()) @@ -3043,11 +3048,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) seqState->prevOffset = seq->offset; if (litLength == MaxLL) { - U32 add = *dumps++; + const U32 add = dumps 1 byte */ + litLength = MEM_readLE24(dumps); dumps += 3; } if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ @@ -3073,11 +3078,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); if (matchLength == MaxML) { - U32 add = *dumps++; + const U32 add = dumps 1 byte */ + matchLength = MEM_readLE24(dumps); dumps += 3; } if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ diff --git a/internal-complibs/zstd-1.4.0/legacy/zstd_v02.h b/internal-complibs/zstd-1.4.3/legacy/zstd_v02.h similarity index 100% rename from internal-complibs/zstd-1.4.0/legacy/zstd_v02.h rename to internal-complibs/zstd-1.4.3/legacy/zstd_v02.h diff --git a/internal-complibs/zstd-1.4.0/legacy/zstd_v03.c b/internal-complibs/zstd-1.4.3/legacy/zstd_v03.c similarity index 99% rename from internal-complibs/zstd-1.4.0/legacy/zstd_v03.c rename to internal-complibs/zstd-1.4.3/legacy/zstd_v03.c index a1bf0fa9b..dbc83f1ee 100644 --- a/internal-complibs/zstd-1.4.0/legacy/zstd_v03.c +++ b/internal-complibs/zstd-1.4.3/legacy/zstd_v03.c @@ -219,6 +219,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) } } +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + MEM_STATIC U32 MEM_readLE32(const void* memPtr) { if (MEM_isLittleEndian()) @@ -2525,6 +2530,7 @@ static size_t ZSTD_decodeLiteralsBlock(void* ctx, const size_t litSize = (MEM_readLE32(istart) & 0xFFFFFF) >> 2; /* no buffer issue : srcSize >= MIN_CBLOCK_SIZE */ if (litSize > srcSize-11) /* risk of reading too far with wildcopy */ { + if (litSize > BLOCKSIZE) return ERROR(corruption_detected); if (litSize > srcSize-3) return ERROR(corruption_detected); memcpy(dctx->litBuffer, istart, litSize); dctx->litPtr = dctx->litBuffer; @@ -2684,11 +2690,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) seqState->prevOffset = seq->offset; if (litLength == MaxLL) { - U32 add = *dumps++; + const U32 add = dumps 1 byte */ + litLength = MEM_readLE24(dumps); dumps += 3; } if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ @@ -2714,11 +2720,11 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); if (matchLength == MaxML) { - U32 add = *dumps++; + const U32 add = dumps 1 byte */ + matchLength = MEM_readLE24(dumps); dumps += 3; } if (dumps >= de) dumps = de-1; /* late correction, to avoid read overflow (data is now corrupted anyway) */ diff --git a/internal-complibs/zstd-1.4.0/legacy/zstd_v03.h b/internal-complibs/zstd-1.4.3/legacy/zstd_v03.h similarity index 100% rename from internal-complibs/zstd-1.4.0/legacy/zstd_v03.h rename to internal-complibs/zstd-1.4.3/legacy/zstd_v03.h diff --git a/internal-complibs/zstd-1.4.0/legacy/zstd_v04.c b/internal-complibs/zstd-1.4.3/legacy/zstd_v04.c similarity index 99% rename from internal-complibs/zstd-1.4.0/legacy/zstd_v04.c rename to internal-complibs/zstd-1.4.3/legacy/zstd_v04.c index 4342330e2..645a6e313 100644 --- a/internal-complibs/zstd-1.4.0/legacy/zstd_v04.c +++ b/internal-complibs/zstd-1.4.3/legacy/zstd_v04.c @@ -189,6 +189,11 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) } } +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + MEM_STATIC U32 MEM_readLE32(const void* memPtr) { if (MEM_isLittleEndian()) @@ -2808,13 +2813,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); prevOffset = litLength ? seq->offset : seqState->prevOffset; if (litLength == MaxLL) { - U32 add = *dumps++; + const U32 add = dumps de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */ } @@ -2837,13 +2841,12 @@ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) /* MatchLength */ matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); if (matchLength == MaxML) { - U32 add = *dumps++; + const U32 add = dumps de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */ } matchLength += MINMATCH; diff --git a/internal-complibs/zstd-1.4.0/legacy/zstd_v04.h b/internal-complibs/zstd-1.4.3/legacy/zstd_v04.h similarity index 100% rename from internal-complibs/zstd-1.4.0/legacy/zstd_v04.h rename to internal-complibs/zstd-1.4.3/legacy/zstd_v04.h diff --git a/internal-complibs/zstd-1.4.0/legacy/zstd_v05.c b/internal-complibs/zstd-1.4.3/legacy/zstd_v05.c similarity index 97% rename from internal-complibs/zstd-1.4.0/legacy/zstd_v05.c rename to internal-complibs/zstd-1.4.3/legacy/zstd_v05.c index caaf15f9b..e347b00dc 100644 --- a/internal-complibs/zstd-1.4.0/legacy/zstd_v05.c +++ b/internal-complibs/zstd-1.4.3/legacy/zstd_v05.c @@ -1998,91 +1998,92 @@ size_t HUFv05_decompress4X2_usingDTable( const void* cSrc, size_t cSrcSize, const U16* DTable) { - const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - const void* const dtPtr = DTable; - const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1; - const U32 dtLog = DTable[0]; - size_t errorCode; - - /* Init */ - BITv05_DStream_t bitD1; - BITv05_DStream_t bitD2; - BITv05_DStream_t bitD3; - BITv05_DStream_t bitD4; - const size_t length1 = MEM_readLE16(istart); - const size_t length2 = MEM_readLE16(istart+2); - const size_t length3 = MEM_readLE16(istart+4); - size_t length4; - const BYTE* const istart1 = istart + 6; /* jumpTable */ - const BYTE* const istart2 = istart1 + length1; - const BYTE* const istart3 = istart2 + length2; - const BYTE* const istart4 = istart3 + length3; - const size_t segmentSize = (dstSize+3) / 4; - BYTE* const opStart2 = ostart + segmentSize; - BYTE* const opStart3 = opStart2 + segmentSize; - BYTE* const opStart4 = opStart3 + segmentSize; - BYTE* op1 = ostart; - BYTE* op2 = opStart2; - BYTE* op3 = opStart3; - BYTE* op4 = opStart4; - U32 endSignal; - /* Check */ if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + { + const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable; + const HUFv05_DEltX2* const dt = ((const HUFv05_DEltX2*)dtPtr) +1; + const U32 dtLog = DTable[0]; + size_t errorCode; - length4 = cSrcSize - (length1 + length2 + length3 + 6); - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - errorCode = BITv05_initDStream(&bitD1, istart1, length1); - if (HUFv05_isError(errorCode)) return errorCode; - errorCode = BITv05_initDStream(&bitD2, istart2, length2); - if (HUFv05_isError(errorCode)) return errorCode; - errorCode = BITv05_initDStream(&bitD3, istart3, length3); - if (HUFv05_isError(errorCode)) return errorCode; - errorCode = BITv05_initDStream(&bitD4, istart4, length4); - if (HUFv05_isError(errorCode)) return errorCode; + /* Init */ + BITv05_DStream_t bitD1; + BITv05_DStream_t bitD2; + BITv05_DStream_t bitD3; + BITv05_DStream_t bitD4; + const size_t length1 = MEM_readLE16(istart); + const size_t length2 = MEM_readLE16(istart+2); + const size_t length3 = MEM_readLE16(istart+4); + size_t length4; + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; - /* 16-32 symbols per loop (4-8 symbols per stream) */ - endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4); - for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) { - HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1); - HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2); - HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3); - HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4); - HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1); - HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2); - HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3); - HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4); - HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1); - HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2); - HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3); - HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4); - HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1); - HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2); - HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3); - HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4); + length4 = cSrcSize - (length1 + length2 + length3 + 6); + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + errorCode = BITv05_initDStream(&bitD1, istart1, length1); + if (HUFv05_isError(errorCode)) return errorCode; + errorCode = BITv05_initDStream(&bitD2, istart2, length2); + if (HUFv05_isError(errorCode)) return errorCode; + errorCode = BITv05_initDStream(&bitD3, istart3, length3); + if (HUFv05_isError(errorCode)) return errorCode; + errorCode = BITv05_initDStream(&bitD4, istart4, length4); + if (HUFv05_isError(errorCode)) return errorCode; + + /* 16-32 symbols per loop (4-8 symbols per stream) */ endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4); - } + for ( ; (endSignal==BITv05_DStream_unfinished) && (op4<(oend-7)) ; ) { + HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1); + HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2); + HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3); + HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4); + HUFv05_DECODE_SYMBOLX2_1(op1, &bitD1); + HUFv05_DECODE_SYMBOLX2_1(op2, &bitD2); + HUFv05_DECODE_SYMBOLX2_1(op3, &bitD3); + HUFv05_DECODE_SYMBOLX2_1(op4, &bitD4); + HUFv05_DECODE_SYMBOLX2_2(op1, &bitD1); + HUFv05_DECODE_SYMBOLX2_2(op2, &bitD2); + HUFv05_DECODE_SYMBOLX2_2(op3, &bitD3); + HUFv05_DECODE_SYMBOLX2_2(op4, &bitD4); + HUFv05_DECODE_SYMBOLX2_0(op1, &bitD1); + HUFv05_DECODE_SYMBOLX2_0(op2, &bitD2); + HUFv05_DECODE_SYMBOLX2_0(op3, &bitD3); + HUFv05_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = BITv05_reloadDStream(&bitD1) | BITv05_reloadDStream(&bitD2) | BITv05_reloadDStream(&bitD3) | BITv05_reloadDStream(&bitD4); + } - /* check corruption */ - if (op1 > opStart2) return ERROR(corruption_detected); - if (op2 > opStart3) return ERROR(corruption_detected); - if (op3 > opStart4) return ERROR(corruption_detected); - /* note : op4 supposed already verified within main loop */ + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ - /* finish bitStreams one by one */ - HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); - HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); - HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); - HUFv05_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + /* finish bitStreams one by one */ + HUFv05_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUFv05_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUFv05_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUFv05_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); - /* check */ - endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4); - if (!endSignal) return ERROR(corruption_detected); + /* check */ + endSignal = BITv05_endOfDStream(&bitD1) & BITv05_endOfDStream(&bitD2) & BITv05_endOfDStream(&bitD3) & BITv05_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); - /* decoded size */ - return dstSize; + /* decoded size */ + return dstSize; + } } @@ -3150,14 +3151,17 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState) litLength = FSEv05_peakSymbol(&(seqState->stateLL)); prevOffset = litLength ? seq->offset : seqState->prevOffset; if (litLength == MaxLL) { - U32 add = *dumps++; + const U32 add = *dumps++; if (add < 255) litLength += add; - else { - litLength = MEM_readLE32(dumps) & 0xFFFFFF; /* no risk : dumps is always followed by seq tables > 1 byte */ - if (litLength&1) litLength>>=1, dumps += 3; - else litLength = (U16)(litLength)>>1, dumps += 2; + else if (dumps + 2 <= de) { + litLength = MEM_readLE16(dumps); + dumps += 2; + if ((litLength & 1) && dumps < de) { + litLength += *dumps << 16; + dumps += 1; + } + litLength>>=1; } - if (dumps > de) { litLength = MaxLL+255; } /* late correction, to avoid using uninitialized memory */ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */ } @@ -3184,14 +3188,17 @@ static void ZSTDv05_decodeSequence(seq_t* seq, seqState_t* seqState) /* MatchLength */ matchLength = FSEv05_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); if (matchLength == MaxML) { - U32 add = *dumps++; + const U32 add = dumps 1 byte */ - if (matchLength&1) matchLength>>=1, dumps += 3; - else matchLength = (U16)(matchLength)>>1, dumps += 2; + else if (dumps + 2 <= de) { + matchLength = MEM_readLE16(dumps); + dumps += 2; + if ((matchLength & 1) && dumps < de) { + matchLength += *dumps << 16; + dumps += 1; + } + matchLength >>= 1; } - if (dumps > de) { matchLength = MaxML+255; } /* late correction, to avoid using uninitialized memory */ if (dumps >= de) { dumps = de-1; } /* late correction, to avoid read overflow (data is now corrupted anyway) */ } matchLength += MINMATCH; diff --git a/internal-complibs/zstd-1.4.0/legacy/zstd_v05.h b/internal-complibs/zstd-1.4.3/legacy/zstd_v05.h similarity index 100% rename from internal-complibs/zstd-1.4.0/legacy/zstd_v05.h rename to internal-complibs/zstd-1.4.3/legacy/zstd_v05.h diff --git a/internal-complibs/zstd-1.4.0/legacy/zstd_v06.c b/internal-complibs/zstd-1.4.3/legacy/zstd_v06.c similarity index 99% rename from internal-complibs/zstd-1.4.0/legacy/zstd_v06.c rename to internal-complibs/zstd-1.4.3/legacy/zstd_v06.c index a695cbb8a..f907a3a71 100644 --- a/internal-complibs/zstd-1.4.0/legacy/zstd_v06.c +++ b/internal-complibs/zstd-1.4.3/legacy/zstd_v06.c @@ -3242,14 +3242,12 @@ static size_t ZSTDv06_decodeSeqHeaders(int* nbSeqPtr, } /* FSE table descriptors */ + if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */ { U32 const LLtype = *ip >> 6; U32 const Offtype = (*ip >> 4) & 3; U32 const MLtype = (*ip >> 2) & 3; ip++; - /* check */ - if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ - /* Build DTables */ { size_t const bhSize = ZSTDv06_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable); if (ZSTDv06_isError(bhSize)) return ERROR(corruption_detected); @@ -3672,7 +3670,7 @@ void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cS blockProperties_t blockProperties = { bt_compressed, 0 }; /* Frame Header */ - { size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, ZSTDv06_frameHeaderSize_min); + { size_t const frameHeaderSize = ZSTDv06_frameHeaderSize(src, srcSize); if (ZSTDv06_isError(frameHeaderSize)) { ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize); return; diff --git a/internal-complibs/zstd-1.4.0/legacy/zstd_v06.h b/internal-complibs/zstd-1.4.3/legacy/zstd_v06.h similarity index 100% rename from internal-complibs/zstd-1.4.0/legacy/zstd_v06.h rename to internal-complibs/zstd-1.4.3/legacy/zstd_v06.h diff --git a/internal-complibs/zstd-1.4.0/legacy/zstd_v07.c b/internal-complibs/zstd-1.4.3/legacy/zstd_v07.c similarity index 99% rename from internal-complibs/zstd-1.4.0/legacy/zstd_v07.c rename to internal-complibs/zstd-1.4.3/legacy/zstd_v07.c index 6b9488931..a83ddc9a6 100644 --- a/internal-complibs/zstd-1.4.0/legacy/zstd_v07.c +++ b/internal-complibs/zstd-1.4.3/legacy/zstd_v07.c @@ -3470,14 +3470,12 @@ static size_t ZSTDv07_decodeSeqHeaders(int* nbSeqPtr, } /* FSE table descriptors */ + if (ip + 4 > iend) return ERROR(srcSize_wrong); /* min : header byte + all 3 are "raw", hence no header, but at least xxLog bits per type */ { U32 const LLtype = *ip >> 6; U32 const OFtype = (*ip >> 4) & 3; U32 const MLtype = (*ip >> 2) & 3; ip++; - /* check */ - if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ - /* Build DTables */ { size_t const llhSize = ZSTDv07_buildSeqTable(DTableLL, LLtype, MaxLL, LLFSELog, ip, iend-ip, LL_defaultNorm, LL_defaultNormLog, flagRepeatTable); if (ZSTDv07_isError(llhSize)) return ERROR(corruption_detected); @@ -3918,7 +3916,7 @@ void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cS } /* Frame Header */ - { size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, ZSTDv07_frameHeaderSize_min); + { size_t const frameHeaderSize = ZSTDv07_frameHeaderSize(src, srcSize); if (ZSTDv07_isError(frameHeaderSize)) { ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, frameHeaderSize); return; diff --git a/internal-complibs/zstd-1.4.0/legacy/zstd_v07.h b/internal-complibs/zstd-1.4.3/legacy/zstd_v07.h similarity index 100% rename from internal-complibs/zstd-1.4.0/legacy/zstd_v07.h rename to internal-complibs/zstd-1.4.3/legacy/zstd_v07.h diff --git a/internal-complibs/zstd-1.4.0/libzstd.pc.in b/internal-complibs/zstd-1.4.3/libzstd.pc.in similarity index 100% rename from internal-complibs/zstd-1.4.0/libzstd.pc.in rename to internal-complibs/zstd-1.4.3/libzstd.pc.in diff --git a/internal-complibs/zstd-1.4.0/zstd.h b/internal-complibs/zstd-1.4.3/zstd.h similarity index 97% rename from internal-complibs/zstd-1.4.0/zstd.h rename to internal-complibs/zstd-1.4.3/zstd.h index 53470c18f..f8e95f228 100644 --- a/internal-complibs/zstd-1.4.0/zstd.h +++ b/internal-complibs/zstd-1.4.3/zstd.h @@ -71,7 +71,7 @@ extern "C" { /*------ Version ------*/ #define ZSTD_VERSION_MAJOR 1 #define ZSTD_VERSION_MINOR 4 -#define ZSTD_VERSION_RELEASE 0 +#define ZSTD_VERSION_RELEASE 3 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ @@ -82,16 +82,16 @@ ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library v #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */ -/*************************************** -* Default constant -***************************************/ +/* ************************************* + * Default constant + ***************************************/ #ifndef ZSTD_CLEVEL_DEFAULT # define ZSTD_CLEVEL_DEFAULT 3 #endif -/*************************************** -* Constants -***************************************/ +/* ************************************* + * Constants + ***************************************/ /* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ #define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ @@ -183,9 +183,14 @@ ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compres ***************************************/ /*= Compression context * When compressing many times, - * it is recommended to allocate a context just once, and re-use it for each successive compression operation. + * it is recommended to allocate a context just once, + * and re-use it for each successive compression operation. * This will make workload friendlier for system's memory. - * Use one context per thread for parallel execution in multi-threaded environments. */ + * Note : re-using context is just a speed / resource optimization. + * It doesn't change the compression ratio, which remains identical. + * Note 2 : In multi-threaded environments, + * use one different context per thread for parallel execution. + */ typedef struct ZSTD_CCtx_s ZSTD_CCtx; ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); @@ -380,6 +385,7 @@ typedef enum { * ZSTD_c_forceMaxWindow * ZSTD_c_forceAttachDict * ZSTD_c_literalCompressionMode + * ZSTD_c_targetCBlockSize * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. * note : never ever use experimentalParam? names directly; * also, the enums values themselves are unstable and can still change. @@ -389,6 +395,7 @@ typedef enum { ZSTD_c_experimentalParam3=1000, ZSTD_c_experimentalParam4=1001, ZSTD_c_experimentalParam5=1002, + ZSTD_c_experimentalParam6=1003, } ZSTD_cParameter; typedef struct { @@ -657,17 +664,33 @@ ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, ZSTD_inBuffer* input, ZSTD_EndDirective endOp); + +/* These buffer sizes are softly recommended. + * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. + * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), + * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. + * + * However, note that these recommendations are from the perspective of a C caller program. + * If the streaming interface is invoked from some other language, + * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, + * a major performance rule is to reduce crossing such interface to an absolute minimum. + * It's not rare that performance ends being spent more into the interface, rather than compression itself. + * In which cases, prefer using large buffers, as large as practical, + * for both input and output, to reduce the nb of roundtrips. + */ ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ -/******************************************************************************* - * This is a legacy streaming API, and can be replaced by ZSTD_CCtx_reset() and - * ZSTD_compressStream2(). It is redundant, but is still fully supported. + +/* ***************************************************************************** + * This following is a legacy streaming API. + * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). + * It is redundant, but remains fully supported. * Advanced parameters and dictionary compression can only be used through the * new API. ******************************************************************************/ -/** +/*! * Equivalent to: * * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); @@ -675,16 +698,16 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); */ ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); -/** +/*! * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). * NOTE: The return value is different. ZSTD_compressStream() returns a hint for * the next read size (if non-zero and not an error). ZSTD_compressStream2() - * returns the number of bytes left to flush (if non-zero and not an error). + * returns the minimum nb of bytes left to flush (if non-zero and not an error). */ ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); -/** Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); @@ -969,7 +992,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #endif /* ZSTD_H_235446 */ -/**************************************************************************************** +/* ************************************************************************************** * ADVANCED AND EXPERIMENTAL FUNCTIONS **************************************************************************************** * The definitions in the following section are considered experimental. @@ -1037,6 +1060,10 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); #define ZSTD_LDM_HASHRATELOG_MIN 0 #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) +/* Advanced parameter bounds */ +#define ZSTD_TARGETCBLOCKSIZE_MIN 64 +#define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX + /* internal */ #define ZSTD_HASHLOG3_MAX 17 @@ -1162,7 +1189,7 @@ typedef enum { * however it does mean that all frame data must be present and valid. */ ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); -/** ZSTD_decompressBound() : +/*! ZSTD_decompressBound() : * `src` should point to the start of a series of ZSTD encoded and/or skippable frames * `srcSize` must be the _exact_ size of this series * (i.e. there should be a frame boundary at `src + srcSize`) @@ -1409,6 +1436,11 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre */ #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 +/* Tries to fit compressed block size to be around targetCBlockSize. + * No target when targetCBlockSize == 0. + * There is no guarantee on compressed block size (default:0) */ +#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 + /*! ZSTD_CCtx_getParameter() : * Get the requested compression parameter value, selected by enum ZSTD_cParameter, * and store it into int* value. @@ -1843,7 +1875,7 @@ typedef struct { unsigned checksumFlag; } ZSTD_frameHeader; -/** ZSTD_getFrameHeader() : +/*! ZSTD_getFrameHeader() : * decode Frame Header, or requires larger `srcSize`. * @return : 0, `zfhPtr` is correctly filled, * >0, `srcSize` is too small, value is wanted `srcSize` amount, @@ -1877,7 +1909,7 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); /*! Block functions produce and decode raw zstd blocks, without frame metadata. Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). - User will have to take in charge required information to regenerate data, such as compressed and content sizes. + But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. A few rules to respect : - Compressing and decompressing require a context structure @@ -1888,12 +1920,14 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + copyCCtx() and copyDCtx() can be used too - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + If input is larger than a block size, it's necessary to split input data into multiple blocks - + For inputs larger than a single block, really consider using regular ZSTD_compress() instead. - Frame metadata is not that costly, and quickly becomes negligible as source size grows larger. - - When a block is considered not compressible enough, ZSTD_compressBlock() result will be zero. - In which case, nothing is produced into `dst` ! - + User must test for such outcome and deal directly with uncompressed data - + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!! + + For inputs larger than a single block, consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! + ===> In which case, nothing is produced into `dst` ! + + User __must__ test for such outcome and deal directly with uncompressed data + + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. + Doing so would mess up with statistics history, leading to potential data corruption. + + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! + In case of multiple successive blocks, should some of them be uncompressed, decoder must be informed of their existence in order to follow proper history. Use ZSTD_insertBlock() for such a case. diff --git a/tests/test_dict_schunk.c b/tests/test_dict_schunk.c index 3c0972a35..120628db5 100644 --- a/tests/test_dict_schunk.c +++ b/tests/test_dict_schunk.c @@ -113,11 +113,11 @@ static char* test_dict() { break; case 4 * KB: mu_assert("ERROR: Dict does not reach expected compression ratio", - 20 * cbytes < nbytes); + 15 * cbytes < nbytes); break; case 32 * KB: mu_assert("ERROR: Dict does not reach expected compression ratio", - 150 * cbytes < nbytes); + 100 * cbytes < nbytes); break; case 256 * KB: mu_assert("ERROR: Dict does not reach expected compression ratio",