From be024da3073ec72b6b59c2239c2d1ce8056a13c9 Mon Sep 17 00:00:00 2001 From: "Jangra, Ravi" Date: Tue, 18 Nov 2025 16:40:25 +0530 Subject: [PATCH] Improve compression performance by conditionally skipping lazy evaluation Optional ZSTD_COMPRESS_FAST flag provided to speed up compression by restricting lazy evaluation to short matches. Applied to levels 6 and 7. This helps improve lazy compression strategy. Skip determinism test when enabled. Optimization disabled by default. --- lib/README.md | 5 ++++ lib/compress/clevels.h | 17 +++++++++++++ lib/compress/zstd_compress.c | 10 ++++++++ lib/compress/zstd_compress_internal.h | 35 +++++++++++++++++++++++++++ lib/compress/zstd_lazy.c | 7 ++++++ lib/libzstd.mk | 7 ++++++ tests/cli-tests/common/platform.sh | 2 +- tests/cli-tests/run.py | 1 + tests/fuzzer.c | 13 ++++++++++ 9 files changed, 96 insertions(+), 1 deletion(-) diff --git a/lib/README.md b/lib/README.md index 3974de160ec..7159f5111e9 100644 --- a/lib/README.md +++ b/lib/README.md @@ -212,6 +212,11 @@ The file structure is designed to make this selection manually achievable for an For this scenario, it can be set as `ZDICT_QSORT=ZDICT_QSORT_C90`. Other selectable suffixes are `_GNU`, `_APPLE`, `_MSVC` and `_C11`. +- The build macro `ZSTD_COMPRESS_FAST` can be defined for fast lazy evaluation + from compression level 6 to 7. This improves compression performance with + little to no tradeoff for compressibility and decompression performance. + This option is designed for applications with demanding compression speed requirements. + #### Windows : using MinGW+MSYS to create DLL DLL can be created using MinGW+MSYS with the `make libzstd` command. diff --git a/lib/compress/clevels.h b/lib/compress/clevels.h index c18da465f32..26461d640d0 100644 --- a/lib/compress/clevels.h +++ b/lib/compress/clevels.h @@ -31,8 +31,15 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ { 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */ +#ifdef ZSTD_ENABLE_COMPRESS_FAST + /* Larger windows and settings from next levels to improve ratio. + * Accounts for ratio drop from selective lazy evaluation */ + { 23, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 6 */ + { 23, 19, 20, 4, 5, 16, ZSTD_lazy }, /* level 7 */ +#else { 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */ { 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */ +#endif { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */ { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */ @@ -57,8 +64,13 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ { 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/ { 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/ +#ifdef ZSTD_ENABLE_COMPRESS_FAST + { 20, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 6 */ + { 20, 18, 19, 4, 5, 8, ZSTD_lazy }, /* level 7 */ +#else { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ +#endif { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ @@ -83,8 +95,13 @@ static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEV { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ +#ifdef ZSTD_ENABLE_COMPRESS_FAST + { 19, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */ + { 19, 16, 17, 4, 5, 16, ZSTD_lazy }, /* level 7 */ +#else { 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ { 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ +#endif { 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ { 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ { 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 1d6f0fcae0e..ab152e46c15 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -5260,12 +5260,22 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, ZSTD_buffered_policy_e zbuff) { size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize; +#ifdef ZSTD_ENABLE_COMPRESS_FAST + int compressionLevel = 0; +#endif #if ZSTD_TRACE cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0; #endif DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); /* params are supposed to be fully validated at this point */ assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); +#ifdef ZSTD_ENABLE_COMPRESS_FAST + compressionLevel = (params->compressionLevel == 0) ? cctx->requestedParams.compressionLevel + : params->compressionLevel; + compressionLevel = MIN(MAX(0, compressionLevel), ZSTD_maxCLevel()); + assert(compressionLevel >= 0 && compressionLevel <= ZSTD_MAX_CLEVEL); + cctx->seqStore.lazyLimit = ZSTD_compressFastLazyLimit[compressionLevel]; +#endif assert(!((dict) && (cdict))); /* either dict or cdict, not both */ if ( (cdict) && (cdict->dictContentSize > 0) diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 13a394b3816..dbe647005b7 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -39,6 +39,38 @@ The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy. This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ +#ifdef ZSTD_ENABLE_COMPRESS_FAST +/* Lazy evaluation is performed only if the first match length + * is <= ZSTD_compressFastLazyLimit */ +#define ZSTD_COMPRESS_FAST_BASE_LAZY_LIMIT 5 + +#define ZSTD_MAX_CLEVEL 22 +static const size_t ZSTD_compressFastLazyLimit[ZSTD_MAX_CLEVEL + 1] = { + ZSTD_COMPRESS_FAST_BASE_LAZY_LIMIT, /* base for negative levels */ + 0, /* level 1 */ + 0, /* level 2 */ + 0, /* level 3 */ + 0, /* level 4 */ + 0, /* level 5 */ + ZSTD_COMPRESS_FAST_BASE_LAZY_LIMIT, /* level 6 */ + ZSTD_COMPRESS_FAST_BASE_LAZY_LIMIT + 1, /* level 7 */ + 0, /* level 8.*/ + 0, /* level 9.*/ + 0, /* level 10.*/ + 0, /* level 11.*/ + 0, /* level 12.*/ + 0, /* level 13 */ + 0, /* level 14 */ + 0, /* level 15 */ + 0, /* level 16 */ + 0, /* level 17 */ + 0, /* level 18 */ + 0, /* level 19 */ + 0, /* level 20 */ + 0, /* level 21 */ + 0, /* level 22 */ +}; +#endif /*-************************************* * Context memory management @@ -105,6 +137,9 @@ typedef struct { BYTE* ofCode; size_t maxNbSeq; size_t maxNbLit; +#ifdef ZSTD_ENABLE_COMPRESS_FAST + size_t lazyLimit; /* Match length limit to allow lazy evaluation */ +#endif /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment diff --git a/lib/compress/zstd_lazy.c b/lib/compress/zstd_lazy.c index 18b7b43948f..f60bb59afce 100644 --- a/lib/compress/zstd_lazy.c +++ b/lib/compress/zstd_lazy.c @@ -1590,6 +1590,9 @@ size_t ZSTD_compressBlock_lazy_generic( prefixLowestIndex - (U32)(dictEnd - dictBase) : 0; const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); +#ifdef ZSTD_ENABLE_COMPRESS_FAST + size_t lazyLimit = seqStore->lazyLimit; +#endif DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod); ip += (dictAndPrefixLength == 0); @@ -1669,7 +1672,11 @@ size_t ZSTD_compressBlock_lazy_generic( } /* let's try to find a better solution */ +#ifdef ZSTD_ENABLE_COMPRESS_FAST + if ((lazyLimit == 0 || matchLength <= lazyLimit) && depth >= 1) /* restrict lazy eval to short matches only */ +#else if (depth>=1) +#endif while (ip None: env["COMMON"] = os.path.abspath(os.path.join(args.test_dir, "common")) env["PATH"] = bin_dir + ":" + os.getenv("PATH", "") env["LC_ALL"] = "C" + env["ZSTD_COMPRESS_FAST"] = os.environ.get('ZSTD_COMPRESS_FAST', '0') opts = Options( env=env, diff --git a/tests/fuzzer.c b/tests/fuzzer.c index 0bc160efa39..fca7c608339 100644 --- a/tests/fuzzer.c +++ b/tests/fuzzer.c @@ -2866,6 +2866,18 @@ static int basicUnitTests(U32 const seed, double compressibility) const void* const dict = (const char*)CNBuffer; const void* const contentStart = (const char*)dict + flatdictSize; /* These upper bounds are generally within a few bytes of the compressed size */ +#ifdef ZSTD_ENABLE_COMPRESS_FAST + size_t target_nodict_cSize[22+1] = { 3840, 3770, 3870, 3830, 3770, + 3770, 3772, 3772, 3750, 3750, + 3742, 3675, 3674, 3665, 3664, + 3663, 3662, 3661, 3660, 3660, + 3660, 3660, 3660 }; + size_t const target_wdict_cSize[22+1] = { 2830, 2896, 2893, 2840, 2950, + 2950, 2952, 2927, 2900, 2892, + 2910, 2910, 2910, 2780, 2775, + 2765, 2760, 2755, 2754, 2753, + 2753, 2753, 2753 }; +#else size_t target_nodict_cSize[22+1] = { 3840, 3770, 3870, 3830, 3770, 3770, 3770, 3770, 3750, 3750, 3742, 3675, 3674, 3665, 3664, @@ -2876,6 +2888,7 @@ static int basicUnitTests(U32 const seed, double compressibility) 2910, 2910, 2910, 2780, 2775, 2765, 2760, 2755, 2754, 2753, 2753, 2753, 2753 }; +#endif int l = 1; int const maxLevel = ZSTD_maxCLevel(); /* clevels with strategies that support rowhash on small inputs */