From 3b42bfa5d649646c7bf1c735048e04dff1c9de0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20F=20Bj=C3=B6rklund?= Date: Fri, 30 Jan 2026 17:40:10 +0100 Subject: [PATCH] Add support for zlib format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the compression frame and a checksum, without the gzip header. It is used by for instance git objects, and is in zlib. It is similar to the lzma frames being included in the xz support. There are no real .zlib files, but it made testing more consistent. Signed-off-by: Anders F Björklund --- programs/Makefile | 4 ++-- programs/fileio.c | 31 ++++++++++++++++----------- programs/fileio.h | 1 + programs/fileio_types.h | 2 +- programs/zstd.1.md | 2 +- programs/zstdcli.c | 4 +++- tests/cli-tests/compression/format.sh | 2 +- tests/playTests.sh | 3 +++ 8 files changed, 31 insertions(+), 18 deletions(-) diff --git a/programs/Makefile b/programs/Makefile index 94f2179d028..8b22b241e52 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -74,10 +74,10 @@ else endif # zlib detection -NO_ZLIB_MSG := ==> no zlib, building zstd without .gz support +NO_ZLIB_MSG := ==> no zlib, building zstd without .gz/.zlib support HAVE_ZLIB ?= $(shell printf '$(NUM_SYMBOL)include \nint main(void) { return 0; }' > have_zlib.c && $(CC) $(FLAGS) -o have_zlib$(EXT) have_zlib.c -lz 2> $(VOID) && rm have_zlib$(EXT) && echo 1 || echo 0; rm have_zlib.c) ifeq ($(HAVE_ZLIB), 1) - ZLIB_MSG := ==> building zstd with .gz compression support + ZLIB_MSG := ==> building zstd with .gz/.zlib compression support ZLIBCPP = -DZSTD_GZCOMPRESS -DZSTD_GZDECOMPRESS ZLIBLD = -lz else diff --git a/programs/fileio.c b/programs/fileio.c index 4000c5b62f4..b8e7d61e3d0 100644 --- a/programs/fileio.c +++ b/programs/fileio.c @@ -1484,7 +1484,7 @@ static void FIO_freeCResources(cRess_t* const ress) static unsigned long long FIO_compressGzFrame(cRess_t* ress, const char* srcFileName, U64 const srcFileSize, - int compressionLevel, U64* readsize) + int compressionLevel, U64* readsize, int plain_zlib) { FIO_SyncCompressIO* const syncIO = &ress->io; unsigned long long inFileSize = 0, outFileSize = 0; @@ -1498,7 +1498,7 @@ FIO_compressGzFrame(cRess_t* ress, strm.opaque = Z_NULL; { int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED, - 15 /* maxWindowLogSize */ + 16 /* gzip only */, + 15 /* maxWindowLogSize */ + (plain_zlib ? 0 /* zlib */: 16 /* gzip only */), 8, Z_DEFAULT_STRATEGY); /* see https://www.zlib.net/manual.html */ if (ret != Z_OK) { EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret); @@ -2010,11 +2010,12 @@ FIO_compressFilename_internal(FIO_ctx_t* const fCtx, break; case FIO_gzipCompression: + case FIO_zlibCompression: #ifdef ZSTD_GZCOMPRESS - compressedfilesize = FIO_compressGzFrame(ress, srcFileName, fileSize, compressionLevel, &readsize); + compressedfilesize = FIO_compressGzFrame(ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_zlibCompression); #else (void)compressionLevel; - EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", + EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip/zlib (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n", srcFileName); #endif break; @@ -2155,6 +2156,7 @@ static const char *compressedFileExtensions[] = { ZSTD_EXTENSION, TZSTD_EXTENSION, GZ_EXTENSION, + ZLIB_EXTENSION, TGZ_EXTENSION, LZMA_EXTENSION, XZ_EXTENSION, @@ -2358,8 +2360,8 @@ checked_index(const char* options[], size_t length, size_t index) { void FIO_displayCompressionParameters(const FIO_prefs_t* prefs) { - static const char* formatOptions[5] = {ZSTD_EXTENSION, GZ_EXTENSION, XZ_EXTENSION, - LZMA_EXTENSION, LZ4_EXTENSION}; + static const char* formatOptions[6] = {ZSTD_EXTENSION, GZ_EXTENSION, XZ_EXTENSION, + ZLIB_EXTENSION, LZMA_EXTENSION, LZ4_EXTENSION}; static const char* sparseOptions[3] = {" --no-sparse", "", " --sparse"}; static const char* checkSumOptions[3] = {" --no-check", "", " --check"}; static const char* rowMatchFinderOptions[3] = {"", " --no-row-match-finder", " --row-match-finder"}; @@ -2758,7 +2760,7 @@ FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, #ifdef ZSTD_GZDECOMPRESS static unsigned long long -FIO_decompressGzFrame(dRess_t* ress, const char* srcFileName) +FIO_decompressGzFrame(dRess_t* ress, const char* srcFileName, int plain_zlib) { unsigned long long outFileSize = 0; z_stream strm; @@ -2772,7 +2774,7 @@ FIO_decompressGzFrame(dRess_t* ress, const char* srcFileName) strm.next_in = 0; strm.avail_in = 0; /* see https://www.zlib.net/manual.html */ - if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK) + if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + (plain_zlib ? 0 /* zlib */: 16 /* gzip only */)) != Z_OK) return FIO_ERROR_FRAME_DECODING; writeJob = AIO_WritePool_acquireJob(ress->writeCtx); @@ -3015,13 +3017,17 @@ static int FIO_decompressFrames(FIO_ctx_t* const fCtx, unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, prefs, srcFileName, filesize); if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; filesize += frameSize; - } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */ + } else if ((buf[0] == 31 && buf[1] == 139) /* gz magic number */ + || (buf[0] == 0x78 && buf[1] == 0x01) /* zlib header (no compression) */ + || (buf[0] == 0x78 && buf[1] == 0x5E) /* zlib header (fast compression) */ + || (buf[0] == 0x78 && buf[1] == 0x9C) /* zlib header (best compression) */ + || (buf[0] == 0x78 && buf[1] == 0xDA)) { /* zlib header (default compression) */ #ifdef ZSTD_GZDECOMPRESS - unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFileName); + unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFileName, buf[0] != 31); if (frameSize == FIO_ERROR_FRAME_DECODING) return 1; filesize += frameSize; #else - DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName); + DISPLAYLEVEL(1, "zstd: %s: gzip/zlib file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName); return 1; #endif } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */ @@ -3210,6 +3216,7 @@ static const char *suffixList[] = { ZSTD_ALT_EXTENSION, #endif #ifdef ZSTD_GZDECOMPRESS + ZLIB_EXTENSION, GZ_EXTENSION, TGZ_EXTENSION, #endif @@ -3228,7 +3235,7 @@ static const char *suffixList[] = { static const char *suffixListStr = ZSTD_EXTENSION "/" TZSTD_EXTENSION #ifdef ZSTD_GZDECOMPRESS - "/" GZ_EXTENSION "/" TGZ_EXTENSION + "/" ZLIB_EXTENSION "/" GZ_EXTENSION "/" TGZ_EXTENSION #endif #ifdef ZSTD_LZMADECOMPRESS "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION diff --git a/programs/fileio.h b/programs/fileio.h index 5d7334ef5f0..1d454cb0c5d 100644 --- a/programs/fileio.h +++ b/programs/fileio.h @@ -36,6 +36,7 @@ #define XZ_EXTENSION ".xz" #define TXZ_EXTENSION ".txz" +#define ZLIB_EXTENSION ".zlib" #define GZ_EXTENSION ".gz" #define TGZ_EXTENSION ".tgz" diff --git a/programs/fileio_types.h b/programs/fileio_types.h index 9bbb5154979..3e6de627c54 100644 --- a/programs/fileio_types.h +++ b/programs/fileio_types.h @@ -28,7 +28,7 @@ struct FIO_display_prefs_s { }; -typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_xzCompression, FIO_lzmaCompression, FIO_lz4Compression } FIO_compressionType_t; +typedef enum { FIO_zstdCompression, FIO_gzipCompression, FIO_zlibCompression, FIO_xzCompression, FIO_lzmaCompression, FIO_lz4Compression } FIO_compressionType_t; typedef struct FIO_prefs_s { diff --git a/programs/zstd.1.md b/programs/zstd.1.md index bb9258d571b..d739a8199e6 100644 --- a/programs/zstd.1.md +++ b/programs/zstd.1.md @@ -298,7 +298,7 @@ the last one takes effect. * `--format=FORMAT`: compress and decompress in other formats. If compiled with support, zstd can compress to or decompress from other compression algorithm - formats. Possibly available options are `zstd`, `gzip`, `xz`, `lzma`, and `lz4`. + formats. Possibly available options are `zstd`, `gzip`, `zlib`, `xz`, `lzma`, and `lz4`. If no such format is provided, `zstd` is the default. * `-h`/`-H`, `--help`: display help/long help and exit diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 01760ff8c45..a6a7064f308 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -258,6 +258,7 @@ static void usageAdvanced(const char* programName) DISPLAYOUT(" --[no-]mmap-dict Memory-map dictionary file rather than mallocing and loading all at once\n"); #ifdef ZSTD_GZCOMPRESS DISPLAYOUT(" --format=gzip Compress files to the `.gz` format.\n"); + DISPLAYOUT(" --format=zlib Compress files to the `.zlib` format.\n"); #endif #ifdef ZSTD_LZMACOMPRESS DISPLAYOUT(" --format=xz Compress files to the `.xz` format.\n"); @@ -673,7 +674,7 @@ static void printVersion(void) DISPLAYOUT(", zstd legacy v0.%d+", ZSTD_LEGACY_SUPPORT); #endif #ifdef ZSTD_GZCOMPRESS - DISPLAYOUT(", gzip"); + DISPLAYOUT(", zlib, gzip"); #endif #ifdef ZSTD_LZ4COMPRESS DISPLAYOUT(", lz4"); @@ -1015,6 +1016,7 @@ int main(int argCount, const char* argv[]) if (!strcmp(argument, "--mmap-dict")) { mmapDict = ZSTD_ps_enable; continue; } if (!strcmp(argument, "--no-mmap-dict")) { mmapDict = ZSTD_ps_disable; continue; } #ifdef ZSTD_GZCOMPRESS + if (!strcmp(argument, "--format=zlib")) { suffix = ZLIB_EXTENSION; cType = FIO_zlibCompression; continue; } if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; cType = FIO_gzipCompression; continue; } if (exeNameMatch(programName, ZSTD_GZ)) { /* behave like gzip */ if (!strcmp(argument, "--best")) { dictCLevel = cLevel = 9; continue; } diff --git a/tests/cli-tests/compression/format.sh b/tests/cli-tests/compression/format.sh index 192fa2cf29f..b1e9f2065cc 100755 --- a/tests/cli-tests/compression/format.sh +++ b/tests/cli-tests/compression/format.sh @@ -7,7 +7,7 @@ set -e # Test --format zstd --format=zstd file -f zstd -t file.zst -for format in "gzip" "lz4" "xz" "lzma"; do +for format in "gzip" "zlib" "lz4" "xz" "lzma"; do if zstd_supports_format $format; then zstd --format=$format file zstd -t file.$(format_extension $format) diff --git a/tests/playTests.sh b/tests/playTests.sh index d3ce39846ce..eb4201ff61a 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -1345,8 +1345,11 @@ println "\n===> gzip frame tests " if [ $GZIPMODE -eq 1 ]; then datagen > tmp + zstd -f --format=zlib tmp zstd -f --format=gzip tmp zstd -f tmp + zstd -d -f -v tmp.gz + zstd -d -f -v tmp.zlib cat tmp.gz tmp.zst tmp.gz tmp.zst | zstd -d -f -o tmp truncateLastByte tmp.gz | zstd -t > $INTOVOID && die "incomplete frame not detected !" rm -f tmp*