From acfbbaa745ff7ae6895fc5aa94fc2e0e322a4255 Mon Sep 17 00:00:00 2001 From: Yun Zheng Hu Date: Mon, 1 Dec 2025 12:12:49 +0100 Subject: [PATCH 1/3] Switch zstandard library to stdlib compression.zstd or backports.zstd fixes #197 --- flow/record/base.py | 23 ++++++++++++++--------- pyproject.toml | 4 ++-- tests/record/test_adapter.py | 4 +--- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/flow/record/base.py b/flow/record/base.py index 0573af7d..dc8ef3e1 100644 --- a/flow/record/base.py +++ b/flow/record/base.py @@ -27,25 +27,33 @@ from flow.record.exceptions import RecordAdapterNotFound, RecordDescriptorError from flow.record.utils import get_stdin, get_stdout +# lz4 try: import lz4.frame as lz4 HAS_LZ4 = True except ImportError: HAS_LZ4 = False + +# bzip2 try: import bz2 HAS_BZ2 = True except ImportError: HAS_BZ2 = False -try: - import zstandard as zstd +# zstandard +try: + if sys.version_info >= (3, 14): + from compression import zstd # novermin + else: + from backports import zstd HAS_ZSTD = True except ImportError: HAS_ZSTD = False +# fastavro try: import fastavro as avro # noqa @@ -727,8 +735,7 @@ def open_stream(fp: BinaryIO, mode: str) -> BinaryIO: elif HAS_LZ4 and peek_data[:4] == LZ4_MAGIC: fp = lz4.open(fp, mode=mode) elif HAS_ZSTD and peek_data[:4] == ZSTD_MAGIC: - dctx = zstd.ZstdDecompressor() - fp = dctx.stream_reader(fp) + fp = zstd.ZstdFile(fp, mode=mode) return fp @@ -804,13 +811,11 @@ def open_path(path: str, mode: str, clobber: bool = True) -> IO: fp = lz4.open(path, mode) elif path.endswith((".zstd", ".zst")): if not HAS_ZSTD: - raise RuntimeError("zstandard python module not available") + raise RuntimeError("zstd python module not available") if not out: - dctx = zstd.ZstdDecompressor() - fp = dctx.stream_reader(pathobj.open("rb")) + fp = zstd.ZstdFile(pathobj.open("rb"), mode="rb") else: - cctx = zstd.ZstdCompressor() - fp = cctx.stream_writer(pathobj.open("wb")) + fp = zstd.ZstdFile(pathobj.open("wb"), mode="wb") # normal file or stdio for reading or writing if not fp: diff --git a/pyproject.toml b/pyproject.toml index 2b75e7f2..af59dd77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ repository = "https://github.com/fox-it/flow.record" # Note: these compression libraries do not work well with pypy compression = [ "lz4", - "zstandard; platform_python_implementation != 'PyPy'", + "backports.zstd; python_version < '3.14'", ] elastic = [ "elasticsearch", @@ -68,7 +68,7 @@ full = [ [dependency-groups] compression = [ "lz4", - "zstandard; platform_python_implementation != 'PyPy'", + "backports.zstd; python_version < '3.14'", ] elastic = [ "elasticsearch", diff --git a/tests/record/test_adapter.py b/tests/record/test_adapter.py index 45eb80ad..a11c8470 100644 --- a/tests/record/test_adapter.py +++ b/tests/record/test_adapter.py @@ -85,12 +85,10 @@ def test_compressed_writer_reader(tmp_path: Path, compression: str) -> None: if compression == "lz4" and not HAS_LZ4: pytest.skip("lz4 module not installed") if compression == "zstd" and not HAS_ZSTD: - pytest.skip("zstandard module not installed") + pytest.skip("backports.zstd module not installed") if compression == "lz4" and platform.python_implementation() == "PyPy": pytest.skip("lz4 module not supported on PyPy") - if compression == "zstd" and platform.python_implementation() == "PyPy": - pytest.skip("zstandard module not supported on PyPy") p = tmp_path.joinpath(f"{compression}-test") p.mkdir() From 676432f490a8fea21fd5707b696baba03fb0d757 Mon Sep 17 00:00:00 2001 From: Yun Zheng Hu Date: Mon, 1 Dec 2025 12:24:17 +0100 Subject: [PATCH 2/3] Apply code suggestions --- flow/record/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flow/record/base.py b/flow/record/base.py index dc8ef3e1..bfa4a2db 100644 --- a/flow/record/base.py +++ b/flow/record/base.py @@ -813,9 +813,9 @@ def open_path(path: str, mode: str, clobber: bool = True) -> IO: if not HAS_ZSTD: raise RuntimeError("zstd python module not available") if not out: - fp = zstd.ZstdFile(pathobj.open("rb"), mode="rb") + fp = zstd.ZstdFile(path, mode) else: - fp = zstd.ZstdFile(pathobj.open("wb"), mode="wb") + fp = zstd.ZstdFile(path, mode) # normal file or stdio for reading or writing if not fp: From 3c9940a718e93a7be94dfb3d615269e20c080fc9 Mon Sep 17 00:00:00 2001 From: Yun Zheng Hu Date: Mon, 1 Dec 2025 12:36:52 +0100 Subject: [PATCH 3/3] Fixes --- flow/record/base.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/flow/record/base.py b/flow/record/base.py index bfa4a2db..7b574ee4 100644 --- a/flow/record/base.py +++ b/flow/record/base.py @@ -811,11 +811,8 @@ def open_path(path: str, mode: str, clobber: bool = True) -> IO: fp = lz4.open(path, mode) elif path.endswith((".zstd", ".zst")): if not HAS_ZSTD: - raise RuntimeError("zstd python module not available") - if not out: - fp = zstd.ZstdFile(path, mode) - else: - fp = zstd.ZstdFile(path, mode) + raise RuntimeError("backports.zstd python module not available") + fp = zstd.ZstdFile(path, mode) # normal file or stdio for reading or writing if not fp: