From ad983c15b872deab98c7dc0832cbaff57e31359a Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Mon, 22 Sep 2025 15:21:25 +0000 Subject: [PATCH 01/10] Change the minimum python version to 3.10 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0b1ec9c2..cf8ef8e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" name = "flow.record" description = "A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record" readme = "README.md" -requires-python = ">=3.9.0" +requires-python = ">=3.10" license = "AGPL-3.0-or-later" license-files = ["LICENSE", "COPYRIGHT"] authors = [ From 0fad9446189439c0969de4f13a1c6892b2fe8b5c Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Mon, 22 Sep 2025 15:21:45 +0000 Subject: [PATCH 02/10] Change vermin minimum version to python3.10 --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index e90b5845..f7fc079c 100644 --- a/tox.ini +++ b/tox.ini @@ -42,7 +42,7 @@ dependency_groups = lint commands = ruff check flow tests ruff format --check flow tests - vermin -t=3.9- --no-tips --lint flow tests + vermin -t=3.10- --no-tips --lint flow tests [testenv:docs-build] allowlist_externals = make From 2160e5f3aeb85a9709628b66beb1d658507da58d Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Tue, 23 Sep 2025 09:31:58 +0000 Subject: [PATCH 03/10] Bump ruff version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index cf8ef8e0..f7ffaf95 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -173,7 +173,7 @@ build = [ "build", ] lint = [ - "ruff==0.12.9", + "ruff==0.13.1", "vermin", ] dev = [ From ca4ee51811b7ef64d934a7f44d22dbc3f536279d Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Mon, 22 Sep 2025 15:22:19 +0000 Subject: [PATCH 04/10] Fix linting for python3.10 --- flow/record/adapter/broker.py | 1 + flow/record/adapter/csvfile.py | 2 +- flow/record/adapter/sqlite.py | 2 +- flow/record/adapter/xlsx.py | 2 +- flow/record/base.py | 5 ++--- flow/record/context.py | 6 +----- flow/record/fieldtypes/net/ip.py | 11 +++++------ flow/record/fieldtypes/net/ipv4.py | 6 +++--- flow/record/selector.py | 4 ++-- flow/record/utils.py | 5 ++++- tests/adapter/test_splunk.py | 6 +++++- tests/adapter/test_sqlite_duckdb.py | 2 +- tests/fieldtypes/test_fieldtypes.py | 9 ++++++--- tests/fieldtypes/test_ip.py | 4 ++-- tests/packer/test_json_packer.py | 2 +- tests/packer/test_packer.py | 2 +- tests/record/test_adapter.py | 12 ++++++------ tests/record/test_record.py | 4 ++-- tests/selector/test_selectors.py | 10 +++++++--- tests/test_regressions.py | 9 ++++++--- tests/test_utils.py | 2 +- tests/tools/test_rdump.py | 4 ++-- 22 files changed, 61 insertions(+), 49 deletions(-) diff --git a/flow/record/adapter/broker.py b/flow/record/adapter/broker.py index 1c3b0431..48d08919 100644 --- a/flow/record/adapter/broker.py +++ b/flow/record/adapter/broker.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from flow.broker import Publisher, Subscriber + from flow.record.adapter import AbstractReader, AbstractWriter if TYPE_CHECKING: diff --git a/flow/record/adapter/csvfile.py b/flow/record/adapter/csvfile.py index 197abf60..54a4c814 100644 --- a/flow/record/adapter/csvfile.py +++ b/flow/record/adapter/csvfile.py @@ -118,7 +118,7 @@ def __iter__(self) -> Iterator[Record]: ctx = get_app_context() selector = self.selector for row in self.reader: - rdict = dict(zip(self.fields, row)) + rdict = dict(zip(self.fields, row, strict=False)) record = self.desc.init_from_dict(rdict) if match_record_with_context(record, selector, ctx): yield record diff --git a/flow/record/adapter/sqlite.py b/flow/record/adapter/sqlite.py index 78555a42..a48c83de 100644 --- a/flow/record/adapter/sqlite.py +++ b/flow/record/adapter/sqlite.py @@ -192,7 +192,7 @@ def read_table(self, table_name: str) -> Iterator[Record]: row[idx] = None elif isinstance(value, str): row[idx] = value.encode(errors="surrogateescape") - yield descriptor_cls.init_from_dict(dict(zip(fnames, row))) + yield descriptor_cls.init_from_dict(dict(zip(fnames, row, strict=False))) def __iter__(self) -> Iterator[Record]: """Iterate over all tables in the database and yield records.""" diff --git a/flow/record/adapter/xlsx.py b/flow/record/adapter/xlsx.py index 069bf463..6a7a6e8a 100644 --- a/flow/record/adapter/xlsx.py +++ b/flow/record/adapter/xlsx.py @@ -144,7 +144,7 @@ def __iter__(self) -> Iterator[Record]: for col in row if col.value and not col.value.startswith("_") ] - desc = record.RecordDescriptor(desc_name, list(zip(field_types, field_names))) + desc = record.RecordDescriptor(desc_name, list(zip(field_types, field_names, strict=False))) continue record_values = [] diff --git a/flow/record/base.py b/flow/record/base.py index 527ac39c..0573af7d 100644 --- a/flow/record/base.py +++ b/flow/record/base.py @@ -21,7 +21,6 @@ TYPE_CHECKING, Any, BinaryIO, - Callable, ) from urllib.parse import parse_qsl, urlparse @@ -60,7 +59,7 @@ from flow.record.whitelist import WHITELIST, WHITELIST_TREE if TYPE_CHECKING: - from collections.abc import Iterator, Mapping, Sequence + from collections.abc import Callable, Iterator, Mapping, Sequence from flow.record.adapter import AbstractReader, AbstractWriter @@ -1000,7 +999,7 @@ def merge_record_descriptors( field_map[fname] = ftype if name is None and descriptors: name = descriptors[0].name - return RecordDescriptor(name, zip(field_map.values(), field_map.keys())) + return RecordDescriptor(name, zip(field_map.values(), field_map.keys(), strict=False)) def extend_record( diff --git a/flow/record/context.py b/flow/record/context.py index 740bd5d9..f57b4837 100644 --- a/flow/record/context.py +++ b/flow/record/context.py @@ -1,6 +1,5 @@ from __future__ import annotations -import sys from contextlib import contextmanager from contextvars import ContextVar from dataclasses import dataclass @@ -39,10 +38,7 @@ def fresh_app_context() -> Generator[AppContext, None, None]: # Use slots=True on dataclass for better performance which requires Python 3.10 or later. # This can be removed when we drop support for Python 3.9. -if sys.version_info >= (3, 10): - app_dataclass = dataclass(slots=True) # novermin -else: - app_dataclass = dataclass +app_dataclass = dataclass(slots=True) @app_dataclass diff --git a/flow/record/fieldtypes/net/ip.py b/flow/record/fieldtypes/net/ip.py index 85965d89..fdf6df88 100644 --- a/flow/record/fieldtypes/net/ip.py +++ b/flow/record/fieldtypes/net/ip.py @@ -11,16 +11,15 @@ ip_interface, ip_network, ) -from typing import Union from flow.record.base import FieldType from flow.record.fieldtypes import defang -_IPNetwork = Union[IPv4Network, IPv6Network] -_IPAddress = Union[IPv4Address, IPv6Address] -_IPInterface = Union[IPv4Interface, IPv6Interface] -_ConversionTypes = Union[str, int, bytes] -_IPTypes = Union[_IPNetwork, _IPAddress, _IPInterface] +_IPNetwork = IPv4Network | IPv6Network +_IPAddress = IPv4Address | IPv6Address +_IPInterface = IPv4Interface | IPv6Interface +_ConversionTypes = str | int | bytes +_IPTypes = _IPNetwork | _IPAddress | _IPInterface class ipaddress(FieldType): diff --git a/flow/record/fieldtypes/net/ipv4.py b/flow/record/fieldtypes/net/ipv4.py index 8397dc02..caa6e638 100644 --- a/flow/record/fieldtypes/net/ipv4.py +++ b/flow/record/fieldtypes/net/ipv4.py @@ -29,7 +29,7 @@ def addr_str(s: address | int | str) -> str: def mask_to_bits(n: int) -> int: - return bin(n).count("1") + return (n).bit_count() def bits_to_mask(b: int) -> int: @@ -51,7 +51,7 @@ def __init__(self, addr: str, netmask: int | None = None): raise TypeError(f"Subnet() argument 1 must be string, not {type(addr).__name__}") if netmask is None: - ip, sep, mask = addr.partition("/") + ip, _sep, mask = addr.partition("/") self.mask = bits_to_mask(int(mask)) if mask else 0xFFFFFFFF self.net = addr_long(ip) else: @@ -93,7 +93,7 @@ def __init__(self): def load(self, path: str | Path) -> None: with Path(path).open() as fh: for line in fh: - entry, desc = line.split(" ", 1) + entry, _desc = line.split(" ", 1) self.subnets.append(subnet(entry)) def add(self, entry: str) -> None: diff --git a/flow/record/selector.py b/flow/record/selector.py index 42518a94..ee54081a 100644 --- a/flow/record/selector.py +++ b/flow/record/selector.py @@ -4,14 +4,14 @@ import ast import operator import re -from typing import TYPE_CHECKING, Any, Callable +from typing import TYPE_CHECKING, Any from flow.record.base import GroupedRecord, Record, dynamic_fieldtype from flow.record.fieldtypes import net from flow.record.whitelist import WHITELIST, WHITELIST_TREE if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Callable, Iterator try: import astor diff --git a/flow/record/utils.py b/flow/record/utils.py index 857e0d39..89b3a3f2 100644 --- a/flow/record/utils.py +++ b/flow/record/utils.py @@ -5,7 +5,10 @@ import sys import warnings from functools import wraps -from typing import Any, BinaryIO, Callable, TextIO +from typing import TYPE_CHECKING, Any, BinaryIO, TextIO + +if TYPE_CHECKING: + from collections.abc import Callable LOGGING_TRACE_LEVEL = 5 diff --git a/tests/adapter/test_splunk.py b/tests/adapter/test_splunk.py index 9d6ef1d8..933a9d04 100644 --- a/tests/adapter/test_splunk.py +++ b/tests/adapter/test_splunk.py @@ -56,7 +56,11 @@ def mock_httpx_package(monkeypatch: pytest.MonkeyPatch) -> Iterator[MagicMock]: @pytest.mark.parametrize( - ("field", "escaped"), [*list(zip(escaped_fields, [True] * len(escaped_fields))), ("not_escaped", False)] + ("field", "escaped"), + [ + *list(zip(escaped_fields, [True] * len(escaped_fields), strict=False)), + ("not_escaped", False), + ], ) def test_escape_field_name(field: str, escaped: bool) -> None: if escaped: diff --git a/tests/adapter/test_sqlite_duckdb.py b/tests/adapter/test_sqlite_duckdb.py index 6a5ab26d..0538c407 100644 --- a/tests/adapter/test_sqlite_duckdb.py +++ b/tests/adapter/test_sqlite_duckdb.py @@ -327,7 +327,7 @@ def test_invalid_field_names_quoting(tmp_path: Path, invalid_field_name: str) -> # However, these field names are invalid in flow.record and should raise an exception with ( - pytest.raises(RecordDescriptorError, match="Field .* is an invalid or reserved field name."), + pytest.raises(RecordDescriptorError, match=r"Field .* is an invalid or reserved field name."), RecordReader(f"sqlite://{db}") as reader, ): _ = next(iter(reader)) diff --git a/tests/fieldtypes/test_fieldtypes.py b/tests/fieldtypes/test_fieldtypes.py index 87411125..c4784b31 100644 --- a/tests/fieldtypes/test_fieldtypes.py +++ b/tests/fieldtypes/test_fieldtypes.py @@ -6,7 +6,7 @@ import posixpath import types from datetime import datetime, timedelta, timezone -from typing import Callable +from typing import TYPE_CHECKING import pytest @@ -30,6 +30,9 @@ ) from flow.record.fieldtypes import datetime as dt +if TYPE_CHECKING: + from collections.abc import Callable + UTC = timezone.utc INT64_MAX = (1 << 63) - 1 @@ -1006,10 +1009,10 @@ def test_datetime_timezone_aware(tmp_path: pathlib.Path, record_filename: str) - def test_datetime_comparisions() -> None: - with pytest.raises(TypeError, match=".* compare .*naive"): + with pytest.raises(TypeError, match=r".* compare .*naive"): assert dt("2023-01-01") > datetime(2022, 1, 1) # noqa: DTZ001 - with pytest.raises(TypeError, match=".* compare .*naive"): + with pytest.raises(TypeError, match=r".* compare .*naive"): assert datetime(2022, 1, 1) < dt("2023-01-01") # noqa: DTZ001 assert dt("2023-01-01") > datetime(2022, 1, 1, tzinfo=UTC) diff --git a/tests/fieldtypes/test_ip.py b/tests/fieldtypes/test_ip.py index a1c224ee..e9c17bdb 100644 --- a/tests/fieldtypes/test_ip.py +++ b/tests/fieldtypes/test_ip.py @@ -18,7 +18,7 @@ def test_field_ipaddress() -> None: a = net.IPAddress("192.168.1.1") assert a == "192.168.1.1" - with pytest.raises(ValueError, match=".* does not appear to be an IPv4 or IPv6 address"): + with pytest.raises(ValueError, match=r".* does not appear to be an IPv4 or IPv6 address"): net.IPAddress("a.a.a.a") @@ -27,7 +27,7 @@ def test_field_ipnetwork() -> None: assert a == "192.168.1.0/24" # Host bits set - with pytest.raises(ValueError, match=".* has host bits set"): + with pytest.raises(ValueError, match=r".* has host bits set"): net.IPNetwork("192.168.1.10/24") diff --git a/tests/packer/test_json_packer.py b/tests/packer/test_json_packer.py index 7a139c5a..025b30d8 100644 --- a/tests/packer/test_json_packer.py +++ b/tests/packer/test_json_packer.py @@ -69,7 +69,7 @@ def test_record_descriptor_not_found() -> None: # create a new packer and try to unpack the bytes packer = JsonRecordPacker() - with pytest.raises(RecordDescriptorNotFound, match="No RecordDescriptor found for: .*test/descriptor_not_found"): + with pytest.raises(RecordDescriptorNotFound, match=r"No RecordDescriptor found for: .*test/descriptor_not_found"): packer.unpack(data) diff --git a/tests/packer/test_packer.py b/tests/packer/test_packer.py index 17eadc64..3eaf6e53 100644 --- a/tests/packer/test_packer.py +++ b/tests/packer/test_packer.py @@ -269,5 +269,5 @@ def test_record_descriptor_not_found() -> None: # create a new packer and try to unpack the bytes packer = RecordPacker() - with pytest.raises(RecordDescriptorNotFound, match="No RecordDescriptor found for: .*test/descriptor_not_found"): + with pytest.raises(RecordDescriptorNotFound, match=r"No RecordDescriptor found for: .*test/descriptor_not_found"): packer.unpack(data) diff --git a/tests/record/test_adapter.py b/tests/record/test_adapter.py index b6cb8b37..5d611cf2 100644 --- a/tests/record/test_adapter.py +++ b/tests/record/test_adapter.py @@ -357,13 +357,13 @@ def test_text_record_adapter(capsys: pytest.CaptureFixture) -> None: # Format string with existing variables rec = TestRecordWithFooBar(name="world", foo="foo", bar="bar") writer.write(rec) - out, err = capsys.readouterr() + out, _err = capsys.readouterr() assert out == "Hello world, foo is bar!\n" # Format string with non-existing variables rec = TestRecordWithoutFooBar(name="planet") writer.write(rec) - out, err = capsys.readouterr() + out, _err = capsys.readouterr() assert out == "Hello planet, {foo} is {bar}!\n" @@ -405,23 +405,23 @@ def test_recordstream_header(tmp_path: Path) -> None: def test_recordstream_header_stdout(capsysbinary: pytest.CaptureFixture) -> None: with RecordWriter() as writer: pass - out, err = capsysbinary.readouterr() + out, _err = capsysbinary.readouterr() assert out == b"\x00\x00\x00\x0f\xc4\rRECORDSTREAM\n" writer = RecordWriter() del writer - out, err = capsysbinary.readouterr() + out, _err = capsysbinary.readouterr() assert out == b"" writer = RecordWriter() writer.close() - out, err = capsysbinary.readouterr() + out, _err = capsysbinary.readouterr() assert out == b"" writer = RecordWriter() writer.flush() writer.close() - out, err = capsysbinary.readouterr() + out, _err = capsysbinary.readouterr() assert out == b"\x00\x00\x00\x0f\xc4\rRECORDSTREAM\n" diff --git a/tests/record/test_record.py b/tests/record/test_record.py index 4a507de2..ada29cb7 100644 --- a/tests/record/test_record.py +++ b/tests/record/test_record.py @@ -225,7 +225,7 @@ def test_grouped_records_packing(tmp_path: Path) -> None: assert not isinstance(a, GroupedRecord) grouped = GroupedRecord("grouped/ab", [a, b]) - assert isinstance(grouped, (Record, GroupedRecord)) + assert isinstance(grouped, Record | GroupedRecord) assert [(f.typename, f.name) for f in grouped._desc.fields.values()] == [ ("string", "a_string"), ("string", "common"), @@ -523,7 +523,7 @@ def test_record_replace() -> None: assert t4._source == "pytest" assert t4._generated == t2._generated - with pytest.raises(ValueError, match=".*Got unexpected field names:.*foobar.*"): + with pytest.raises(ValueError, match=r".*Got unexpected field names:.*foobar.*"): t._replace(foobar="keyword does not exist") diff --git a/tests/selector/test_selectors.py b/tests/selector/test_selectors.py index a25935f8..fc7b4b10 100644 --- a/tests/selector/test_selectors.py +++ b/tests/selector/test_selectors.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from datetime import datetime, timezone import pytest @@ -228,13 +229,15 @@ def test_selector_function_call_whitelisting() -> None: assert rec in Selector("'pe32' in lower(r.filetype)") # But functions on types are not with pytest.raises( - Exception, match="Call 'r.filetype.lower' not allowed. No calls other then whitelisted 'global' calls allowed!" + Exception, + match=re.escape("Call 'r.filetype.lower' not allowed. No calls other then whitelisted 'global' calls allowed!"), ): assert rec in Selector("'pe' in r.filetype.lower()") assert rec in Selector("'EXECUTABLE' in upper(r.filetype)") with pytest.raises( - Exception, match="Call 'r.filetype.upper' not allowed. No calls other then whitelisted 'global' calls allowed!" + Exception, + match=re.escape("Call 'r.filetype.upper' not allowed. No calls other then whitelisted 'global' calls allowed!"), ): assert rec in Selector("'EXECUTABLE' in r.filetype.upper()") @@ -251,7 +254,8 @@ def test_selector_function_call_whitelisting() -> None: # We call net.ipv4 instead of net.ipv4.Subnet, which should fail with pytest.raises( - Exception, match="Call 'net.ipv4' not allowed. No calls other then whitelisted 'global' calls allowed!" + Exception, + match=re.escape("Call 'net.ipv4' not allowed. No calls other then whitelisted 'global' calls allowed!"), ): assert rec in Selector("r.ip in net.ipv4('192.168.1.0/24')") diff --git a/tests/test_regressions.py b/tests/test_regressions.py index 5a76a20d..d2d06ad9 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -9,7 +9,7 @@ from datetime import datetime, timezone from io import BytesIO from pathlib import Path -from typing import Callable +from typing import TYPE_CHECKING from unittest.mock import MagicMock, patch import msgpack @@ -34,6 +34,9 @@ from flow.record.tools import rdump from flow.record.utils import is_stdout +if TYPE_CHECKING: + from collections.abc import Callable + def test_datetime_serialization() -> None: packer = RecordPacker() @@ -435,7 +438,7 @@ def test_grouped_replace() -> None: assert replaced_grouped_record._source == "testcase" # Replacement with non existing field should raise a ValueError - with pytest.raises(ValueError, match=".*Got unexpected field names:.*non_existing_field.*"): + with pytest.raises(ValueError, match=r".*Got unexpected field names:.*non_existing_field.*"): grouped_record._replace(number=100, other="changed", non_existing_field="oops") @@ -467,7 +470,7 @@ def test_is_stdout(tmp_path: pathlib.Path, capsysbinary: pytest.CaptureFixture) with RecordWriter() as writer: assert is_stdout(writer.fp) - out, err = capsysbinary.readouterr() + out, _err = capsysbinary.readouterr() assert out.startswith(b"\x00\x00\x00\x0f\xc4\rRECORDSTREAM\n") with RecordWriter(tmp_path / "output.records") as writer: diff --git a/tests/test_utils.py b/tests/test_utils.py index 441ae8de..eeb6cdc9 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -21,5 +21,5 @@ def test_boolean_argument() -> None: assert boolean_argument(False) is False assert boolean_argument(1) is True assert boolean_argument(0) is False - with pytest.raises(ValueError, match="Invalid boolean argument: .*"): + with pytest.raises(ValueError, match=r"Invalid boolean argument: .*"): boolean_argument("maybe") diff --git a/tests/tools/test_rdump.py b/tests/tools/test_rdump.py index a9ab376e..49a29c1b 100644 --- a/tests/tools/test_rdump.py +++ b/tests/tools/test_rdump.py @@ -113,7 +113,7 @@ def test_rdump_format_template(tmp_path: Path) -> None: args = ["rdump", str(path), "-f", "TEST: {count},{foo}"] print(args) res = subprocess.Popen(args, stdout=subprocess.PIPE) - stdout, stderr = res.communicate() + stdout, _stderr = res.communicate() for i, line in enumerate(stdout.decode().splitlines()): assert line == f"TEST: {i},bar" @@ -155,7 +155,7 @@ def test_rdump_json(tmp_path: Path) -> None: # dump records as JSON lines args = ["rdump", str(record_path), "-w", "jsonfile://-?descriptors=true"] process = subprocess.Popen(args, stdout=subprocess.PIPE) - stdout, stderr = process.communicate() + stdout, _stderr = process.communicate() assert process.returncode == 0 From 20d5ddbd05ed59ce758b8a75ff7521368ef1c9ec Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Tue, 23 Sep 2025 13:48:06 +0000 Subject: [PATCH 05/10] move dependency-groups up --- pyproject.toml | 107 +++++++++++++++++++++++++------------------------ tox.ini | 2 +- 2 files changed, 55 insertions(+), 54 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f7ffaf95..68614811 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,6 +66,60 @@ full = [ "structlog", ] +# This list is duplicated due to https://github.com/fox-it/flow.record/pull/182#discussion_r2284582481 +[dependency-groups] +compression = [ + "lz4", + "zstandard==0.23.0; platform_python_implementation == 'PyPy' and python_version == '3.9'", # Pin to last working for PyPy3.9 + "zstandard; platform_python_implementation != 'PyPy' and python_version != '3.9'", # Otherwise, pick the latest +] +elastic = [ + "elasticsearch", +] +geoip = [ + "maxminddb", +] +avro = [ + "cramjam<2.8.4; platform_python_implementation == 'PyPy' and python_version == '3.9'", # Pin to last working for PyPy3.9 + "fastavro[snappy]", +] +duckdb = [ + "duckdb; platform_python_implementation != 'PyPy'", # Don't install duckdb on PyPy + "pytz; platform_python_implementation != 'PyPy'", # Don't install duckdb on PyPy +] +splunk = [ + "httpx", +] +xlsx = [ + "openpyxl", +] +test = [ + {include-group = "compression"}, + {include-group = "avro"}, + {include-group = "elastic"}, + {include-group = "xlsx"}, + {include-group = "duckdb"}, + "tqdm", + "structlog", + "pytest", +] +full = [ + {include-group = "compression"}, + "tqdm", + "structlog", +] +build = [ + "build", +] +lint = [ + "ruff==0.13.1", + "vermin", +] +dev = [ + {include-group = "test"}, + {include-group = "lint"}, +] + [project.scripts] rdump = "flow.record.tools.rdump:main" rgeoip = "flow.record.tools.geoip:main" @@ -127,56 +181,3 @@ include = ["flow.*"] [tool.setuptools_scm] version_file = "flow/record/version.py" -# This list is duplicated due to https://github.com/fox-it/flow.record/pull/182#discussion_r2284582481 -[dependency-groups] -compression = [ - "lz4", - "zstandard==0.23.0; platform_python_implementation == 'PyPy' and python_version == '3.9'", # Pin to last working for PyPy3.9 - "zstandard; platform_python_implementation != 'PyPy' and python_version != '3.9'", # Otherwise, pick the latest -] -elastic = [ - "elasticsearch", -] -geoip = [ - "maxminddb", -] -avro = [ - "cramjam<2.8.4; platform_python_implementation == 'PyPy' and python_version == '3.9'", # Pin to last working for PyPy3.9 - "fastavro[snappy]", -] -duckdb = [ - "duckdb; platform_python_implementation != 'PyPy'", # Don't install duckdb on PyPy - "pytz; platform_python_implementation != 'PyPy'", # Don't install duckdb on PyPy -] -splunk = [ - "httpx", -] -xlsx = [ - "openpyxl", -] -test = [ - {include-group = "compression"}, - {include-group = "avro"}, - {include-group = "elastic"}, - {include-group = "xlsx"}, - {include-group = "duckdb"}, - "tqdm", - "structlog", - "pytest", -] -full = [ - {include-group = "compression"}, - "tqdm", - "structlog", -] -build = [ - "build", -] -lint = [ - "ruff==0.13.1", - "vermin", -] -dev = [ - {include-group = "test"}, - {include-group = "lint"}, -] diff --git a/tox.ini b/tox.ini index f7fc079c..72ab0dd0 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ envlist = lint, py3, pypy3 # requires if they are not available on the host system. This requires the # locally installed tox to have a minimum version 3.3.0. This means the names # of the configuration options are still according to the tox 3.x syntax. -minversion = 4.11.4 +minversion = 4.27.0 # This version of virtualenv will install setuptools version 68.2.2 and pip # 23.3.1. These versions fully support python projects defined only through a # pyproject.toml file (PEP-517/PEP-518/PEP-621). This pip version also support From 3376dccd4e31c91917df7837d44406bd754858f1 Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Tue, 23 Sep 2025 13:53:15 +0000 Subject: [PATCH 06/10] remove unnecessary dependency conditions --- pyproject.toml | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 68614811..34318b61 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,8 +37,7 @@ repository = "https://github.com/fox-it/flow.record" # Note: these compression libraries do not work well with pypy compression = [ "lz4", - "zstandard==0.23.0; platform_python_implementation == 'PyPy' and python_version == '3.9'", # Pin to last working for PyPy3.9 - "zstandard; platform_python_implementation != 'PyPy' and python_version != '3.9'", # Otherwise, pick the latest + "zstandard; platform_python_implementation != 'PyPy'", ] elastic = [ "elasticsearch", @@ -47,12 +46,11 @@ geoip = [ "maxminddb", ] avro = [ - "cramjam<2.8.4; platform_python_implementation == 'PyPy' and python_version == '3.9'", # Pin to last working for PyPy3.9 "fastavro[snappy]", ] duckdb = [ - "duckdb", - "pytz", # duckdb requires pytz for timezone support + "duckdb; platform_python_implementation != 'PyPy'", # Don't install duckdb on PyPy + "pytz; platform_python_implementation != 'PyPy'", # Don't install duckdb on PyPy ] splunk = [ "httpx", @@ -70,8 +68,7 @@ full = [ [dependency-groups] compression = [ "lz4", - "zstandard==0.23.0; platform_python_implementation == 'PyPy' and python_version == '3.9'", # Pin to last working for PyPy3.9 - "zstandard; platform_python_implementation != 'PyPy' and python_version != '3.9'", # Otherwise, pick the latest + "zstandard; platform_python_implementation != 'PyPy'", ] elastic = [ "elasticsearch", @@ -80,7 +77,6 @@ geoip = [ "maxminddb", ] avro = [ - "cramjam<2.8.4; platform_python_implementation == 'PyPy' and python_version == '3.9'", # Pin to last working for PyPy3.9 "fastavro[snappy]", ] duckdb = [ From 094706ff39d573919389a74d411a3df11c9b8f1b Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Wed, 24 Sep 2025 12:25:49 +0000 Subject: [PATCH 07/10] Rename all unused variables to `_` --- flow/record/fieldtypes/net/ipv4.py | 6 +++--- tests/record/test_adapter.py | 12 ++++++------ tests/test_regressions.py | 2 +- tests/tools/test_rdump.py | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/flow/record/fieldtypes/net/ipv4.py b/flow/record/fieldtypes/net/ipv4.py index caa6e638..6701577c 100644 --- a/flow/record/fieldtypes/net/ipv4.py +++ b/flow/record/fieldtypes/net/ipv4.py @@ -29,7 +29,7 @@ def addr_str(s: address | int | str) -> str: def mask_to_bits(n: int) -> int: - return (n).bit_count() + return n.bit_count() def bits_to_mask(b: int) -> int: @@ -51,7 +51,7 @@ def __init__(self, addr: str, netmask: int | None = None): raise TypeError(f"Subnet() argument 1 must be string, not {type(addr).__name__}") if netmask is None: - ip, _sep, mask = addr.partition("/") + ip, _, mask = addr.partition("/") self.mask = bits_to_mask(int(mask)) if mask else 0xFFFFFFFF self.net = addr_long(ip) else: @@ -93,7 +93,7 @@ def __init__(self): def load(self, path: str | Path) -> None: with Path(path).open() as fh: for line in fh: - entry, _desc = line.split(" ", 1) + entry, _ = line.split(" ", 1) self.subnets.append(subnet(entry)) def add(self, entry: str) -> None: diff --git a/tests/record/test_adapter.py b/tests/record/test_adapter.py index 5d611cf2..45eb80ad 100644 --- a/tests/record/test_adapter.py +++ b/tests/record/test_adapter.py @@ -357,13 +357,13 @@ def test_text_record_adapter(capsys: pytest.CaptureFixture) -> None: # Format string with existing variables rec = TestRecordWithFooBar(name="world", foo="foo", bar="bar") writer.write(rec) - out, _err = capsys.readouterr() + out, _ = capsys.readouterr() assert out == "Hello world, foo is bar!\n" # Format string with non-existing variables rec = TestRecordWithoutFooBar(name="planet") writer.write(rec) - out, _err = capsys.readouterr() + out, _ = capsys.readouterr() assert out == "Hello planet, {foo} is {bar}!\n" @@ -405,23 +405,23 @@ def test_recordstream_header(tmp_path: Path) -> None: def test_recordstream_header_stdout(capsysbinary: pytest.CaptureFixture) -> None: with RecordWriter() as writer: pass - out, _err = capsysbinary.readouterr() + out, _ = capsysbinary.readouterr() assert out == b"\x00\x00\x00\x0f\xc4\rRECORDSTREAM\n" writer = RecordWriter() del writer - out, _err = capsysbinary.readouterr() + out, _ = capsysbinary.readouterr() assert out == b"" writer = RecordWriter() writer.close() - out, _err = capsysbinary.readouterr() + out, _ = capsysbinary.readouterr() assert out == b"" writer = RecordWriter() writer.flush() writer.close() - out, _err = capsysbinary.readouterr() + out, _ = capsysbinary.readouterr() assert out == b"\x00\x00\x00\x0f\xc4\rRECORDSTREAM\n" diff --git a/tests/test_regressions.py b/tests/test_regressions.py index d2d06ad9..810ed9b1 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -470,7 +470,7 @@ def test_is_stdout(tmp_path: pathlib.Path, capsysbinary: pytest.CaptureFixture) with RecordWriter() as writer: assert is_stdout(writer.fp) - out, _err = capsysbinary.readouterr() + out, _ = capsysbinary.readouterr() assert out.startswith(b"\x00\x00\x00\x0f\xc4\rRECORDSTREAM\n") with RecordWriter(tmp_path / "output.records") as writer: diff --git a/tests/tools/test_rdump.py b/tests/tools/test_rdump.py index 49a29c1b..64bcad76 100644 --- a/tests/tools/test_rdump.py +++ b/tests/tools/test_rdump.py @@ -113,7 +113,7 @@ def test_rdump_format_template(tmp_path: Path) -> None: args = ["rdump", str(path), "-f", "TEST: {count},{foo}"] print(args) res = subprocess.Popen(args, stdout=subprocess.PIPE) - stdout, _stderr = res.communicate() + stdout, _ = res.communicate() for i, line in enumerate(stdout.decode().splitlines()): assert line == f"TEST: {i},bar" @@ -155,7 +155,7 @@ def test_rdump_json(tmp_path: Path) -> None: # dump records as JSON lines args = ["rdump", str(record_path), "-w", "jsonfile://-?descriptors=true"] process = subprocess.Popen(args, stdout=subprocess.PIPE) - stdout, _stderr = process.communicate() + stdout, _ = process.communicate() assert process.returncode == 0 From 1389a6814f8a4fe3db00a08a493399b8c027c2a3 Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Wed, 24 Sep 2025 12:28:25 +0000 Subject: [PATCH 08/10] Remove app_dataclass as it doesn't need to exist anymore we can consider adding a deprecation warning instead, but I doubt that someone directly imports app_dataclass --- flow/record/context.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/flow/record/context.py b/flow/record/context.py index f57b4837..c0211d4f 100644 --- a/flow/record/context.py +++ b/flow/record/context.py @@ -36,12 +36,7 @@ def fresh_app_context() -> Generator[AppContext, None, None]: APP_CONTEXT.reset(token) -# Use slots=True on dataclass for better performance which requires Python 3.10 or later. -# This can be removed when we drop support for Python 3.9. -app_dataclass = dataclass(slots=True) - - -@app_dataclass +@dataclass(slots=True) class AppContext: """Context for the application, holding metrics like amount of processed records.""" From c10e69d4353737bc3b7fec62a3320ec24c3a80c6 Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Wed, 24 Sep 2025 12:38:16 +0000 Subject: [PATCH 09/10] Changing required ruff version to ">=0.13.1" --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 34318b61..2b75e7f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -122,7 +122,7 @@ rgeoip = "flow.record.tools.geoip:main" [tool.ruff] line-length = 120 -required-version = ">=0.9.0" +required-version = ">=0.13.1" extend-exclude = ["flow/record/version.py"] [tool.ruff.format] From 379d7023159d1192a6f5c12799ecd40586fda52b Mon Sep 17 00:00:00 2001 From: Miauwkeru Date: Thu, 25 Sep 2025 14:45:02 +0000 Subject: [PATCH 10/10] Add suggestion --- tests/record/test_record.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/record/test_record.py b/tests/record/test_record.py index ada29cb7..72b0506f 100644 --- a/tests/record/test_record.py +++ b/tests/record/test_record.py @@ -225,7 +225,7 @@ def test_grouped_records_packing(tmp_path: Path) -> None: assert not isinstance(a, GroupedRecord) grouped = GroupedRecord("grouped/ab", [a, b]) - assert isinstance(grouped, Record | GroupedRecord) + assert isinstance(grouped, (Record, GroupedRecord)) assert [(f.typename, f.name) for f in grouped._desc.fields.values()] == [ ("string", "a_string"), ("string", "common"),