diff --git a/flow/record/adapter/broker.py b/flow/record/adapter/broker.py index 1c3b0431..48d08919 100644 --- a/flow/record/adapter/broker.py +++ b/flow/record/adapter/broker.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from flow.broker import Publisher, Subscriber + from flow.record.adapter import AbstractReader, AbstractWriter if TYPE_CHECKING: diff --git a/flow/record/adapter/csvfile.py b/flow/record/adapter/csvfile.py index 197abf60..54a4c814 100644 --- a/flow/record/adapter/csvfile.py +++ b/flow/record/adapter/csvfile.py @@ -118,7 +118,7 @@ def __iter__(self) -> Iterator[Record]: ctx = get_app_context() selector = self.selector for row in self.reader: - rdict = dict(zip(self.fields, row)) + rdict = dict(zip(self.fields, row, strict=False)) record = self.desc.init_from_dict(rdict) if match_record_with_context(record, selector, ctx): yield record diff --git a/flow/record/adapter/sqlite.py b/flow/record/adapter/sqlite.py index 78555a42..a48c83de 100644 --- a/flow/record/adapter/sqlite.py +++ b/flow/record/adapter/sqlite.py @@ -192,7 +192,7 @@ def read_table(self, table_name: str) -> Iterator[Record]: row[idx] = None elif isinstance(value, str): row[idx] = value.encode(errors="surrogateescape") - yield descriptor_cls.init_from_dict(dict(zip(fnames, row))) + yield descriptor_cls.init_from_dict(dict(zip(fnames, row, strict=False))) def __iter__(self) -> Iterator[Record]: """Iterate over all tables in the database and yield records.""" diff --git a/flow/record/adapter/xlsx.py b/flow/record/adapter/xlsx.py index 069bf463..6a7a6e8a 100644 --- a/flow/record/adapter/xlsx.py +++ b/flow/record/adapter/xlsx.py @@ -144,7 +144,7 @@ def __iter__(self) -> Iterator[Record]: for col in row if col.value and not col.value.startswith("_") ] - desc = record.RecordDescriptor(desc_name, list(zip(field_types, field_names))) + desc = record.RecordDescriptor(desc_name, list(zip(field_types, field_names, strict=False))) continue record_values = [] diff --git a/flow/record/base.py b/flow/record/base.py index 527ac39c..0573af7d 100644 --- a/flow/record/base.py +++ b/flow/record/base.py @@ -21,7 +21,6 @@ TYPE_CHECKING, Any, BinaryIO, - Callable, ) from urllib.parse import parse_qsl, urlparse @@ -60,7 +59,7 @@ from flow.record.whitelist import WHITELIST, WHITELIST_TREE if TYPE_CHECKING: - from collections.abc import Iterator, Mapping, Sequence + from collections.abc import Callable, Iterator, Mapping, Sequence from flow.record.adapter import AbstractReader, AbstractWriter @@ -1000,7 +999,7 @@ def merge_record_descriptors( field_map[fname] = ftype if name is None and descriptors: name = descriptors[0].name - return RecordDescriptor(name, zip(field_map.values(), field_map.keys())) + return RecordDescriptor(name, zip(field_map.values(), field_map.keys(), strict=False)) def extend_record( diff --git a/flow/record/context.py b/flow/record/context.py index 740bd5d9..c0211d4f 100644 --- a/flow/record/context.py +++ b/flow/record/context.py @@ -1,6 +1,5 @@ from __future__ import annotations -import sys from contextlib import contextmanager from contextvars import ContextVar from dataclasses import dataclass @@ -37,15 +36,7 @@ def fresh_app_context() -> Generator[AppContext, None, None]: APP_CONTEXT.reset(token) -# Use slots=True on dataclass for better performance which requires Python 3.10 or later. -# This can be removed when we drop support for Python 3.9. -if sys.version_info >= (3, 10): - app_dataclass = dataclass(slots=True) # novermin -else: - app_dataclass = dataclass - - -@app_dataclass +@dataclass(slots=True) class AppContext: """Context for the application, holding metrics like amount of processed records.""" diff --git a/flow/record/fieldtypes/net/ip.py b/flow/record/fieldtypes/net/ip.py index 85965d89..fdf6df88 100644 --- a/flow/record/fieldtypes/net/ip.py +++ b/flow/record/fieldtypes/net/ip.py @@ -11,16 +11,15 @@ ip_interface, ip_network, ) -from typing import Union from flow.record.base import FieldType from flow.record.fieldtypes import defang -_IPNetwork = Union[IPv4Network, IPv6Network] -_IPAddress = Union[IPv4Address, IPv6Address] -_IPInterface = Union[IPv4Interface, IPv6Interface] -_ConversionTypes = Union[str, int, bytes] -_IPTypes = Union[_IPNetwork, _IPAddress, _IPInterface] +_IPNetwork = IPv4Network | IPv6Network +_IPAddress = IPv4Address | IPv6Address +_IPInterface = IPv4Interface | IPv6Interface +_ConversionTypes = str | int | bytes +_IPTypes = _IPNetwork | _IPAddress | _IPInterface class ipaddress(FieldType): diff --git a/flow/record/fieldtypes/net/ipv4.py b/flow/record/fieldtypes/net/ipv4.py index 8397dc02..6701577c 100644 --- a/flow/record/fieldtypes/net/ipv4.py +++ b/flow/record/fieldtypes/net/ipv4.py @@ -29,7 +29,7 @@ def addr_str(s: address | int | str) -> str: def mask_to_bits(n: int) -> int: - return bin(n).count("1") + return n.bit_count() def bits_to_mask(b: int) -> int: @@ -51,7 +51,7 @@ def __init__(self, addr: str, netmask: int | None = None): raise TypeError(f"Subnet() argument 1 must be string, not {type(addr).__name__}") if netmask is None: - ip, sep, mask = addr.partition("/") + ip, _, mask = addr.partition("/") self.mask = bits_to_mask(int(mask)) if mask else 0xFFFFFFFF self.net = addr_long(ip) else: @@ -93,7 +93,7 @@ def __init__(self): def load(self, path: str | Path) -> None: with Path(path).open() as fh: for line in fh: - entry, desc = line.split(" ", 1) + entry, _ = line.split(" ", 1) self.subnets.append(subnet(entry)) def add(self, entry: str) -> None: diff --git a/flow/record/selector.py b/flow/record/selector.py index 42518a94..ee54081a 100644 --- a/flow/record/selector.py +++ b/flow/record/selector.py @@ -4,14 +4,14 @@ import ast import operator import re -from typing import TYPE_CHECKING, Any, Callable +from typing import TYPE_CHECKING, Any from flow.record.base import GroupedRecord, Record, dynamic_fieldtype from flow.record.fieldtypes import net from flow.record.whitelist import WHITELIST, WHITELIST_TREE if TYPE_CHECKING: - from collections.abc import Iterator + from collections.abc import Callable, Iterator try: import astor diff --git a/flow/record/utils.py b/flow/record/utils.py index 857e0d39..89b3a3f2 100644 --- a/flow/record/utils.py +++ b/flow/record/utils.py @@ -5,7 +5,10 @@ import sys import warnings from functools import wraps -from typing import Any, BinaryIO, Callable, TextIO +from typing import TYPE_CHECKING, Any, BinaryIO, TextIO + +if TYPE_CHECKING: + from collections.abc import Callable LOGGING_TRACE_LEVEL = 5 diff --git a/pyproject.toml b/pyproject.toml index 0b1ec9c2..2b75e7f2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" name = "flow.record" description = "A library for defining and creating structured data (called records) that can be streamed to disk or piped to other tools that use flow.record" readme = "README.md" -requires-python = ">=3.9.0" +requires-python = ">=3.10" license = "AGPL-3.0-or-later" license-files = ["LICENSE", "COPYRIGHT"] authors = [ @@ -37,8 +37,7 @@ repository = "https://github.com/fox-it/flow.record" # Note: these compression libraries do not work well with pypy compression = [ "lz4", - "zstandard==0.23.0; platform_python_implementation == 'PyPy' and python_version == '3.9'", # Pin to last working for PyPy3.9 - "zstandard; platform_python_implementation != 'PyPy' and python_version != '3.9'", # Otherwise, pick the latest + "zstandard; platform_python_implementation != 'PyPy'", ] elastic = [ "elasticsearch", @@ -47,12 +46,11 @@ geoip = [ "maxminddb", ] avro = [ - "cramjam<2.8.4; platform_python_implementation == 'PyPy' and python_version == '3.9'", # Pin to last working for PyPy3.9 "fastavro[snappy]", ] duckdb = [ - "duckdb", - "pytz", # duckdb requires pytz for timezone support + "duckdb; platform_python_implementation != 'PyPy'", # Don't install duckdb on PyPy + "pytz; platform_python_implementation != 'PyPy'", # Don't install duckdb on PyPy ] splunk = [ "httpx", @@ -66,13 +64,65 @@ full = [ "structlog", ] +# This list is duplicated due to https://github.com/fox-it/flow.record/pull/182#discussion_r2284582481 +[dependency-groups] +compression = [ + "lz4", + "zstandard; platform_python_implementation != 'PyPy'", +] +elastic = [ + "elasticsearch", +] +geoip = [ + "maxminddb", +] +avro = [ + "fastavro[snappy]", +] +duckdb = [ + "duckdb; platform_python_implementation != 'PyPy'", # Don't install duckdb on PyPy + "pytz; platform_python_implementation != 'PyPy'", # Don't install duckdb on PyPy +] +splunk = [ + "httpx", +] +xlsx = [ + "openpyxl", +] +test = [ + {include-group = "compression"}, + {include-group = "avro"}, + {include-group = "elastic"}, + {include-group = "xlsx"}, + {include-group = "duckdb"}, + "tqdm", + "structlog", + "pytest", +] +full = [ + {include-group = "compression"}, + "tqdm", + "structlog", +] +build = [ + "build", +] +lint = [ + "ruff==0.13.1", + "vermin", +] +dev = [ + {include-group = "test"}, + {include-group = "lint"}, +] + [project.scripts] rdump = "flow.record.tools.rdump:main" rgeoip = "flow.record.tools.geoip:main" [tool.ruff] line-length = 120 -required-version = ">=0.9.0" +required-version = ">=0.13.1" extend-exclude = ["flow/record/version.py"] [tool.ruff.format] @@ -127,56 +177,3 @@ include = ["flow.*"] [tool.setuptools_scm] version_file = "flow/record/version.py" -# This list is duplicated due to https://github.com/fox-it/flow.record/pull/182#discussion_r2284582481 -[dependency-groups] -compression = [ - "lz4", - "zstandard==0.23.0; platform_python_implementation == 'PyPy' and python_version == '3.9'", # Pin to last working for PyPy3.9 - "zstandard; platform_python_implementation != 'PyPy' and python_version != '3.9'", # Otherwise, pick the latest -] -elastic = [ - "elasticsearch", -] -geoip = [ - "maxminddb", -] -avro = [ - "cramjam<2.8.4; platform_python_implementation == 'PyPy' and python_version == '3.9'", # Pin to last working for PyPy3.9 - "fastavro[snappy]", -] -duckdb = [ - "duckdb; platform_python_implementation != 'PyPy'", # Don't install duckdb on PyPy - "pytz; platform_python_implementation != 'PyPy'", # Don't install duckdb on PyPy -] -splunk = [ - "httpx", -] -xlsx = [ - "openpyxl", -] -test = [ - {include-group = "compression"}, - {include-group = "avro"}, - {include-group = "elastic"}, - {include-group = "xlsx"}, - {include-group = "duckdb"}, - "tqdm", - "structlog", - "pytest", -] -full = [ - {include-group = "compression"}, - "tqdm", - "structlog", -] -build = [ - "build", -] -lint = [ - "ruff==0.12.9", - "vermin", -] -dev = [ - {include-group = "test"}, - {include-group = "lint"}, -] diff --git a/tests/adapter/test_splunk.py b/tests/adapter/test_splunk.py index 9d6ef1d8..933a9d04 100644 --- a/tests/adapter/test_splunk.py +++ b/tests/adapter/test_splunk.py @@ -56,7 +56,11 @@ def mock_httpx_package(monkeypatch: pytest.MonkeyPatch) -> Iterator[MagicMock]: @pytest.mark.parametrize( - ("field", "escaped"), [*list(zip(escaped_fields, [True] * len(escaped_fields))), ("not_escaped", False)] + ("field", "escaped"), + [ + *list(zip(escaped_fields, [True] * len(escaped_fields), strict=False)), + ("not_escaped", False), + ], ) def test_escape_field_name(field: str, escaped: bool) -> None: if escaped: diff --git a/tests/adapter/test_sqlite_duckdb.py b/tests/adapter/test_sqlite_duckdb.py index 6a5ab26d..0538c407 100644 --- a/tests/adapter/test_sqlite_duckdb.py +++ b/tests/adapter/test_sqlite_duckdb.py @@ -327,7 +327,7 @@ def test_invalid_field_names_quoting(tmp_path: Path, invalid_field_name: str) -> # However, these field names are invalid in flow.record and should raise an exception with ( - pytest.raises(RecordDescriptorError, match="Field .* is an invalid or reserved field name."), + pytest.raises(RecordDescriptorError, match=r"Field .* is an invalid or reserved field name."), RecordReader(f"sqlite://{db}") as reader, ): _ = next(iter(reader)) diff --git a/tests/fieldtypes/test_fieldtypes.py b/tests/fieldtypes/test_fieldtypes.py index 87411125..c4784b31 100644 --- a/tests/fieldtypes/test_fieldtypes.py +++ b/tests/fieldtypes/test_fieldtypes.py @@ -6,7 +6,7 @@ import posixpath import types from datetime import datetime, timedelta, timezone -from typing import Callable +from typing import TYPE_CHECKING import pytest @@ -30,6 +30,9 @@ ) from flow.record.fieldtypes import datetime as dt +if TYPE_CHECKING: + from collections.abc import Callable + UTC = timezone.utc INT64_MAX = (1 << 63) - 1 @@ -1006,10 +1009,10 @@ def test_datetime_timezone_aware(tmp_path: pathlib.Path, record_filename: str) - def test_datetime_comparisions() -> None: - with pytest.raises(TypeError, match=".* compare .*naive"): + with pytest.raises(TypeError, match=r".* compare .*naive"): assert dt("2023-01-01") > datetime(2022, 1, 1) # noqa: DTZ001 - with pytest.raises(TypeError, match=".* compare .*naive"): + with pytest.raises(TypeError, match=r".* compare .*naive"): assert datetime(2022, 1, 1) < dt("2023-01-01") # noqa: DTZ001 assert dt("2023-01-01") > datetime(2022, 1, 1, tzinfo=UTC) diff --git a/tests/fieldtypes/test_ip.py b/tests/fieldtypes/test_ip.py index a1c224ee..e9c17bdb 100644 --- a/tests/fieldtypes/test_ip.py +++ b/tests/fieldtypes/test_ip.py @@ -18,7 +18,7 @@ def test_field_ipaddress() -> None: a = net.IPAddress("192.168.1.1") assert a == "192.168.1.1" - with pytest.raises(ValueError, match=".* does not appear to be an IPv4 or IPv6 address"): + with pytest.raises(ValueError, match=r".* does not appear to be an IPv4 or IPv6 address"): net.IPAddress("a.a.a.a") @@ -27,7 +27,7 @@ def test_field_ipnetwork() -> None: assert a == "192.168.1.0/24" # Host bits set - with pytest.raises(ValueError, match=".* has host bits set"): + with pytest.raises(ValueError, match=r".* has host bits set"): net.IPNetwork("192.168.1.10/24") diff --git a/tests/packer/test_json_packer.py b/tests/packer/test_json_packer.py index 7a139c5a..025b30d8 100644 --- a/tests/packer/test_json_packer.py +++ b/tests/packer/test_json_packer.py @@ -69,7 +69,7 @@ def test_record_descriptor_not_found() -> None: # create a new packer and try to unpack the bytes packer = JsonRecordPacker() - with pytest.raises(RecordDescriptorNotFound, match="No RecordDescriptor found for: .*test/descriptor_not_found"): + with pytest.raises(RecordDescriptorNotFound, match=r"No RecordDescriptor found for: .*test/descriptor_not_found"): packer.unpack(data) diff --git a/tests/packer/test_packer.py b/tests/packer/test_packer.py index 17eadc64..3eaf6e53 100644 --- a/tests/packer/test_packer.py +++ b/tests/packer/test_packer.py @@ -269,5 +269,5 @@ def test_record_descriptor_not_found() -> None: # create a new packer and try to unpack the bytes packer = RecordPacker() - with pytest.raises(RecordDescriptorNotFound, match="No RecordDescriptor found for: .*test/descriptor_not_found"): + with pytest.raises(RecordDescriptorNotFound, match=r"No RecordDescriptor found for: .*test/descriptor_not_found"): packer.unpack(data) diff --git a/tests/record/test_adapter.py b/tests/record/test_adapter.py index b6cb8b37..45eb80ad 100644 --- a/tests/record/test_adapter.py +++ b/tests/record/test_adapter.py @@ -357,13 +357,13 @@ def test_text_record_adapter(capsys: pytest.CaptureFixture) -> None: # Format string with existing variables rec = TestRecordWithFooBar(name="world", foo="foo", bar="bar") writer.write(rec) - out, err = capsys.readouterr() + out, _ = capsys.readouterr() assert out == "Hello world, foo is bar!\n" # Format string with non-existing variables rec = TestRecordWithoutFooBar(name="planet") writer.write(rec) - out, err = capsys.readouterr() + out, _ = capsys.readouterr() assert out == "Hello planet, {foo} is {bar}!\n" @@ -405,23 +405,23 @@ def test_recordstream_header(tmp_path: Path) -> None: def test_recordstream_header_stdout(capsysbinary: pytest.CaptureFixture) -> None: with RecordWriter() as writer: pass - out, err = capsysbinary.readouterr() + out, _ = capsysbinary.readouterr() assert out == b"\x00\x00\x00\x0f\xc4\rRECORDSTREAM\n" writer = RecordWriter() del writer - out, err = capsysbinary.readouterr() + out, _ = capsysbinary.readouterr() assert out == b"" writer = RecordWriter() writer.close() - out, err = capsysbinary.readouterr() + out, _ = capsysbinary.readouterr() assert out == b"" writer = RecordWriter() writer.flush() writer.close() - out, err = capsysbinary.readouterr() + out, _ = capsysbinary.readouterr() assert out == b"\x00\x00\x00\x0f\xc4\rRECORDSTREAM\n" diff --git a/tests/record/test_record.py b/tests/record/test_record.py index 4a507de2..72b0506f 100644 --- a/tests/record/test_record.py +++ b/tests/record/test_record.py @@ -523,7 +523,7 @@ def test_record_replace() -> None: assert t4._source == "pytest" assert t4._generated == t2._generated - with pytest.raises(ValueError, match=".*Got unexpected field names:.*foobar.*"): + with pytest.raises(ValueError, match=r".*Got unexpected field names:.*foobar.*"): t._replace(foobar="keyword does not exist") diff --git a/tests/selector/test_selectors.py b/tests/selector/test_selectors.py index a25935f8..fc7b4b10 100644 --- a/tests/selector/test_selectors.py +++ b/tests/selector/test_selectors.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from datetime import datetime, timezone import pytest @@ -228,13 +229,15 @@ def test_selector_function_call_whitelisting() -> None: assert rec in Selector("'pe32' in lower(r.filetype)") # But functions on types are not with pytest.raises( - Exception, match="Call 'r.filetype.lower' not allowed. No calls other then whitelisted 'global' calls allowed!" + Exception, + match=re.escape("Call 'r.filetype.lower' not allowed. No calls other then whitelisted 'global' calls allowed!"), ): assert rec in Selector("'pe' in r.filetype.lower()") assert rec in Selector("'EXECUTABLE' in upper(r.filetype)") with pytest.raises( - Exception, match="Call 'r.filetype.upper' not allowed. No calls other then whitelisted 'global' calls allowed!" + Exception, + match=re.escape("Call 'r.filetype.upper' not allowed. No calls other then whitelisted 'global' calls allowed!"), ): assert rec in Selector("'EXECUTABLE' in r.filetype.upper()") @@ -251,7 +254,8 @@ def test_selector_function_call_whitelisting() -> None: # We call net.ipv4 instead of net.ipv4.Subnet, which should fail with pytest.raises( - Exception, match="Call 'net.ipv4' not allowed. No calls other then whitelisted 'global' calls allowed!" + Exception, + match=re.escape("Call 'net.ipv4' not allowed. No calls other then whitelisted 'global' calls allowed!"), ): assert rec in Selector("r.ip in net.ipv4('192.168.1.0/24')") diff --git a/tests/test_regressions.py b/tests/test_regressions.py index 5a76a20d..810ed9b1 100644 --- a/tests/test_regressions.py +++ b/tests/test_regressions.py @@ -9,7 +9,7 @@ from datetime import datetime, timezone from io import BytesIO from pathlib import Path -from typing import Callable +from typing import TYPE_CHECKING from unittest.mock import MagicMock, patch import msgpack @@ -34,6 +34,9 @@ from flow.record.tools import rdump from flow.record.utils import is_stdout +if TYPE_CHECKING: + from collections.abc import Callable + def test_datetime_serialization() -> None: packer = RecordPacker() @@ -435,7 +438,7 @@ def test_grouped_replace() -> None: assert replaced_grouped_record._source == "testcase" # Replacement with non existing field should raise a ValueError - with pytest.raises(ValueError, match=".*Got unexpected field names:.*non_existing_field.*"): + with pytest.raises(ValueError, match=r".*Got unexpected field names:.*non_existing_field.*"): grouped_record._replace(number=100, other="changed", non_existing_field="oops") @@ -467,7 +470,7 @@ def test_is_stdout(tmp_path: pathlib.Path, capsysbinary: pytest.CaptureFixture) with RecordWriter() as writer: assert is_stdout(writer.fp) - out, err = capsysbinary.readouterr() + out, _ = capsysbinary.readouterr() assert out.startswith(b"\x00\x00\x00\x0f\xc4\rRECORDSTREAM\n") with RecordWriter(tmp_path / "output.records") as writer: diff --git a/tests/test_utils.py b/tests/test_utils.py index 441ae8de..eeb6cdc9 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -21,5 +21,5 @@ def test_boolean_argument() -> None: assert boolean_argument(False) is False assert boolean_argument(1) is True assert boolean_argument(0) is False - with pytest.raises(ValueError, match="Invalid boolean argument: .*"): + with pytest.raises(ValueError, match=r"Invalid boolean argument: .*"): boolean_argument("maybe") diff --git a/tests/tools/test_rdump.py b/tests/tools/test_rdump.py index a9ab376e..64bcad76 100644 --- a/tests/tools/test_rdump.py +++ b/tests/tools/test_rdump.py @@ -113,7 +113,7 @@ def test_rdump_format_template(tmp_path: Path) -> None: args = ["rdump", str(path), "-f", "TEST: {count},{foo}"] print(args) res = subprocess.Popen(args, stdout=subprocess.PIPE) - stdout, stderr = res.communicate() + stdout, _ = res.communicate() for i, line in enumerate(stdout.decode().splitlines()): assert line == f"TEST: {i},bar" @@ -155,7 +155,7 @@ def test_rdump_json(tmp_path: Path) -> None: # dump records as JSON lines args = ["rdump", str(record_path), "-w", "jsonfile://-?descriptors=true"] process = subprocess.Popen(args, stdout=subprocess.PIPE) - stdout, stderr = process.communicate() + stdout, _ = process.communicate() assert process.returncode == 0 diff --git a/tox.ini b/tox.ini index e90b5845..72ab0dd0 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ envlist = lint, py3, pypy3 # requires if they are not available on the host system. This requires the # locally installed tox to have a minimum version 3.3.0. This means the names # of the configuration options are still according to the tox 3.x syntax. -minversion = 4.11.4 +minversion = 4.27.0 # This version of virtualenv will install setuptools version 68.2.2 and pip # 23.3.1. These versions fully support python projects defined only through a # pyproject.toml file (PEP-517/PEP-518/PEP-621). This pip version also support @@ -42,7 +42,7 @@ dependency_groups = lint commands = ruff check flow tests ruff format --check flow tests - vermin -t=3.9- --no-tips --lint flow tests + vermin -t=3.10- --no-tips --lint flow tests [testenv:docs-build] allowlist_externals = make