diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/examples/filesystem.py b/examples/filesystem.py index b5d8d469..eb35bff9 100644 --- a/examples/filesystem.py +++ b/examples/filesystem.py @@ -1,10 +1,15 @@ -import os -import stat +from __future__ import annotations -from datetime import datetime +import stat +from pathlib import Path +from typing import TYPE_CHECKING from flow.record import RecordDescriptor, RecordWriter +if TYPE_CHECKING: + from collections.abc import Iterator + + descriptor = """ filesystem/unix/entry string path; @@ -22,34 +27,32 @@ FilesystemFile = RecordDescriptor(descriptor) -def hash_file(path, t): - f = open(path, "rb") - while 1: - d = f.read(4096) - if d == "": - break - f.close() +def hash_file(path: str | Path) -> None: + with Path(path).open("rb") as f: + while True: + d = f.read(4096) + if not d: + break class FilesystemIterator: basepath = None - def __init__(self, basepath): + def __init__(self, basepath: str | None): self.basepath = basepath self.recordType = FilesystemFile - def classify(self, source, classification): + def classify(self, source: str, classification: str) -> None: self.recordType = FilesystemFile.base(_source=source, _classification=classification) - def iter(self, path): - path = os.path.abspath(path) - return self._iter(path) + def iter(self, path: str | Path) -> Iterator[FilesystemFile]: + return self._iter(Path(path).resolve()) - def _iter(self, path): - if path.startswith("/proc"): + def _iter(self, path: Path) -> Iterator[FilesystemFile]: + if path.is_relative_to("/proc"): return - st = os.lstat(path) + st = path.lstat() abspath = path if self.basepath and abspath.startswith(self.basepath): @@ -59,7 +62,7 @@ def _iter(self, path): link = None if ifmt == stat.S_IFLNK: - link = os.readlink(path) + link = path.readlink() yield self.recordType( path=abspath, @@ -69,20 +72,16 @@ def _iter(self, path): size=st.st_size, uid=st.st_uid, gid=st.st_gid, - ctime=datetime.fromtimestamp(st.st_ctime), - mtime=datetime.fromtimestamp(st.st_mtime), - atime=datetime.fromtimestamp(st.st_atime), + ctime=st.st_ctime, + mtime=st.st_mtime, + atime=st.st_atime, link=link, ) if ifmt == stat.S_IFDIR: - for i in os.listdir(path): - if i in (".", ".."): - continue - - fullpath = os.path.join(path, i) - for e in self.iter(fullpath): - yield e + for i in path.iterdir(): + fullpath = path.joinpath(i) + yield from self.iter(fullpath) chunk = [] diff --git a/examples/passivedns.py b/examples/passivedns.py index 28361d80..57e3a6b5 100644 --- a/examples/passivedns.py +++ b/examples/passivedns.py @@ -1,18 +1,21 @@ #!/usr/bin/env pypy -import record +from __future__ import annotations + import sys -import datetime +from datetime import datetime, timezone import net.ipv4 - +import record from fileprocessing import DirectoryProcessor +UTC_TIMEZONE = timezone.utc + -def ts(s): - return datetime.datetime.fromtimestamp(float(s)) +def ts(s: float) -> datetime: + return datetime.fromtimestamp(float(s), tz=UTC_TIMEZONE) -def ip(s): +def ip(s: str) -> net.ipv4.Address: return net.ipv4.Address(s) @@ -21,7 +24,7 @@ class SeparatedFile: seperator = None format = None - def __init__(self, fp, seperator, format): + def __init__(self, fp: list[str], seperator: str | None, format: list[tuple]): self.fp = fp self.seperator = seperator self.format = format @@ -46,7 +49,7 @@ def __iter__(self): yield recordtype(**r) -def PassiveDnsFile(fp): +def PassiveDnsFile(fp: list[str]) -> SeparatedFile: return SeparatedFile(fp, "||", PASSIVEDNS_FORMAT) @@ -63,7 +66,7 @@ def PassiveDnsFile(fp): ] -def main(): +def main() -> None: rs = record.RecordOutput(sys.stdout) for r in DirectoryProcessor(sys.argv[1], PassiveDnsFile, r"\.log\.gz"): rs.write(r) diff --git a/examples/tcpconn.py b/examples/tcpconn.py index 0c10faa0..078d6d3e 100644 --- a/examples/tcpconn.py +++ b/examples/tcpconn.py @@ -1,8 +1,10 @@ import random +from datetime import datetime, timezone -from datetime import datetime from flow import record +UTC_TIMEZONE = timezone.utc + descriptor = """ network/traffic/tcp/connection datetime ts; @@ -32,9 +34,9 @@ rs = record.RecordWriter() -for i in range(500): +for _ in range(500): r = conn( - ts=datetime.now(), + ts=datetime.now(tz=UTC_TIMEZONE), src=random.choice(ip_list), srcport=random.choice(port_list), dst=random.choice(ip_list), diff --git a/flow/record/adapter/elastic.py b/flow/record/adapter/elastic.py index c47e0bc9..c39d9f33 100644 --- a/flow/record/adapter/elastic.py +++ b/flow/record/adapter/elastic.py @@ -6,6 +6,8 @@ import threading from typing import TYPE_CHECKING +import urllib3 + try: import elasticsearch import elasticsearch.helpers @@ -103,8 +105,6 @@ def __init__( if not verify_certs: # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag. - import urllib3 - urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) self.metadata_fields = {} @@ -235,8 +235,6 @@ def __init__( if not verify_certs: # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag. - import urllib3 - urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) def __iter__(self) -> Iterator[Record]: diff --git a/flow/record/base.py b/flow/record/base.py index 7584f34c..32649807 100644 --- a/flow/record/base.py +++ b/flow/record/base.py @@ -186,6 +186,7 @@ def _asdict(self, fields: list[str] | None = None, exclude: list[str] | None = N return OrderedDict((k, getattr(self, k)) for k in self.__slots__ if k not in exclude) if TYPE_CHECKING: + def __getattr__(self, name: str) -> Any: ... def __setattr__(self, k: str, v: Any) -> None: diff --git a/flow/record/tools/rdump.py b/flow/record/tools/rdump.py index b630e003..e175c2bc 100644 --- a/flow/record/tools/rdump.py +++ b/flow/record/tools/rdump.py @@ -1,6 +1,7 @@ #!/usr/bin/env python from __future__ import annotations +import argparse import logging import sys from importlib import import_module @@ -69,8 +70,6 @@ def list_adapters() -> None: @catch_sigpipe def main(argv: list[str] | None = None) -> int: - import argparse - parser = argparse.ArgumentParser( description="Record dumper, a tool that can read, write and filter records", formatter_class=argparse.ArgumentDefaultsHelpFormatter, diff --git a/pyproject.toml b/pyproject.toml index 9776758a..8892b42a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,10 +56,14 @@ duckdb = [ splunk = [ "httpx", ] +xlsx = [ + "openpyxl", +] test = [ "flow.record[compression]", "flow.record[avro]", "flow.record[elastic]", + "flow.record[xlsx]", "duckdb; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb "pytz; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb "tqdm", diff --git a/tests/standalone_test.py b/tests/standalone_test.py index 9bd0137c..6389754e 100644 --- a/tests/standalone_test.py +++ b/tests/standalone_test.py @@ -1,5 +1,6 @@ from __future__ import annotations +import traceback from typing import Callable @@ -14,6 +15,5 @@ def main(glob: dict[str, Callable[..., None]]) -> None: print("PASSED") except Exception: print("FAILED") - import traceback traceback.print_exc() diff --git a/tests/test_fieldtypes.py b/tests/test_fieldtypes.py index e6402892..87411125 100644 --- a/tests/test_fieldtypes.py +++ b/tests/test_fieldtypes.py @@ -376,15 +376,21 @@ def test_uri_type() -> None: assert r.path.protocol == "http" assert r.path.hostname == "example.com" - with pytest.warns(DeprecationWarning): + with pytest.warns( + DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)" + ): r = TestRecord(uri.from_windows(r"c:\windows\program files\Fox-IT B.V\flow.exe")) assert r.path.filename == "flow.exe" r = TestRecord() - with pytest.warns(DeprecationWarning): + with pytest.warns( + DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)" + ): r.path = uri.normalize(r"c:\Users\Fox-IT\Downloads\autoruns.exe") assert r.path.filename == "autoruns.exe" - with pytest.warns(DeprecationWarning): + with pytest.warns( + DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)" + ): assert r.path.dirname == uri.normalize(r"\Users\Fox-IT\Downloads") assert r.path.dirname == "/Users/Fox-IT/Downloads" diff --git a/tests/test_packer.py b/tests/test_packer.py index cff4ca35..17eadc64 100644 --- a/tests/test_packer.py +++ b/tests/test_packer.py @@ -22,7 +22,7 @@ def test_uri_packing() -> None: ], ) - # construct with an url + # Construct with an url record = TestRecord("http://www.google.com/evil.bin") data = packer.pack(record) record = packer.unpack(data) @@ -30,8 +30,9 @@ def test_uri_packing() -> None: assert record.path.filename == "evil.bin" assert record.path.dirname == "/" - # construct from uri() -> for windows=True - with pytest.warns(DeprecationWarning): + with pytest.warns( + DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)" + ): path = uri.from_windows(r"c:\Program Files\Fox-IT\flow is awesome.exe") record = TestRecord(path) data = packer.pack(record) @@ -40,8 +41,9 @@ def test_uri_packing() -> None: assert record.path.filename == "flow is awesome.exe" assert record.path.dirname == "/Program Files/Fox-IT" - # construct using uri.from_windows() - with pytest.warns(DeprecationWarning): + with pytest.warns( + DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)" + ): path = uri.from_windows(r"c:\Users\Hello World\foo.bar.exe") record = TestRecord(path) data = packer.pack(record) diff --git a/tests/test_rdump.py b/tests/test_rdump.py index 783855a2..723e2978 100644 --- a/tests/test_rdump.py +++ b/tests/test_rdump.py @@ -17,6 +17,7 @@ import flow.record.fieldtypes from flow.record import RecordDescriptor, RecordReader, RecordWriter +from flow.record.adapter.line import field_types_for_record_descriptor from flow.record.fieldtypes import flow_record_tz from flow.record.tools import rdump @@ -681,8 +682,6 @@ def test_rdump_line_verbose(tmp_path: Path, capsys: pytest.CaptureFixture, rdump writer.write(TestRecord(counter=2)) writer.write(TestRecord(counter=3)) - from flow.record.adapter.line import field_types_for_record_descriptor - field_types_for_record_descriptor.cache_clear() assert field_types_for_record_descriptor.cache_info().currsize == 0 rdump.main([str(record_path), *rdump_params]) diff --git a/tests/test_xlsx_adapter.py b/tests/test_xlsx_adapter.py index 91fa1594..558b7234 100644 --- a/tests/test_xlsx_adapter.py +++ b/tests/test_xlsx_adapter.py @@ -9,6 +9,7 @@ import pytest from flow.record import fieldtypes +from flow.record.adapter.xlsx import sanitize_fieldvalues if TYPE_CHECKING: from collections.abc import Iterator @@ -27,8 +28,6 @@ def mock_openpyxl_package(monkeypatch: pytest.MonkeyPatch) -> Iterator[MagicMock def test_sanitize_field_values(mock_openpyxl_package: MagicMock) -> None: - from flow.record.adapter.xlsx import sanitize_fieldvalues - assert list( sanitize_fieldvalues( [