fox-it · yunzheng · Jun 22, 2025 · Jun 19, 2025 · Jun 22, 2025 · Jun 22, 2025
diff --git a/examples/__init__.py b/examples/__init__.py
diff --git a/examples/filesystem.py b/examples/filesystem.py
@@ -1,10 +1,15 @@
-import os
-import stat
+from __future__ import annotations
 
-from datetime import datetime
+import stat
+from pathlib import Path
+from typing import TYPE_CHECKING
 
 from flow.record import RecordDescriptor, RecordWriter
 
+if TYPE_CHECKING:
+    from collections.abc import Iterator
+
+
 descriptor = """
 filesystem/unix/entry
     string path;
@@ -22,34 +27,32 @@
 FilesystemFile = RecordDescriptor(descriptor)
 
 
-def hash_file(path, t):
-    f = open(path, "rb")
-    while 1:
-        d = f.read(4096)
-        if d == "":
-            break
-    f.close()
+def hash_file(path: str | Path) -> None:
+    with Path(path).open("rb") as f:
+        while True:
+            d = f.read(4096)
+            if not d:
+                break
 
 
 class FilesystemIterator:
     basepath = None
 
-    def __init__(self, basepath):
+    def __init__(self, basepath: str | None):
         self.basepath = basepath
         self.recordType = FilesystemFile
 
-    def classify(self, source, classification):
+    def classify(self, source: str, classification: str) -> None:
         self.recordType = FilesystemFile.base(_source=source, _classification=classification)
 
-    def iter(self, path):
-        path = os.path.abspath(path)
-        return self._iter(path)
+    def iter(self, path: str | Path) -> Iterator[FilesystemFile]:
+        return self._iter(Path(path).resolve())
 
-    def _iter(self, path):
-        if path.startswith("/proc"):
+    def _iter(self, path: Path) -> Iterator[FilesystemFile]:
+        if path.is_relative_to("/proc"):
             return
 
-        st = os.lstat(path)
+        st = path.lstat()
 
         abspath = path
         if self.basepath and abspath.startswith(self.basepath):
@@ -59,7 +62,7 @@ def _iter(self, path):
 
         link = None
         if ifmt == stat.S_IFLNK:
-            link = os.readlink(path)
+            link = path.readlink()
 
         yield self.recordType(
             path=abspath,
@@ -69,20 +72,16 @@ def _iter(self, path):
             size=st.st_size,
             uid=st.st_uid,
             gid=st.st_gid,
-            ctime=datetime.fromtimestamp(st.st_ctime),
-            mtime=datetime.fromtimestamp(st.st_mtime),
-            atime=datetime.fromtimestamp(st.st_atime),
+            ctime=st.st_ctime,
+            mtime=st.st_mtime,
+            atime=st.st_atime,
             link=link,
         )
 
         if ifmt == stat.S_IFDIR:
-            for i in os.listdir(path):
-                if i in (".", ".."):
-                    continue
-
-                fullpath = os.path.join(path, i)
-                for e in self.iter(fullpath):
-                    yield e
+            for i in path.iterdir():
+                fullpath = path.joinpath(i)
+                yield from self.iter(fullpath)
 
 
 chunk = []

diff --git a/examples/passivedns.py b/examples/passivedns.py
@@ -1,18 +1,21 @@
 #!/usr/bin/env pypy
-import record
+from __future__ import annotations
+
 import sys
-import datetime
+from datetime import datetime, timezone
 
 import net.ipv4
-
+import record
 from fileprocessing import DirectoryProcessor
 
+UTC_TIMEZONE = timezone.utc
+
 
-def ts(s):
-    return datetime.datetime.fromtimestamp(float(s))
+def ts(s: float) -> datetime:
+    return datetime.fromtimestamp(float(s), tz=UTC_TIMEZONE)
 
 
-def ip(s):
+def ip(s: str) -> net.ipv4.Address:
     return net.ipv4.Address(s)
 
 
@@ -21,7 +24,7 @@ class SeparatedFile:
     seperator = None
     format = None
 
-    def __init__(self, fp, seperator, format):
+    def __init__(self, fp: list[str], seperator: str | None, format: list[tuple]):
         self.fp = fp
         self.seperator = seperator
         self.format = format
@@ -46,7 +49,7 @@ def __iter__(self):
             yield recordtype(**r)
 
 
-def PassiveDnsFile(fp):
+def PassiveDnsFile(fp: list[str]) -> SeparatedFile:
     return SeparatedFile(fp, "||", PASSIVEDNS_FORMAT)
 
 
@@ -63,7 +66,7 @@ def PassiveDnsFile(fp):
 ]
 
 
-def main():
+def main() -> None:
     rs = record.RecordOutput(sys.stdout)
     for r in DirectoryProcessor(sys.argv[1], PassiveDnsFile, r"\.log\.gz"):
         rs.write(r)

diff --git a/examples/tcpconn.py b/examples/tcpconn.py
@@ -1,8 +1,10 @@
 import random
+from datetime import datetime, timezone
 
-from datetime import datetime
 from flow import record
 
+UTC_TIMEZONE = timezone.utc
+
 descriptor = """
 network/traffic/tcp/connection
     datetime ts;
@@ -32,9 +34,9 @@
 
 rs = record.RecordWriter()
 
-for i in range(500):
+for _ in range(500):
     r = conn(
-        ts=datetime.now(),
+        ts=datetime.now(tz=UTC_TIMEZONE),
         src=random.choice(ip_list),
         srcport=random.choice(port_list),
         dst=random.choice(ip_list),

diff --git a/flow/record/adapter/elastic.py b/flow/record/adapter/elastic.py
@@ -6,6 +6,8 @@
 import threading
 from typing import TYPE_CHECKING
 
+import urllib3
+
 try:
     import elasticsearch
     import elasticsearch.helpers
@@ -103,8 +105,6 @@ def __init__(
 
         if not verify_certs:
             # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
-            import urllib3
-
             urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 
         self.metadata_fields = {}
@@ -235,8 +235,6 @@ def __init__(
 
         if not verify_certs:
             # Disable InsecureRequestWarning of urllib3, caused by the verify_certs flag.
-            import urllib3
-
             urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 
     def __iter__(self) -> Iterator[Record]:

diff --git a/flow/record/base.py b/flow/record/base.py
@@ -186,6 +186,7 @@ def _asdict(self, fields: list[str] | None = None, exclude: list[str] | None = N
         return OrderedDict((k, getattr(self, k)) for k in self.__slots__ if k not in exclude)
 
     if TYPE_CHECKING:
+
         def __getattr__(self, name: str) -> Any: ...
 
     def __setattr__(self, k: str, v: Any) -> None:

diff --git a/flow/record/tools/rdump.py b/flow/record/tools/rdump.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 from __future__ import annotations
 
+import argparse
 import logging
 import sys
 from importlib import import_module
@@ -69,8 +70,6 @@ def list_adapters() -> None:
 
 @catch_sigpipe
 def main(argv: list[str] | None = None) -> int:
-    import argparse
-
     parser = argparse.ArgumentParser(
         description="Record dumper, a tool that can read, write and filter records",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,

diff --git a/pyproject.toml b/pyproject.toml
@@ -56,10 +56,14 @@ duckdb = [
 splunk = [
     "httpx",
 ]
+xlsx = [
+    "openpyxl",
+]
 test = [
     "flow.record[compression]",
     "flow.record[avro]",
     "flow.record[elastic]",
+    "flow.record[xlsx]",
     "duckdb; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb
     "pytz; platform_python_implementation != 'PyPy' and python_version < '3.12'", # duckdb
     "tqdm",

diff --git a/tests/standalone_test.py b/tests/standalone_test.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import traceback
 from typing import Callable
 
 
@@ -14,6 +15,5 @@ def main(glob: dict[str, Callable[..., None]]) -> None:
             print("PASSED")
         except Exception:
             print("FAILED")
-            import traceback
 
             traceback.print_exc()
diff --git a/tests/test_fieldtypes.py b/tests/test_fieldtypes.py
@@ -376,15 +376,21 @@ def test_uri_type() -> None:
     assert r.path.protocol == "http"
     assert r.path.hostname == "example.com"
 
-    with pytest.warns(DeprecationWarning):
+    with pytest.warns(
+        DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)"
+    ):
         r = TestRecord(uri.from_windows(r"c:\windows\program files\Fox-IT B.V\flow.exe"))
     assert r.path.filename == "flow.exe"
 
     r = TestRecord()
-    with pytest.warns(DeprecationWarning):
+    with pytest.warns(
+        DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)"
+    ):
         r.path = uri.normalize(r"c:\Users\Fox-IT\Downloads\autoruns.exe")
     assert r.path.filename == "autoruns.exe"
-    with pytest.warns(DeprecationWarning):
+    with pytest.warns(
+        DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)"
+    ):
         assert r.path.dirname == uri.normalize(r"\Users\Fox-IT\Downloads")
     assert r.path.dirname == "/Users/Fox-IT/Downloads"
 

diff --git a/tests/test_packer.py b/tests/test_packer.py
@@ -22,16 +22,17 @@ def test_uri_packing() -> None:
         ],
     )
 
-    # construct with an url
+    # Construct with an url
     record = TestRecord("http://www.google.com/evil.bin")
     data = packer.pack(record)
     record = packer.unpack(data)
     assert record.path == "http://www.google.com/evil.bin"
     assert record.path.filename == "evil.bin"
     assert record.path.dirname == "/"
 
-    # construct from uri() -> for windows=True
-    with pytest.warns(DeprecationWarning):
+    with pytest.warns(
+        DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)"
+    ):
         path = uri.from_windows(r"c:\Program Files\Fox-IT\flow is awesome.exe")
     record = TestRecord(path)
     data = packer.pack(record)
@@ -40,8 +41,9 @@ def test_uri_packing() -> None:
     assert record.path.filename == "flow is awesome.exe"
     assert record.path.dirname == "/Program Files/Fox-IT"
 
-    # construct using uri.from_windows()
-    with pytest.warns(DeprecationWarning):
+    with pytest.warns(
+        DeprecationWarning, match=r"Do not use class uri\(...\) for filesystem paths, use class path\(...\)"
+    ):
         path = uri.from_windows(r"c:\Users\Hello World\foo.bar.exe")
     record = TestRecord(path)
     data = packer.pack(record)

diff --git a/tests/test_rdump.py b/tests/test_rdump.py
@@ -17,6 +17,7 @@
 
 import flow.record.fieldtypes
 from flow.record import RecordDescriptor, RecordReader, RecordWriter
+from flow.record.adapter.line import field_types_for_record_descriptor
 from flow.record.fieldtypes import flow_record_tz
 from flow.record.tools import rdump
 
@@ -681,8 +682,6 @@ def test_rdump_line_verbose(tmp_path: Path, capsys: pytest.CaptureFixture, rdump
         writer.write(TestRecord(counter=2))
         writer.write(TestRecord(counter=3))
 
-    from flow.record.adapter.line import field_types_for_record_descriptor
-
     field_types_for_record_descriptor.cache_clear()
     assert field_types_for_record_descriptor.cache_info().currsize == 0
     rdump.main([str(record_path), *rdump_params])

diff --git a/tests/test_xlsx_adapter.py b/tests/test_xlsx_adapter.py
@@ -9,6 +9,7 @@
 import pytest
 
 from flow.record import fieldtypes
+from flow.record.adapter.xlsx import sanitize_fieldvalues
 
 if TYPE_CHECKING:
     from collections.abc import Iterator
@@ -27,8 +28,6 @@ def mock_openpyxl_package(monkeypatch: pytest.MonkeyPatch) -> Iterator[MagicMock
 
 
 def test_sanitize_field_values(mock_openpyxl_package: MagicMock) -> None:
-    from flow.record.adapter.xlsx import sanitize_fieldvalues
-
     assert list(
         sanitize_fieldvalues(
             [