From 673812fa7e6984d3ca319af6ce9055f889812307 Mon Sep 17 00:00:00 2001 From: Gyatso <218940752+respondersGY@users.noreply.github.com> Date: Wed, 13 Aug 2025 11:28:49 +0200 Subject: [PATCH 1/6] Handle surrogates with `errors=surrogateescape` --- flow/record/stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/record/stream.py b/flow/record/stream.py index 4fa665f1..e1c10ed4 100644 --- a/flow/record/stream.py +++ b/flow/record/stream.py @@ -45,7 +45,7 @@ def __init__(self, fp: BinaryIO, flush: bool = True): self.auto_flush = flush def write(self, obj: Record) -> None: - buf = repr(obj).encode() + b"\n" + buf = repr(obj).encode(errors="surrogateescape") + b"\n" self.fp.write(buf) if self.auto_flush: self.flush() From be7cef53722abb225d74884c7e3c8babfcfbf09f Mon Sep 17 00:00:00 2001 From: Gyatso <218940752+respondersGY@users.noreply.github.com> Date: Wed, 20 Aug 2025 10:00:56 +0200 Subject: [PATCH 2/6] Add non-working test `UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe4 in position 38: invalid continuation byte` --- tests/record/test_record.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/record/test_record.py b/tests/record/test_record.py index 00c471fb..e1ccf8f7 100644 --- a/tests/record/test_record.py +++ b/tests/record/test_record.py @@ -29,6 +29,7 @@ set_ignored_fields_for_comparison, ) from flow.record.exceptions import RecordDescriptorError +from flow.record.fieldtypes import windows_path from flow.record.stream import RecordFieldRewriter if TYPE_CHECKING: @@ -317,9 +318,13 @@ def test_record_printer_stdout_surrogateescape(capsys: pytest.CaptureFixture) -> "test/a", [ ("string", "name"), + ("path", "value"), ], ) - record = Record(b"R\xc3\xa9\xeamy") + record = Record( + b"R\xc3\xa9\xeamy\xc3\xa4\xc3\x84", + windows_path("\udce4"), + ) # fake capsys to be a tty. def isatty() -> bool: From 0fb58ec0b1b7c43ef433b96732c8289a91d3f5b5 Mon Sep 17 00:00:00 2001 From: Gyatso <218940752+respondersGY@users.noreply.github.com> Date: Wed, 13 Aug 2025 11:28:49 +0200 Subject: [PATCH 3/6] Handle surrogates with `errors=surrogateescape` --- flow/record/stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/record/stream.py b/flow/record/stream.py index 4fa665f1..e1c10ed4 100644 --- a/flow/record/stream.py +++ b/flow/record/stream.py @@ -45,7 +45,7 @@ def __init__(self, fp: BinaryIO, flush: bool = True): self.auto_flush = flush def write(self, obj: Record) -> None: - buf = repr(obj).encode() + b"\n" + buf = repr(obj).encode(errors="surrogateescape") + b"\n" self.fp.write(buf) if self.auto_flush: self.flush() From b410276ab1e3ed9521d66163fa602d6a1b6f2b6c Mon Sep 17 00:00:00 2001 From: Gyatso <218940752+respondersGY@users.noreply.github.com> Date: Wed, 20 Aug 2025 10:00:56 +0200 Subject: [PATCH 4/6] Add non-working test `UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe4 in position 38: invalid continuation byte` --- tests/record/test_record.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/record/test_record.py b/tests/record/test_record.py index ff46904a..f3d8ccec 100644 --- a/tests/record/test_record.py +++ b/tests/record/test_record.py @@ -29,6 +29,7 @@ set_ignored_fields_for_comparison, ) from flow.record.exceptions import RecordDescriptorError +from flow.record.fieldtypes import windows_path from flow.record.stream import RecordFieldRewriter if TYPE_CHECKING: @@ -317,9 +318,13 @@ def test_record_printer_stdout_surrogateescape(capsys: pytest.CaptureFixture) -> "test/a", [ ("string", "name"), + ("path", "value"), ], ) - record = Record(b"R\xc3\xa9\xeamy") + record = Record( + b"R\xc3\xa9\xeamy\xc3\xa4\xc3\x84", + windows_path("\udce4"), + ) # fake capsys to be a tty. def isatty() -> bool: From 9324f509b3f16a599f5191621efab28a775abee6 Mon Sep 17 00:00:00 2001 From: Gyatso <218940752+respondersGY@users.noreply.github.com> Date: Thu, 21 Aug 2025 12:51:51 +0200 Subject: [PATCH 5/6] Revert changes and change `__repr__` --- flow/record/fieldtypes/__init__.py | 10 ++++++++++ flow/record/stream.py | 2 +- tests/record/test_record.py | 10 +++++----- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/flow/record/fieldtypes/__init__.py b/flow/record/fieldtypes/__init__.py index 95f62b8b..ed1337f9 100644 --- a/flow/record/fieldtypes/__init__.py +++ b/flow/record/fieldtypes/__init__.py @@ -722,6 +722,16 @@ class posix_path(pathlib.PurePosixPath, path): class windows_path(pathlib.PureWindowsPath, path): def __repr__(self) -> str: s = str(self) + # Only use repr() if we have surrogates that need escaping + try: + s.encode("utf-8") + except UnicodeEncodeError: + # Has surrogates - use repr but fix the over-escaping + s = repr(s)[1:-1] # This escapes surrogates as \udcXX + s = s.replace("\\\\", "\\") # Fix double backslashes + s = s.replace("\\'", "'") # Fix over-escaped quotes + s = s.replace('\\"', '"') # Fix over-escaped double quotes + quote = "'" if "'" in s: if '"' in s: diff --git a/flow/record/stream.py b/flow/record/stream.py index e1c10ed4..4fa665f1 100644 --- a/flow/record/stream.py +++ b/flow/record/stream.py @@ -45,7 +45,7 @@ def __init__(self, fp: BinaryIO, flush: bool = True): self.auto_flush = flush def write(self, obj: Record) -> None: - buf = repr(obj).encode(errors="surrogateescape") + b"\n" + buf = repr(obj).encode() + b"\n" self.fp.write(buf) if self.auto_flush: self.flush() diff --git a/tests/record/test_record.py b/tests/record/test_record.py index e1ccf8f7..f227a8f5 100644 --- a/tests/record/test_record.py +++ b/tests/record/test_record.py @@ -308,7 +308,7 @@ def isatty() -> bool: writer = RecordPrinter(getattr(sys.stdout, "buffer", sys.stdout)) writer.write(record) - out, err = capsys.readouterr() + out, _ = capsys.readouterr() expected = "\n" assert out == expected @@ -322,8 +322,8 @@ def test_record_printer_stdout_surrogateescape(capsys: pytest.CaptureFixture) -> ], ) record = Record( - b"R\xc3\xa9\xeamy\xc3\xa4\xc3\x84", - windows_path("\udce4"), + b"R\xc3\xa9\xeamy", + windows_path(b"\x43\x3a\x5c\xc3\xa4\xc3\x84\xe4".decode(errors="surrogateescape")), ) # fake capsys to be a tty. @@ -335,8 +335,8 @@ def isatty() -> bool: writer = RecordPrinter(getattr(sys.stdout, "buffer", sys.stdout)) writer.write(record) - out, err = capsys.readouterr() - expected = "\n" + out, _ = capsys.readouterr() + expected = "\n" assert out == expected From c2edb71d8de4ffa70d050f2b3d02dfdd4e9cbf01 Mon Sep 17 00:00:00 2001 From: Gyatso <218940752+respondersGY@users.noreply.github.com> Date: Thu, 21 Aug 2025 14:16:28 +0200 Subject: [PATCH 6/6] Remove `errors=surrogateescape` --- flow/record/stream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flow/record/stream.py b/flow/record/stream.py index e1c10ed4..4fa665f1 100644 --- a/flow/record/stream.py +++ b/flow/record/stream.py @@ -45,7 +45,7 @@ def __init__(self, fp: BinaryIO, flush: bool = True): self.auto_flush = flush def write(self, obj: Record) -> None: - buf = repr(obj).encode(errors="surrogateescape") + b"\n" + buf = repr(obj).encode() + b"\n" self.fp.write(buf) if self.auto_flush: self.flush()