From e51c0f7d92b0c3e001cea00553cc62964bf2beb7 Mon Sep 17 00:00:00 2001 From: bbopen Date: Wed, 21 Jan 2026 10:08:50 -0800 Subject: [PATCH 1/2] feat(codec): integrate SafeCodec into Python bridge for NaN rejection Integrate SafeCodec from safe_codec.py into python_bridge.py to: - Reject NaN/Infinity values at encoding time with clear error messages - Handle numpy scalars via .item() extraction for JSON serialization - Handle pandas NaT, Timestamp, and Timedelta types properly - Convert Python sets to lists for JSON serialization Changes: - Import SafeCodec and CodecError in python_bridge.py - Create _response_codec instance with allow_nan=False - Replace json.dumps with _response_codec.encode in encode_response() - Update adversarial_module.py to use lambda (truly non-serializable) instead of set (now serializable as list) - Update test regex to accept new NaN rejection error message Fixes #95, #45, #41 Co-Authored-By: Claude Opus 4.5 --- runtime/python_bridge.py | 17 ++++++++++++++++- test/adversarial_playground.test.ts | 2 +- test/fixtures/python/adversarial_module.py | 4 +++- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/runtime/python_bridge.py b/runtime/python_bridge.py index 3edbd37..94e993c 100644 --- a/runtime/python_bridge.py +++ b/runtime/python_bridge.py @@ -11,6 +11,8 @@ import uuid from pathlib import Path, PurePath +from safe_codec import SafeCodec, CodecError + # Ensure the working directory is importable so local modules can be resolved when # the bridge is launched as a script from a different directory. try: @@ -95,6 +97,14 @@ def get_codec_max_bytes(): # Why: parse once at startup to avoid per-response env lookups. CODEC_MAX_BYTES = get_codec_max_bytes() +# Why: use SafeCodec for final JSON encoding to reject NaN/Infinity and handle +# edge cases like numpy scalars. We disable SafeCodec's internal size limit since +# we use our own CODEC_MAX_BYTES logic with specific error messages. +_response_codec = SafeCodec( + allow_nan=False, + max_payload_bytes=1024 * 1024 * 1024, # 1GB, effectively unlimited for SafeCodec +) + def get_request_max_bytes(): """ @@ -779,8 +789,13 @@ def encode_response(out): Serialize the response and enforce size limits. Why: keep payload size checks outside the main loop for clarity and lint compliance. + Uses SafeCodec to reject NaN/Infinity and handle edge cases like numpy scalars. """ - payload = json.dumps(out) + try: + payload = _response_codec.encode(out) + except CodecError as exc: + # Convert CodecError to ValueError for consistent error handling + raise ValueError(str(exc)) from exc payload_bytes = len(payload.encode('utf-8')) if CODEC_MAX_BYTES is not None and payload_bytes > CODEC_MAX_BYTES: raise PayloadTooLargeError(payload_bytes, CODEC_MAX_BYTES) diff --git a/test/adversarial_playground.test.ts b/test/adversarial_playground.test.ts index 1734c0f..14aebe1 100644 --- a/test/adversarial_playground.test.ts +++ b/test/adversarial_playground.test.ts @@ -290,7 +290,7 @@ describeAdversarial('Adversarial playground', () => { try { await expect(callAdversarial(bridge, 'return_nan_payload', [])).rejects.toThrow( - /Protocol error|Invalid JSON|JSON parse failed/ + /Protocol error|Invalid JSON|JSON parse failed|Cannot serialize NaN|NaN.*not allowed/ ); } finally { await bridge.dispose(); diff --git a/test/fixtures/python/adversarial_module.py b/test/fixtures/python/adversarial_module.py index af3a4d3..e60906a 100644 --- a/test/fixtures/python/adversarial_module.py +++ b/test/fixtures/python/adversarial_module.py @@ -41,8 +41,10 @@ def return_unserializable() -> Any: """Return a non-JSON-serializable value. Why: ensure serialization failures surface as explicit errors. + Note: sets are now serialized as lists, so we return a function which + cannot be JSON serialized. """ - return {1, 2, 3} + return lambda x: x def return_circular_reference() -> list: From 09557ce1d33fbf7db9d88947164e92a165272f01 Mon Sep 17 00:00:00 2001 From: bbopen Date: Wed, 21 Jan 2026 10:21:42 -0800 Subject: [PATCH 2/2] fix: use sys.maxsize for SafeCodec limit to preserve original behavior Address Codex review feedback: The original python_bridge.py had no size limit unless TYWRAP_CODEC_MAX_BYTES was set. Using sys.maxsize preserves this behavior while letting the explicit size check in encode_response() provide the specific error message mentioning the env var name. Co-Authored-By: Claude Opus 4.5 --- runtime/python_bridge.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/runtime/python_bridge.py b/runtime/python_bridge.py index 94e993c..e82cc56 100644 --- a/runtime/python_bridge.py +++ b/runtime/python_bridge.py @@ -98,11 +98,13 @@ def get_codec_max_bytes(): CODEC_MAX_BYTES = get_codec_max_bytes() # Why: use SafeCodec for final JSON encoding to reject NaN/Infinity and handle -# edge cases like numpy scalars. We disable SafeCodec's internal size limit since -# we use our own CODEC_MAX_BYTES logic with specific error messages. +# edge cases like numpy scalars. We use sys.maxsize for SafeCodec's internal limit +# to preserve the original "no limit unless TYWRAP_CODEC_MAX_BYTES is set" behavior. +# The explicit size check in encode_response() provides the specific error message +# mentioning the env var name, which is important for debugging. _response_codec = SafeCodec( allow_nan=False, - max_payload_bytes=1024 * 1024 * 1024, # 1GB, effectively unlimited for SafeCodec + max_payload_bytes=sys.maxsize, )