From a810a11efe599b036a5ae4fa693d7c2afb8d2cae Mon Sep 17 00:00:00 2001
From: sonniki <son.nik@mail.ru>
Date: Fri, 20 Mar 2026 20:49:24 +0000
Subject: [PATCH 1/3] HelpersTask1165: Cache as pickle
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 helpers/hllm.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/helpers/hllm.py b/helpers/hllm.py
index b306d83e7..0c360fa85 100644
--- a/helpers/hllm.py
+++ b/helpers/hllm.py
@@ -256,7 +256,9 @@ def _call_api_sync(
     return completion_obj
 
 
-@hcacsimp.simple_cache(write_through=True, exclude_keys=["client", "cache_mode"])
+@hcacsimp.simple_cache(
+    cache_type="pickle", write_through=True, exclude_keys=["client", "cache_mode"]
+)
 def _call_structured_api_sync(
     # pylint: disable=unused-argument
     # This is needed to support caching.
@@ -270,7 +272,7 @@ def _call_structured_api_sync(
     *,
     images_as_base64: Optional[Tuple[str, ...]] = None,
     **create_kwargs,
-) -> Any:
+) -> T:
     """
     Make a non-streaming structured API call.
 
@@ -278,7 +280,7 @@ def _call_structured_api_sync(
 
     :param client: LLM client
     :param response_format: expected structured output format
-    :return: OpenAI Response object with parsed output
+    :return: parsed output as the specified Pydantic model
     """
     user_input = build_responses_input(
         user_prompt, images_as_base64=images_as_base64
@@ -291,7 +293,9 @@ def _call_structured_api_sync(
         text_format=response_format,
         **create_kwargs,
     )
-    return response
+    # Extract the parsed output.
+    parsed_output: T = response.output_parsed
+    return parsed_output
 
 
 # #############################################################################
@@ -572,7 +576,7 @@ def get_structured_completion(
             f"Got provider_name='{llm_client.provider_name}'."
         )
     # Retrieve a structured response.
-    response = _call_structured_api_sync(
+    parsed_output: T = _call_structured_api_sync(
         cache_mode=cache_mode,
         client=llm_client.client,
         model=llm_client.model,
@@ -583,14 +587,13 @@ def get_structured_completion(
         images_as_base64=images_as_base64,
         **create_kwargs,
     )
-    parsed_output: T = response.output_parsed
-    # Track costs.
+    # Note: Cost tracking is not supported for structured completions when caching
+    # is enabled, since we only cache the parsed output, not the full response object
+    # with token usage information. Cached responses have zero cost anyway.
     if cost_tracker is not None:
-        hdbg.dassert_isinstance(cost_tracker, hllmcost.LLMCostTracker)
-        cost = cost_tracker.calculate_cost(response)
-        cost_tracker.accumulate_cost(cost)
-        if print_cost:
-            _LOG.info("cost=%.6f", cost)
+        _LOG.debug(
+            "Cost tracking not available for cached structured completions"
+        )
     return parsed_output
 
 

From 89e4501966ff5fb79b2c0a573114196aa8ea016a Mon Sep 17 00:00:00 2001
From: sonniki <son.nik@mail.ru>
Date: Fri, 20 Mar 2026 20:56:40 +0000
Subject: [PATCH 2/3] HelpersTask1165: Fix cost tracking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 helpers/hllm.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/helpers/hllm.py b/helpers/hllm.py
index 0c360fa85..f9873cd45 100644
--- a/helpers/hllm.py
+++ b/helpers/hllm.py
@@ -271,6 +271,8 @@ def _call_structured_api_sync(
     response_format: type[T],
     *,
     images_as_base64: Optional[Tuple[str, ...]] = None,
+    cost_tracker: Optional[hllmcost.LLMCostTracker] = None,
+    print_cost: bool = False,
     **create_kwargs,
 ) -> T:
     """
@@ -295,6 +297,13 @@ def _call_structured_api_sync(
     )
     # Extract the parsed output.
     parsed_output: T = response.output_parsed
+    # Track costs.
+    if cost_tracker is not None:
+        hdbg.dassert_isinstance(cost_tracker, hllmcost.LLMCostTracker)
+        cost = cost_tracker.calculate_cost(response)
+        cost_tracker.accumulate_cost(cost)
+        if print_cost:
+            _LOG.info("cost=%.6f", cost)
     return parsed_output
 
 
@@ -585,15 +594,10 @@ def get_structured_completion(
         temperature=temperature,
         response_format=response_format,
         images_as_base64=images_as_base64,
+        cost_tracker=cost_tracker,
+        print_cost=print_cost,
         **create_kwargs,
     )
-    # Note: Cost tracking is not supported for structured completions when caching
-    # is enabled, since we only cache the parsed output, not the full response object
-    # with token usage information. Cached responses have zero cost anyway.
-    if cost_tracker is not None:
-        _LOG.debug(
-            "Cost tracking not available for cached structured completions"
-        )
     return parsed_output
 
 

From 688e226483bb06f804ee7a0c5085a3be3b85e912 Mon Sep 17 00:00:00 2001
From: sonniki <son.nik@mail.ru>
Date: Fri, 20 Mar 2026 21:03:03 +0000
Subject: [PATCH 3/3] HelpersTask1165: Exclude key from cache
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pre-commit checks:
All checks passed ✅
---
 helpers/hllm.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/helpers/hllm.py b/helpers/hllm.py
index f9873cd45..5375121ba 100644
--- a/helpers/hllm.py
+++ b/helpers/hllm.py
@@ -257,7 +257,9 @@ def _call_api_sync(
 
 
 @hcacsimp.simple_cache(
-    cache_type="pickle", write_through=True, exclude_keys=["client", "cache_mode"]
+    cache_type="pickle",
+    write_through=True,
+    exclude_keys=["client", "cache_mode", "cost_tracker"],
 )
 def _call_structured_api_sync(
     # pylint: disable=unused-argument