From a810a11efe599b036a5ae4fa693d7c2afb8d2cae Mon Sep 17 00:00:00 2001 From: sonniki Date: Fri, 20 Mar 2026 20:49:24 +0000 Subject: [PATCH 1/3] HelpersTask1165: Cache as pickle MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hllm.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/helpers/hllm.py b/helpers/hllm.py index b306d83e7..0c360fa85 100644 --- a/helpers/hllm.py +++ b/helpers/hllm.py @@ -256,7 +256,9 @@ def _call_api_sync( return completion_obj -@hcacsimp.simple_cache(write_through=True, exclude_keys=["client", "cache_mode"]) +@hcacsimp.simple_cache( + cache_type="pickle", write_through=True, exclude_keys=["client", "cache_mode"] +) def _call_structured_api_sync( # pylint: disable=unused-argument # This is needed to support caching. @@ -270,7 +272,7 @@ def _call_structured_api_sync( *, images_as_base64: Optional[Tuple[str, ...]] = None, **create_kwargs, -) -> Any: +) -> T: """ Make a non-streaming structured API call. @@ -278,7 +280,7 @@ def _call_structured_api_sync( :param client: LLM client :param response_format: expected structured output format - :return: OpenAI Response object with parsed output + :return: parsed output as the specified Pydantic model """ user_input = build_responses_input( user_prompt, images_as_base64=images_as_base64 @@ -291,7 +293,9 @@ def _call_structured_api_sync( text_format=response_format, **create_kwargs, ) - return response + # Extract the parsed output. + parsed_output: T = response.output_parsed + return parsed_output # ############################################################################# @@ -572,7 +576,7 @@ def get_structured_completion( f"Got provider_name='{llm_client.provider_name}'." ) # Retrieve a structured response. - response = _call_structured_api_sync( + parsed_output: T = _call_structured_api_sync( cache_mode=cache_mode, client=llm_client.client, model=llm_client.model, @@ -583,14 +587,13 @@ def get_structured_completion( images_as_base64=images_as_base64, **create_kwargs, ) - parsed_output: T = response.output_parsed - # Track costs. + # Note: Cost tracking is not supported for structured completions when caching + # is enabled, since we only cache the parsed output, not the full response object + # with token usage information. Cached responses have zero cost anyway. if cost_tracker is not None: - hdbg.dassert_isinstance(cost_tracker, hllmcost.LLMCostTracker) - cost = cost_tracker.calculate_cost(response) - cost_tracker.accumulate_cost(cost) - if print_cost: - _LOG.info("cost=%.6f", cost) + _LOG.debug( + "Cost tracking not available for cached structured completions" + ) return parsed_output From 89e4501966ff5fb79b2c0a573114196aa8ea016a Mon Sep 17 00:00:00 2001 From: sonniki Date: Fri, 20 Mar 2026 20:56:40 +0000 Subject: [PATCH 2/3] HelpersTask1165: Fix cost tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hllm.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/helpers/hllm.py b/helpers/hllm.py index 0c360fa85..f9873cd45 100644 --- a/helpers/hllm.py +++ b/helpers/hllm.py @@ -271,6 +271,8 @@ def _call_structured_api_sync( response_format: type[T], *, images_as_base64: Optional[Tuple[str, ...]] = None, + cost_tracker: Optional[hllmcost.LLMCostTracker] = None, + print_cost: bool = False, **create_kwargs, ) -> T: """ @@ -295,6 +297,13 @@ def _call_structured_api_sync( ) # Extract the parsed output. parsed_output: T = response.output_parsed + # Track costs. + if cost_tracker is not None: + hdbg.dassert_isinstance(cost_tracker, hllmcost.LLMCostTracker) + cost = cost_tracker.calculate_cost(response) + cost_tracker.accumulate_cost(cost) + if print_cost: + _LOG.info("cost=%.6f", cost) return parsed_output @@ -585,15 +594,10 @@ def get_structured_completion( temperature=temperature, response_format=response_format, images_as_base64=images_as_base64, + cost_tracker=cost_tracker, + print_cost=print_cost, **create_kwargs, ) - # Note: Cost tracking is not supported for structured completions when caching - # is enabled, since we only cache the parsed output, not the full response object - # with token usage information. Cached responses have zero cost anyway. - if cost_tracker is not None: - _LOG.debug( - "Cost tracking not available for cached structured completions" - ) return parsed_output From 688e226483bb06f804ee7a0c5085a3be3b85e912 Mon Sep 17 00:00:00 2001 From: sonniki Date: Fri, 20 Mar 2026 21:03:03 +0000 Subject: [PATCH 3/3] HelpersTask1165: Exclude key from cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pre-commit checks: All checks passed ✅ --- helpers/hllm.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/helpers/hllm.py b/helpers/hllm.py index f9873cd45..5375121ba 100644 --- a/helpers/hllm.py +++ b/helpers/hllm.py @@ -257,7 +257,9 @@ def _call_api_sync( @hcacsimp.simple_cache( - cache_type="pickle", write_through=True, exclude_keys=["client", "cache_mode"] + cache_type="pickle", + write_through=True, + exclude_keys=["client", "cache_mode", "cost_tracker"], ) def _call_structured_api_sync( # pylint: disable=unused-argument