From 5fd27305e05621b0e52e9118b3199e40fbf3e616 Mon Sep 17 00:00:00 2001 From: Emilio Garcia Date: Wed, 25 Mar 2026 13:03:50 -0400 Subject: [PATCH] fix: make tool call assertions flexible to prevent CI flakes Some models return multiple parallel tool calls for a single-tool prompt, which is a valid API response. The previous assertions required exactly one function call, causing intermittent CI failures when models produced logically correct but duplicated tool invocations. This relaxes the assertions to accept one or more function calls and responds to all of them in follow-up turns, preventing the "tool_call_id not responded to" error that occurs when only the first call is acknowledged. Signed-off-by: Emilio Garcia Made-with: Cursor --- .../responses/test_tool_responses.py | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/tests/integration/responses/test_tool_responses.py b/tests/integration/responses/test_tool_responses.py index 427281c251..298b125059 100644 --- a/tests/integration/responses/test_tool_responses.py +++ b/tests/integration/responses/test_tool_responses.py @@ -462,10 +462,11 @@ def test_response_non_streaming_custom_tool(responses_client, text_model_id, cas tools=case.tools, stream=False, ) - assert len(response.output) == 1 - assert response.output[0].type == "function_call" - assert response.output[0].status == "completed" - assert response.output[0].name == "get_weather" + assert len(response.output) >= 1 + function_calls = [o for o in response.output if o.type == "function_call"] + assert len(function_calls) >= 1 + assert function_calls[0].status == "completed" + assert function_calls[0].name == "get_weather" @pytest.mark.parametrize("case", custom_tool_test_cases) @@ -582,16 +583,16 @@ def test_function_call_output_list_text(responses_client, text_model_id): tools=tools, stream=False, ) - assert len(response.output) == 1 - assert response.output[0].type == "function_call" - call_id = response.output[0].call_id + function_calls = [o for o in response.output if o.type == "function_call"] + assert len(function_calls) >= 1 inputs = [ { "type": "function_call_output", - "call_id": call_id, + "call_id": fc.call_id, "output": [{"type": "input_text", "text": "It is sunny and 22 degrees Celsius in Paris."}], - }, + } + for fc in function_calls ] response2 = responses_client.responses.create( model=text_model_id, @@ -600,8 +601,8 @@ def test_function_call_output_list_text(responses_client, text_model_id): stream=False, previous_response_id=response.id, ) - assert len(response2.output) == 1 - assert response2.output[0].type == "message" + messages = [o for o in response2.output if o.type == "message"] + assert len(messages) >= 1 assert response2.output_text @@ -631,19 +632,19 @@ def test_function_call_output_list_text_multi_block(responses_client, text_model tools=tools, stream=False, ) - assert len(response.output) == 1 - assert response.output[0].type == "function_call" - call_id = response.output[0].call_id + function_calls = [o for o in response.output if o.type == "function_call"] + assert len(function_calls) >= 1 inputs = [ { "type": "function_call_output", - "call_id": call_id, + "call_id": fc.call_id, "output": [ {"type": "input_text", "text": "Current conditions: overcast skies."}, {"type": "input_text", "text": "Temperature: 15 degrees Celsius."}, ], - }, + } + for fc in function_calls ] response2 = responses_client.responses.create( model=text_model_id, @@ -652,8 +653,8 @@ def test_function_call_output_list_text_multi_block(responses_client, text_model stream=False, previous_response_id=response.id, ) - assert len(response2.output) == 1 - assert response2.output[0].type == "message" + messages = [o for o in response2.output if o.type == "message"] + assert len(messages) >= 1 assert response2.output_text