diff --git a/pyproject.toml b/pyproject.toml index 372abd1b05..3a47531ed8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -491,7 +491,6 @@ exclude = [ "^src/llama_stack/core/routing_tables/", # Provider directories - Inline "^src/llama_stack/providers/inline/datasetio/localfs/", - "^src/llama_stack/providers/inline/responses/builtin/", "^src/llama_stack/providers/inline/safety/code_scanner/", "^src/llama_stack/providers/inline/safety/llama_guard/", "^src/llama_stack/providers/inline/scoring/basic/", diff --git a/src/llama_stack/providers/inline/responses/builtin/responses/openai_responses.py b/src/llama_stack/providers/inline/responses/builtin/responses/openai_responses.py index be93f04cec..91c7a65404 100644 --- a/src/llama_stack/providers/inline/responses/builtin/responses/openai_responses.py +++ b/src/llama_stack/providers/inline/responses/builtin/responses/openai_responses.py @@ -632,7 +632,7 @@ async def create_openai_response( presence_penalty: float | None = None, extra_body: dict | None = None, stream_options: ResponseStreamOptions | None = None, - ): + ) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]: stream = bool(stream) background = bool(background) text = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) if text is None else text @@ -841,7 +841,9 @@ async def _create_background_response( created_at = int(time.time()) # Normalize input to list format for storage - input_items = [OpenAIResponseMessage(content=input, role="user")] if isinstance(input, str) else input + input_items: list[OpenAIResponseInput] = ( + [OpenAIResponseMessage(content=input, role="user")] if isinstance(input, str) else input + ) # Create initial queued response queued_response = OpenAIResponseObject( diff --git a/src/llama_stack/providers/inline/responses/builtin/responses/streaming.py b/src/llama_stack/providers/inline/responses/builtin/responses/streaming.py index 734e1b90ac..1888d4647f 100644 --- a/src/llama_stack/providers/inline/responses/builtin/responses/streaming.py +++ b/src/llama_stack/providers/inline/responses/builtin/responses/streaming.py @@ -28,6 +28,7 @@ OpenAIAssistantMessageParam, OpenAIChatCompletion, OpenAIChatCompletionChunk, + OpenAIChatCompletionCustomToolCall, OpenAIChatCompletionRequestWithExtraBody, OpenAIChatCompletionResponseMessage, OpenAIChatCompletionToolCall, @@ -176,7 +177,7 @@ def extract_openai_error(exc: Exception) -> tuple[str, str]: raw_message = body.get("message") if raw_code and isinstance(raw_code, str): - final_code: str = _RESPONSES_API_ERROR_CODES.get(raw_code, raw_code) + final_code: str = _RESPONSES_API_ERROR_CODES[raw_code] if raw_code in _RESPONSES_API_ERROR_CODES else raw_code else: final_code = "server_error" @@ -424,10 +425,11 @@ async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]: async for stream_event in self._process_tools(output_messages): yield stream_event - chat_tool_choice = None + chat_tool_choice: str | dict[str, Any] | None = None # Track allowed tools for filtering (persists across iterations) allowed_tool_names: set[str] | None = None - if self.ctx.tool_choice and len(self.ctx.chat_tools) > 0: + # check truthiness of self.ctx.chat_tools to avoid len(None) + if self.ctx.tool_choice and self.ctx.chat_tools: processed_tool_choice = await _process_tool_choice( self.ctx.chat_tools, self.ctx.tool_choice, @@ -484,7 +486,7 @@ async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]: ) # Filter tools to only allowed ones if tool_choice specified an allowed list effective_tools = self.ctx.chat_tools - if allowed_tool_names is not None: + if allowed_tool_names is not None and self.ctx.chat_tools is not None: effective_tools = [ tool for tool in self.ctx.chat_tools @@ -525,7 +527,7 @@ async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]: parallel_tool_calls=effective_parallel_tool_calls, reasoning_effort=self.reasoning.effort if self.reasoning else None, safety_identifier=self.safety_identifier, - service_tier=self.service_tier, + service_tier=ServiceTier(self.service_tier) if self.service_tier else None, max_completion_tokens=remaining_output_tokens, prompt_cache_key=self.prompt_cache_key, top_logprobs=self.top_logprobs, @@ -1224,13 +1226,14 @@ async def _process_streaming_chunks( message_item_id=message_item_id, tool_call_item_ids=tool_call_item_ids, content_part_emitted=content_part_emitted, - logprobs=OpenAIChoiceLogprobs(content=chat_response_logprobs) if chat_response_logprobs else None, + logprobs=chat_response_logprobs if chat_response_logprobs else None, service_tier=chunk_service_tier, ) def _build_chat_completion(self, result: ChatCompletionResult) -> OpenAIChatCompletion: """Build OpenAIChatCompletion from ChatCompletionResult.""" # Convert collected chunks to complete response + tool_calls: list[OpenAIChatCompletionToolCall | OpenAIChatCompletionCustomToolCall] | None if result.tool_calls: tool_calls = [result.tool_calls[i] for i in sorted(result.tool_calls.keys())] else: @@ -1247,7 +1250,7 @@ def _build_chat_completion(self, result: ChatCompletionResult) -> OpenAIChatComp message=assistant_message, finish_reason=result.finish_reason, index=0, - logprobs=result.logprobs, + logprobs=OpenAIChoiceLogprobs(content=result.logprobs) if result.logprobs else None, ) ], created=result.created, @@ -1409,8 +1412,8 @@ def make_openai_tool(tool_name: str, tool: ToolDef) -> ChatCompletionToolParam: for input_tool in tools: if input_tool.type == "function": self.ctx.chat_tools.append( - ChatCompletionToolParam(type="function", function=input_tool.model_dump(exclude_none=True)) - ) # type: ignore[typeddict-item,arg-type] # Dict compatible with FunctionDefinition + ChatCompletionToolParam(type="function", function=input_tool.model_dump(exclude_none=True)) # type: ignore[typeddict-item,arg-type] # Dict compatible with FunctionDefinition + ) elif input_tool.type in WebSearchToolTypes: tool_name = "web_search" # Need to access tool_groups_api from tool_executor @@ -1446,7 +1449,8 @@ async def _process_mcp_tool( ) -> AsyncIterator[OpenAIResponseObjectStream]: """Process an MCP tool configuration and emit appropriate streaming events.""" # Resolve connector_id to server_url if provided - mcp_tool = await resolve_mcp_connector_id(mcp_tool, self.connectors_api) + if self.connectors_api is not None: + mcp_tool = await resolve_mcp_connector_id(mcp_tool, self.connectors_api) # Emit mcp_list_tools.in_progress self.sequence_number += 1 @@ -1467,15 +1471,21 @@ async def _process_mcp_tool( # Call list_mcp_tools tool_defs = None list_id = f"mcp_list_{uuid.uuid4()}" + + # Get session manager from tool_executor if available (fix for #4452) + session_manager = getattr(self.tool_executor, "mcp_session_manager", None) + + if not mcp_tool.server_url: + raise ValueError( + f"Failed to list MCP tools for server '{mcp_tool.server_label}': server_url is not set" + ) + attributes = { "server_label": mcp_tool.server_label, "server_url": mcp_tool.server_url, "mcp_list_tools_id": list_id, } - # Get session manager from tool_executor if available (fix for #4452) - session_manager = getattr(self.tool_executor, "mcp_session_manager", None) - # TODO: follow semantic conventions for Open Telemetry tool spans # https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span with tracer.start_as_current_span("list_mcp_tools", attributes=attributes): @@ -1681,7 +1691,7 @@ async def _process_tool_choice( elif isinstance(tool_choice, OpenAIResponseInputToolChoiceAllowedTools): # ensure that specified tool choices are available in the chat tools, if not, remove them from the list - final_tools = [] + final_tools: list[dict[str, Any]] = [] for tool in tool_choice.tools: match tool.get("type"): case "function": @@ -1714,19 +1724,18 @@ async def _process_tool_choice( else: # Handle specific tool choice by type # Each case validates the tool exists in chat_tools before returning - tool_name = getattr(tool_choice, "name", None) match tool_choice: case OpenAIResponseInputToolChoiceCustomTool(): - if tool_name and tool_name not in chat_tool_names: - logger.warning(f"Tool {tool_name} not found in chat tools") + if tool_choice.name not in chat_tool_names: + logger.warning(f"Tool {tool_choice.name} not found in chat tools") return None - return OpenAIChatCompletionToolChoiceCustomTool(name=tool_name) + return OpenAIChatCompletionToolChoiceCustomTool(name=tool_choice.name) case OpenAIResponseInputToolChoiceFunctionTool(): - if tool_name and tool_name not in chat_tool_names: - logger.warning(f"Tool {tool_name} not found in chat tools") + if tool_choice.name not in chat_tool_names: + logger.warning(f"Tool {tool_choice.name} not found in chat tools") return None - return OpenAIChatCompletionToolChoiceFunctionTool(name=tool_name) + return OpenAIChatCompletionToolChoiceFunctionTool(name=tool_choice.name) case OpenAIResponseInputToolChoiceFileSearch(): if "file_search" not in chat_tool_names: @@ -1741,21 +1750,25 @@ async def _process_tool_choice( return OpenAIChatCompletionToolChoiceFunctionTool(name="web_search") case OpenAIResponseInputToolChoiceMCPTool(): - tool_choice = convert_mcp_tool_choice( + mcp_result = convert_mcp_tool_choice( chat_tool_names, tool_choice.server_label, server_label_to_tools, - tool_name, + tool_choice.name, ) - if isinstance(tool_choice, dict): + if isinstance(mcp_result, dict): # for single tool choice, return as function tool choice - return OpenAIChatCompletionToolChoiceFunctionTool(name=tool_choice["function"]["name"]) - elif isinstance(tool_choice, list): + function_info = mcp_result["function"] + if not isinstance(function_info, dict): + return None + return OpenAIChatCompletionToolChoiceFunctionTool(name=function_info["name"]) + elif isinstance(mcp_result, list): # for multiple tool choices, return as allowed tools return OpenAIChatCompletionToolChoiceAllowedTools( - tools=tool_choice, + tools=mcp_result, mode="required", ) + return None async def resolve_mcp_connector_id( diff --git a/src/llama_stack/providers/inline/responses/builtin/responses/tool_executor.py b/src/llama_stack/providers/inline/responses/builtin/responses/tool_executor.py index e6f61a4b3f..33135f7f7f 100644 --- a/src/llama_stack/providers/inline/responses/builtin/responses/tool_executor.py +++ b/src/llama_stack/providers/inline/responses/builtin/responses/tool_executor.py @@ -328,6 +328,8 @@ async def _execute_tool( from llama_stack.providers.utils.tools.mcp import invoke_mcp_tool mcp_tool = mcp_tool_to_server[function_name] + if not mcp_tool.server_url: + raise ValueError(f"Failed to invoke MCP tool {function_name}: server_url is not set") attributes = { "server_label": mcp_tool.server_label, "server_url": mcp_tool.server_url, diff --git a/src/llama_stack/providers/inline/responses/builtin/responses/types.py b/src/llama_stack/providers/inline/responses/builtin/responses/types.py index 83ae4a7bf4..14ccb6368b 100644 --- a/src/llama_stack/providers/inline/responses/builtin/responses/types.py +++ b/src/llama_stack/providers/inline/responses/builtin/responses/types.py @@ -38,7 +38,8 @@ def _json_equal(a: str, b: str) -> bool: """Compare two JSON strings by value, falling back to string comparison.""" try: - return json.loads(a) == json.loads(b) + # json.loads() returns Any, so == on two Any values is also Any + return cast(bool, json.loads(a) == json.loads(b)) except (json.JSONDecodeError, TypeError): return a == b diff --git a/src/llama_stack/providers/inline/responses/builtin/responses/utils.py b/src/llama_stack/providers/inline/responses/builtin/responses/utils.py index 12d9af0778..5e0f150f52 100644 --- a/src/llama_stack/providers/inline/responses/builtin/responses/utils.py +++ b/src/llama_stack/providers/inline/responses/builtin/responses/utils.py @@ -574,7 +574,7 @@ def convert_mcp_tool_choice( server_label: str | None = None, server_label_to_tools: dict[str, list[str]] | None = None, tool_name: str | None = None, -) -> dict[str, str] | list[dict[str, str]]: +) -> dict[str, str | dict[str, str]] | list[dict[str, str | dict[str, str]]] | None: """Convert a responses tool choice of type mcp to a chat completions compatible function tool choice.""" if tool_name: @@ -589,6 +589,8 @@ def convert_mcp_tool_choice( tool_names = server_label_to_tools.get(server_label, []) if not tool_names: return None - matching_tools = [{"type": "function", "function": {"name": tool_name}} for tool_name in tool_names] + matching_tools: list[dict[str, str | dict[str, str]]] = [ + {"type": "function", "function": {"name": name}} for name in tool_names + ] return matching_tools return [] diff --git a/src/llama_stack/providers/utils/responses/responses_store.py b/src/llama_stack/providers/utils/responses/responses_store.py index 764b705e65..8bbbaca864 100644 --- a/src/llama_stack/providers/utils/responses/responses_store.py +++ b/src/llama_stack/providers/utils/responses/responses_store.py @@ -20,6 +20,7 @@ OpenAIResponseObjectWithInput, Order, ResponseInputItemNotFoundError, + ResponseItemInclude, ResponseNotFoundError, ) from llama_stack_api.internal.sqlstore import ColumnDefinition, ColumnType @@ -255,7 +256,7 @@ async def list_response_input_items( response_id: str, after: str | None = None, before: str | None = None, - include: list[str] | None = None, + include: list[ResponseItemInclude] | None = None, limit: int | None = 20, order: Order | None = Order.desc, ) -> ListOpenAIResponseInputItem: