From 4306f1a28d83d8e9c4db9ec3e0679eae94c7ad6b Mon Sep 17 00:00:00 2001
From: Cheng Zhang <chengzhang98@outlook.com>
Date: Thu, 26 Feb 2026 16:04:25 +0000
Subject: [PATCH 01/11] response api support

---
 src/sequrity/__init__.py                    |    4 +
 src/sequrity/control/_client.py             |    5 +
 src/sequrity/control/resources/__init__.py  |    3 +
 src/sequrity/control/resources/responses.py |  249 +++++
 src/sequrity/types/responses/__init__.py    |  258 +++++
 src/sequrity/types/responses/request.py     | 1002 +++++++++++++++++++
 src/sequrity/types/responses/response.py    |  578 +++++++++++
 test/control/test_responses.py              |  268 +++++
 8 files changed, 2367 insertions(+)
 create mode 100644 src/sequrity/control/resources/responses.py
 create mode 100644 src/sequrity/types/responses/__init__.py
 create mode 100644 src/sequrity/types/responses/request.py
 create mode 100644 src/sequrity/types/responses/response.py
 create mode 100644 test/control/test_responses.py

diff --git a/src/sequrity/__init__.py b/src/sequrity/__init__.py
index 6b5a318..f45b541 100644
--- a/src/sequrity/__init__.py
+++ b/src/sequrity/__init__.py
@@ -23,6 +23,8 @@
 from .types.chat_completion.response import ChatCompletionResponse
 from .types.messages.request import AnthropicMessageRequest
 from .types.messages.response import AnthropicMessageResponse
+from .types.responses.request import ResponsesRequest
+from .types.responses.response import ResponsesResponse
 
 try:
     from ._version import __version__
@@ -47,6 +49,8 @@
     "ChatCompletionResponse",
     "AnthropicMessageRequest",
     "AnthropicMessageResponse",
+    "ResponsesRequest",
+    "ResponsesResponse",
     # Version
     "__version__",
 ]
diff --git a/src/sequrity/control/_client.py b/src/sequrity/control/_client.py
index d361f5e..d680c3c 100644
--- a/src/sequrity/control/_client.py
+++ b/src/sequrity/control/_client.py
@@ -12,6 +12,7 @@
 from .resources.langgraph import LangGraphResource
 from .resources.messages import AsyncMessagesResource, MessagesResource
 from .resources.policy import AsyncPolicyResource, PolicyResource
+from .resources.responses import AsyncResponsesResource, ResponsesResource
 
 
 class ControlClient:
@@ -57,6 +58,9 @@ def __init__(
         self.policy = PolicyResource(self._transport)
         """Policy generation."""
 
+        self.responses = ResponsesResource(self._transport)
+        """OpenAI Responses API."""
+
         self.langgraph = LangGraphResource(self._transport)
         """LangGraph execution."""
 
@@ -156,6 +160,7 @@ def __init__(
 
         self.chat = AsyncChatResource(self._transport)
         self.messages = AsyncMessagesResource(self._transport)
+        self.responses = AsyncResponsesResource(self._transport)
         self.policy = AsyncPolicyResource(self._transport)
 
     # -- Session management --------------------------------------------------
diff --git a/src/sequrity/control/resources/__init__.py b/src/sequrity/control/resources/__init__.py
index 72a2db3..3a58e31 100644
--- a/src/sequrity/control/resources/__init__.py
+++ b/src/sequrity/control/resources/__init__.py
@@ -3,6 +3,7 @@
 from .chat import AsyncChatResource, ChatResource
 from .messages import AsyncMessagesResource, MessagesResource
 from .policy import AsyncPolicyResource, PolicyResource
+from .responses import AsyncResponsesResource, ResponsesResource
 
 __all__ = [
     "ChatResource",
@@ -11,4 +12,6 @@
     "AsyncMessagesResource",
     "PolicyResource",
     "AsyncPolicyResource",
+    "ResponsesResource",
+    "AsyncResponsesResource",
 ]
diff --git a/src/sequrity/control/resources/responses.py b/src/sequrity/control/resources/responses.py
new file mode 100644
index 0000000..6934a84
--- /dev/null
+++ b/src/sequrity/control/resources/responses.py
@@ -0,0 +1,249 @@
+"""OpenAI Responses API resource."""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from ..._sentinel import NOT_GIVEN, _NotGiven
+from ...types.enums import LlmServiceProvider, LlmServiceProviderStr, RestApiType
+from ...types.responses.request import (
+    ConversationParam,
+    ReasoningParam,
+    ResponsePromptParam,
+    ResponsesRequest,
+    ResponseTextConfigParam,
+    StreamOptionsParam,
+    ToolChoiceFunctionParam,
+    ToolParam,
+)
+from ...types.responses.response import ResponsesResponse
+from .._transport import ControlAsyncTransport, ControlSyncTransport
+from ..types.headers import FeaturesHeader, FineGrainedConfigHeader, SecurityPolicyHeader
+
+
+class ResponsesResource:
+    """OpenAI Responses API — ``client.control.responses``."""
+
+    def __init__(self, transport: ControlSyncTransport) -> None:
+        self._transport = transport
+
+    def create(
+        self,
+        model: str,
+        *,
+        # Responses API parameters
+        input: str | list | None = None,
+        instructions: str | None = None,
+        tools: list[ToolParam | dict] | None = None,
+        tool_choice: Literal["none", "auto", "required"] | ToolChoiceFunctionParam | dict | None = None,
+        stream: bool | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        max_output_tokens: int | None = None,
+        reasoning: ReasoningParam | dict | None = None,
+        text: ResponseTextConfigParam | dict | None = None,
+        metadata: dict[str, str] | None = None,
+        previous_response_id: str | None = None,
+        include: list[str] | None = None,
+        store: bool | None = None,
+        truncation: Literal["auto", "disabled"] | None = None,
+        parallel_tool_calls: bool | None = None,
+        max_tool_calls: int | None = None,
+        background: bool | None = None,
+        conversation: str | ConversationParam | dict | None = None,
+        prompt: ResponsePromptParam | dict | None = None,
+        service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None,
+        stream_options: StreamOptionsParam | dict | None = None,
+        top_logprobs: int | None = None,
+        timeout: float | None = None,
+        # Sequrity overrides (NOT_GIVEN -> client defaults)
+        provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        endpoint_type: str | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> ResponsesResponse:
+        """Send a Responses API request through Sequrity's secure orchestrator.
+
+        Args:
+            model: The LLM model identifier (e.g., ``"gpt-4o"``, ``"o3"``).
+            input: Text, image, or file inputs to the model.
+            instructions: A system (or developer) message.
+            tools: List of tools available to the model.
+            tool_choice: How the model should select which tool to use.
+            stream: Whether to stream the response.
+            temperature: Sampling temperature (0-2).
+            top_p: Nucleus sampling parameter.
+            max_output_tokens: Upper bound for generated tokens.
+            reasoning: Configuration for reasoning models.
+            text: Text response format configuration.
+            metadata: Key-value pairs attached to the response.
+            previous_response_id: ID of the previous response for multi-turn.
+            include: Additional output data to include.
+            store: Whether to store the response for later retrieval.
+            truncation: Truncation strategy for the response.
+            parallel_tool_calls: Whether to allow parallel tool execution.
+            max_tool_calls: Maximum number of calls to built-in tools.
+            background: Whether to run the response in the background.
+            conversation: Conversation context.
+            prompt: Prompt template reference.
+            service_tier: Processing tier for serving the request.
+            stream_options: Options for streaming responses.
+            top_logprobs: Number of most likely tokens to return (0-20).
+            timeout: Client-side timeout in seconds.
+            provider: LLM service provider override.
+            llm_api_key: LLM provider API key override.
+            features: Security features override.
+            security_policy: Security policy override.
+            fine_grained_config: Fine-grained config override.
+            endpoint_type: Endpoint type override.
+            session_id: Explicit session ID override.
+
+        Returns:
+            Parsed ``ResponsesResponse`` with ``session_id`` populated.
+        """
+        payload = ResponsesRequest.model_validate(
+            {
+                "model": model,
+                "input": input,
+                "instructions": instructions,
+                "tools": tools,
+                "tool_choice": tool_choice,
+                "stream": stream,
+                "temperature": temperature,
+                "top_p": top_p,
+                "max_output_tokens": max_output_tokens,
+                "reasoning": reasoning,
+                "text": text,
+                "metadata": metadata,
+                "previous_response_id": previous_response_id,
+                "include": include,
+                "store": store,
+                "truncation": truncation,
+                "parallel_tool_calls": parallel_tool_calls,
+                "max_tool_calls": max_tool_calls,
+                "background": background,
+                "conversation": conversation,
+                "prompt": prompt,
+                "service_tier": service_tier,
+                "stream_options": stream_options,
+                "top_logprobs": top_logprobs,
+                "timeout": timeout,
+            }
+        ).model_dump(exclude_none=True)
+
+        url = self._transport.build_url(
+            RestApiType.RESPONSES,
+            provider=provider,
+            endpoint_type=endpoint_type,
+        )
+
+        response = self._transport.request(
+            url=url,
+            payload=payload,
+            llm_api_key=llm_api_key,
+            features=features,
+            security_policy=security_policy,
+            fine_grained_config=fine_grained_config,
+            session_id=session_id,
+        )
+
+        result = ResponsesResponse.model_validate(response.json())
+        result.session_id = response.headers.get("X-Session-ID")
+        return result
+
+
+class AsyncResponsesResource:
+    """Async variant of :class:`ResponsesResource`."""
+
+    def __init__(self, transport: ControlAsyncTransport) -> None:
+        self._transport = transport
+
+    async def create(
+        self,
+        model: str,
+        *,
+        input: str | list | None = None,
+        instructions: str | None = None,
+        tools: list[ToolParam | dict] | None = None,
+        tool_choice: Literal["none", "auto", "required"] | ToolChoiceFunctionParam | dict | None = None,
+        stream: bool | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        max_output_tokens: int | None = None,
+        reasoning: ReasoningParam | dict | None = None,
+        text: ResponseTextConfigParam | dict | None = None,
+        metadata: dict[str, str] | None = None,
+        previous_response_id: str | None = None,
+        include: list[str] | None = None,
+        store: bool | None = None,
+        truncation: Literal["auto", "disabled"] | None = None,
+        parallel_tool_calls: bool | None = None,
+        max_tool_calls: int | None = None,
+        background: bool | None = None,
+        conversation: str | ConversationParam | dict | None = None,
+        prompt: ResponsePromptParam | dict | None = None,
+        service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None,
+        stream_options: StreamOptionsParam | dict | None = None,
+        top_logprobs: int | None = None,
+        timeout: float | None = None,
+        provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        endpoint_type: str | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> ResponsesResponse:
+        """Async variant of :meth:`ResponsesResource.create`."""
+        payload = ResponsesRequest.model_validate(
+            {
+                "model": model,
+                "input": input,
+                "instructions": instructions,
+                "tools": tools,
+                "tool_choice": tool_choice,
+                "stream": stream,
+                "temperature": temperature,
+                "top_p": top_p,
+                "max_output_tokens": max_output_tokens,
+                "reasoning": reasoning,
+                "text": text,
+                "metadata": metadata,
+                "previous_response_id": previous_response_id,
+                "include": include,
+                "store": store,
+                "truncation": truncation,
+                "parallel_tool_calls": parallel_tool_calls,
+                "max_tool_calls": max_tool_calls,
+                "background": background,
+                "conversation": conversation,
+                "prompt": prompt,
+                "service_tier": service_tier,
+                "stream_options": stream_options,
+                "top_logprobs": top_logprobs,
+                "timeout": timeout,
+            }
+        ).model_dump(exclude_none=True)
+
+        url = self._transport.build_url(
+            RestApiType.RESPONSES,
+            provider=provider,
+            endpoint_type=endpoint_type,
+        )
+
+        response = await self._transport.request(
+            url=url,
+            payload=payload,
+            llm_api_key=llm_api_key,
+            features=features,
+            security_policy=security_policy,
+            fine_grained_config=fine_grained_config,
+            session_id=session_id,
+        )
+
+        result = ResponsesResponse.model_validate(response.json())
+        result.session_id = response.headers.get("X-Session-ID")
+        return result
diff --git a/src/sequrity/types/responses/__init__.py b/src/sequrity/types/responses/__init__.py
new file mode 100644
index 0000000..b116460
--- /dev/null
+++ b/src/sequrity/types/responses/__init__.py
@@ -0,0 +1,258 @@
+"""OpenAI Responses API types.
+
+This package provides request and response types for the OpenAI Responses API
+used through Sequrity's Control API.
+"""
+
+from .request import (
+    ApplyPatchCallOutputParam,
+    ApplyPatchCallParam,
+    ApplyPatchToolParam,
+    CodeInterpreterContainerAuto,
+    CodeInterpreterToolParam,
+    ComputerCallOutputAcknowledgedSafetyCheck,
+    ComputerCallOutputParam,
+    ComputerToolParam,
+    ConversationParam,
+    CustomToolParam,
+    FileSearchToolParam,
+    FunctionCallOutputParam,
+    FunctionShellToolParam,
+    FunctionToolParam,
+    ImageGenerationCallParam,
+    ImageGenerationInputImageMask,
+    ImageGenerationToolParam,
+    InputAudioParam,
+    InputFileParam,
+    InputImageParam,
+    InputMessageParam,
+    InputTextParam,
+    ItemReferenceParam,
+    LocalShellCallActionParam,
+    LocalShellCallOutputParam,
+    LocalShellCallParam,
+    LocalShellToolParam,
+    McpAllowedToolsFilter,
+    McpApprovalFilter,
+    McpApprovalRequestParam,
+    McpApprovalResponseParam,
+    McpCallParam,
+    McpListToolsParam,
+    McpListToolsToolParam,
+    McpToolParam,
+    MessageParam,
+    OutputRefusalParam,
+    OutputTextParam,
+    ReasoningItemParam,
+    ReasoningItemSummaryParam,
+    ReasoningParam,
+    ResponseCustomToolCallOutputParam,
+    ResponseCustomToolCallParam,
+    ResponseFunctionToolCallParam,
+    ResponseOutputMessageParam,
+    ResponsePromptParam,
+    ResponsesRequest,
+    ResponseTextConfigParam,
+    ShellCallActionParam,
+    ShellCallOutputParam,
+    ShellCallParam,
+    StreamOptionsParam,
+    TextConfigJSONObjectParam,
+    TextConfigJSONSchemaParam,
+    TextConfigTextParam,
+    ToolChoiceAutoParam,
+    ToolChoiceFunctionParam,
+    ToolChoiceNoneParam,
+    ToolChoiceRequiredParam,
+    WebSearchPreviewToolParam,
+    WebSearchPreviewUserLocation,
+    WebSearchToolParam,
+)
+from .response import (
+    AnnotationContainerFileCitation,
+    AnnotationFileCitation,
+    AnnotationFilePath,
+    AnnotationURLCitation,
+    ApplyPatchToolCall,
+    CodeInterpreterOutputImage,
+    CodeInterpreterOutputLogs,
+    CodeInterpreterToolCall,
+    ComputerActionClick,
+    ComputerActionDoubleClick,
+    ComputerActionDrag,
+    ComputerActionKeypress,
+    ComputerActionMove,
+    ComputerActionScreenshot,
+    ComputerActionScroll,
+    ComputerActionType,
+    ComputerActionWait,
+    ComputerPendingSafetyCheck,
+    ComputerToolCall,
+    ConversationInfo,
+    FileSearchResult,
+    FileSearchToolCall,
+    FunctionShellToolCall,
+    FunctionToolCall,
+    ImageGenerationCall,
+    IncompleteDetails,
+    InputTokensDetails,
+    LocalShellCall,
+    LocalShellCallAction,
+    McpApprovalRequest,
+    McpCall,
+    McpListTools,
+    McpListToolsTool,
+    OutputText,
+    OutputTextLogprob,
+    OutputTextLogprobTopLogprob,
+    ReasoningContent,
+    ReasoningItem,
+    ReasoningSummary,
+    Refusal,
+    ResponseCustomToolCall,
+    ResponseError,
+    ResponseOutputMessage,
+    ResponsePromptInfo,
+    ResponsesResponse,
+    ResponseUsage,
+    ToolInfo,
+    WebSearchActionFind,
+    WebSearchActionOpenPage,
+    WebSearchActionSearch,
+    WebSearchActionSource,
+    WebSearchToolCall,
+)
+
+__all__ = [
+    # Request - main
+    "ResponsesRequest",
+    # Request - input content types
+    "InputTextParam",
+    "InputImageParam",
+    "InputFileParam",
+    "InputAudioParam",
+    # Request - message types
+    "InputMessageParam",
+    "MessageParam",
+    # Request - output message (for multi-turn input)
+    "OutputTextParam",
+    "OutputRefusalParam",
+    "ResponseOutputMessageParam",
+    # Request - tool call types (for multi-turn input)
+    "ResponseFunctionToolCallParam",
+    "FunctionCallOutputParam",
+    "ResponseCustomToolCallParam",
+    "ResponseCustomToolCallOutputParam",
+    "ComputerCallOutputParam",
+    "ComputerCallOutputAcknowledgedSafetyCheck",
+    "LocalShellCallParam",
+    "LocalShellCallActionParam",
+    "LocalShellCallOutputParam",
+    "ShellCallParam",
+    "ShellCallActionParam",
+    "ShellCallOutputParam",
+    "ApplyPatchCallParam",
+    "ApplyPatchCallOutputParam",
+    # Request - MCP types
+    "McpListToolsParam",
+    "McpListToolsToolParam",
+    "McpApprovalRequestParam",
+    "McpApprovalResponseParam",
+    "McpCallParam",
+    # Request - other input types
+    "ReasoningItemParam",
+    "ReasoningItemSummaryParam",
+    "ImageGenerationCallParam",
+    "ItemReferenceParam",
+    # Request - tool definitions
+    "FunctionToolParam",
+    "FileSearchToolParam",
+    "WebSearchToolParam",
+    "WebSearchPreviewToolParam",
+    "WebSearchPreviewUserLocation",
+    "CodeInterpreterToolParam",
+    "CodeInterpreterContainerAuto",
+    "ComputerToolParam",
+    "ImageGenerationToolParam",
+    "ImageGenerationInputImageMask",
+    "McpToolParam",
+    "McpAllowedToolsFilter",
+    "McpApprovalFilter",
+    "LocalShellToolParam",
+    "FunctionShellToolParam",
+    "CustomToolParam",
+    "ApplyPatchToolParam",
+    # Request - tool choice
+    "ToolChoiceNoneParam",
+    "ToolChoiceAutoParam",
+    "ToolChoiceRequiredParam",
+    "ToolChoiceFunctionParam",
+    # Request - text config
+    "TextConfigJSONSchemaParam",
+    "TextConfigJSONObjectParam",
+    "TextConfigTextParam",
+    "ResponseTextConfigParam",
+    # Request - config types
+    "ConversationParam",
+    "ResponsePromptParam",
+    "ReasoningParam",
+    "StreamOptionsParam",
+    # Response - main
+    "ResponsesResponse",
+    # Response - output message
+    "ResponseOutputMessage",
+    "OutputText",
+    "Refusal",
+    # Response - reasoning
+    "ReasoningSummary",
+    "ReasoningContent",
+    "ReasoningItem",
+    # Response - tool calls
+    "FunctionToolCall",
+    "FileSearchToolCall",
+    "FileSearchResult",
+    "WebSearchToolCall",
+    "WebSearchActionSearch",
+    "WebSearchActionOpenPage",
+    "WebSearchActionFind",
+    "WebSearchActionSource",
+    "CodeInterpreterToolCall",
+    "CodeInterpreterOutputLogs",
+    "CodeInterpreterOutputImage",
+    "ComputerToolCall",
+    "ComputerActionClick",
+    "ComputerActionDoubleClick",
+    "ComputerActionDrag",
+    "ComputerActionKeypress",
+    "ComputerActionMove",
+    "ComputerActionScreenshot",
+    "ComputerActionScroll",
+    "ComputerActionType",
+    "ComputerActionWait",
+    "ComputerPendingSafetyCheck",
+    "ImageGenerationCall",
+    "LocalShellCall",
+    "LocalShellCallAction",
+    "FunctionShellToolCall",
+    "ApplyPatchToolCall",
+    "McpCall",
+    "McpListTools",
+    "McpListToolsTool",
+    "McpApprovalRequest",
+    "ResponseCustomToolCall",
+    # Response - annotations
+    "AnnotationFileCitation",
+    "AnnotationURLCitation",
+    "AnnotationContainerFileCitation",
+    "AnnotationFilePath",
+    # Response - supporting types
+    "ResponseUsage",
+    "InputTokensDetails",
+    "OutputTextLogprob",
+    "OutputTextLogprobTopLogprob",
+    "ResponseError",
+    "ToolInfo",
+    "ConversationInfo",
+    "ResponsePromptInfo",
+    "IncompleteDetails",
+]
diff --git a/src/sequrity/types/responses/request.py b/src/sequrity/types/responses/request.py
new file mode 100644
index 0000000..07cdef6
--- /dev/null
+++ b/src/sequrity/types/responses/request.py
@@ -0,0 +1,1002 @@
+"""Pydantic models for OpenAI Responses API request types."""
+
+from __future__ import annotations
+
+from typing import Annotated, Any, Literal
+
+from pydantic import BaseModel, ConfigDict, Field
+
+# =============================================================================
+# Input Content Types
+# =============================================================================
+
+
+class InputTextParam(BaseModel):
+    text: str = Field(..., description="The text content.")
+    type: Literal["input_text"] = Field(
+        ..., description="The type of the input content."
+    )
+
+
+class InputImageParam(BaseModel):
+    """An image input. Supports both URL and file_id references."""
+
+    type: Literal["input_image"] = Field(
+        ..., description="The type of the input content."
+    )
+    detail: Literal["auto", "low", "high"] | None = Field(
+        default=None, description="The detail level of the image."
+    )
+    file_id: str | None = Field(
+        default=None, description="The ID of the uploaded file."
+    )
+    image_url: str | None = Field(
+        default=None, description="The URL of the image or base64 data URL."
+    )
+
+
+class InputFileParam(BaseModel):
+    file_id: str = Field(..., description="The ID of the uploaded file.")
+    type: Literal["input_file"] = Field(
+        ..., description="The type of the input content."
+    )
+
+
+class InputAudioParam(BaseModel):
+    audio: str = Field(..., description="Base64-encoded audio data.")
+    type: Literal["input_audio"] = Field(
+        ..., description="The type of the input content."
+    )
+
+
+InputContentParam = Annotated[
+    InputTextParam | InputImageParam | InputFileParam | InputAudioParam,
+    Field(discriminator="type"),
+]
+
+# =============================================================================
+# Message Types
+# =============================================================================
+
+
+class InputMessageParam(BaseModel):
+    role: Literal["user", "system", "developer"] = Field(
+        ...,
+        description="The role of the message. One of 'user', 'system', or 'developer'.",
+    )
+    content: str | list[InputContentParam] = Field(
+        ...,
+        description="The content of the message. Can be a string or list of content items.",
+    )
+
+
+# =============================================================================
+# Message (type="message") - full message with status, distinct from InputMessageParam
+# =============================================================================
+
+
+class MessageParam(BaseModel):
+    """A message input to the model with a role indicating instruction following hierarchy."""
+
+    content: str | list[InputContentParam] = Field(
+        ...,
+        description="A list of one or many input items to the model, containing different content types.",
+    )
+    role: Literal["user", "system", "developer"] = Field(
+        ...,
+        description="The role of the message input. One of 'user', 'system', or 'developer'.",
+    )
+    type: Literal["message"] = Field(
+        ..., description="The type of the message input. Always 'message'."
+    )
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(
+        default=None, description="The status of the item."
+    )
+
+
+# =============================================================================
+# Output Content Types (for ResponseOutputMessageParam)
+# =============================================================================
+
+
+class OutputTextParam(BaseModel):
+    """A text output from the model."""
+
+    text: str = Field(..., description="The text output from the model.")
+    type: Literal["output_text"] = Field(
+        ..., description="The type of the output text. Always 'output_text'."
+    )
+    annotations: list[dict[str, Any]] | None = Field(
+        default=None, description="Annotations for the text, such as citations."
+    )
+
+
+class OutputRefusalParam(BaseModel):
+    """A refusal from the model."""
+
+    refusal: str = Field(..., description="The refusal explanation from the model.")
+    type: Literal["refusal"] = Field(
+        ..., description="The type of the refusal. Always 'refusal'."
+    )
+
+
+OutputMessageContentParam = Annotated[
+    OutputTextParam | OutputRefusalParam,
+    Field(discriminator="type"),
+]
+
+
+class ResponseOutputMessageParam(BaseModel):
+    """An output message from the model, used as input for multi-turn conversations."""
+
+    id: str = Field(..., description="The unique ID of the output message.")
+    content: list[OutputMessageContentParam] = Field(
+        ..., description="The content of the output message."
+    )
+    role: Literal["assistant"] = Field(
+        ..., description="The role of the output message. Always 'assistant'."
+    )
+    status: Literal["in_progress", "completed", "incomplete"] = Field(
+        ..., description="The status of the message."
+    )
+    type: Literal["message"] = Field(
+        ..., description="The type of the output message. Always 'message'."
+    )
+
+
+# =============================================================================
+# Function Tool Call (type="function_call") - for feeding back previous tool calls
+# =============================================================================
+
+
+class ResponseFunctionToolCallParam(BaseModel):
+    """A tool call to run a function, used as input for multi-turn conversations."""
+
+    arguments: str = Field(
+        ..., description="A JSON string of the arguments to pass to the function."
+    )
+    call_id: str = Field(
+        ...,
+        description="The unique ID of the function tool call generated by the model.",
+    )
+    name: str = Field(..., description="The name of the function to run.")
+    type: Literal["function_call"] = Field(
+        ..., description="The type of the function tool call. Always 'function_call'."
+    )
+    id: str | None = Field(
+        default=None, description="The unique ID of the function tool call."
+    )
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(
+        default=None, description="The status of the item."
+    )
+
+
+# =============================================================================
+# Function Call Output (type="function_call_output") - tool result
+# =============================================================================
+
+
+class FunctionCallOutputParam(BaseModel):
+    """The output of a function tool call."""
+
+    call_id: str = Field(
+        ...,
+        description="The unique ID of the function tool call generated by the model.",
+    )
+    output: str = Field(..., description="Text output of the function tool call.")
+    type: Literal["function_call_output"] = Field(
+        ...,
+        description="The type of the function tool call output. Always 'function_call_output'.",
+    )
+    id: str | None = Field(
+        default=None, description="The unique ID of the function tool call output."
+    )
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(
+        default=None, description="The status of the item."
+    )
+
+
+# =============================================================================
+# Custom Tool Call (type="custom_tool_call") - for custom/grammar tools
+# =============================================================================
+
+
+class ResponseCustomToolCallParam(BaseModel):
+    """A call to a custom tool created by the model."""
+
+    call_id: str = Field(
+        ...,
+        description="An identifier used to map this custom tool call to a tool call output.",
+    )
+    input: str = Field(
+        ..., description="The input for the custom tool call generated by the model."
+    )
+    name: str = Field(..., description="The name of the custom tool being called.")
+    type: Literal["custom_tool_call"] = Field(
+        ..., description="The type of the custom tool call. Always 'custom_tool_call'."
+    )
+    id: str | None = Field(
+        default=None, description="The unique ID of the custom tool call."
+    )
+
+
+# =============================================================================
+# Custom Tool Call Output (type="custom_tool_call_output") - tool result for custom tools
+# =============================================================================
+
+
+class ResponseCustomToolCallOutputParam(BaseModel):
+    """The output of a custom tool call from your code, being sent back to the model."""
+
+    call_id: str = Field(
+        ...,
+        description="The call ID, used to map this custom tool call output to a custom tool call.",
+    )
+    output: str = Field(
+        ..., description="The output from the custom tool call generated by your code."
+    )
+    type: Literal["custom_tool_call_output"] = Field(
+        ...,
+        description="The type of the custom tool call output. Always 'custom_tool_call_output'.",
+    )
+    id: str | None = Field(
+        default=None, description="The unique ID of the custom tool call output."
+    )
+
+
+# =============================================================================
+# Computer Call Output (type="computer_call_output")
+# =============================================================================
+
+
+class ComputerCallOutputAcknowledgedSafetyCheck(BaseModel):
+    id: str = Field(..., description="The safety check ID.")
+    code: str | None = Field(
+        default=None, description="The type of the pending safety check."
+    )
+    message: str | None = Field(
+        default=None, description="Details about the pending safety check."
+    )
+
+
+class ComputerCallOutputParam(BaseModel):
+    """Output from a computer tool call."""
+
+    call_id: str = Field(..., description="The computer tool call ID.")
+    output: dict[str, Any] = Field(..., description="Screenshot output.")
+    type: Literal["computer_call_output"] = Field(
+        ..., description="Always 'computer_call_output'."
+    )
+    id: str | None = Field(default=None, description="The unique ID.")
+    acknowledged_safety_checks: (
+        list[ComputerCallOutputAcknowledgedSafetyCheck] | None
+    ) = Field(default=None, description="Acknowledged safety checks.")
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(
+        default=None, description="The status."
+    )
+
+
+# =============================================================================
+# Local Shell Call types (for input)
+# =============================================================================
+
+
+class LocalShellCallActionParam(BaseModel):
+    command: list[str] = Field(..., description="The command to run.")
+    env: dict[str, str] = Field(
+        default_factory=dict, description="Environment variables."
+    )
+    type: Literal["exec"] = Field(..., description="Always 'exec'.")
+    timeout_ms: int | None = Field(
+        default=None, description="Optional timeout in milliseconds."
+    )
+    user: str | None = Field(
+        default=None, description="Optional user to run the command as."
+    )
+    working_directory: str | None = Field(
+        default=None, description="Optional working directory."
+    )
+
+
+class LocalShellCallParam(BaseModel):
+    """A local shell tool call."""
+
+    id: str = Field(..., description="The unique ID.")
+    action: LocalShellCallActionParam = Field(
+        ..., description="The shell command to execute."
+    )
+    call_id: str = Field(..., description="The tool call ID from the model.")
+    status: Literal["in_progress", "completed", "incomplete"] = Field(
+        ..., description="The status."
+    )
+    type: Literal["local_shell_call"] = Field(
+        ..., description="Always 'local_shell_call'."
+    )
+
+
+class LocalShellCallOutputParam(BaseModel):
+    """Output from a local shell tool call."""
+
+    id: str = Field(..., description="The tool call output ID.")
+    output: str = Field(..., description="JSON string of output.")
+    type: Literal["local_shell_call_output"] = Field(
+        ..., description="Always 'local_shell_call_output'."
+    )
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(
+        default=None, description="The status."
+    )
+
+
+# =============================================================================
+# Shell Call types (function_shell)
+# =============================================================================
+
+
+class ShellCallActionParam(BaseModel):
+    commands: list[str] = Field(..., description="Shell commands to run.")
+    max_output_length: int | None = Field(
+        default=None, description="Max output characters."
+    )
+    timeout_ms: int | None = Field(
+        default=None, description="Max wall-clock time in ms."
+    )
+
+
+class ShellCallParam(BaseModel):
+    """A shell command request."""
+
+    action: ShellCallActionParam = Field(..., description="The shell action.")
+    call_id: str = Field(..., description="The tool call ID.")
+    type: Literal["shell_call"] = Field(..., description="Always 'shell_call'.")
+    id: str | None = Field(default=None, description="The unique ID.")
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(
+        default=None, description="The status."
+    )
+
+
+class ShellCallOutputParam(BaseModel):
+    """Output from a shell tool call."""
+
+    call_id: str = Field(..., description="The shell tool call ID.")
+    output: list[dict[str, Any]] = Field(..., description="Output chunks.")
+    type: Literal["shell_call_output"] = Field(
+        ..., description="Always 'shell_call_output'."
+    )
+    id: str | None = Field(default=None, description="The unique ID.")
+    max_output_length: int | None = Field(
+        default=None, description="Max output length."
+    )
+
+
+# =============================================================================
+# Apply Patch Call types
+# =============================================================================
+
+
+class ApplyPatchCallParam(BaseModel):
+    """An apply patch tool call."""
+
+    call_id: str = Field(..., description="The tool call ID.")
+    operation: dict[str, Any] = Field(
+        ..., description="The file operation (create_file, delete_file, update_file)."
+    )
+    status: Literal["in_progress", "completed"] = Field(..., description="The status.")
+    type: Literal["apply_patch_call"] = Field(
+        ..., description="Always 'apply_patch_call'."
+    )
+    id: str | None = Field(default=None, description="The unique ID.")
+
+
+class ApplyPatchCallOutputParam(BaseModel):
+    """Output from an apply patch tool call."""
+
+    call_id: str = Field(..., description="The tool call ID.")
+    status: Literal["completed", "failed"] = Field(..., description="The status.")
+    type: Literal["apply_patch_call_output"] = Field(
+        ..., description="Always 'apply_patch_call_output'."
+    )
+    id: str | None = Field(default=None, description="The unique ID.")
+    output: str | None = Field(default=None, description="Log text.")
+
+
+# =============================================================================
+# MCP types (for input)
+# =============================================================================
+
+
+class McpListToolsToolParam(BaseModel):
+    input_schema: dict[str, Any] = Field(
+        ..., description="The JSON schema describing the tool's input."
+    )
+    name: str = Field(..., description="The name of the tool.")
+    annotations: dict[str, Any] | None = Field(
+        default=None, description="Additional annotations."
+    )
+    description: str | None = Field(
+        default=None, description="The description of the tool."
+    )
+
+
+class McpListToolsParam(BaseModel):
+    """MCP server tools list."""
+
+    id: str = Field(..., description="The unique ID.")
+    server_label: str = Field(..., description="The MCP server label.")
+    tools: list[McpListToolsToolParam] = Field(..., description="Available tools.")
+    type: Literal["mcp_list_tools"] = Field(..., description="Always 'mcp_list_tools'.")
+    error: str | None = Field(default=None, description="Error message.")
+
+
+class McpApprovalRequestParam(BaseModel):
+    """MCP tool approval request."""
+
+    id: str = Field(..., description="The request ID.")
+    arguments: str = Field(..., description="JSON string of tool arguments.")
+    name: str = Field(..., description="The tool name.")
+    server_label: str = Field(..., description="The MCP server label.")
+    type: Literal["mcp_approval_request"] = Field(
+        ..., description="Always 'mcp_approval_request'."
+    )
+
+
+class McpApprovalResponseParam(BaseModel):
+    """MCP approval response."""
+
+    approval_request_id: str = Field(..., description="The request ID being answered.")
+    approve: bool = Field(..., description="The approval decision.")
+    type: Literal["mcp_approval_response"] = Field(
+        ..., description="Always 'mcp_approval_response'."
+    )
+    id: str | None = Field(default=None, description="The unique ID.")
+    reason: str | None = Field(default=None, description="The decision reason.")
+
+
+class McpCallParam(BaseModel):
+    """MCP tool invocation."""
+
+    id: str = Field(..., description="The tool call ID.")
+    arguments: str = Field(..., description="JSON string of arguments.")
+    name: str = Field(..., description="The tool name.")
+    server_label: str = Field(..., description="The MCP server label.")
+    type: Literal["mcp_call"] = Field(..., description="Always 'mcp_call'.")
+    approval_request_id: str | None = Field(
+        default=None, description="For approval flow."
+    )
+    error: str | None = Field(default=None, description="Tool error.")
+    output: str | None = Field(default=None, description="Tool output.")
+    status: (
+        Literal["in_progress", "completed", "incomplete", "calling", "failed"] | None
+    ) = Field(default=None, description="The status.")
+
+
+# =============================================================================
+# Item Reference
+# =============================================================================
+
+
+class ItemReferenceParam(BaseModel):
+    """A reference to an existing item."""
+
+    id: str = Field(..., description="The item ID.")
+    type: Literal["item_reference"] | None = Field(
+        default=None, description="Always 'item_reference'."
+    )
+
+
+# =============================================================================
+# Reasoning Item (for input)
+# =============================================================================
+
+
+class ReasoningItemSummaryParam(BaseModel):
+    text: str = Field(..., description="A summary of the reasoning output.")
+    type: Literal["summary_text"] = Field(..., description="Always 'summary_text'.")
+
+
+class ReasoningItemParam(BaseModel):
+    """A reasoning item for multi-turn conversations."""
+
+    id: str = Field(..., description="The unique identifier.")
+    type: Literal["reasoning"] = Field(..., description="Always 'reasoning'.")
+    summary: list[ReasoningItemSummaryParam] = Field(
+        default_factory=list, description="Reasoning summary."
+    )
+    encrypted_content: str | None = Field(
+        default=None, description="Encrypted content for multi-turn."
+    )
+
+
+# =============================================================================
+# Image Generation Call (for input)
+# =============================================================================
+
+
+class ImageGenerationCallParam(BaseModel):
+    """An image generation call for multi-turn conversations."""
+
+    id: str = Field(..., description="The unique ID.")
+    result: str | None = Field(
+        default=None, description="The generated image encoded in base64."
+    )
+    status: Literal["in_progress", "completed", "generating", "failed"] = Field(
+        ..., description="The status."
+    )
+    type: Literal["image_generation_call"] = Field(
+        ..., description="Always 'image_generation_call'."
+    )
+
+
+# =============================================================================
+# ResponseInputItemParam - union of all item types that can appear in input
+# =============================================================================
+
+ResponseInputItemParam = (
+    InputMessageParam
+    | MessageParam
+    | ResponseOutputMessageParam
+    | ResponseFunctionToolCallParam
+    | FunctionCallOutputParam
+    | ResponseCustomToolCallParam
+    | ResponseCustomToolCallOutputParam
+    | ComputerCallOutputParam
+    | LocalShellCallParam
+    | LocalShellCallOutputParam
+    | ShellCallParam
+    | ShellCallOutputParam
+    | ApplyPatchCallParam
+    | ApplyPatchCallOutputParam
+    | McpListToolsParam
+    | McpApprovalRequestParam
+    | McpApprovalResponseParam
+    | McpCallParam
+    | ReasoningItemParam
+    | ImageGenerationCallParam
+    | ItemReferenceParam
+    | dict[str, Any]
+)
+ResponseInputParam = str | list[ResponseInputItemParam]
+
+# =============================================================================
+# Tool Definition Types
+# =============================================================================
+
+
+class FunctionToolParam(BaseModel):
+    """Defines a function tool the model can call.
+
+    Note: The Responses API uses a flat structure (name, parameters, strict, description
+    at the top level), unlike the Chat Completions API which nests them under a
+    ``function`` key.
+    """
+
+    name: str = Field(..., description="The name of the function to call.")
+    parameters: dict[str, Any] | None = Field(
+        default=None,
+        description="A JSON Schema object describing the parameters of the function.",
+    )
+    strict: bool | None = Field(
+        default=None,
+        description="Whether to enforce strict parameter validation. Default true.",
+    )
+    type: Literal["function"] = Field(
+        ..., description="The type of the function tool. Always 'function'."
+    )
+    description: str | None = Field(
+        default=None, description="A description of the function."
+    )
+
+
+class FileSearchToolParam(BaseModel):
+    type: Literal["file_search"] = Field(..., description="The type of the tool.")
+    file_ids: list[str] | None = Field(default=None, description="Files to search.")
+
+
+class WebSearchToolParam(BaseModel):
+    type: Literal["web_search"] = Field(..., description="The type of the tool.")
+    model_config = ConfigDict(extra="allow")
+
+
+class WebSearchPreviewUserLocation(BaseModel):
+    type: Literal["approximate"] = Field(..., description="Always 'approximate'.")
+    city: str | None = Field(default=None, description="City name.")
+    country: str | None = Field(default=None, description="ISO country code.")
+    region: str | None = Field(default=None, description="Region name.")
+    timezone: str | None = Field(default=None, description="IANA timezone.")
+
+
+class WebSearchPreviewToolParam(BaseModel):
+    type: Literal["web_search_preview", "web_search_preview_2025_03_11"] = Field(
+        ..., description="The type of the tool."
+    )
+    search_context_size: Literal["low", "medium", "high"] | None = Field(
+        default=None, description="Search context size."
+    )
+    user_location: WebSearchPreviewUserLocation | None = Field(
+        default=None, description="User location."
+    )
+
+
+class CodeInterpreterContainerAuto(BaseModel):
+    type: Literal["auto"] = Field(..., description="Always 'auto'.")
+    file_ids: list[str] | None = Field(default=None, description="Uploaded files.")
+    memory_limit: Literal["1g", "4g", "16g", "64g"] | None = Field(
+        default=None, description="Memory limit."
+    )
+
+
+class CodeInterpreterToolParam(BaseModel):
+    type: Literal["code_interpreter"] = Field(..., description="The type of the tool.")
+    container: str | CodeInterpreterContainerAuto = Field(
+        ..., description="The code interpreter container."
+    )
+
+
+class ComputerToolParam(BaseModel):
+    type: Literal["computer_use_preview"] = Field(
+        ..., description="The type of the tool."
+    )
+    model_config = ConfigDict(extra="allow")
+
+
+class ImageGenerationInputImageMask(BaseModel):
+    file_id: str | None = Field(default=None, description="Mask image file ID.")
+    image_url: str | None = Field(default=None, description="Base64-encoded mask.")
+
+
+class ImageGenerationToolParam(BaseModel):
+    type: Literal["image_generation"] = Field(..., description="The type of the tool.")
+    model: str | Literal["gpt-image-1", "gpt-image-1-mini"] | None = Field(
+        default=None, description="The image generation model to use."
+    )
+    size: Literal["1024x1024", "1024x1536", "1536x1024", "auto"] | None = Field(
+        default=None, description="The size of the generated image."
+    )
+    quality: Literal["low", "medium", "high", "auto"] | None = Field(
+        default=None, description="The quality of the generated image."
+    )
+    background: Literal["transparent", "opaque", "auto"] | None = Field(
+        default=None, description="The background of the generated image."
+    )
+    input_fidelity: Literal["high", "low"] | None = Field(
+        default=None, description="Input fidelity."
+    )
+    input_image_mask: ImageGenerationInputImageMask | None = Field(
+        default=None, description="Inpainting mask."
+    )
+    moderation: Literal["auto", "low"] | None = Field(
+        default=None, description="Moderation level."
+    )
+    output_compression: int | None = Field(
+        default=None, description="Compression level."
+    )
+    output_format: Literal["png", "webp", "jpeg"] | None = Field(
+        default=None, description="Output format."
+    )
+    partial_images: int | None = Field(
+        default=None, description="Partial images for streaming (0-3)."
+    )
+
+
+class McpAllowedToolsFilter(BaseModel):
+    read_only: bool | None = Field(default=None, description="Tool is read-only.")
+    tool_names: list[str] | None = Field(
+        default=None, description="Allowed tool names."
+    )
+
+
+class McpApprovalFilter(BaseModel):
+    always: McpAllowedToolsFilter | None = Field(
+        default=None, description="Always require approval."
+    )
+    never: McpAllowedToolsFilter | None = Field(
+        default=None, description="Never require approval."
+    )
+
+
+class McpToolParam(BaseModel):
+    """MCP (Model Context Protocol) tool configuration."""
+
+    type: Literal["mcp"] = Field(..., description="The type of the tool.")
+    server_label: str = Field(..., description="Label for the MCP server.")
+    server_url: str | None = Field(default=None, description="URL for the MCP server.")
+    connector_id: str | None = Field(default=None, description="Service connector ID.")
+    allowed_tools: list[str] | McpAllowedToolsFilter | None = Field(
+        default=None, description="Allowed tools filter."
+    )
+    require_approval: Literal["always", "never"] | McpApprovalFilter | None = Field(
+        default=None, description="Approval requirement."
+    )
+    authorization: str | None = Field(default=None, description="OAuth access token.")
+    headers: dict[str, str] | None = Field(
+        default=None, description="Custom HTTP headers."
+    )
+    server_description: str | None = Field(
+        default=None, description="MCP server description."
+    )
+
+
+class LocalShellToolParam(BaseModel):
+    type: Literal["local_shell"] = Field(..., description="The type of the tool.")
+
+
+class FunctionShellToolParam(BaseModel):
+    type: Literal["function_shell"] = Field(..., description="The type of the tool.")
+
+
+class CustomToolParam(BaseModel):
+    """Custom tool with grammar-based input format."""
+
+    type: Literal["custom"] = Field(..., description="The type of the tool.")
+    name: str = Field(..., description="The custom tool name.")
+    description: str | None = Field(default=None, description="Tool description.")
+    format: dict[str, Any] | None = Field(
+        default=None, description="Input format specification."
+    )
+
+
+class ApplyPatchToolParam(BaseModel):
+    type: Literal["apply_patch"] = Field(..., description="The type of the tool.")
+
+
+ToolParam = (
+    FunctionToolParam
+    | FileSearchToolParam
+    | WebSearchToolParam
+    | WebSearchPreviewToolParam
+    | CodeInterpreterToolParam
+    | ComputerToolParam
+    | ImageGenerationToolParam
+    | McpToolParam
+    | LocalShellToolParam
+    | FunctionShellToolParam
+    | CustomToolParam
+    | ApplyPatchToolParam
+    | dict[str, Any]
+)
+
+# =============================================================================
+# Tool Choice Types
+# =============================================================================
+
+
+class ToolChoiceNoneParam(BaseModel):
+    type: Literal["none"] = Field(..., description="The model will not use any tools.")
+
+
+class ToolChoiceAutoParam(BaseModel):
+    type: Literal["auto"] = Field(
+        ..., description="The model will automatically decide whether to use tools."
+    )
+
+
+class ToolChoiceRequiredParam(BaseModel):
+    type: Literal["required"] = Field(
+        ..., description="The model must use at least one tool."
+    )
+
+
+class ToolChoiceFunctionParam(BaseModel):
+    type: Literal["function"] = Field(
+        ..., description="The model will use the specified function."
+    )
+    name: str = Field(..., description="The name of the function to use.")
+
+
+ToolChoiceParam = Annotated[
+    ToolChoiceNoneParam
+    | ToolChoiceAutoParam
+    | ToolChoiceRequiredParam
+    | ToolChoiceFunctionParam,
+    Field(discriminator="type"),
+]
+
+# =============================================================================
+# Text Config Types
+# =============================================================================
+
+
+class TextConfigJSONSchemaParam(BaseModel):
+    type: Literal["json_schema"] = Field(..., description="Structured JSON output.")
+    name: str = Field(..., description="The name of the schema.")
+    description: str | None = Field(
+        default=None, description="A description of the schema."
+    )
+    schema_: dict[str, Any] = Field(..., alias="schema", description="The JSON schema.")
+    strict: bool | None = Field(
+        default=None,
+        description="Whether to enable strict schema adherence when generating the output.",
+    )
+
+    model_config = ConfigDict(extra="ignore", populate_by_name=True)
+
+
+class TextConfigJSONObjectParam(BaseModel):
+    type: Literal["json_object"] = Field(..., description="JSON object output.")
+
+
+class TextConfigTextParam(BaseModel):
+    type: Literal["text"] = Field(..., description="Plain text output.")
+
+
+TextFormatParam = Annotated[
+    TextConfigJSONSchemaParam | TextConfigJSONObjectParam | TextConfigTextParam,
+    Field(discriminator="type"),
+]
+
+
+class ResponseTextConfigParam(BaseModel):
+    format: TextFormatParam | None = Field(
+        default=None, description="The format of the text response."
+    )
+
+
+# =============================================================================
+# Conversation and Prompt Types
+# =============================================================================
+
+
+class ConversationParam(BaseModel):
+    id: str = Field(..., description="The unique ID of the conversation.")
+
+
+class ResponsePromptParam(BaseModel):
+    id: str = Field(..., description="The unique ID of the prompt template.")
+    variables: dict[str, str] | None = Field(
+        default=None, description="Variables to substitute in the prompt template."
+    )
+
+
+# =============================================================================
+# Reasoning Config Types
+# =============================================================================
+
+
+class ReasoningParam(BaseModel):
+    effort: Literal["none", "low", "medium", "high", "xhigh"] | None = Field(
+        default=None, description="The reasoning effort level."
+    )
+    generate_summary: Literal["auto", "concise", "detailed"] | None = Field(
+        default=None, description="Whether to generate a summary of the reasoning."
+    )
+
+
+# =============================================================================
+# Stream Options
+# =============================================================================
+
+
+class StreamOptionsParam(BaseModel):
+    include_usage: bool | None = Field(
+        default=None,
+        description="If set, an additional chunk will be streamed with usage statistics.",
+    )
+
+
+# =============================================================================
+# Main Request Class
+# =============================================================================
+
+
+class ResponsesRequest(BaseModel):
+    """OpenAI Responses API request. See https://platform.openai.com/docs/api-reference/responses."""
+
+    model: str = Field(
+        ...,
+        description="Model ID used to generate the response, like 'gpt-4o' or 'o3'.",
+    )
+    background: bool | None = Field(
+        default=None,
+        description="Whether to run the model response in the background.",
+    )
+    conversation: str | ConversationParam | None = Field(
+        default=None,
+        description="The conversation that this response belongs to.",
+    )
+    include: list[str] | None = Field(
+        default=None,
+        description="Specify additional output data to include in the model response.",
+    )
+    input: ResponseInputParam | None = Field(
+        default=None,
+        description="Text, image, or file inputs to the model, used to generate a response.",
+    )
+    instructions: str | None = Field(
+        default=None,
+        description="A system (or developer) message inserted into the model's context.",
+    )
+    max_output_tokens: int | None = Field(
+        default=None,
+        description="An upper bound for the number of tokens that can be generated for a response.",
+    )
+    max_tool_calls: int | None = Field(
+        default=None,
+        description="The maximum number of total calls to built-in tools.",
+    )
+    metadata: dict[str, str] | None = Field(
+        default=None,
+        description="Set of 16 key-value pairs that can be attached to an object.",
+    )
+    parallel_tool_calls: bool | None = Field(
+        default=None,
+        description="Whether to allow the model to run tool calls in parallel.",
+    )
+    previous_response_id: str | None = Field(
+        default=None,
+        description="The unique ID of the previous response to the model.",
+    )
+    prompt: ResponsePromptParam | None = Field(
+        default=None,
+        description="Reference to a prompt template and its variables.",
+    )
+    prompt_cache_key: str | None = Field(
+        default=None,
+        description="Used by OpenAI to cache responses for similar requests.",
+    )
+    prompt_cache_retention: Literal["in-memory", "24h"] | None = Field(
+        default=None,
+        description="Retention policy for prompt cache.",
+    )
+    reasoning: ReasoningParam | None = Field(
+        default=None,
+        description="Configuration options for reasoning models.",
+    )
+    safety_identifier: str | None = Field(
+        default=None,
+        description="A stable identifier used to help detect users violating usage policies.",
+    )
+    service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = (
+        Field(
+            default=None,
+            description="Specifies the processing type used for serving the request.",
+        )
+    )
+    store: bool | None = Field(
+        default=None,
+        description="Whether to store the generated model response for later retrieval via API.",
+    )
+    stream: bool | None = Field(
+        default=None,
+        description="If set to true, the model response data will be streamed.",
+    )
+    stream_options: StreamOptionsParam | None = Field(
+        default=None, description="Options for streaming responses."
+    )
+    temperature: float | None = Field(
+        default=None,
+        description="Sampling temperature (0-2). Higher values make output more random.",
+    )
+    text: ResponseTextConfigParam | None = Field(
+        default=None,
+        description="Configuration options for a text response from the model.",
+    )
+    tool_choice: (
+        Literal["none", "auto", "required"] | ToolChoiceFunctionParam | None
+    ) = Field(
+        default=None, description="How the model should select which tool to use."
+    )
+    tools: list[ToolParam] | None = Field(
+        default=None,
+        description="An array of tools the model may call while generating a response.",
+    )
+    top_logprobs: int | None = Field(
+        default=None,
+        description="Number of most likely tokens to return at each position (0-20).",
+    )
+    top_p: float | None = Field(
+        default=None,
+        description="Nucleus sampling parameter.",
+    )
+    truncation: Literal["auto", "disabled"] | None = Field(
+        default=None,
+        description="The truncation strategy to use for the model response.",
+    )
+    user: str | None = Field(
+        default=None,
+        description="Deprecated: use safety_identifier and prompt_cache_key.",
+    )
+    timeout: float | None = Field(
+        default=None,
+        description="Client-side timeout in seconds.",
+    )
+
+    model_config = ConfigDict(extra="ignore")
diff --git a/src/sequrity/types/responses/response.py b/src/sequrity/types/responses/response.py
new file mode 100644
index 0000000..cb904da
--- /dev/null
+++ b/src/sequrity/types/responses/response.py
@@ -0,0 +1,578 @@
+"""Pydantic models for OpenAI Responses API response types."""
+
+from __future__ import annotations
+
+from typing import Annotated, Any, Literal
+
+from pydantic import BaseModel, ConfigDict, Field
+
+# =============================================================================
+# Output Content Types
+# =============================================================================
+
+
+class OutputTextLogprobTopLogprob(BaseModel):
+    token: str = Field(..., description="The token.")
+    bytes: list[int] = Field(..., description="A list of integers representing the UTF-8 bytes of the token.")
+    logprob: float = Field(..., description="The log probability of the token.")
+
+
+class OutputTextLogprob(BaseModel):
+    token: str = Field(..., description="The token.")
+    bytes: list[int] = Field(..., description="A list of integers representing the UTF-8 bytes of the token.")
+    logprob: float = Field(..., description="The log probability of the token.")
+    top_logprobs: list[OutputTextLogprobTopLogprob] = Field(
+        ..., description="List of the most likely tokens and their log probability."
+    )
+
+
+class AnnotationFileCitation(BaseModel):
+    file_id: str = Field(..., description="The ID of the file.")
+    filename: str = Field(..., description="The filename of the file cited.")
+    index: int = Field(..., description="The index of the file in the list of files.")
+    type: Literal["file_citation"] = Field(..., description="Always 'file_citation'.")
+
+
+class AnnotationURLCitation(BaseModel):
+    end_index: int = Field(..., description="The index of the last character of the URL citation.")
+    start_index: int = Field(..., description="The index of the first character of the URL citation.")
+    title: str = Field(..., description="The title of the web resource.")
+    type: Literal["url_citation"] = Field(..., description="Always 'url_citation'.")
+    url: str = Field(..., description="The URL of the web resource.")
+
+
+class AnnotationContainerFileCitation(BaseModel):
+    container_id: str = Field(..., description="The ID of the container.")
+    end_index: int = Field(..., description="The index of the last character of the citation.")
+    file_id: str = Field(..., description="The ID of the file.")
+    filename: str = Field(..., description="The filename of the container file cited.")
+    start_index: int = Field(..., description="The index of the first character of the citation.")
+    type: Literal["container_file_citation"] = Field(..., description="Always 'container_file_citation'.")
+
+
+class AnnotationFilePath(BaseModel):
+    file_id: str = Field(..., description="The ID of the file.")
+    index: int = Field(..., description="The index of the file in the list of files.")
+    type: Literal["file_path"] = Field(..., description="Always 'file_path'.")
+
+
+Annotation = Annotated[
+    AnnotationFileCitation | AnnotationURLCitation | AnnotationContainerFileCitation | AnnotationFilePath,
+    Field(discriminator="type"),
+]
+
+
+class OutputText(BaseModel):
+    type: Literal["output_text"] = Field(..., description="The type of the output content.")
+    text: str = Field(..., description="The text content.")
+    annotations: list[Annotation] = Field(default_factory=list, description="Annotations for the text.")
+    logprobs: list[OutputTextLogprob] | None = Field(default=None, description="Log probability information.")
+
+
+class Refusal(BaseModel):
+    type: Literal["refusal"] = Field(..., description="The type of the output content.")
+    refusal: str = Field(..., description="The refusal message generated by the model.")
+
+
+OutputContentItem = Annotated[
+    OutputText | Refusal,
+    Field(discriminator="type"),
+]
+
+# =============================================================================
+# Output Message Type
+# =============================================================================
+
+
+class ResponseOutputMessage(BaseModel):
+    id: str = Field(..., description="The unique ID of the output message.")
+    type: Literal["message"] = Field(..., description="The type of the output item.")
+    role: Literal["assistant"] = Field(..., description="The role of the message. Always 'assistant'.")
+    content: list[OutputContentItem] = Field(..., description="The content items of the message.")
+    status: Literal["in_progress", "completed", "incomplete"] = Field(..., description="The status of the message.")
+
+
+# =============================================================================
+# Reasoning Item
+# =============================================================================
+
+
+class ReasoningSummary(BaseModel):
+    text: str = Field(..., description="A summary of the reasoning output.")
+    type: Literal["summary_text"] = Field(..., description="Always 'summary_text'.")
+
+
+class ReasoningContent(BaseModel):
+    text: str = Field(..., description="The reasoning text from the model.")
+    type: Literal["reasoning_text"] = Field(..., description="Always 'reasoning_text'.")
+
+
+class ReasoningItem(BaseModel):
+    id: str = Field(..., description="The unique identifier of the reasoning content.")
+    type: Literal["reasoning"] = Field(..., description="The type of the output item.")
+    summary: list[ReasoningSummary] = Field(default_factory=list, description="Reasoning summary content.")
+    content: list[ReasoningContent] | None = Field(default=None, description="Reasoning text content.")
+    encrypted_content: str | None = Field(default=None, description="Encrypted content for multi-turn.")
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(
+        default=None, description="The status of the reasoning item."
+    )
+
+
+# =============================================================================
+# Tool Call Types
+# =============================================================================
+
+
+class FunctionToolCall(BaseModel):
+    type: Literal["function_call"] = Field(..., description="The type of the tool call.")
+    call_id: str = Field(..., description="The unique ID of the function tool call generated by the model.")
+    name: str = Field(..., description="The name of the function to call.")
+    arguments: str = Field(..., description="The arguments to call the function with, as a JSON string.")
+    id: str | None = Field(default=None, description="The unique ID of the function tool call.")
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(
+        default=None, description="The status of the tool call."
+    )
+
+
+class FileSearchResult(BaseModel):
+    attributes: dict[str, str | float | bool] | None = Field(default=None, description="Metadata key-value pairs.")
+    file_id: str | None = Field(default=None, description="The unique ID of the file.")
+    filename: str | None = Field(default=None, description="The name of the file.")
+    score: float | None = Field(default=None, description="The relevance score of the file (0-1).")
+    text: str | None = Field(default=None, description="The text that was retrieved from the file.")
+
+
+class FileSearchToolCall(BaseModel):
+    type: Literal["file_search_call"] = Field(..., description="The type of the tool call.")
+    id: str = Field(..., description="The unique ID of the file search tool call.")
+    queries: list[str] = Field(default_factory=list, description="The queries used to search for files.")
+    status: Literal["in_progress", "searching", "completed", "incomplete", "failed"] = Field(
+        ..., description="The status of the tool call."
+    )
+    results: list[FileSearchResult] | None = Field(default=None, description="The results of the file search.")
+
+
+# --- Web Search ---
+
+
+class WebSearchActionSource(BaseModel):
+    type: Literal["url"] = Field(..., description="Always 'url'.")
+    url: str = Field(..., description="The URL of the source.")
+
+
+class WebSearchActionSearch(BaseModel):
+    query: str = Field(..., description="The search query.")
+    type: Literal["search"] = Field(..., description="The action type.")
+    queries: list[str] | None = Field(default=None, description="The search queries.")
+    sources: list[WebSearchActionSource] | None = Field(default=None, description="The sources used.")
+
+
+class WebSearchActionOpenPage(BaseModel):
+    type: Literal["open_page"] = Field(..., description="The action type.")
+    url: str = Field(..., description="The URL opened by the model.")
+
+
+class WebSearchActionFind(BaseModel):
+    pattern: str = Field(..., description="The pattern or text to search for within the page.")
+    type: Literal["find"] = Field(..., description="The action type.")
+    url: str = Field(..., description="The URL of the page searched.")
+
+
+WebSearchAction = Annotated[
+    WebSearchActionSearch | WebSearchActionOpenPage | WebSearchActionFind,
+    Field(discriminator="type"),
+]
+
+
+class WebSearchToolCall(BaseModel):
+    type: Literal["web_search_call"] = Field(..., description="The type of the tool call.")
+    id: str = Field(..., description="The unique ID of the web search tool call.")
+    action: WebSearchAction | None = Field(default=None, description="The action taken.")
+    status: Literal["in_progress", "searching", "completed", "failed"] = Field(
+        ..., description="The status of the tool call."
+    )
+
+
+# --- Code Interpreter ---
+
+
+class CodeInterpreterOutputLogs(BaseModel):
+    logs: str = Field(..., description="The logs output from the code interpreter.")
+    type: Literal["logs"] = Field(..., description="Always 'logs'.")
+
+
+class CodeInterpreterOutputImage(BaseModel):
+    type: Literal["image"] = Field(..., description="Always 'image'.")
+    url: str = Field(..., description="The URL of the image output.")
+
+
+CodeInterpreterOutput = Annotated[
+    CodeInterpreterOutputLogs | CodeInterpreterOutputImage,
+    Field(discriminator="type"),
+]
+
+
+class CodeInterpreterToolCall(BaseModel):
+    type: Literal["code_interpreter_call"] = Field(..., description="The type of the tool call.")
+    id: str = Field(..., description="The unique ID of the code interpreter tool call.")
+    code: str | None = Field(default=None, description="The code that was executed.")
+    container_id: str = Field(..., description="The ID of the container used to run the code.")
+    outputs: list[CodeInterpreterOutput] | None = Field(
+        default=None, description="The outputs from the code execution."
+    )
+    status: Literal["in_progress", "completed", "incomplete", "interpreting", "failed"] | None = Field(
+        default=None, description="The status of the tool call."
+    )
+
+
+# --- Computer ---
+
+
+class ComputerActionClick(BaseModel):
+    button: Literal["left", "right", "wheel", "back", "forward"] = Field(
+        ..., description="Which mouse button was pressed."
+    )
+    type: Literal["click"] = Field(..., description="Always 'click'.")
+    x: int = Field(..., description="The x-coordinate.")
+    y: int = Field(..., description="The y-coordinate.")
+
+
+class ComputerActionDoubleClick(BaseModel):
+    type: Literal["double_click"] = Field(..., description="Always 'double_click'.")
+    x: int = Field(..., description="The x-coordinate.")
+    y: int = Field(..., description="The y-coordinate.")
+
+
+class ComputerActionDragPath(BaseModel):
+    x: int = Field(..., description="The x-coordinate.")
+    y: int = Field(..., description="The y-coordinate.")
+
+
+class ComputerActionDrag(BaseModel):
+    path: list[ComputerActionDragPath] = Field(..., description="Coordinates representing the drag path.")
+    type: Literal["drag"] = Field(..., description="Always 'drag'.")
+
+
+class ComputerActionKeypress(BaseModel):
+    keys: list[str] = Field(..., description="The combination of keys to press.")
+    type: Literal["keypress"] = Field(..., description="Always 'keypress'.")
+
+
+class ComputerActionMove(BaseModel):
+    type: Literal["move"] = Field(..., description="Always 'move'.")
+    x: int = Field(..., description="The x-coordinate.")
+    y: int = Field(..., description="The y-coordinate.")
+
+
+class ComputerActionScreenshot(BaseModel):
+    type: Literal["screenshot"] = Field(..., description="Always 'screenshot'.")
+
+
+class ComputerActionScroll(BaseModel):
+    scroll_x: int = Field(..., description="The horizontal scroll distance.")
+    scroll_y: int = Field(..., description="The vertical scroll distance.")
+    type: Literal["scroll"] = Field(..., description="Always 'scroll'.")
+    x: int = Field(..., description="The x-coordinate.")
+    y: int = Field(..., description="The y-coordinate.")
+
+
+class ComputerActionType(BaseModel):
+    text: str = Field(..., description="The text to type.")
+    type: Literal["type"] = Field(..., description="Always 'type'.")
+
+
+class ComputerActionWait(BaseModel):
+    type: Literal["wait"] = Field(..., description="Always 'wait'.")
+
+
+ComputerAction = Annotated[
+    ComputerActionClick
+    | ComputerActionDoubleClick
+    | ComputerActionDrag
+    | ComputerActionKeypress
+    | ComputerActionMove
+    | ComputerActionScreenshot
+    | ComputerActionScroll
+    | ComputerActionType
+    | ComputerActionWait,
+    Field(discriminator="type"),
+]
+
+
+class ComputerPendingSafetyCheck(BaseModel):
+    id: str = Field(..., description="The ID of the pending safety check.")
+    code: str | None = Field(default=None, description="The type of the pending safety check.")
+    message: str | None = Field(default=None, description="Details about the pending safety check.")
+
+
+class ComputerToolCall(BaseModel):
+    type: Literal["computer_call"] = Field(..., description="The type of the tool call.")
+    id: str = Field(..., description="The unique ID of the computer call.")
+    action: ComputerAction = Field(..., description="The action performed by the computer tool.")
+    call_id: str = Field(..., description="An identifier used when responding to the tool call with output.")
+    pending_safety_checks: list[ComputerPendingSafetyCheck] = Field(
+        default_factory=list, description="The pending safety checks."
+    )
+    status: Literal["in_progress", "completed", "incomplete"] = Field(..., description="The status of the tool call.")
+
+
+# --- Image Generation ---
+
+
+class ImageGenerationCall(BaseModel):
+    type: Literal["image_generation_call"] = Field(..., description="The type of the output item.")
+    id: str = Field(..., description="The unique ID of the image generation call.")
+    result: str | None = Field(default=None, description="The generated image encoded in base64.")
+    status: Literal["in_progress", "completed", "generating", "failed"] = Field(
+        ..., description="The status of the image generation call."
+    )
+
+
+# --- Local Shell ---
+
+
+class LocalShellCallAction(BaseModel):
+    command: list[str] = Field(..., description="The command to run.")
+    env: dict[str, str] = Field(default_factory=dict, description="Environment variables.")
+    type: Literal["exec"] = Field(..., description="Always 'exec'.")
+    timeout_ms: int | None = Field(default=None, description="Optional timeout in milliseconds.")
+    user: str | None = Field(default=None, description="Optional user to run the command as.")
+    working_directory: str | None = Field(default=None, description="Optional working directory.")
+
+
+class LocalShellCall(BaseModel):
+    type: Literal["local_shell_call"] = Field(..., description="The type of the output item.")
+    id: str = Field(..., description="The unique ID of the local shell call.")
+    action: LocalShellCallAction = Field(..., description="The shell command to execute.")
+    call_id: str = Field(..., description="The unique ID of the tool call generated by the model.")
+    status: Literal["in_progress", "completed", "incomplete"] = Field(..., description="The status of the call.")
+
+
+# --- Function Shell ---
+
+
+class FunctionShellToolCall(BaseModel):
+    type: Literal["shell_call"] = Field(..., description="The type of the output item.")
+    call_id: str = Field(..., description="The tool call ID.")
+    id: str | None = Field(default=None, description="The unique ID.")
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(default=None, description="The status.")
+    model_config = ConfigDict(extra="allow")
+
+
+# --- Apply Patch ---
+
+
+class ApplyPatchToolCall(BaseModel):
+    type: Literal["apply_patch_call"] = Field(..., description="The type of the output item.")
+    call_id: str = Field(..., description="The tool call ID.")
+    id: str | None = Field(default=None, description="The unique ID.")
+    status: Literal["in_progress", "completed"] = Field(..., description="The status.")
+    model_config = ConfigDict(extra="allow")
+
+
+# --- MCP ---
+
+
+class McpListToolsTool(BaseModel):
+    input_schema: dict[str, Any] = Field(..., description="The JSON schema describing the tool's input.")
+    name: str = Field(..., description="The name of the tool.")
+    annotations: dict[str, Any] | None = Field(default=None, description="Additional annotations about the tool.")
+    description: str | None = Field(default=None, description="The description of the tool.")
+
+
+class McpListTools(BaseModel):
+    type: Literal["mcp_list_tools"] = Field(..., description="The type of the output item.")
+    id: str = Field(..., description="The unique ID of the list.")
+    server_label: str = Field(..., description="The label of the MCP server.")
+    tools: list[McpListToolsTool] = Field(..., description="The tools available on the server.")
+    error: str | None = Field(default=None, description="Error message if the server could not list tools.")
+
+
+class McpApprovalRequest(BaseModel):
+    type: Literal["mcp_approval_request"] = Field(..., description="The type of the output item.")
+    id: str = Field(..., description="The unique ID of the approval request.")
+    arguments: str = Field(..., description="A JSON string of arguments for the tool.")
+    name: str = Field(..., description="The name of the tool to run.")
+    server_label: str = Field(..., description="The label of the MCP server making the request.")
+
+
+class McpCall(BaseModel):
+    type: Literal["mcp_call"] = Field(..., description="The type of the output item.")
+    id: str = Field(..., description="The unique ID of the tool call.")
+    arguments: str = Field(..., description="A JSON string of the arguments passed to the tool.")
+    name: str = Field(..., description="The name of the tool that was run.")
+    server_label: str = Field(..., description="The label of the MCP server running the tool.")
+    approval_request_id: str | None = Field(default=None, description="Unique ID for the approval request.")
+    error: str | None = Field(default=None, description="The error from the tool call, if any.")
+    output: str | None = Field(default=None, description="The output from the tool call.")
+    status: Literal["in_progress", "completed", "incomplete", "calling", "failed"] | None = Field(
+        default=None, description="The status."
+    )
+
+
+# --- Custom Tool Call (response output) ---
+
+
+class ResponseCustomToolCall(BaseModel):
+    type: Literal["custom_tool_call"] = Field(..., description="The type of the output item.")
+    call_id: str = Field(..., description="An identifier used to map this call to output.")
+    name: str = Field(..., description="The name of the custom tool being called.")
+    input: str = Field(..., description="The input for the custom tool call.")
+    id: str | None = Field(default=None, description="The unique ID of the custom tool call.")
+
+
+# =============================================================================
+# ResponseOutputItem union
+# =============================================================================
+
+# Note: Using plain union (no discriminator) to allow for dict fallback flexibility
+ResponseOutputItem = (
+    ResponseOutputMessage
+    | FunctionToolCall
+    | FileSearchToolCall
+    | WebSearchToolCall
+    | CodeInterpreterToolCall
+    | ComputerToolCall
+    | ReasoningItem
+    | ImageGenerationCall
+    | LocalShellCall
+    | FunctionShellToolCall
+    | ApplyPatchToolCall
+    | McpCall
+    | McpListTools
+    | McpApprovalRequest
+    | ResponseCustomToolCall
+    | dict[str, Any]
+)
+
+# =============================================================================
+# Usage Statistics
+# =============================================================================
+
+
+class InputTokensDetails(BaseModel):
+    cached_tokens: int = Field(..., description="The number of tokens that were retrieved from the cache.")
+
+
+class OutputTokensDetails(BaseModel):
+    reasoning_tokens: int = Field(..., description="The number of reasoning tokens.")
+
+
+class ResponseUsage(BaseModel):
+    input_tokens: int = Field(..., description="The number of input tokens.")
+    input_tokens_details: InputTokensDetails = Field(..., description="A detailed breakdown of the input tokens.")
+    output_tokens: int = Field(..., description="The number of output tokens.")
+    output_tokens_details: OutputTokensDetails = Field(..., description="A detailed breakdown of the output tokens.")
+    total_tokens: int = Field(..., description="The total number of tokens used.")
+
+
+# =============================================================================
+# Error Types
+# =============================================================================
+
+
+class ResponseError(BaseModel):
+    code: str = Field(..., description="The error code.")
+    message: str = Field(..., description="A human-readable error message.")
+
+
+# =============================================================================
+# Supporting Info Types
+# =============================================================================
+
+
+class ToolInfo(BaseModel):
+    type: str = Field(..., description="The type of the tool.")
+    model_config = ConfigDict(extra="allow")
+
+
+class ConversationInfo(BaseModel):
+    id: str = Field(..., description="The unique ID of the conversation.")
+
+
+class ResponsePromptInfo(BaseModel):
+    id: str = Field(..., description="The unique ID of the prompt template.")
+    variables: dict[str, str] | None = Field(default=None, description="Variables used in the prompt template.")
+
+
+class IncompleteDetails(BaseModel):
+    reason: Literal["max_output_tokens", "content_filter"] | None = Field(
+        default=None, description="The reason why the response is incomplete."
+    )
+
+
+# =============================================================================
+# Main Response Class
+# =============================================================================
+
+
+class ResponsesResponse(BaseModel):
+    """OpenAI Responses API response object."""
+
+    id: str = Field(..., description="Unique identifier for this Response.")
+    object: Literal["response"] = Field(..., description="The object type - always 'response'.")
+    created_at: float = Field(..., description="Unix timestamp (in seconds) of when this Response was created.")
+    model: str = Field(..., description="Model ID used to generate the response.")
+    output: list[ResponseOutputItem] = Field(..., description="An array of content items generated by the model.")
+    parallel_tool_calls: bool = Field(..., description="Whether the model can run tool calls in parallel.")
+    tool_choice: Literal["none", "auto", "required"] | dict[str, Any] = Field(
+        ..., description="How the model should select which tool to use."
+    )
+    tools: list[ToolInfo] = Field(..., description="An array of tools the model may call.")
+    status: Literal["completed", "failed", "in_progress", "cancelled", "queued", "incomplete"] | None = Field(
+        default=None, description="The status of the response generation."
+    )
+    error: ResponseError | None = Field(default=None, description="An error object if the model fails.")
+    incomplete_details: IncompleteDetails | None = Field(
+        default=None, description="Details about why the response is incomplete."
+    )
+    instructions: str | list[dict[str, Any]] | None = Field(
+        default=None, description="A system (or developer) message."
+    )
+    metadata: dict[str, str] | None = Field(default=None, description="Key-value pairs for additional information.")
+    temperature: float | None = Field(default=None, description="Sampling temperature (0-2).")
+    top_p: float | None = Field(default=None, description="Nucleus sampling parameter.")
+    background: bool | None = Field(default=None, description="Whether to run in the background.")
+    completed_at: float | None = Field(
+        default=None, description="Unix timestamp (in seconds) of when this Response was completed."
+    )
+    conversation: ConversationInfo | None = Field(
+        default=None, description="The conversation this response belongs to."
+    )
+    max_output_tokens: int | None = Field(default=None, description="Upper bound for generated tokens.")
+    max_tool_calls: int | None = Field(default=None, description="The maximum number of calls to built-in tools.")
+    previous_response_id: str | None = Field(default=None, description="The ID of the previous response.")
+    prompt: ResponsePromptInfo | None = Field(default=None, description="Reference to a prompt template.")
+    prompt_cache_key: str | None = Field(default=None, description="Used for prompt caching optimization.")
+    prompt_cache_retention: Literal["in-memory", "24h"] | None = Field(
+        default=None, description="Retention policy for prompt cache."
+    )
+    reasoning: dict[str, Any] | None = Field(default=None, description="Configuration for reasoning models.")
+    safety_identifier: str | None = Field(default=None, description="Identifier for abuse detection.")
+    service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = Field(
+        default=None, description="Processing type used for serving the request."
+    )
+    text: dict[str, Any] | None = Field(default=None, description="Configuration for text response.")
+    top_logprobs: int | None = Field(default=None, description="Number of most likely tokens to return (0-20).")
+    truncation: Literal["auto", "disabled"] | None = Field(default=None, description="Truncation strategy.")
+    usage: ResponseUsage | None = Field(default=None, description="Token usage details.")
+    user: str | None = Field(default=None, description="Deprecated: use safety_identifier and prompt_cache_key.")
+
+    session_id: str | None = Field(default=None, description="Sequrity session ID.")
+
+    model_config = ConfigDict(extra="ignore")
+
+    @property
+    def output_text(self) -> str:
+        """Convenience property that aggregates all output_text items from the output list."""
+        texts: list[str] = []
+        for item in self.output:
+            if isinstance(item, ResponseOutputMessage):
+                for content in item.content:
+                    if isinstance(content, OutputText):
+                        texts.append(content.text)
+            elif isinstance(item, dict) and item.get("type") == "message":
+                for content in item.get("content", []):
+                    if isinstance(content, dict) and content.get("type") == "output_text":
+                        texts.append(content.get("text", ""))
+        return "".join(texts)
diff --git a/test/control/test_responses.py b/test/control/test_responses.py
new file mode 100644
index 0000000..e9e059c
--- /dev/null
+++ b/test/control/test_responses.py
@@ -0,0 +1,268 @@
+import pytest
+
+from sequrity import SequrityClient
+from sequrity.control import FeaturesHeader, FineGrainedConfigHeader, FsmOverrides, SecurityPolicyHeader
+from sequrity.types.enums import LlmServiceProvider
+from sequrity_unittest.config import get_test_config
+
+
+class TestResponses:
+    def setup_method(self):
+        self.test_config = get_test_config()
+        self.sequrity_client = SequrityClient(
+            api_key=self.test_config.api_key, base_url=self.test_config.base_url, timeout=300
+        )
+
+    @pytest.mark.parametrize(
+        "service_provider",
+        [LlmServiceProvider.OPENAI, LlmServiceProvider.SEQURITY_AZURE],
+    )
+    def test_minimal_no_headers(self, service_provider: LlmServiceProvider):
+        """Truly minimal request — no config headers at all."""
+        response = self.sequrity_client.control.responses.create(
+            model=self.test_config.get_model_name(service_provider),
+            input="What is the largest prime number below 100?",
+            llm_api_key=self.test_config.get_llm_api_key(service_provider),
+            provider=service_provider,
+            fine_grained_config=FineGrainedConfigHeader(fsm=FsmOverrides(enabled_internal_tools=[])),
+        )
+
+        assert response is not None
+        assert response.id is not None
+        assert response.object == "response"
+        assert response.status == "completed"
+        assert len(response.output) > 0
+        assert response.output_text is not None
+        assert "97" in response.output_text
+
+    @pytest.mark.parametrize(
+        "service_provider",
+        [LlmServiceProvider.OPENAI, LlmServiceProvider.SEQURITY_AZURE],
+    )
+    def test_dual_llm_multi_turn(self, service_provider: LlmServiceProvider):
+        features_header = FeaturesHeader.dual_llm()
+        config_header = FineGrainedConfigHeader(fsm=FsmOverrides(max_n_turns=5, enabled_internal_tools=[]))
+
+        # First turn: ask model to book a flight
+        response = self.sequrity_client.control.responses.create(
+            model=self.test_config.get_model_name(service_provider),
+            input=[
+                {
+                    "role": "user",
+                    "content": "Book me the flight BA263 from New York to San Francisco on 10th June, 2026.",
+                }
+            ],
+            llm_api_key=self.test_config.get_llm_api_key(service_provider),
+            features=features_header,
+            fine_grained_config=config_header,
+            provider=service_provider,
+            tools=[
+                {
+                    "type": "function",
+                    "name": "book_flight",
+                    "description": "Books a flight with given flight number, origin, destination and date. Returns a booking reference number (str).",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "flight_number": {"type": "string", "description": "The flight number."},
+                            "origin": {"type": "string", "description": "The origin city."},
+                            "destination": {"type": "string", "description": "The destination city."},
+                            "date": {"type": "string", "description": "The date of the flight in YYYY-MM-DD format."},
+                        },
+                        "required": ["flight_number", "origin", "destination", "date"],
+                    },
+                }
+            ],
+        )
+        print("First response:", response)
+        assert response is not None
+        assert response.status == "completed"
+        assert len(response.output) > 0
+
+        # Find the function_call in output
+        from sequrity.types.responses.response import FunctionToolCall
+
+        tool_calls = [item for item in response.output if isinstance(item, FunctionToolCall)]
+        assert len(tool_calls) > 0
+        assert tool_calls[0].name == "book_flight"
+        assert "BA263" in tool_calls[0].arguments
+        assert "2026-06-10" in tool_calls[0].arguments
+
+        # Second turn: provide tool result and ask for return flight
+        second_input = []
+        # Add tool result
+        second_input.append(
+            {
+                "type": "function_call_output",
+                "call_id": tool_calls[0].call_id,
+                "output": "Flight booked successfully. Your booking reference number is ABC12345.",
+            }
+        )
+        # Add follow-up message
+        second_input.append(
+            {
+                "role": "user",
+                "content": "Thanks! Can you also book a return flight (flight number BA289) on 20th June, 2026?",
+            }
+        )
+        response_2 = self.sequrity_client.control.responses.create(
+            model=self.test_config.get_model_name(service_provider),
+            input=second_input,
+            llm_api_key=self.test_config.get_llm_api_key(service_provider),
+            features=features_header,
+            fine_grained_config=config_header,
+            provider=service_provider,
+            tools=[
+                {
+                    "type": "function",
+                    "name": "book_flight",
+                    "description": "Books a flight with given flight number, origin, destination and date. Returns a booking reference number (str).",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "flight_number": {"type": "string", "description": "The flight number."},
+                            "origin": {"type": "string", "description": "The origin city."},
+                            "destination": {"type": "string", "description": "The destination city."},
+                            "date": {"type": "string", "description": "The date of the flight in YYYY-MM-DD format."},
+                        },
+                        "required": ["flight_number", "origin", "destination", "date"],
+                    },
+                }
+            ],
+            previous_response_id=response.id,
+        )
+        print("Second response:", response_2)
+        assert response_2 is not None
+        assert response_2.status == "completed"
+        tool_calls_2 = [item for item in response_2.output if isinstance(item, FunctionToolCall)]
+        assert len(tool_calls_2) > 0
+        assert tool_calls_2[0].name == "book_flight"
+        assert "BA289" in tool_calls_2[0].arguments
+        assert "2026-06-20" in tool_calls_2[0].arguments
+
+        # Third turn: provide tool result
+        third_input = []
+        third_input.append(
+            {
+                "type": "function_call_output",
+                "call_id": tool_calls_2[0].call_id,
+                "output": "Return flight booked successfully. Your booking reference number is XYZ67890.",
+            }
+        )
+        response_3 = self.sequrity_client.control.responses.create(
+            model=self.test_config.get_model_name(service_provider),
+            input=third_input,
+            llm_api_key=self.test_config.get_llm_api_key(service_provider),
+            features=features_header,
+            fine_grained_config=config_header,
+            provider=service_provider,
+            previous_response_id=response_2.id,
+        )
+        assert response_3 is not None
+        assert response_3.status == "completed"
+        assert len(response_3.output) > 0
+        assert response_3.output_text is not None
+        print("Third response content:", response_3.output_text)
+
+    @pytest.mark.parametrize(
+        "service_provider",
+        [LlmServiceProvider.OPENAI, LlmServiceProvider.SEQURITY_AZURE],
+    )
+    def test_dual_llm_policy_enforcement(self, service_provider: LlmServiceProvider):
+        features_header = FeaturesHeader.dual_llm()
+        sqrt_codes = r"""
+        tool "load_applicant_profile" -> @tags |= {"internal_use_only", "tool/load_applicant_profile"};
+        tool "send_email" {
+            must deny when body.tags superset of {"internal_use_only"} and (not to.value in {str matching r".*@trustedcorp\.com"});
+        }
+        """
+        policy_header = SecurityPolicyHeader.dual_llm(codes=sqrt_codes)
+        config_header = FineGrainedConfigHeader(
+            fsm=FsmOverrides(max_n_turns=1, retry_on_policy_violation=False, enabled_internal_tools=[])
+        )
+
+        tools = [
+            {
+                "type": "function",
+                "name": "load_applicant_profile",
+                "description": "Loads the profile of an applicant given their applicant ID. Returns the profile information as a string.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "applicant_id": {
+                            "type": "string",
+                            "description": "The unique identifier of the applicant.",
+                        },
+                    },
+                    "required": ["applicant_id"],
+                },
+            },
+            {
+                "type": "function",
+                "name": "send_email",
+                "description": "Sends an email to the specified recipient with the given subject and body.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "to": {"type": "string", "description": "The recipient's email address."},
+                        "subject": {"type": "string", "description": "The subject of the email."},
+                        "body": {"type": "string", "description": "The body content of the email."},
+                    },
+                    "required": ["to", "subject", "body"],
+                },
+            },
+        ]
+
+        # First turn: ask to load profile and send to untrusted email
+        response = self.sequrity_client.control.responses.create(
+            model=self.test_config.get_model_name(service_provider),
+            input=[{"role": "user", "content": "Load 'applicant-829''s profile and send it to hr@gmail.com"}],
+            llm_api_key=self.test_config.get_llm_api_key(service_provider),
+            features=features_header,
+            security_policy=policy_header,
+            fine_grained_config=config_header,
+            provider=service_provider,
+            tools=tools,
+        )
+        print("Response:", response)
+        assert response is not None
+        assert response.status == "completed"
+        assert len(response.output) > 0
+
+        # Find the function_call in output — should be load_applicant_profile
+        from sequrity.types.responses.response import FunctionToolCall
+
+        tool_calls = [item for item in response.output if isinstance(item, FunctionToolCall)]
+        assert len(tool_calls) > 0
+        assert tool_calls[0].name == "load_applicant_profile"
+        assert "applicant-829" in tool_calls[0].arguments
+
+        # Simulate tool execution and provide result
+        second_input = []
+        second_input.append(
+            {
+                "type": "function_call_output",
+                "call_id": tool_calls[0].call_id,
+                "output": "Applicant Profile: Name: John Doe, Experience: 5 years in software engineering.",
+            }
+        )
+
+        response_2 = self.sequrity_client.control.responses.create(
+            model=self.test_config.get_model_name(service_provider),
+            input=second_input,
+            llm_api_key=self.test_config.get_llm_api_key(service_provider),
+            features=features_header,
+            security_policy=policy_header,
+            fine_grained_config=config_header,
+            provider=service_provider,
+            tools=tools,
+            previous_response_id=response.id,
+        )
+        print("Second Response:", response_2)
+        assert response_2 is not None
+        assert response_2.status == "failed"
+        assert len(response_2.output) > 0
+        # The model should refuse to send due to policy enforcement
+        assert response_2.output_text is not None
+        print("Second response content:", response_2.output_text)
+        assert "'send_email' is denied" in response_2.output_text

From ce507fa1dd7d54e4d41ba4ba565458b9133073b5 Mon Sep 17 00:00:00 2001
From: Cheng Zhang <chengzhang98@outlook.com>
Date: Thu, 26 Feb 2026 18:45:12 +0000
Subject: [PATCH 02/11] streaming support

---
 src/sequrity/__init__.py                     |   6 +
 src/sequrity/control/__init__.py             |   4 +
 src/sequrity/control/_stream.py              | 115 ++++++++
 src/sequrity/control/_transport.py           | 191 ++++++++++---
 src/sequrity/control/resources/chat.py       | 120 +++++++-
 src/sequrity/control/resources/messages.py   | 148 +++++++++-
 src/sequrity/control/resources/responses.py  | 184 ++++++++++++-
 src/sequrity/types/chat_completion/stream.py | 118 ++++++++
 src/sequrity/types/messages/stream.py        | 196 ++++++++++++++
 src/sequrity/types/responses/stream.py       | 271 +++++++++++++++++++
 test/control/test_streaming.py               | 154 +++++++++++
 11 files changed, 1439 insertions(+), 68 deletions(-)
 create mode 100644 src/sequrity/control/_stream.py
 create mode 100644 src/sequrity/types/chat_completion/stream.py
 create mode 100644 src/sequrity/types/messages/stream.py
 create mode 100644 src/sequrity/types/responses/stream.py
 create mode 100644 test/control/test_streaming.py

diff --git a/src/sequrity/__init__.py b/src/sequrity/__init__.py
index f45b541..e1286d2 100644
--- a/src/sequrity/__init__.py
+++ b/src/sequrity/__init__.py
@@ -21,10 +21,13 @@
 # Universal provider request/response types
 from .types.chat_completion.request import ChatCompletionRequest
 from .types.chat_completion.response import ChatCompletionResponse
+from .types.chat_completion.stream import ChatCompletionChunk
 from .types.messages.request import AnthropicMessageRequest
 from .types.messages.response import AnthropicMessageResponse
+from .types.messages.stream import AnthropicStreamEvent
 from .types.responses.request import ResponsesRequest
 from .types.responses.response import ResponsesResponse
+from .types.responses.stream import OpenAiResponseStreamEvent
 
 try:
     from ._version import __version__
@@ -47,10 +50,13 @@
     # Universal request/response types
     "ChatCompletionRequest",
     "ChatCompletionResponse",
+    "ChatCompletionChunk",
     "AnthropicMessageRequest",
     "AnthropicMessageResponse",
+    "AnthropicStreamEvent",
     "ResponsesRequest",
     "ResponsesResponse",
+    "OpenAiResponseStreamEvent",
     # Version
     "__version__",
 ]
diff --git a/src/sequrity/control/__init__.py b/src/sequrity/control/__init__.py
index 3867446..56bd36e 100644
--- a/src/sequrity/control/__init__.py
+++ b/src/sequrity/control/__init__.py
@@ -24,6 +24,7 @@
 """
 
 from ._config import ControlConfig
+from ._stream import AsyncStream, SyncStream
 from .types.dual_llm_response import (
     ErrorInfo,
     MetaData,
@@ -62,6 +63,9 @@
     "ValueWithMeta",
     "ErrorInfo",
     "ResponseContentJsonSchema",
+    # Streaming
+    "SyncStream",
+    "AsyncStream",
     # Policy generation
     "PolicyGenRequest",
     "PolicyGenResponse",
diff --git a/src/sequrity/control/_stream.py b/src/sequrity/control/_stream.py
new file mode 100644
index 0000000..bd36632
--- /dev/null
+++ b/src/sequrity/control/_stream.py
@@ -0,0 +1,115 @@
+"""Generic SSE stream wrappers for typed chunk iteration."""
+
+from __future__ import annotations
+
+import json
+from typing import AsyncIterator, Generic, Iterator, TypeVar
+
+import httpx
+from pydantic import TypeAdapter
+
+_T = TypeVar("_T")
+
+
+class SyncStream(Generic[_T]):
+    """Wraps an httpx streaming response, parses SSE lines, and yields typed chunks.
+
+    Usage::
+
+        stream = SyncStream(response, ChatCompletionChunk)
+        for chunk in stream:
+            print(chunk)
+        stream.close()
+
+    Or as a context manager::
+
+        with SyncStream(response, ChatCompletionChunk) as stream:
+            for chunk in stream:
+                print(chunk)
+    """
+
+    def __init__(
+        self,
+        response: httpx.Response,
+        chunk_type: type[_T],
+        *,
+        session_id: str | None = None,
+    ) -> None:
+        self._response = response
+        self._adapter = TypeAdapter(chunk_type)
+        self.session_id = session_id
+
+    def __iter__(self) -> Iterator[_T]:
+        for line in self._response.iter_lines():
+            chunk = _parse_sse_line(line, self._adapter)
+            if chunk is not None:
+                yield chunk
+
+    def __enter__(self) -> SyncStream[_T]:
+        return self
+
+    def __exit__(self, *_args: object) -> None:
+        self.close()
+
+    def close(self) -> None:
+        """Close the underlying HTTP response."""
+        self._response.close()
+
+
+class AsyncStream(Generic[_T]):
+    """Async variant of :class:`SyncStream`.
+
+    Usage::
+
+        async with AsyncStream(response, ChatCompletionChunk) as stream:
+            async for chunk in stream:
+                print(chunk)
+    """
+
+    def __init__(
+        self,
+        response: httpx.Response,
+        chunk_type: type[_T],
+        *,
+        session_id: str | None = None,
+    ) -> None:
+        self._response = response
+        self._adapter = TypeAdapter(chunk_type)
+        self.session_id = session_id
+
+    async def __aiter__(self) -> AsyncIterator[_T]:
+        async for line in self._response.aiter_lines():
+            chunk = _parse_sse_line(line, self._adapter)
+            if chunk is not None:
+                yield chunk
+
+    async def __aenter__(self) -> AsyncStream[_T]:
+        return self
+
+    async def __aexit__(self, *_args: object) -> None:
+        await self.aclose()
+
+    async def aclose(self) -> None:
+        """Close the underlying HTTP response."""
+        await self._response.aclose()
+
+
+def _parse_sse_line(line: str, adapter: TypeAdapter[_T]) -> _T | None:
+    """Parse a single SSE line and return a validated chunk, or None if the line should be skipped."""
+    # Skip empty lines, comment lines, and event type lines
+    if not line or line.startswith(":") or line.startswith("event:"):
+        return None
+
+    # Strip "data: " prefix
+    if line.startswith("data: "):
+        line = line[6:]
+    elif line.startswith("data:"):
+        line = line[5:]
+    else:
+        return None
+
+    # Skip the [DONE] sentinel
+    if line.strip() == "[DONE]":
+        return None
+
+    return adapter.validate_python(json.loads(line))
diff --git a/src/sequrity/control/_transport.py b/src/sequrity/control/_transport.py
index 8489e1d..2c180e5 100644
--- a/src/sequrity/control/_transport.py
+++ b/src/sequrity/control/_transport.py
@@ -57,6 +57,37 @@ def build_policy_gen_url(self, request_type: str) -> str:
         """Build the policy generation endpoint URL for the given request type."""
         return build_policy_gen_url(self._base_url, request_type)
 
+    # -- Header building (shared) --------------------------------------------
+
+    def _build_headers(
+        self,
+        *,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> dict[str, str]:
+        eff_llm_key = _resolve(llm_api_key, self._config.llm_api_key)
+        eff_features = _resolve(features, self._config.features)
+        eff_policy = _resolve(security_policy, self._config.security_policy)
+        eff_config = _resolve(fine_grained_config, self._config.fine_grained_config)
+        eff_session = _resolve(session_id, None)
+
+        return build_sequrity_headers(
+            api_key=self._api_key,
+            llm_api_key=eff_llm_key,
+            features=eff_features.dump_for_headers(mode="json_str") if eff_features else None,
+            policy=eff_policy.dump_for_headers(mode="json_str") if eff_policy else None,
+            config=eff_config.dump_for_headers(mode="json_str") if eff_config else None,
+            session_id=eff_session,
+        )
+
+    def _track_session(self, response: httpx.Response) -> None:
+        new_session = response.headers.get("X-Session-ID")
+        if new_session:
+            self._session_id = new_session
+
     # -- Request execution ---------------------------------------------------
 
     def request(
@@ -72,15 +103,6 @@ def request(
     ) -> httpx.Response:
         """POST *payload* as JSON to *url* with merged Sequrity headers.
 
-        Args:
-            url: Fully-qualified endpoint URL.
-            payload: JSON-serializable request body.
-            llm_api_key: LLM provider key override (``NOT_GIVEN`` -> config default).
-            features: ``FeaturesHeader`` override (``NOT_GIVEN`` -> config default).
-            security_policy: ``SecurityPolicyHeader`` override.
-            fine_grained_config: ``FineGrainedConfigHeader`` override.
-            session_id: Explicit session ID override. ``NOT_GIVEN`` uses self._session_id, which is auto-updated from responses.
-
         Returns:
             The raw ``httpx.Response`` (status already validated).
 
@@ -88,19 +110,12 @@ def request(
             SequrityAPIError: On HTTP 4xx/5xx responses.
             SequrityConnectionError: On network failures.
         """
-        eff_llm_key = _resolve(llm_api_key, self._config.llm_api_key)
-        eff_features = _resolve(features, self._config.features)
-        eff_policy = _resolve(security_policy, self._config.security_policy)
-        eff_config = _resolve(fine_grained_config, self._config.fine_grained_config)
-        eff_session = _resolve(session_id, self._session_id)
-
-        headers = build_sequrity_headers(
-            api_key=self._api_key,
-            llm_api_key=eff_llm_key,
-            features=eff_features.dump_for_headers(mode="json_str") if eff_features else None,
-            policy=eff_policy.dump_for_headers(mode="json_str") if eff_policy else None,
-            config=eff_config.dump_for_headers(mode="json_str") if eff_config else None,
-            session_id=eff_session,
+        headers = self._build_headers(
+            llm_api_key=llm_api_key,
+            features=features,
+            security_policy=security_policy,
+            fine_grained_config=fine_grained_config,
+            session_id=session_id,
         )
 
         try:
@@ -111,11 +126,51 @@ def request(
         if response.status_code >= 400:
             raise SequrityAPIError.from_response(response)
 
-        # Auto-track session ID from response
-        new_session = response.headers.get("X-Session-ID")
-        if new_session:
-            self._session_id = new_session
+        self._track_session(response)
+        return response
 
+    def stream_request(
+        self,
+        *,
+        url: str,
+        payload: dict,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> httpx.Response:
+        """Open a streaming POST request.
+
+        Returns the raw ``httpx.Response`` in streaming mode. The caller
+        is responsible for closing the response (typically via a
+        :class:`SyncStream` wrapper).
+
+        Raises:
+            SequrityAPIError: On HTTP 4xx/5xx responses.
+            SequrityConnectionError: On network failures.
+        """
+        headers = self._build_headers(
+            llm_api_key=llm_api_key,
+            features=features,
+            security_policy=security_policy,
+            fine_grained_config=fine_grained_config,
+            session_id=session_id,
+        )
+
+        request = self._http.build_request("POST", url, json=payload, headers=headers)
+
+        try:
+            response = self._http.send(request, stream=True)
+        except httpx.ConnectError as exc:
+            raise SequrityConnectionError(str(exc)) from exc
+
+        if response.status_code >= 400:
+            response.read()  # consume body for error message
+            response.close()
+            raise SequrityAPIError.from_response(response)
+
+        self._track_session(response)
         return response
 
 
@@ -146,26 +201,24 @@ def build_url(
     def build_policy_gen_url(self, request_type: str) -> str:
         return build_policy_gen_url(self._base_url, request_type)
 
-    async def request(
+    # -- Header building (shared) --------------------------------------------
+
+    def _build_headers(
         self,
         *,
-        url: str,
-        payload: dict,
         llm_api_key: str | None | _NotGiven = NOT_GIVEN,
         features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
-    ) -> httpx.Response:
-        """Async variant of :meth:`ControlSyncTransport.request`."""
+    ) -> dict[str, str]:
         eff_llm_key = _resolve(llm_api_key, self._config.llm_api_key)
         eff_features = _resolve(features, self._config.features)
         eff_policy = _resolve(security_policy, self._config.security_policy)
         eff_config = _resolve(fine_grained_config, self._config.fine_grained_config)
-
         eff_session = _resolve(session_id, self._session_id)
 
-        headers = build_sequrity_headers(
+        return build_sequrity_headers(
             api_key=self._api_key,
             llm_api_key=eff_llm_key,
             features=eff_features.dump_for_headers(mode="json_str") if eff_features else None,
@@ -174,6 +227,31 @@ async def request(
             session_id=eff_session,
         )
 
+    def _track_session(self, response: httpx.Response) -> None:
+        new_session = response.headers.get("X-Session-ID")
+        if new_session:
+            self._session_id = new_session
+
+    async def request(
+        self,
+        *,
+        url: str,
+        payload: dict,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> httpx.Response:
+        """Async variant of :meth:`ControlSyncTransport.request`."""
+        headers = self._build_headers(
+            llm_api_key=llm_api_key,
+            features=features,
+            security_policy=security_policy,
+            fine_grained_config=fine_grained_config,
+            session_id=session_id,
+        )
+
         try:
             response = await self._http.post(url, json=payload, headers=headers)
         except httpx.ConnectError as exc:
@@ -182,8 +260,49 @@ async def request(
         if response.status_code >= 400:
             raise SequrityAPIError.from_response(response)
 
-        new_session = response.headers.get("X-Session-ID")
-        if new_session:
-            self._session_id = new_session
+        self._track_session(response)
+        return response
+
+    async def stream_request(
+        self,
+        *,
+        url: str,
+        payload: dict,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> httpx.Response:
+        """Open an async streaming POST request.
+
+        Returns the raw ``httpx.Response`` in streaming mode. The caller
+        is responsible for closing the response (typically via an
+        :class:`AsyncStream` wrapper).
+
+        Raises:
+            SequrityAPIError: On HTTP 4xx/5xx responses.
+            SequrityConnectionError: On network failures.
+        """
+        headers = self._build_headers(
+            llm_api_key=llm_api_key,
+            features=features,
+            security_policy=security_policy,
+            fine_grained_config=fine_grained_config,
+            session_id=session_id,
+        )
+
+        request = self._http.build_request("POST", url, json=payload, headers=headers)
+
+        try:
+            response = await self._http.send(request, stream=True)
+        except httpx.ConnectError as exc:
+            raise SequrityConnectionError(str(exc)) from exc
+
+        if response.status_code >= 400:
+            await response.aread()  # consume body for error message
+            await response.aclose()
+            raise SequrityAPIError.from_response(response)
 
+        self._track_session(response)
         return response
diff --git a/src/sequrity/control/resources/chat.py b/src/sequrity/control/resources/chat.py
index db3b0f2..37175bc 100644
--- a/src/sequrity/control/resources/chat.py
+++ b/src/sequrity/control/resources/chat.py
@@ -2,10 +2,14 @@
 
 from __future__ import annotations
 
+from typing import Literal, overload
+
 from ..._sentinel import NOT_GIVEN, _NotGiven
 from ...types.chat_completion.request import ChatCompletionRequest, Message, ReasoningEffort, ResponseFormat, Tool
 from ...types.chat_completion.response import ChatCompletionResponse
+from ...types.chat_completion.stream import ChatCompletionChunk
 from ...types.enums import LlmServiceProvider, LlmServiceProviderStr, RestApiType
+from .._stream import AsyncStream, SyncStream
 from .._transport import ControlAsyncTransport, ControlSyncTransport
 from ..types.headers import FeaturesHeader, FineGrainedConfigHeader, SecurityPolicyHeader
 
@@ -16,6 +20,50 @@ class ChatResource:
     def __init__(self, transport: ControlSyncTransport) -> None:
         self._transport = transport
 
+    @overload
+    def create(
+        self,
+        messages: list[Message | dict],
+        model: str,
+        *,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        tools: list[Tool | dict] | None = None,
+        stream: Literal[True],
+        seed: int | None = None,
+        reasoning_effort: ReasoningEffort | None = None,
+        response_format: ResponseFormat | None = None,
+        provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        endpoint_type: str | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> SyncStream[ChatCompletionChunk]: ...
+
+    @overload
+    def create(
+        self,
+        messages: list[Message | dict],
+        model: str,
+        *,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        tools: list[Tool | dict] | None = None,
+        stream: Literal[False] | None = None,
+        seed: int | None = None,
+        reasoning_effort: ReasoningEffort | None = None,
+        response_format: ResponseFormat | None = None,
+        provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        endpoint_type: str | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> ChatCompletionResponse: ...
+
     def create(
         self,
         messages: list[Message | dict],
@@ -37,7 +85,7 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
-    ) -> ChatCompletionResponse:
+    ) -> ChatCompletionResponse | SyncStream[ChatCompletionChunk]:
         """Send a chat completion request through Sequrity's secure orchestrator.
 
         Args:
@@ -46,7 +94,8 @@ def create(
             temperature: Sampling temperature.
             top_p: Nucleus sampling parameter.
             tools: List of tools available to the model.
-            stream: Whether to stream the response.
+            stream: Whether to stream the response. When ``True``, returns a
+                :class:`SyncStream` of :class:`ChatCompletionChunk` objects.
             seed: Random seed for reproducibility.
             reasoning_effort: Reasoning effort level for supported models.
             response_format: Response format specification.
@@ -59,7 +108,8 @@ def create(
             session_id: Explicit session ID override.
 
         Returns:
-            Parsed ``ChatCompletionResponse`` with ``session_id`` populated.
+            ``ChatCompletionResponse`` when ``stream`` is ``False``/``None``,
+            or ``SyncStream[ChatCompletionChunk]`` when ``stream`` is ``True``.
         """
         payload = ChatCompletionRequest.model_validate(
             {
@@ -81,9 +131,7 @@ def create(
             endpoint_type=endpoint_type,
         )
 
-        response = self._transport.request(
-            url=url,
-            payload=payload,
+        sequrity_kwargs = dict(
             llm_api_key=llm_api_key,
             features=features,
             security_policy=security_policy,
@@ -91,6 +139,11 @@ def create(
             session_id=session_id,
         )
 
+        if stream:
+            response = self._transport.stream_request(url=url, payload=payload, **sequrity_kwargs)
+            return SyncStream(response, ChatCompletionChunk, session_id=response.headers.get("X-Session-ID"))
+
+        response = self._transport.request(url=url, payload=payload, **sequrity_kwargs)
         result = ChatCompletionResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
         return result
@@ -102,6 +155,50 @@ class AsyncChatResource:
     def __init__(self, transport: ControlAsyncTransport) -> None:
         self._transport = transport
 
+    @overload
+    async def create(
+        self,
+        messages: list[Message | dict],
+        model: str,
+        *,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        tools: list[Tool | dict] | None = None,
+        stream: Literal[True],
+        seed: int | None = None,
+        reasoning_effort: ReasoningEffort | None = None,
+        response_format: ResponseFormat | None = None,
+        provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        endpoint_type: str | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ChatCompletionChunk]: ...
+
+    @overload
+    async def create(
+        self,
+        messages: list[Message | dict],
+        model: str,
+        *,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        tools: list[Tool | dict] | None = None,
+        stream: Literal[False] | None = None,
+        seed: int | None = None,
+        reasoning_effort: ReasoningEffort | None = None,
+        response_format: ResponseFormat | None = None,
+        provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        endpoint_type: str | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> ChatCompletionResponse: ...
+
     async def create(
         self,
         messages: list[Message | dict],
@@ -121,7 +218,7 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = None,
-    ) -> ChatCompletionResponse:
+    ) -> ChatCompletionResponse | AsyncStream[ChatCompletionChunk]:
         """Async variant of :meth:`ChatResource.create`."""
         payload = ChatCompletionRequest.model_validate(
             {
@@ -143,9 +240,7 @@ async def create(
             endpoint_type=endpoint_type,
         )
 
-        response = await self._transport.request(
-            url=url,
-            payload=payload,
+        sequrity_kwargs = dict(
             llm_api_key=llm_api_key,
             features=features,
             security_policy=security_policy,
@@ -153,6 +248,11 @@ async def create(
             session_id=session_id,
         )
 
+        if stream:
+            response = await self._transport.stream_request(url=url, payload=payload, **sequrity_kwargs)
+            return AsyncStream(response, ChatCompletionChunk, session_id=response.headers.get("X-Session-ID"))
+
+        response = await self._transport.request(url=url, payload=payload, **sequrity_kwargs)
         result = ChatCompletionResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
         return result
diff --git a/src/sequrity/control/resources/messages.py b/src/sequrity/control/resources/messages.py
index 28c4ae4..dfb7a89 100644
--- a/src/sequrity/control/resources/messages.py
+++ b/src/sequrity/control/resources/messages.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Literal
+from typing import Literal, overload
 
 from ..._sentinel import NOT_GIVEN, _NotGiven
 from ...types.enums import LlmServiceProvider, LlmServiceProviderStr, RestApiType
@@ -17,6 +17,8 @@
     ToolParam,
 )
 from ...types.messages.response import AnthropicMessageResponse
+from ...types.messages.stream import AnthropicStreamEvent
+from .._stream import AsyncStream, SyncStream
 from .._transport import ControlAsyncTransport, ControlSyncTransport
 from ..types.headers import FeaturesHeader, FineGrainedConfigHeader, SecurityPolicyHeader
 
@@ -27,6 +29,64 @@ class MessagesResource:
     def __init__(self, transport: ControlSyncTransport) -> None:
         self._transport = transport
 
+    @overload
+    def create(
+        self,
+        messages: list[MessageParam | dict],
+        model: str,
+        max_tokens: int,
+        *,
+        system: str | list[TextBlockParam] | list[dict] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        top_k: int | None = None,
+        tools: list[ToolParam | dict] | None = None,
+        tool_choice: ToolChoiceParam | dict | None = None,
+        thinking: ThinkingConfigParam | dict | None = None,
+        stop_sequences: list[str] | None = None,
+        stream: Literal[True],
+        output_config: OutputConfigParam | dict | None = None,
+        metadata: MetadataParam | dict | None = None,
+        service_tier: Literal["auto", "standard_only"] | None = None,
+        timeout: float | None = None,
+        provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        endpoint_type: str | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> SyncStream[AnthropicStreamEvent]: ...
+
+    @overload
+    def create(
+        self,
+        messages: list[MessageParam | dict],
+        model: str,
+        max_tokens: int,
+        *,
+        system: str | list[TextBlockParam] | list[dict] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        top_k: int | None = None,
+        tools: list[ToolParam | dict] | None = None,
+        tool_choice: ToolChoiceParam | dict | None = None,
+        thinking: ThinkingConfigParam | dict | None = None,
+        stop_sequences: list[str] | None = None,
+        stream: Literal[False] | None = None,
+        output_config: OutputConfigParam | dict | None = None,
+        metadata: MetadataParam | dict | None = None,
+        service_tier: Literal["auto", "standard_only"] | None = None,
+        timeout: float | None = None,
+        provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        endpoint_type: str | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> AnthropicMessageResponse: ...
+
     def create(
         self,
         messages: list[MessageParam | dict],
@@ -55,7 +115,7 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
-    ) -> AnthropicMessageResponse:
+    ) -> AnthropicMessageResponse | SyncStream[AnthropicStreamEvent]:
         """Send an Anthropic Messages API request through Sequrity.
 
         Args:
@@ -70,7 +130,8 @@ def create(
             tool_choice: How the model should use the provided tools.
             thinking: Configuration for extended thinking.
             stop_sequences: Custom text sequences that cause the model to stop.
-            stream: Whether to stream the response.
+            stream: Whether to stream the response. When ``True``, returns a
+                :class:`SyncStream` of :class:`AnthropicStreamEvent` objects.
             output_config: Output format configuration.
             metadata: Request metadata.
             service_tier: Priority or standard capacity selection.
@@ -84,7 +145,8 @@ def create(
             session_id: Explicit session ID override.
 
         Returns:
-            Parsed ``AnthropicMessageResponse`` with ``session_id`` populated.
+            ``AnthropicMessageResponse`` when ``stream`` is ``False``/``None``,
+            or ``SyncStream[AnthropicStreamEvent]`` when ``stream`` is ``True``.
         """
         payload = AnthropicMessageRequest.model_validate(
             {
@@ -113,9 +175,7 @@ def create(
             endpoint_type=endpoint_type,
         )
 
-        response = self._transport.request(
-            url=url,
-            payload=payload,
+        sequrity_kwargs = dict(
             llm_api_key=llm_api_key,
             features=features,
             security_policy=security_policy,
@@ -123,6 +183,11 @@ def create(
             session_id=session_id,
         )
 
+        if stream:
+            response = self._transport.stream_request(url=url, payload=payload, **sequrity_kwargs)
+            return SyncStream(response, AnthropicStreamEvent, session_id=response.headers.get("X-Session-ID"))
+
+        response = self._transport.request(url=url, payload=payload, **sequrity_kwargs)
         result = AnthropicMessageResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
         return result
@@ -134,6 +199,64 @@ class AsyncMessagesResource:
     def __init__(self, transport: ControlAsyncTransport) -> None:
         self._transport = transport
 
+    @overload
+    async def create(
+        self,
+        messages: list[MessageParam | dict],
+        model: str,
+        max_tokens: int,
+        *,
+        system: str | list[TextBlockParam] | list[dict] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        top_k: int | None = None,
+        tools: list[ToolParam | dict] | None = None,
+        tool_choice: ToolChoiceParam | dict | None = None,
+        thinking: ThinkingConfigParam | dict | None = None,
+        stop_sequences: list[str] | None = None,
+        stream: Literal[True],
+        output_config: OutputConfigParam | dict | None = None,
+        metadata: MetadataParam | dict | None = None,
+        service_tier: Literal["auto", "standard_only"] | None = None,
+        timeout: float | None = None,
+        provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        endpoint_type: str | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[AnthropicStreamEvent]: ...
+
+    @overload
+    async def create(
+        self,
+        messages: list[MessageParam | dict],
+        model: str,
+        max_tokens: int,
+        *,
+        system: str | list[TextBlockParam] | list[dict] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        top_k: int | None = None,
+        tools: list[ToolParam | dict] | None = None,
+        tool_choice: ToolChoiceParam | dict | None = None,
+        thinking: ThinkingConfigParam | dict | None = None,
+        stop_sequences: list[str] | None = None,
+        stream: Literal[False] | None = None,
+        output_config: OutputConfigParam | dict | None = None,
+        metadata: MetadataParam | dict | None = None,
+        service_tier: Literal["auto", "standard_only"] | None = None,
+        timeout: float | None = None,
+        provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        endpoint_type: str | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> AnthropicMessageResponse: ...
+
     async def create(
         self,
         messages: list[MessageParam | dict],
@@ -160,7 +283,7 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = None,
-    ) -> AnthropicMessageResponse:
+    ) -> AnthropicMessageResponse | AsyncStream[AnthropicStreamEvent]:
         """Async variant of :meth:`MessagesResource.create`."""
         payload = AnthropicMessageRequest.model_validate(
             {
@@ -189,9 +312,7 @@ async def create(
             endpoint_type=endpoint_type,
         )
 
-        response = await self._transport.request(
-            url=url,
-            payload=payload,
+        sequrity_kwargs = dict(
             llm_api_key=llm_api_key,
             features=features,
             security_policy=security_policy,
@@ -199,6 +320,11 @@ async def create(
             session_id=session_id,
         )
 
+        if stream:
+            response = await self._transport.stream_request(url=url, payload=payload, **sequrity_kwargs)
+            return AsyncStream(response, AnthropicStreamEvent, session_id=response.headers.get("X-Session-ID"))
+
+        response = await self._transport.request(url=url, payload=payload, **sequrity_kwargs)
         result = AnthropicMessageResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
         return result
diff --git a/src/sequrity/control/resources/responses.py b/src/sequrity/control/resources/responses.py
index 6934a84..5135741 100644
--- a/src/sequrity/control/resources/responses.py
+++ b/src/sequrity/control/resources/responses.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Literal
+from typing import Literal, overload
 
 from ..._sentinel import NOT_GIVEN, _NotGiven
 from ...types.enums import LlmServiceProvider, LlmServiceProviderStr, RestApiType
@@ -17,6 +17,8 @@
     ToolParam,
 )
 from ...types.responses.response import ResponsesResponse
+from ...types.responses.stream import OpenAiResponseStreamEvent
+from .._stream import AsyncStream, SyncStream
 from .._transport import ControlAsyncTransport, ControlSyncTransport
 from ..types.headers import FeaturesHeader, FineGrainedConfigHeader, SecurityPolicyHeader
 
@@ -27,6 +29,82 @@ class ResponsesResource:
     def __init__(self, transport: ControlSyncTransport) -> None:
         self._transport = transport
 
+    @overload
+    def create(
+        self,
+        model: str,
+        *,
+        input: str | list | None = None,
+        instructions: str | None = None,
+        tools: list[ToolParam | dict] | None = None,
+        tool_choice: Literal["none", "auto", "required"] | ToolChoiceFunctionParam | dict | None = None,
+        stream: Literal[True],
+        temperature: float | None = None,
+        top_p: float | None = None,
+        max_output_tokens: int | None = None,
+        reasoning: ReasoningParam | dict | None = None,
+        text: ResponseTextConfigParam | dict | None = None,
+        metadata: dict[str, str] | None = None,
+        previous_response_id: str | None = None,
+        include: list[str] | None = None,
+        store: bool | None = None,
+        truncation: Literal["auto", "disabled"] | None = None,
+        parallel_tool_calls: bool | None = None,
+        max_tool_calls: int | None = None,
+        background: bool | None = None,
+        conversation: str | ConversationParam | dict | None = None,
+        prompt: ResponsePromptParam | dict | None = None,
+        service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None,
+        stream_options: StreamOptionsParam | dict | None = None,
+        top_logprobs: int | None = None,
+        timeout: float | None = None,
+        provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        endpoint_type: str | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> SyncStream[OpenAiResponseStreamEvent]: ...
+
+    @overload
+    def create(
+        self,
+        model: str,
+        *,
+        input: str | list | None = None,
+        instructions: str | None = None,
+        tools: list[ToolParam | dict] | None = None,
+        tool_choice: Literal["none", "auto", "required"] | ToolChoiceFunctionParam | dict | None = None,
+        stream: Literal[False] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        max_output_tokens: int | None = None,
+        reasoning: ReasoningParam | dict | None = None,
+        text: ResponseTextConfigParam | dict | None = None,
+        metadata: dict[str, str] | None = None,
+        previous_response_id: str | None = None,
+        include: list[str] | None = None,
+        store: bool | None = None,
+        truncation: Literal["auto", "disabled"] | None = None,
+        parallel_tool_calls: bool | None = None,
+        max_tool_calls: int | None = None,
+        background: bool | None = None,
+        conversation: str | ConversationParam | dict | None = None,
+        prompt: ResponsePromptParam | dict | None = None,
+        service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None,
+        stream_options: StreamOptionsParam | dict | None = None,
+        top_logprobs: int | None = None,
+        timeout: float | None = None,
+        provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        endpoint_type: str | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> ResponsesResponse: ...
+
     def create(
         self,
         model: str,
@@ -64,7 +142,7 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
-    ) -> ResponsesResponse:
+    ) -> ResponsesResponse | SyncStream[OpenAiResponseStreamEvent]:
         """Send a Responses API request through Sequrity's secure orchestrator.
 
         Args:
@@ -73,7 +151,8 @@ def create(
             instructions: A system (or developer) message.
             tools: List of tools available to the model.
             tool_choice: How the model should select which tool to use.
-            stream: Whether to stream the response.
+            stream: Whether to stream the response. When ``True``, returns a
+                :class:`SyncStream` of :class:`OpenAiResponseStreamEvent` objects.
             temperature: Sampling temperature (0-2).
             top_p: Nucleus sampling parameter.
             max_output_tokens: Upper bound for generated tokens.
@@ -102,7 +181,8 @@ def create(
             session_id: Explicit session ID override.
 
         Returns:
-            Parsed ``ResponsesResponse`` with ``session_id`` populated.
+            ``ResponsesResponse`` when ``stream`` is ``False``/``None``,
+            or ``SyncStream[OpenAiResponseStreamEvent]`` when ``stream`` is ``True``.
         """
         payload = ResponsesRequest.model_validate(
             {
@@ -140,9 +220,7 @@ def create(
             endpoint_type=endpoint_type,
         )
 
-        response = self._transport.request(
-            url=url,
-            payload=payload,
+        sequrity_kwargs = dict(
             llm_api_key=llm_api_key,
             features=features,
             security_policy=security_policy,
@@ -150,6 +228,11 @@ def create(
             session_id=session_id,
         )
 
+        if stream:
+            response = self._transport.stream_request(url=url, payload=payload, **sequrity_kwargs)
+            return SyncStream(response, OpenAiResponseStreamEvent, session_id=response.headers.get("X-Session-ID"))
+
+        response = self._transport.request(url=url, payload=payload, **sequrity_kwargs)
         result = ResponsesResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
         return result
@@ -161,6 +244,82 @@ class AsyncResponsesResource:
     def __init__(self, transport: ControlAsyncTransport) -> None:
         self._transport = transport
 
+    @overload
+    async def create(
+        self,
+        model: str,
+        *,
+        input: str | list | None = None,
+        instructions: str | None = None,
+        tools: list[ToolParam | dict] | None = None,
+        tool_choice: Literal["none", "auto", "required"] | ToolChoiceFunctionParam | dict | None = None,
+        stream: Literal[True],
+        temperature: float | None = None,
+        top_p: float | None = None,
+        max_output_tokens: int | None = None,
+        reasoning: ReasoningParam | dict | None = None,
+        text: ResponseTextConfigParam | dict | None = None,
+        metadata: dict[str, str] | None = None,
+        previous_response_id: str | None = None,
+        include: list[str] | None = None,
+        store: bool | None = None,
+        truncation: Literal["auto", "disabled"] | None = None,
+        parallel_tool_calls: bool | None = None,
+        max_tool_calls: int | None = None,
+        background: bool | None = None,
+        conversation: str | ConversationParam | dict | None = None,
+        prompt: ResponsePromptParam | dict | None = None,
+        service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None,
+        stream_options: StreamOptionsParam | dict | None = None,
+        top_logprobs: int | None = None,
+        timeout: float | None = None,
+        provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        endpoint_type: str | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[OpenAiResponseStreamEvent]: ...
+
+    @overload
+    async def create(
+        self,
+        model: str,
+        *,
+        input: str | list | None = None,
+        instructions: str | None = None,
+        tools: list[ToolParam | dict] | None = None,
+        tool_choice: Literal["none", "auto", "required"] | ToolChoiceFunctionParam | dict | None = None,
+        stream: Literal[False] | None = None,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        max_output_tokens: int | None = None,
+        reasoning: ReasoningParam | dict | None = None,
+        text: ResponseTextConfigParam | dict | None = None,
+        metadata: dict[str, str] | None = None,
+        previous_response_id: str | None = None,
+        include: list[str] | None = None,
+        store: bool | None = None,
+        truncation: Literal["auto", "disabled"] | None = None,
+        parallel_tool_calls: bool | None = None,
+        max_tool_calls: int | None = None,
+        background: bool | None = None,
+        conversation: str | ConversationParam | dict | None = None,
+        prompt: ResponsePromptParam | dict | None = None,
+        service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None,
+        stream_options: StreamOptionsParam | dict | None = None,
+        top_logprobs: int | None = None,
+        timeout: float | None = None,
+        provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+        llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        features: FeaturesHeader | None | _NotGiven = NOT_GIVEN,
+        security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
+        fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
+        endpoint_type: str | _NotGiven = NOT_GIVEN,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
+    ) -> ResponsesResponse: ...
+
     async def create(
         self,
         model: str,
@@ -196,7 +355,7 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
-    ) -> ResponsesResponse:
+    ) -> ResponsesResponse | AsyncStream[OpenAiResponseStreamEvent]:
         """Async variant of :meth:`ResponsesResource.create`."""
         payload = ResponsesRequest.model_validate(
             {
@@ -234,9 +393,7 @@ async def create(
             endpoint_type=endpoint_type,
         )
 
-        response = await self._transport.request(
-            url=url,
-            payload=payload,
+        sequrity_kwargs = dict(
             llm_api_key=llm_api_key,
             features=features,
             security_policy=security_policy,
@@ -244,6 +401,11 @@ async def create(
             session_id=session_id,
         )
 
+        if stream:
+            response = await self._transport.stream_request(url=url, payload=payload, **sequrity_kwargs)
+            return AsyncStream(response, OpenAiResponseStreamEvent, session_id=response.headers.get("X-Session-ID"))
+
+        response = await self._transport.request(url=url, payload=payload, **sequrity_kwargs)
         result = ResponsesResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
         return result
diff --git a/src/sequrity/types/chat_completion/stream.py b/src/sequrity/types/chat_completion/stream.py
new file mode 100644
index 0000000..d65e455
--- /dev/null
+++ b/src/sequrity/types/chat_completion/stream.py
@@ -0,0 +1,118 @@
+"""
+Pydantic models for OpenAI ChatCompletionChunk streaming types.
+
+See: openai.types.chat.ChatCompletionChunk
+"""
+
+from typing import Literal
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+# =============================================================================
+# Nested Types
+# =============================================================================
+
+
+class ChunkChoiceDeltaFunctionCall(BaseModel):
+    """Function call delta within a tool call."""
+
+    name: str | None = None
+    arguments: str | None = None
+
+
+class ChunkChoiceDeltaToolCall(BaseModel):
+    """A single tool call delta within a streaming choice."""
+
+    index: int
+    id: str | None = None
+    type: Literal["function"] | None = None
+    function: ChunkChoiceDeltaFunctionCall | None = None
+
+
+class ChunkChoiceDelta(BaseModel):
+    """Delta object within a streaming choice."""
+
+    role: Literal["assistant"] | None = None
+    content: str | None = None
+    refusal: str | None = None
+    tool_calls: list[ChunkChoiceDeltaToolCall] | None = None
+    reasoning_content: str | None = Field(
+        default=None,
+        description="Reasoning content (non-standard extension used by some providers via OpenAI-compatible API).",
+    )
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class ChunkChoiceLogprobs(BaseModel):
+    """Logprob information for a streaming choice."""
+
+    content: list[dict] | None = None
+    refusal: list[dict] | None = None
+
+    model_config = ConfigDict(extra="allow")
+
+
+class ChunkChoice(BaseModel):
+    """A single choice in a streaming chunk."""
+
+    index: int
+    delta: ChunkChoiceDelta
+    finish_reason: Literal["stop", "length", "tool_calls", "content_filter", "function_call"] | None = None
+    logprobs: ChunkChoiceLogprobs | None = None
+
+    model_config = ConfigDict(extra="ignore")
+
+
+# =============================================================================
+# Usage (optional, on final chunk when stream_options.include_usage=true)
+# =============================================================================
+
+
+class ChunkCompletionTokensDetails(BaseModel):
+    accepted_prediction_tokens: int | None = None
+    audio_tokens: int | None = None
+    reasoning_tokens: int | None = None
+    rejected_prediction_tokens: int | None = None
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class ChunkPromptTokensDetails(BaseModel):
+    audio_tokens: int | None = None
+    cached_tokens: int | None = None
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class ChunkCompletionUsage(BaseModel):
+    """Usage stats on the final chunk."""
+
+    prompt_tokens: int
+    completion_tokens: int
+    total_tokens: int
+    completion_tokens_details: ChunkCompletionTokensDetails | None = None
+    prompt_tokens_details: ChunkPromptTokensDetails | None = None
+
+    model_config = ConfigDict(extra="ignore")
+
+
+# =============================================================================
+# Root Chunk Type
+# =============================================================================
+
+
+class ChatCompletionChunk(BaseModel):
+    """OpenAI ChatCompletionChunk for streaming responses."""
+
+    id: str
+    choices: list[ChunkChoice]
+    created: int
+    model: str
+    object: Literal["chat.completion.chunk"]
+    service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = None
+    system_fingerprint: str | None = None
+    usage: ChunkCompletionUsage | None = None
+
+    model_config = ConfigDict(extra="ignore")
diff --git a/src/sequrity/types/messages/stream.py b/src/sequrity/types/messages/stream.py
new file mode 100644
index 0000000..9fa2586
--- /dev/null
+++ b/src/sequrity/types/messages/stream.py
@@ -0,0 +1,196 @@
+"""
+Pydantic models for Anthropic RawMessageStreamEvent types.
+
+See: anthropic.types.RawMessageStreamEvent
+
+Anthropic uses a discriminated union of 6 event types for streaming,
+each carrying different data.
+"""
+
+from typing import Annotated, Any, Literal
+
+from pydantic import BaseModel, ConfigDict, Field
+
+# =============================================================================
+# Delta Types (for content_block_delta events)
+# =============================================================================
+
+
+class TextDelta(BaseModel):
+    type: Literal["text_delta"] = "text_delta"
+    text: str
+
+
+class InputJSONDelta(BaseModel):
+    type: Literal["input_json_delta"] = "input_json_delta"
+    partial_json: str
+
+
+class ThinkingDelta(BaseModel):
+    type: Literal["thinking_delta"] = "thinking_delta"
+    thinking: str
+
+
+class SignatureDelta(BaseModel):
+    type: Literal["signature_delta"] = "signature_delta"
+    signature: str
+
+
+class CitationsDelta(BaseModel):
+    type: Literal["citations_delta"] = "citations_delta"
+    citation: dict[str, Any]
+
+    model_config = ConfigDict(extra="allow")
+
+
+type AnthropicContentDelta = Annotated[
+    TextDelta | InputJSONDelta | ThinkingDelta | SignatureDelta | CitationsDelta,
+    Field(discriminator="type"),
+]
+
+
+# =============================================================================
+# Content Block Start Types (for content_block_start events)
+# =============================================================================
+
+
+class TextBlockStart(BaseModel):
+    """TextBlock as it appears in content_block_start (empty text)."""
+
+    type: Literal["text"] = "text"
+    text: str = ""
+    citations: list[dict[str, Any]] | None = None
+
+
+class ThinkingBlockStart(BaseModel):
+    """ThinkingBlock as it appears in content_block_start."""
+
+    type: Literal["thinking"] = "thinking"
+    thinking: str = ""
+    signature: str = ""
+
+
+class RedactedThinkingBlockStart(BaseModel):
+    """RedactedThinkingBlock as it appears in content_block_start."""
+
+    type: Literal["redacted_thinking"] = "redacted_thinking"
+
+
+class ToolUseBlockStart(BaseModel):
+    """ToolUseBlock as it appears in content_block_start (input is always {})."""
+
+    type: Literal["tool_use"] = "tool_use"
+    id: str
+    name: str
+    input: dict[str, Any] = Field(default_factory=dict)
+
+
+class ServerToolUseBlockStart(BaseModel):
+    """ServerToolUseBlock as it appears in content_block_start."""
+
+    type: Literal["server_tool_use"] = "server_tool_use"
+    id: str
+    name: str
+
+    model_config = ConfigDict(extra="allow")
+
+
+type AnthropicContentBlockStart = Annotated[
+    TextBlockStart | ThinkingBlockStart | RedactedThinkingBlockStart | ToolUseBlockStart | ServerToolUseBlockStart,
+    Field(discriminator="type"),
+]
+
+
+# =============================================================================
+# Message Delta Types
+# =============================================================================
+
+
+class MessageDeltaBody(BaseModel):
+    stop_reason: Literal["end_turn", "max_tokens", "stop_sequence", "tool_use", "pause_turn", "refusal"] | None = None
+    stop_sequence: str | None = None
+
+
+class MessageDeltaUsage(BaseModel):
+    output_tokens: int
+    input_tokens: int | None = None
+    cache_creation_input_tokens: int | None = None
+    cache_read_input_tokens: int | None = None
+
+    model_config = ConfigDict(extra="ignore")
+
+
+# =============================================================================
+# The 6 Event Types
+# =============================================================================
+
+
+class RawMessageStartEvent(BaseModel):
+    """Carries the initial Message object with metadata and usage."""
+
+    type: Literal["message_start"] = "message_start"
+    message: dict[str, Any] = Field(description="Full Message object as dict (id, model, role, content, usage, etc.).")
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class RawContentBlockStartEvent(BaseModel):
+    """A new content block begins."""
+
+    type: Literal["content_block_start"] = "content_block_start"
+    index: int
+    content_block: AnthropicContentBlockStart
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class RawContentBlockDeltaEvent(BaseModel):
+    """Incremental content within a block."""
+
+    type: Literal["content_block_delta"] = "content_block_delta"
+    index: int
+    delta: AnthropicContentDelta
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class RawContentBlockStopEvent(BaseModel):
+    """A content block has ended."""
+
+    type: Literal["content_block_stop"] = "content_block_stop"
+    index: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class RawMessageDeltaEvent(BaseModel):
+    """End-of-stream metadata: stop reason and usage update."""
+
+    type: Literal["message_delta"] = "message_delta"
+    delta: MessageDeltaBody
+    usage: MessageDeltaUsage
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class RawMessageStopEvent(BaseModel):
+    """Terminal event: stream is complete."""
+
+    type: Literal["message_stop"] = "message_stop"
+
+    model_config = ConfigDict(extra="ignore")
+
+
+# =============================================================================
+# Union Type
+# =============================================================================
+
+type AnthropicStreamEvent = Annotated[
+    RawMessageStartEvent
+    | RawContentBlockStartEvent
+    | RawContentBlockDeltaEvent
+    | RawContentBlockStopEvent
+    | RawMessageDeltaEvent
+    | RawMessageStopEvent,
+    Field(discriminator="type"),
+]
diff --git a/src/sequrity/types/responses/stream.py b/src/sequrity/types/responses/stream.py
new file mode 100644
index 0000000..11c63d8
--- /dev/null
+++ b/src/sequrity/types/responses/stream.py
@@ -0,0 +1,271 @@
+"""
+Pydantic models for OpenAI Responses API streaming events.
+
+See: openai.types.responses.ResponseStreamEvent
+
+Events are grouped into:
+- Lifecycle: response created / in_progress / completed / failed / incomplete
+- Structure: output_item added/done, content_part added/done
+- Content:   text delta/done, function_call_arguments delta/done
+- Reasoning: reasoning_summary_text delta/done, reasoning_summary_part added/done
+"""
+
+from typing import Annotated, Any, Literal
+
+from pydantic import BaseModel, ConfigDict, Field
+
+# =============================================================================
+# Lifecycle Events
+# =============================================================================
+
+
+class ResponseCreatedEvent(BaseModel):
+    """Emitted once when the response is first created."""
+
+    type: Literal["response.created"] = "response.created"
+    response: dict[str, Any]
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class ResponseInProgressEvent(BaseModel):
+    """Emitted when the response transitions to in_progress."""
+
+    type: Literal["response.in_progress"] = "response.in_progress"
+    response: dict[str, Any]
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class ResponseCompletedEvent(BaseModel):
+    """Emitted when the response completes successfully."""
+
+    type: Literal["response.completed"] = "response.completed"
+    response: dict[str, Any]
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class ResponseFailedEvent(BaseModel):
+    """Emitted when the response fails."""
+
+    type: Literal["response.failed"] = "response.failed"
+    response: dict[str, Any]
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class ResponseIncompleteEvent(BaseModel):
+    """Emitted when the response is incomplete (e.g. max_tokens)."""
+
+    type: Literal["response.incomplete"] = "response.incomplete"
+    response: dict[str, Any]
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+# =============================================================================
+# Output Item Events
+# =============================================================================
+
+
+class ResponseOutputItemAddedEvent(BaseModel):
+    """Emitted when a new output item (message, reasoning, function_call) starts."""
+
+    type: Literal["response.output_item.added"] = "response.output_item.added"
+    output_index: int
+    item: dict[str, Any]
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class ResponseOutputItemDoneEvent(BaseModel):
+    """Emitted when an output item is fully completed."""
+
+    type: Literal["response.output_item.done"] = "response.output_item.done"
+    output_index: int
+    item: dict[str, Any]
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+# =============================================================================
+# Content Part Events (within message output items)
+# =============================================================================
+
+
+class ResponseContentPartAddedEvent(BaseModel):
+    """Emitted when a new content part starts within a message."""
+
+    type: Literal["response.content_part.added"] = "response.content_part.added"
+    item_id: str
+    output_index: int
+    content_index: int
+    part: dict[str, Any]
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class ResponseContentPartDoneEvent(BaseModel):
+    """Emitted when a content part within a message is completed."""
+
+    type: Literal["response.content_part.done"] = "response.content_part.done"
+    item_id: str
+    output_index: int
+    content_index: int
+    part: dict[str, Any]
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+# =============================================================================
+# Text Content Events
+# =============================================================================
+
+
+class ResponseTextDeltaEvent(BaseModel):
+    """Incremental text content within an output_text content part."""
+
+    type: Literal["response.output_text.delta"] = "response.output_text.delta"
+    item_id: str
+    output_index: int
+    content_index: int
+    delta: str
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class ResponseTextDoneEvent(BaseModel):
+    """Emitted when an output_text content part is finalized."""
+
+    type: Literal["response.output_text.done"] = "response.output_text.done"
+    item_id: str
+    output_index: int
+    content_index: int
+    text: str
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+# =============================================================================
+# Function Call Events
+# =============================================================================
+
+
+class ResponseFunctionCallArgumentsDeltaEvent(BaseModel):
+    """Incremental function call arguments."""
+
+    type: Literal["response.function_call_arguments.delta"] = "response.function_call_arguments.delta"
+    item_id: str
+    output_index: int
+    delta: str
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class ResponseFunctionCallArgumentsDoneEvent(BaseModel):
+    """Emitted when function call arguments are finalized."""
+
+    type: Literal["response.function_call_arguments.done"] = "response.function_call_arguments.done"
+    item_id: str
+    output_index: int
+    name: str
+    arguments: str
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+# =============================================================================
+# Reasoning Summary Events
+# =============================================================================
+
+
+class ResponseReasoningSummaryPartAddedEvent(BaseModel):
+    """Emitted when a new reasoning summary part starts."""
+
+    type: Literal["response.reasoning_summary_part.added"] = "response.reasoning_summary_part.added"
+    item_id: str
+    output_index: int
+    summary_index: int
+    part: dict[str, Any]
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class ResponseReasoningSummaryPartDoneEvent(BaseModel):
+    """Emitted when a reasoning summary part is completed."""
+
+    type: Literal["response.reasoning_summary_part.done"] = "response.reasoning_summary_part.done"
+    item_id: str
+    output_index: int
+    summary_index: int
+    part: dict[str, Any]
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class ResponseReasoningSummaryTextDeltaEvent(BaseModel):
+    """Incremental reasoning summary text."""
+
+    type: Literal["response.reasoning_summary_text.delta"] = "response.reasoning_summary_text.delta"
+    item_id: str
+    output_index: int
+    summary_index: int
+    delta: str
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+class ResponseReasoningSummaryTextDoneEvent(BaseModel):
+    """Emitted when reasoning summary text is finalized."""
+
+    type: Literal["response.reasoning_summary_text.done"] = "response.reasoning_summary_text.done"
+    item_id: str
+    output_index: int
+    summary_index: int
+    text: str
+    sequence_number: int
+
+    model_config = ConfigDict(extra="ignore")
+
+
+# =============================================================================
+# Union Type
+# =============================================================================
+
+type OpenAiResponseStreamEvent = Annotated[
+    ResponseCreatedEvent
+    | ResponseInProgressEvent
+    | ResponseCompletedEvent
+    | ResponseFailedEvent
+    | ResponseIncompleteEvent
+    | ResponseOutputItemAddedEvent
+    | ResponseOutputItemDoneEvent
+    | ResponseContentPartAddedEvent
+    | ResponseContentPartDoneEvent
+    | ResponseTextDeltaEvent
+    | ResponseTextDoneEvent
+    | ResponseFunctionCallArgumentsDeltaEvent
+    | ResponseFunctionCallArgumentsDoneEvent
+    | ResponseReasoningSummaryPartAddedEvent
+    | ResponseReasoningSummaryPartDoneEvent
+    | ResponseReasoningSummaryTextDeltaEvent
+    | ResponseReasoningSummaryTextDoneEvent,
+    Field(discriminator="type"),
+]
diff --git a/test/control/test_streaming.py b/test/control/test_streaming.py
new file mode 100644
index 0000000..5f4f7fe
--- /dev/null
+++ b/test/control/test_streaming.py
@@ -0,0 +1,154 @@
+import pytest
+
+from sequrity import SequrityClient
+from sequrity.control import FineGrainedConfigHeader, FsmOverrides
+from sequrity.types.chat_completion.stream import ChatCompletionChunk
+from sequrity.types.enums import LlmServiceProvider
+from sequrity.types.messages.stream import (
+    RawContentBlockDeltaEvent,
+    RawContentBlockStartEvent,
+    RawMessageDeltaEvent,
+    RawMessageStartEvent,
+    RawMessageStopEvent,
+)
+from sequrity.types.responses.stream import ResponseCompletedEvent, ResponseCreatedEvent, ResponseTextDeltaEvent
+from sequrity_unittest.config import get_test_config
+
+
+class TestChatCompletionStreaming:
+    def setup_method(self):
+        self.test_config = get_test_config()
+        self.sequrity_client = SequrityClient(
+            api_key=self.test_config.api_key, base_url=self.test_config.base_url, timeout=300
+        )
+
+    @pytest.mark.parametrize(
+        "service_provider",
+        [LlmServiceProvider.OPENAI, LlmServiceProvider.SEQURITY_AZURE, LlmServiceProvider.OPENROUTER],
+    )
+    def test_chat_completion_stream(self, service_provider: LlmServiceProvider):
+        """Basic streaming chat completion returns typed chunks."""
+        messages = [{"role": "user", "content": "Say hello in one word."}]
+        stream = self.sequrity_client.control.chat.create(
+            messages=messages,
+            model=self.test_config.get_model_name(service_provider),
+            llm_api_key=self.test_config.get_llm_api_key(service_provider),
+            provider=service_provider,
+            stream=True,
+            fine_grained_config=FineGrainedConfigHeader(fsm=FsmOverrides(enabled_internal_tools=[])),
+        )
+
+        chunks = list(stream)
+        stream.close()
+
+        assert len(chunks) > 0
+        assert all(isinstance(c, ChatCompletionChunk) for c in chunks)
+        assert chunks[0].object == "chat.completion.chunk"
+
+        # At least one chunk should have content
+        content_parts = [c.choices[0].delta.content for c in chunks if c.choices and c.choices[0].delta.content]
+        assert len(content_parts) > 0
+
+        # Last chunk with choices should have a finish_reason
+        chunks_with_choices = [c for c in chunks if c.choices and c.choices[0].finish_reason]
+        assert len(chunks_with_choices) > 0
+
+    @pytest.mark.parametrize(
+        "service_provider",
+        [LlmServiceProvider.OPENAI],
+    )
+    def test_chat_completion_stream_session_id(self, service_provider: LlmServiceProvider):
+        """Streaming response should expose session_id."""
+        messages = [{"role": "user", "content": "Hi"}]
+        stream = self.sequrity_client.control.chat.create(
+            messages=messages,
+            model=self.test_config.get_model_name(service_provider),
+            llm_api_key=self.test_config.get_llm_api_key(service_provider),
+            provider=service_provider,
+            stream=True,
+            fine_grained_config=FineGrainedConfigHeader(fsm=FsmOverrides(enabled_internal_tools=[])),
+        )
+
+        # Consume stream
+        for _ in stream:
+            pass
+        stream.close()
+
+        assert stream.session_id is not None
+
+
+class TestMessagesStreaming:
+    def setup_method(self):
+        self.test_config = get_test_config()
+        self.sequrity_client = SequrityClient(
+            api_key=self.test_config.api_key, base_url=self.test_config.base_url, timeout=300
+        )
+
+    def test_messages_stream(self):
+        """Basic streaming Anthropic Messages returns typed events."""
+        stream = self.sequrity_client.control.messages.create(
+            messages=[{"role": "user", "content": "Say hello in one word."}],
+            model=self.test_config.get_model_name(LlmServiceProvider.ANTHROPIC),
+            max_tokens=100,
+            llm_api_key=self.test_config.get_llm_api_key(LlmServiceProvider.ANTHROPIC),
+            provider=LlmServiceProvider.ANTHROPIC,
+            stream=True,
+            fine_grained_config=FineGrainedConfigHeader(fsm=FsmOverrides(enabled_internal_tools=[])),
+        )
+
+        events = list(stream)
+        stream.close()
+
+        assert len(events) > 0
+
+        # Should start with message_start
+        assert isinstance(events[0], RawMessageStartEvent)
+
+        # Should contain content block events
+        content_starts = [e for e in events if isinstance(e, RawContentBlockStartEvent)]
+        assert len(content_starts) > 0
+
+        content_deltas = [e for e in events if isinstance(e, RawContentBlockDeltaEvent)]
+        assert len(content_deltas) > 0
+
+        # Should end with message_delta and message_stop
+        message_deltas = [e for e in events if isinstance(e, RawMessageDeltaEvent)]
+        assert len(message_deltas) > 0
+
+        assert isinstance(events[-1], RawMessageStopEvent)
+
+
+class TestResponsesStreaming:
+    def setup_method(self):
+        self.test_config = get_test_config()
+        self.sequrity_client = SequrityClient(
+            api_key=self.test_config.api_key, base_url=self.test_config.base_url, timeout=300
+        )
+
+    def test_responses_stream(self):
+        """Basic streaming OpenAI Responses API returns typed events."""
+        stream = self.sequrity_client.control.responses.create(
+            model=self.test_config.get_model_name(LlmServiceProvider.OPENAI),
+            input="Say hello in one word.",
+            llm_api_key=self.test_config.get_llm_api_key(LlmServiceProvider.OPENAI),
+            provider=LlmServiceProvider.OPENAI,
+            stream=True,
+            fine_grained_config=FineGrainedConfigHeader(fsm=FsmOverrides(enabled_internal_tools=[])),
+        )
+
+        events = list(stream)
+        stream.close()
+
+        assert len(events) > 0
+
+        # Should contain response lifecycle events
+        created_events = [e for e in events if isinstance(e, ResponseCreatedEvent)]
+        assert len(created_events) > 0
+
+        # Should contain text delta events
+        text_deltas = [e for e in events if isinstance(e, ResponseTextDeltaEvent)]
+        assert len(text_deltas) > 0
+
+        # Should end with response.completed
+        completed_events = [e for e in events if isinstance(e, ResponseCompletedEvent)]
+        assert len(completed_events) > 0

From daf76875b2a7a604a77894915e77c15b288529d0 Mon Sep 17 00:00:00 2001
From: Cheng Zhang <chengzhang98@outlook.com>
Date: Thu, 26 Feb 2026 20:12:01 +0000
Subject: [PATCH 03/11] type check to fix

---
 docs/control/reference/index.md               |   2 +-
 .../reference/rest_api/chat_completion.md     |  16 +-
 .../rest_api/headers/security_config.md       |  93 +++--
 .../rest_api/headers/security_features.md     |   2 +-
 .../rest_api/headers/security_policy.md       |   2 +-
 docs/control/reference/rest_api/index.md      |  37 +-
 docs/control/reference/rest_api/messages.md   |   1 -
 docs/control/reference/rest_api/responses.md  | 253 +++++++++++++
 .../reference/sequrity_client/index.md        |   2 +
 .../reference/sequrity_client/responses.md    | 181 +++++++++
 .../single_llm_vs_dual_llm_features.md        |  12 +
 docs/release_notes.md                         |  36 +-
 justfile                                      |   7 +-
 mkdocs.yml                                    |   2 +
 src/sequrity/types/responses/request.py       | 343 +++++-------------
 15 files changed, 669 insertions(+), 320 deletions(-)
 create mode 100644 docs/control/reference/rest_api/responses.md
 create mode 100644 docs/control/reference/sequrity_client/responses.md

diff --git a/docs/control/reference/index.md b/docs/control/reference/index.md
index 56ba9f0..b720056 100644
--- a/docs/control/reference/index.md
+++ b/docs/control/reference/index.md
@@ -4,7 +4,7 @@ Complete reference documentation for Sequrity Control, including API interfaces,
 
 ## Quick Links
 
-- **[REST API](rest_api/index.md)** - HTTP endpoints for chat completions and LangGraph execution
+- **[REST API](rest_api/index.md)** - HTTP endpoints for chat completions, responses, messages, and LangGraph execution
 - **[SequrityClient.control](sequrity_client/index.md)** - Python client API reference
 - **[SQRT Policy Language](sqrt/index.md)** - Policy language specification and grammar
 
diff --git a/docs/control/reference/rest_api/chat_completion.md b/docs/control/reference/rest_api/chat_completion.md
index 3aaac14..d4f5dd9 100644
--- a/docs/control/reference/rest_api/chat_completion.md
+++ b/docs/control/reference/rest_api/chat_completion.md
@@ -26,7 +26,7 @@ Where `{endpoint_type}` is `chat`, `code`, `agent`, or `lang-graph`. See [URL Pa
 | `tools` | `array[Tool]` | No | Tools the model may call. See [Tools](#tools). |
 | `stream` | `boolean` | No | If `true`, partial deltas are sent as server-sent events. |
 | `seed` | `integer` | No | Seed for deterministic sampling (best-effort). |
-| `reasoning_effort` | `string` | No | Reasoning effort for reasoning models: `"minimal"`, `"low"`, `"medium"`, `"high"`. |
+| `reasoning_effort` | `string` | No | Reasoning effort for reasoning models: `"none"`, `"minimal"`, `"low"`, `"medium"`, `"high"`, `"xhigh"`. |
 | `response_format` | `object` | No | Output format constraint. See [Response Format](#response-format). |
 
 ### Message Types
@@ -67,7 +67,6 @@ Messages are distinguished by the `role` field.
 | `refusal` | `string` | No | Refusal message by the assistant. |
 | `audio` | `object` | No | Reference to a previous audio response. |
 | `tool_calls` | `array[ToolCall]` | No | Tool calls generated by the model. |
-| `function_call` | `object` | No | **Deprecated.** Use `tool_calls`. |
 
 #### Tool Message
 
@@ -77,14 +76,6 @@ Messages are distinguished by the `role` field.
 | `content` | `string \| array[ContentPartText]` | Yes | The tool result. |
 | `tool_call_id` | `string` | Yes | ID of the tool call this responds to. |
 
-#### Function Message (deprecated)
-
-| Field | Type | Required | Description |
-|-------|------|----------|-------------|
-| `role` | `"function"` | Yes | |
-| `content` | `string \| null` | Yes | The function result. |
-| `name` | `string` | Yes | The function name. |
-
 ### Content Parts
 
 User messages support multimodal content:
@@ -124,7 +115,8 @@ User messages support multimodal content:
 | `model` | `string` | The model used. |
 | `choices` | `array[Choice]` | Completion choices. |
 | `usage` | `CompletionUsage` | Token usage statistics. |
-| `session_id` | `string \| null` | Sequrity session ID (also available via `X-Session-ID` response header). |
+| `service_tier` | `string \| null` | Service tier used (e.g., `"auto"`, `"default"`, `"flex"`, `"scale"`, `"priority"`). |
+| `system_fingerprint` | `string \| null` | Backend configuration fingerprint. |
 
 ### Choice
 
@@ -132,7 +124,7 @@ User messages support multimodal content:
 |-------|------|-------------|
 | `index` | `integer` | Index of this choice. |
 | `message` | `ResponseMessage` | The generated message. |
-| `finish_reason` | `string` | Why generation stopped: `"stop"`, `"length"`, `"tool_calls"`, `"content_filter"`, `"error"`. |
+| `finish_reason` | `string` | Why generation stopped: `"stop"`, `"length"`, `"tool_calls"`, `"content_filter"`, `"function_call"`. OpenRouter may also return `"error"`. |
 | `logprobs` | `object \| null` | Log probability information, if requested. |
 
 ### Response Message
diff --git a/docs/control/reference/rest_api/headers/security_config.md b/docs/control/reference/rest_api/headers/security_config.md
index 70929fa..899bf56 100644
--- a/docs/control/reference/rest_api/headers/security_config.md
+++ b/docs/control/reference/rest_api/headers/security_config.md
@@ -9,39 +9,41 @@ This header is **optional** and can be used in Headers-Only Mode to fine-tune se
 ```json
 {
   "fsm": {
-    "min_num_tools_for_filtering": 10,
-    "clear_session_meta": "never",
+    "min_num_tools_for_filtering": null,
+    "clear_session_meta": null,
     "max_n_turns": null,
     "history_mismatch_policy": null,
     "clear_history_every_n_attempts": null,
-    "disable_rllm": true,
+    "disable_rllm": null,
     "enable_multistep_planning": null,
     "enabled_internal_tools": null,
-    "prune_failed_steps": false,
-    "force_to_cache": [],
+    "prune_failed_steps": null,
+    "force_to_cache": null,
     "max_pllm_steps": null,
     "max_pllm_failed_steps": null,
     "max_tool_calls_per_step": null,
-    "reduced_grammar_for_rllm_review": true,
-    "retry_on_policy_violation": false,
+    "reduced_grammar_for_rllm_review": null,
+    "retry_on_policy_violation": null,
     "wrap_tool_result": null,
     "detect_tool_errors": null,
     "detect_tool_error_regex_pattern": null,
     "detect_tool_error_max_result_length": null,
-    "strict_tool_result_parsing": null
+    "strict_tool_result_parsing": null,
+    "tool_result_transform": null
   },
   "prompt": {
     "pllm": {
       "flavor": null,
       "version": null,
-      "debug_info_level": "normal",
+      "debug_info_level": null,
       "clarify_ambiguous_queries": null,
       "context_var_visibility": null,
       "query_inline_roles": null,
       "query_role_name_overrides": null,
       "query_include_tool_calls": null,
       "query_include_tool_args": null,
-      "query_include_tool_results": null
+      "query_include_tool_results": null,
+      "custom_instructions": null
     },
     "rllm": {
       "flavor": null,
@@ -56,10 +58,11 @@ This header is **optional** and can be used in Headers-Only Mode to fine-tune se
     }
   },
   "response_format": {
-    "strip_response_content": false,
-    "include_program": false,
-    "include_policy_check_history": false,
-    "include_namespace_snapshot": false
+    "strip_response_content": null,
+    "stream_thoughts": null,
+    "include_program": null,
+    "include_policy_check_history": null,
+    "include_namespace_snapshot": null
   }
 }
 ```
@@ -84,17 +87,17 @@ All fields are optional and have sensible defaults.
 
 | Type | Required | Default | Constraints |
 |------|----------|---------|-------------|
-| `integer` or `null` | No | `10` | >= 2 |
+| `integer` or `null` | No | `null` | >= 2 |
 
-Minimum number of registered tools to enable tool-filtering LLM step. Set to `null` to disable.
+Minimum number of registered tools to enable tool-filtering LLM step. When not set, the server default is `10`.
 
 #### `fsm.clear_session_meta`
 
 | Type | Required | Default |
 |------|----------|---------|
-| `string` | No | `"never"` |
+| `string` or `null` | No | `null` |
 
-When to clear session meta information:
+When to clear session meta information. When not set, the server default is `"never"`.
 
 - `"never"`: Never clear
 - `"every_attempt"`: Clear at the beginning of each PLLM attempt
@@ -134,17 +137,17 @@ Single-step mode only. Clear all failed step history every N attempts to save to
 
 | Type | Required | Default |
 |------|----------|---------|
-| `boolean` | No | `true` |
+| `boolean` or `null` | No | `null` |
 
-Whether to skip the response LLM (RLLM) review step.
+Whether to skip the response LLM (RLLM) review step. When not set, the server default is `true`.
 
 #### `fsm.enable_multistep_planning`
 
 | Type | Required | Default |
 |------|----------|---------|
-| `boolean` | No | `false` |
+| `boolean` or `null` | No | `null` |
 
-When `false` (single-step), each attempt solves independently. When `true` (multi-step), each step builds on previous.
+When `false` (single-step), each attempt solves independently. When `true` (multi-step), each step builds on previous. When not set, the server default is `false`.
 
 #### `fsm.enabled_internal_tools`
 
@@ -158,17 +161,17 @@ List of internal tool IDs available to planning LLM. Valid values: `"parse_with_
 
 | Type | Required | Default |
 |------|----------|---------|
-| `boolean` | No | `false` |
+| `boolean` or `null` | No | `null` |
 
-Multi-step mode only. Remove failed steps from history after turn completes.
+Multi-step mode only. Remove failed steps from history after turn completes. When not set, the server default is `true`.
 
 #### `fsm.force_to_cache`
 
 | Type | Required | Default |
 |------|----------|---------|
-| `array[string]` | No | `[]` |
+| `array[string]` or `null` | No | `null` |
 
-List of tool ID regex patterns to always cache their results regardless of the cache_tool_result setting.
+List of tool ID regex patterns to always cache their results regardless of the cache_tool_result setting. When not set, the server default is `[]`.
 
 #### `fsm.max_pllm_steps`
 
@@ -198,17 +201,17 @@ Maximum number of tool calls allowed per PLLM attempt. If `null`, no limit is en
 
 | Type | Required | Default |
 |------|----------|---------|
-| `boolean` | No | `true` |
+| `boolean` or `null` | No | `null` |
 
-Whether to paraphrase RLLM output via reduced grammar before feeding back to planning LLM.
+Whether to paraphrase RLLM output via reduced grammar before feeding back to planning LLM. When not set, the server default is `true`.
 
 #### `fsm.retry_on_policy_violation`
 
 | Type | Required | Default |
 |------|----------|---------|
-| `boolean` | No | `false` |
+| `boolean` or `null` | No | `null` |
 
-When `true`, allow planning LLM to retry after policy violation.
+When `true`, allow planning LLM to retry after policy violation. When not set, the server default is `false`.
 
 #### `fsm.wrap_tool_result`
 
@@ -254,6 +257,17 @@ The maximum length of tool result to consider for error detection. Longer result
 
 If `true`, only parse external tool results as JSON when the tool declares an output_schema. When `false`, always attempt `json.loads` on tool results.
 
+#### `fsm.tool_result_transform`
+
+| Type | Required | Default |
+|------|----------|---------|
+| `string` or `null` | No | `null` |
+
+Transform applied to tool results before processing:
+
+- `"none"`: No transformation
+- `"codex"`: Apply codex-style transformation to tool results
+
 ---
 
 ## Prompt Overrides (`prompt`)
@@ -268,7 +282,7 @@ Planning LLM prompt overrides:
 |-------|------|---------|-------------|
 | `flavor` | `string` | `null` | Prompt template variant to use (e.g., `"universal"`). |
 | `version` | `string` | `null` | Prompt template version. Combined with flavor to load template. |
-| `debug_info_level` | `string` | `"normal"` | Level of detail for debug/execution information in planning LLM prompt: `"minimal"`, `"normal"`, `"extra"`. |
+| `debug_info_level` | `string` | `null` | Level of detail for debug/execution information in planning LLM prompt: `"minimal"`, `"normal"`, `"extra"`. When not set, the server default is `"normal"`. |
 | `clarify_ambiguous_queries` | `boolean` | `null` | Whether planning LLM is allowed to ask for clarification on ambiguous queries. |
 | `context_var_visibility` | `string` | `null` | The visibility level of context variables in the PLLM prompts: `"none"`, `"basic-notext"`, `"basic-executable"`, `"all-executable"`, `"all"`. |
 | `query_inline_roles` | `array[string]` | `null` | List of roles whose messages will be inlined into the user query: `"assistant"`, `"tool"`, `"developer"`, `"system"`. |
@@ -276,6 +290,7 @@ Planning LLM prompt overrides:
 | `query_include_tool_calls` | `boolean` | `null` | Whether to include upstream tool calls in inlined query. |
 | `query_include_tool_args` | `boolean` | `null` | Whether to include arguments of upstream tool calls. |
 | `query_include_tool_results` | `boolean` | `null` | Whether to include results of upstream tool calls. |
+| `custom_instructions` | `string` | `null` | Custom instructions text appended to the planning LLM prompt. |
 
 ### `prompt.rllm`
 
@@ -315,15 +330,23 @@ Tool-formulating LLM prompt overrides:
 
 | Type | Required | Default |
 |------|----------|---------|
-| `boolean` | No | `false` |
+| `boolean` | No | `null` |
 
 When `true`, returns only essential result value as plain text, stripping all metadata.
 
+#### `response_format.stream_thoughts`
+
+| Type | Required | Default |
+|------|----------|---------|
+| `boolean` or `null` | No | `null` |
+
+Whether to stream the model's thinking process in the response.
+
 #### `response_format.include_program`
 
 | Type | Required | Default |
 |------|----------|---------|
-| `boolean` | No | `false` |
+| `boolean` | No | `null` |
 
 Whether to include the generated program in the response.
 
@@ -331,7 +354,7 @@ Whether to include the generated program in the response.
 
 | Type | Required | Default |
 |------|----------|---------|
-| `boolean` | No | `false` |
+| `boolean` | No | `null` |
 
 Whether to include policy check results even when there are no violations.
 
@@ -339,6 +362,6 @@ Whether to include policy check results even when there are no violations.
 
 | Type | Required | Default |
 |------|----------|---------|
-| `boolean` | No | `false` |
+| `boolean` | No | `null` |
 
 Whether to include snapshot of all variables after program execution.
diff --git a/docs/control/reference/rest_api/headers/security_features.md b/docs/control/reference/rest_api/headers/security_features.md
index 7e5cb25..f2e6595 100644
--- a/docs/control/reference/rest_api/headers/security_features.md
+++ b/docs/control/reference/rest_api/headers/security_features.md
@@ -28,7 +28,7 @@ This header is **required** when using Headers-Only Mode (must be provided toget
 
 | Type | Required | Default |
 |------|----------|---------|
-| `string` | Yes | - |
+| `string` | No | `null` |
 
 The agent architecture to use. Valid values:
 
diff --git a/docs/control/reference/rest_api/headers/security_policy.md b/docs/control/reference/rest_api/headers/security_policy.md
index d1791a4..dbdb6f5 100644
--- a/docs/control/reference/rest_api/headers/security_policy.md
+++ b/docs/control/reference/rest_api/headers/security_policy.md
@@ -69,7 +69,7 @@ Whether to auto-generate policies based on tool metadata and natural language de
 |------|----------|---------|
 | `boolean` or `null` | No | `null` |
 
-Whether to fail fast on first hard denial during policy checks.
+Whether to fail fast on first hard denial during policy checks. When not set (i.e. `null`), the server default is `true`.
 
 ### `presets`
 
diff --git a/docs/control/reference/rest_api/index.md b/docs/control/reference/rest_api/index.md
index 53c31f9..e4c5379 100644
--- a/docs/control/reference/rest_api/index.md
+++ b/docs/control/reference/rest_api/index.md
@@ -10,9 +10,10 @@ Sequrity Control API (`https://api.sequrity.ai/control`) provides the following
 
 | URL | Status | Description |
 |-----|--------| ------------|
-| `POST /chat/v1/chat/completions` | :white_check_mark: | OpenAI-compatible chat completions (default provider) |
+| `POST /chat/v1/chat/completions` | :white_check_mark: | OpenAI-compatible chat completions (default: [OpenRouter](https://openrouter.ai/)) |
 | `POST /chat/openai/v1/chat/completions` | :white_check_mark: | Chat completions with [OpenAI](https://openai.com/) |
 | `POST /chat/openrouter/v1/chat/completions` | :white_check_mark: | Chat completions with [OpenRouter](https://openrouter.ai/) |
+| `POST /chat/sequrity_azure/v1/chat/completions` | :white_check_mark: | Chat completions with Sequrity Azure |
 
 ### Anthropic Messages
 
@@ -25,24 +26,43 @@ Sequrity Control API (`https://api.sequrity.ai/control`) provides the following
 
 | URL | Status | Description |
 |-----|--------| ------------|
-| `POST /code/v1/chat/completions` | :white_check_mark: | Code-oriented chat completions (default provider) |
-| `POST /code/{service_provider}/v1/chat/completions` | :white_check_mark: | Code-oriented chat completions with specified [service provider](../../../general/rest_api/service_provider.md) |
+| `POST /code/v1/chat/completions` | :white_check_mark: | Code-oriented chat completions (default: [OpenRouter](https://openrouter.ai/)) |
+| `POST /code/openai/v1/chat/completions` | :white_check_mark: | Code-oriented chat completions with [OpenAI](https://openai.com/) |
+| `POST /code/openrouter/v1/chat/completions` | :white_check_mark: | Code-oriented chat completions with [OpenRouter](https://openrouter.ai/) |
+| `POST /code/sequrity_azure/v1/chat/completions` | :white_check_mark: | Code-oriented chat completions with Sequrity Azure |
 | `POST /code/v1/messages` | :white_check_mark: | Code-oriented Anthropic Messages (default provider) |
 | `POST /code/anthropic/v1/messages` | :white_check_mark: | Code-oriented Messages with Anthropic |
+| `POST /code/v1/responses` | :white_check_mark: | Code-oriented Responses API (default: [OpenAI](https://openai.com/)) |
+| `POST /code/openai/v1/responses` | :white_check_mark: | Code-oriented Responses with [OpenAI](https://openai.com/) |
+| `POST /code/sequrity_azure/v1/responses` | :white_check_mark: | Code-oriented Responses with Sequrity Azure |
+
+### Responses
+
+| URL | Status | Description |
+|-----|--------| ------------|
+| `POST /chat/v1/responses` | :white_check_mark: | OpenAI-compatible Responses API (default provider) |
+| `POST /chat/openai/v1/responses` | :white_check_mark: | Responses API with [OpenAI](https://openai.com/) |
+| `POST /chat/sequrity_azure/v1/responses` | :white_check_mark: | Responses API with Sequrity Azure |
 
 ### LangGraph
 
 | URL | Status | Description |
 |-----|--------| ------------|
-| `POST /lang-graph/v1/chat/completions` | :white_check_mark: | Chat completions for [LangGraphExecutor](../sequrity_client/langgraph.md) (default provider) |
-| `POST /lang-graph/{service_provider}/v1/chat/completions` | :white_check_mark: | LangGraph chat completions with specified [service provider](../../../general/rest_api/service_provider.md) |
+| `POST /lang-graph/v1/chat/completions` | :white_check_mark: | Chat completions for [LangGraphExecutor](../sequrity_client/langgraph.md) (default: [OpenRouter](https://openrouter.ai/)) |
+| `POST /lang-graph/openai/v1/chat/completions` | :white_check_mark: | LangGraph chat completions with [OpenAI](https://openai.com/) |
+| `POST /lang-graph/openrouter/v1/chat/completions` | :white_check_mark: | LangGraph chat completions with [OpenRouter](https://openrouter.ai/) |
+| `POST /lang-graph/sequrity_azure/v1/chat/completions` | :white_check_mark: | LangGraph chat completions with Sequrity Azure |
 | `POST /lang-graph/anthropic/v1/messages` | :white_check_mark: | LangGraph Messages with Anthropic |
 
 ### Policy Generation
 
 | URL | Status | Description |
 |-----|--------| ------------|
-| `POST /policy-gen/v1/generate` | :white_check_mark: | Generate security policies from natural language descriptions |
+| `POST /policy-gen/v1/generate` | :white_check_mark: | Generate security policies (default: [OpenRouter](https://openrouter.ai/)) |
+| `POST /policy-gen/openai/v1/generate` | :white_check_mark: | Policy generation with [OpenAI](https://openai.com/) |
+| `POST /policy-gen/openrouter/v1/generate` | :white_check_mark: | Policy generation with [OpenRouter](https://openrouter.ai/) |
+| `POST /policy-gen/anthropic/v1/generate` | :white_check_mark: | Policy generation with [Anthropic](https://anthropic.com/) |
+| `POST /policy-gen/sequrity_azure/v1/generate` | :white_check_mark: | Policy generation with Sequrity Azure |
 
 ### Utility
 
@@ -61,10 +81,10 @@ https://api.sequrity.ai/control/{endpoint_type}/{service_provider?}/{version}/{a
 
 | Segment | Description | Examples |
 |---------|-------------|---------|
-| `endpoint_type` | The type of endpoint | `chat`, `code`, `lang-graph`, `policy-gen` |
+| `endpoint_type` | The type of endpoint | `chat`, `code`, `agent`, `lang-graph`, `policy-gen` |
 | `service_provider` | Optional LLM service provider | `openai`, `openrouter`, `anthropic`, `sequrity_azure` |
 | `version` | API version | `v1` |
-| `api_suffix` | API-specific suffix | `chat/completions`, `messages`, `generate` |
+| `api_suffix` | API-specific suffix | `chat/completions`, `messages`, `responses`, `generate` |
 
 When `service_provider` is omitted, the default provider is used.
 
@@ -72,6 +92,7 @@ When `service_provider` is omitted, the default provider is used.
 
 - **[Service Providers](../../../general/rest_api/service_provider.md)** - Available LLM service providers
 - **[Chat Completion](chat_completion.md)** - OpenAI-compatible Chat Completions API reference
+- **[Responses](responses.md)** - OpenAI-compatible Responses API reference
 - **[Messages](messages.md)** - Anthropic-compatible Messages API reference
 
 ### Custom Headers
diff --git a/docs/control/reference/rest_api/messages.md b/docs/control/reference/rest_api/messages.md
index b1749f7..e798c74 100644
--- a/docs/control/reference/rest_api/messages.md
+++ b/docs/control/reference/rest_api/messages.md
@@ -152,7 +152,6 @@ Extended thinking configuration. Discriminated by `type`:
 | `stop_reason` | `string \| null` | Why generation stopped: `"end_turn"`, `"max_tokens"`, `"stop_sequence"`, `"tool_use"`, `"pause_turn"`, `"refusal"`. |
 | `stop_sequence` | `string \| null` | Which stop sequence was hit, if any. |
 | `usage` | `Usage` | Token usage statistics. |
-| `session_id` | `string \| null` | Sequrity session ID (also available via `X-Session-ID` response header). |
 
 ### Response Content Blocks
 
diff --git a/docs/control/reference/rest_api/responses.md b/docs/control/reference/rest_api/responses.md
new file mode 100644
index 0000000..b738919
--- /dev/null
+++ b/docs/control/reference/rest_api/responses.md
@@ -0,0 +1,253 @@
+# Responses API
+
+The Sequrity Control Responses API is compatible with the [OpenAI Responses API](https://platform.openai.com/docs/api-reference/responses). This allows you to use Sequrity's security features while maintaining compatibility with existing OpenAI Responses-based applications.
+
+For the Chat Completions format, see the [Chat Completion API](./chat_completion.md) reference. For the Anthropic Messages format, see the [Messages API](./messages.md) reference.
+
+## Endpoints
+
+| Endpoint | Provider |
+|----------|----------|
+| `POST /control/{endpoint_type}/v1/responses` | Default |
+| `POST /control/{endpoint_type}/openai/v1/responses` | OpenAI |
+| `POST /control/{endpoint_type}/sequrity_azure/v1/responses` | Sequrity Azure |
+
+Where `{endpoint_type}` is `chat`, `code`, `agent`, or `lang-graph`. See [URL Pattern](./index.md#url-pattern) and [Service Providers](../../../general/rest_api/service_provider.md).
+
+## Request Body
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `model` | `string` | Yes | Model ID, e.g. `gpt-4o`, `o3`. |
+| `input` | `string \| array[InputItem]` | No | Text, image, or file inputs to the model. See [Input Types](#input-types). |
+| `instructions` | `string` | No | A system (or developer) message inserted into the model's context. |
+| `tools` | `array[Tool]` | No | Tools the model may call. See [Tools](#tools). |
+| `tool_choice` | `string \| object` | No | How the model should select which tool to use: `"none"`, `"auto"`, `"required"`, or a function object. |
+| `stream` | `boolean` | No | If `true`, the response is streamed as server-sent events. |
+| `temperature` | `float` | No | Sampling temperature (0–2). Higher values produce more random output. |
+| `top_p` | `float` | No | Nucleus sampling parameter. |
+| `max_output_tokens` | `integer` | No | Upper bound for generated tokens. |
+| `reasoning` | `object` | No | Configuration for reasoning models. See [Reasoning](#reasoning). |
+| `text` | `object` | No | Text response format configuration. See [Text Config](#text-config). |
+| `metadata` | `object` | No | Key-value pairs (up to 16) attached to the response. |
+| `previous_response_id` | `string` | No | ID of a previous response for multi-turn conversations. |
+| `include` | `array[string]` | No | Additional output data to include in the response. |
+| `store` | `boolean` | No | Whether to store the response for later retrieval. |
+| `truncation` | `string` | No | Truncation strategy: `"auto"` or `"disabled"`. |
+| `parallel_tool_calls` | `boolean` | No | Whether to allow parallel tool execution. |
+| `max_tool_calls` | `integer` | No | Maximum number of calls to built-in tools. |
+| `background` | `boolean` | No | Whether to run the response in the background. |
+| `conversation` | `string \| object` | No | Conversation context. |
+| `prompt` | `object` | No | Prompt template reference with `id` and optional `variables`. |
+| `service_tier` | `string` | No | Processing tier: `"auto"`, `"default"`, `"flex"`, `"scale"`, `"priority"`. |
+| `stream_options` | `object` | No | Options for streaming responses (e.g., `include_usage`). |
+| `top_logprobs` | `integer` | No | Number of most likely tokens to return at each position (0–20). |
+| `timeout` | `float` | No | Client-side timeout in seconds. |
+
+### Input Types
+
+The `input` field accepts either a plain string or an array of input items. Input items are distinguished by `role` or `type`:
+
+#### Input Message
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `role` | `"user" \| "system" \| "developer"` | Yes | The role of the message. |
+| `content` | `string \| array[ContentItem]` | Yes | Message content (text, images, files, or audio). |
+
+#### Content Item Types
+
+| Type | Key Fields | Description |
+|------|------------|-------------|
+| `input_text` | `type`, `text` | Plain text input. |
+| `input_image` | `type`, `detail`, `file_id`, `image_url` | Image via URL, base64, or file ID. Detail: `"auto"`, `"low"`, `"high"`. |
+| `input_file` | `type`, `file_id` | File via file ID. |
+| `input_audio` | `type`, `audio` | Base64-encoded audio data. |
+
+#### Function Call Output (for multi-turn)
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `type` | `"function_call_output"` | Yes | |
+| `call_id` | `string` | Yes | The tool call ID from the model's function call. |
+| `output` | `string` | Yes | The text output from the tool execution. |
+
+### Tools
+
+The Responses API supports multiple tool types:
+
+#### Function Tool
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `type` | `"function"` | Yes | |
+| `name` | `string` | Yes | The function name. |
+| `parameters` | `object` | No | JSON Schema describing the function parameters. |
+| `description` | `string` | No | Description of the function. |
+| `strict` | `boolean` | No | Enforce strict parameter validation. |
+
+#### Other Tool Types
+
+| Type | Description |
+|------|-------------|
+| `file_search` | Search uploaded files. |
+| `web_search_preview` | Web search with user location support. |
+| `code_interpreter` | Execute code in a sandbox. |
+| `computer_use_preview` | Computer use tool. |
+| `image_generation` | Generate images. |
+| `mcp` | Model Context Protocol server tools. |
+| `web_search` | Web search tool. |
+| `local_shell` | Local shell execution tool. |
+| `custom` | Custom tool type. |
+
+### Reasoning
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `effort` | `string` | Reasoning effort level: `"none"`, `"low"`, `"medium"`, `"high"`, `"xhigh"`. |
+| `generate_summary` | `string` | Summary generation: `"auto"`, `"concise"`, `"detailed"`. |
+
+### Text Config
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `format.type` | `string` | Output format: `"text"`, `"json_object"`, or `"json_schema"`. |
+| `format.name` | `string` | Schema name (for `json_schema`). |
+| `format.schema` | `object` | JSON Schema (for `json_schema`). |
+| `format.strict` | `boolean` | Strict schema adherence (for `json_schema`). |
+
+## Response Body
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | `string` | Unique identifier for this response. |
+| `object` | `"response"` | Always `"response"`. |
+| `created_at` | `float` | Unix timestamp (seconds) when created. |
+| `model` | `string` | The model used. |
+| `output` | `array[OutputItem]` | Generated content items. See [Output Items](#output-items). |
+| `status` | `string` | Response status: `"completed"`, `"failed"`, `"in_progress"`, `"cancelled"`, `"queued"`, `"incomplete"`. |
+| `error` | `object \| null` | Error information with `code` and `message` fields. |
+| `usage` | `ResponseUsage` | Token usage statistics. |
+| `parallel_tool_calls` | `boolean` | Whether parallel tool calls were enabled. |
+| `tool_choice` | `string \| object` | Tool choice used for this response. |
+| `tools` | `array[object]` | Tools available for this response. |
+| `incomplete_details` | `object \| null` | Details on why the response is incomplete. |
+| `temperature` | `float \| null` | Sampling temperature used. |
+| `top_p` | `float \| null` | Nucleus sampling used. |
+| `max_output_tokens` | `integer \| null` | Max output tokens setting. |
+| `truncation` | `string \| null` | Truncation strategy used. |
+| `service_tier` | `string \| null` | Service tier used. |
+
+### Output Items
+
+Output items are distinguished by the `type` field:
+
+#### Message (`type: "message"`)
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | `string` | Unique ID of the output message. |
+| `type` | `"message"` | |
+| `role` | `"assistant"` | Always `"assistant"`. |
+| `content` | `array[ContentItem]` | Content items: `output_text` (with `text` and `annotations`) or `refusal`. |
+| `status` | `string` | `"in_progress"`, `"completed"`, `"incomplete"`. |
+
+#### Function Call (`type: "function_call"`)
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `type` | `"function_call"` | |
+| `call_id` | `string` | Unique ID for responding with tool output. |
+| `name` | `string` | The function name. |
+| `arguments` | `string` | JSON-encoded arguments. |
+| `id` | `string \| null` | Unique ID of the tool call. |
+| `status` | `string \| null` | `"in_progress"`, `"completed"`, `"incomplete"`. |
+
+#### Reasoning (`type: "reasoning"`)
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `id` | `string` | Unique ID of the reasoning item. |
+| `type` | `"reasoning"` | |
+| `summary` | `array[object]` | Reasoning summary text items. |
+| `encrypted_content` | `string \| null` | Encrypted content for multi-turn continuity. |
+
+#### Other Output Item Types
+
+| Type | Description |
+|------|-------------|
+| `file_search_call` | File search tool call results. |
+| `web_search_call` | Web search tool call results. |
+| `code_interpreter_call` | Code interpreter execution results. |
+| `computer_call` | Computer use tool call. |
+| `image_generation_call` | Image generation results. |
+| `local_shell_call` | Local shell execution results. |
+| `mcp_call` | MCP server tool call results. |
+| `mcp_list_tools` | MCP tool listing results. |
+| `mcp_approval_request` | MCP approval request. |
+| `custom_tool_call` | Custom tool call results. |
+
+### ResponseUsage
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `input_tokens` | `integer` | Input tokens used. |
+| `input_tokens_details.cached_tokens` | `integer` | Tokens retrieved from cache. |
+| `output_tokens` | `integer` | Output tokens generated. |
+| `output_tokens_details.reasoning_tokens` | `integer` | Reasoning tokens used. |
+| `total_tokens` | `integer` | Total tokens used. |
+
+## Streaming Events
+
+When `stream` is `true`, the response is delivered as server-sent events. Each event has a `type` field:
+
+### Lifecycle Events
+
+| Event Type | Description |
+|------------|-------------|
+| `response.created` | Emitted once when the response is first created. |
+| `response.in_progress` | Response transitions to in-progress state. |
+| `response.completed` | Response completes successfully. |
+| `response.failed` | Response fails. |
+| `response.incomplete` | Response is incomplete (e.g. max tokens reached). |
+
+### Structure Events
+
+| Event Type | Description |
+|------------|-------------|
+| `response.output_item.added` | A new output item (message, reasoning, function_call) starts. |
+| `response.output_item.done` | An output item is fully completed. |
+| `response.content_part.added` | A new content part starts within a message. |
+| `response.content_part.done` | A content part is completed. |
+
+### Content Events
+
+| Event Type | Key Fields | Description |
+|------------|------------|-------------|
+| `response.output_text.delta` | `delta` | Incremental text content. |
+| `response.output_text.done` | `text` | Text content finalized. |
+| `response.function_call_arguments.delta` | `delta` | Incremental function call arguments. |
+| `response.function_call_arguments.done` | `name`, `arguments` | Function call arguments finalized. |
+
+### Reasoning Events
+
+| Event Type | Description |
+|------------|-------------|
+| `response.reasoning_summary_part.added` | A new reasoning summary part starts. |
+| `response.reasoning_summary_part.done` | A reasoning summary part completes. |
+| `response.reasoning_summary_text.delta` | Incremental reasoning summary text. |
+| `response.reasoning_summary_text.done` | Reasoning summary text finalized. |
+
+## Headers
+
+See [Custom Headers](./index.md#custom-headers) for the full list. Summary:
+
+| Header | Direction | Description |
+|--------|-----------|-------------|
+| `Authorization` | Request | `Bearer <sequrity-api-key>` |
+| `X-Api-Key` | Request | LLM provider API key (BYOK). |
+| `X-Features` | Request | [Security features](./headers/security_features.md) (agent arch, classifiers, blockers). |
+| `X-Policy` | Request | [Security policy](./headers/security_policy.md) (SQRT rules). |
+| `X-Config` | Request | [Fine-grained config](./headers/security_config.md) (FSM, prompts, response format). |
+| `X-Session-ID` | Request | Explicit session ID for multi-turn conversations. |
+| `X-Session-ID` | Response | Session ID assigned by the server. |
diff --git a/docs/control/reference/sequrity_client/index.md b/docs/control/reference/sequrity_client/index.md
index 919e8ba..8ce7b68 100644
--- a/docs/control/reference/sequrity_client/index.md
+++ b/docs/control/reference/sequrity_client/index.md
@@ -7,6 +7,7 @@ This section provides the API reference for `SequrityClient`, the Python interfa
 The Control API enables secure LLM interactions with policy enforcement. Key capabilities:
 
 - **Chat Completions**: OpenAI-compatible chat API with security features (toxicity filtering, PII redaction, topic guardrails)
+- **Responses API**: OpenAI Responses API with function calling, multi-turn, reasoning, and streaming support
 - **Anthropic Messages**: Anthropic Messages API with security features
 - **LangGraph Integration**: Execute LangGraph workflows with security policies via Sequrity's Dual-LLM runtime
 - **Policy Generation**: Generate SQRT policies from natural language descriptions
@@ -16,6 +17,7 @@ The Control API enables secure LLM interactions with policy enforcement. Key cap
 | Module | Description |
 |--------|-------------|
 | [Chat Completion](chat_completion.md) | Chat completion API, request/response types, and result schemas |
+| [Responses API](responses.md) | OpenAI Responses API, request/response types, and streaming events |
 | [Anthropic Messages](message.md) | Anthropic Messages API, request/response types |
 | [LangGraph sequrity mode](langgraph.md) | LangGraph execution API and related types |
 | [Policy Generation](policy_gen.md) | Generate SQRT policies from natural language |
diff --git a/docs/control/reference/sequrity_client/responses.md b/docs/control/reference/sequrity_client/responses.md
new file mode 100644
index 0000000..135afd2
--- /dev/null
+++ b/docs/control/reference/sequrity_client/responses.md
@@ -0,0 +1,181 @@
+# Responses API
+
+The Responses API provides an OpenAI Responses-compatible interface for secure LLM interactions. It supports function calling, multi-turn conversations via `previous_response_id`, reasoning models, and streaming.
+
+::: sequrity.control.resources.responses.ResponsesResource
+    options:
+      show_root_heading: true
+      show_source: false
+
+---
+
+## Request Types
+
+::: sequrity.types.responses.request.ResponsesRequest
+    options:
+      show_root_heading: true
+      show_source: true
+
+### Input Types
+
+::: sequrity.types.responses.request.InputMessageParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.request.InputTextParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.request.InputImageParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.request.InputFileParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.request.InputAudioParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+### Multi-turn Input Types
+
+::: sequrity.types.responses.request.FunctionCallOutputParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.request.ResponseOutputMessageParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.request.ResponseFunctionToolCallParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+### Tool Definitions
+
+::: sequrity.types.responses.request.FunctionToolParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.request.WebSearchPreviewToolParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.request.CodeInterpreterToolParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.request.McpToolParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+### Tool Choice
+
+::: sequrity.types.responses.request.ToolChoiceFunctionParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+### Text Config
+
+::: sequrity.types.responses.request.ResponseTextConfigParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+### Reasoning Config
+
+::: sequrity.types.responses.request.ReasoningParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+### Other Config Types
+
+::: sequrity.types.responses.request.ConversationParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.request.ResponsePromptParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.request.StreamOptionsParam
+    options:
+      show_root_heading: true
+      show_source: false
+
+---
+
+## Response Types
+
+::: sequrity.types.responses.response.ResponsesResponse
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.response.ResponseOutputMessage
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.response.OutputText
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.response.Refusal
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.response.FunctionToolCall
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.response.ReasoningItem
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.response.ResponseUsage
+    options:
+      show_root_heading: true
+      show_source: false
+
+::: sequrity.types.responses.response.ResponseError
+    options:
+      show_root_heading: true
+      show_source: false
+
+---
+
+## Result Schema
+
+When using Dual-LLM mode, the response content follows `ResponseContentJsonSchema`.
+
+::: sequrity.control.types.dual_llm_response.ResponseContentJsonSchema
+    options:
+      show_root_heading: true
+      show_source: true
+
+::: sequrity.control.types.dual_llm_response.ErrorInfo
+    options:
+      show_root_heading: true
+      show_source: true
diff --git a/docs/control/reference/single_llm_vs_dual_llm_features.md b/docs/control/reference/single_llm_vs_dual_llm_features.md
index 521add9..b5074d7 100644
--- a/docs/control/reference/single_llm_vs_dual_llm_features.md
+++ b/docs/control/reference/single_llm_vs_dual_llm_features.md
@@ -7,6 +7,10 @@ Sequrity Control supports two agent architectures for tool access control: **Sin
 
     Read the conceptual guide on [Single-LLM vs. Dual-LLM Agents](../learn/single-vs-dual-llm.md) to understand the differences between these architectures and their security implications.
 
+!!! note "LangGraph defaults to Dual-LLM"
+
+    The LangGraph endpoint (`/lang-graph/`) always uses dual-LLM configuration, even when `agent_arch` is set to `"single-llm"` in the `X-Features` header. This is because LangGraph integration requires the dual-LLM architecture for proper security enforcement.
+
 ## Supported Features of Single-LLM
 
 Single-LLM supports a limited subset of features compared to Dual-LLM mode. The following table summarizes the feature availability in Single-LLM mode:
@@ -56,6 +60,14 @@ Single-LLM supports a limited subset of features compared to Dual-LLM mode. The
         - :no_entry: `prune_failed_steps`
         - :no_entry: `enabled_internal_tools`
         - :no_entry: `force_to_cache`
+        - :no_entry: `history_mismatch_policy`
+        - :no_entry: `max_pllm_failed_steps`
+        - :no_entry: `wrap_tool_result`
+        - :no_entry: `detect_tool_errors`
+        - :no_entry: `detect_tool_error_regex_pattern`
+        - :no_entry: `detect_tool_error_max_result_length`
+        - :no_entry: `strict_tool_result_parsing`
+        - :no_entry: `tool_result_transform`
     - `prompt`:
         - :no_entry: All prompt overrides (dual-llm only)
     - `response_format`:
diff --git a/docs/release_notes.md b/docs/release_notes.md
index 7cf8da5..6c91f2e 100644
--- a/docs/release_notes.md
+++ b/docs/release_notes.md
@@ -5,6 +5,40 @@ hide:
 
 # Release Notes
 
+## v0.5
+
+`time: 2026-02-25`
+
+| Product  | Version |
+| ---------|---------|
+| Control API | `094afd3174b700f104cab612d32e3f54ad1b152c` |
+
+??? info "v0.5 Release Notes"
+
+    **New Features**
+
+    - **Responses API**: Added OpenAI-compatible Responses API support (`/control/{endpoint_type}/v1/responses`)
+      - Full request/response support with function tools, file search, web search, code interpreter, computer use, MCP, and custom tools
+      - Streaming via server-sent events with lifecycle, structure, content, and reasoning events
+      - Available for `chat`, `code`, and `sequrity_azure` providers
+    - **Streaming Support**: Added streaming across all three main APIs (Chat Completions, Messages, Responses)
+      - New `SyncStream` and `AsyncStream` wrapper classes for SSE handling in the Python client
+      - Session ID tracking through streaming responses
+    - **Custom Headers Documentation**: New tutorial and reference documentation for typed Pydantic header classes (`FeaturesHeader`, `SecurityPolicyHeader`, `FineGrainedConfigHeader`)
+
+    **Configuration Enhancements**
+
+    - Added `tool_result_transform` FSM override (`"none"` or `"codex"`) for stripping Codex CLI metadata
+    - Added `history_mismatch_policy` FSM override (`"reject"`, `"restart_turn"`, `"continue"`) for handling message history divergence in stateless mode (dual-LLM only)
+    - Added `custom_instructions` prompt override for appending custom text to the planning LLM prompt
+    - Added `stream_thoughts` response format override for streaming model thinking process
+
+    **Improvements**
+
+    - Request/response classes now ignore extra fields for forward compatibility
+    - Enhanced type hints across LangGraph integration and headers
+
+
 ## v0.4
 
 `time: 2026-02-18`
@@ -60,7 +94,7 @@ hide:
 | ---------|---------|
 | Control API | `17620f2abd4646171fc8a462bad3fafbd2b0126b` |
 
-- Change default value of `pllm_can_ask_for_clarification` in `security_config` header to `true`.
+- Change default value of `clarify_ambiguous_queries` in `X-Config` header (`prompt.pllm.clarify_ambiguous_queries`) to `true`.
 - Update docs deployment workflow to set default to `dev` instead of `latest` until docs are stable.
 
 ## v0.0.1
diff --git a/justfile b/justfile
index f1d63b6..2b8e695 100644
--- a/justfile
+++ b/justfile
@@ -8,12 +8,7 @@ test-examples:
     bash scripts/run_examples.sh --env-file .env.local
 
 serve-docs: sync-docs
-    uv run mike serve --dev-addr=localhost:8001
-
-# Deploy docs for the latest git tag (e.g., v0.1.0 → 0.1.0)
-deploy-docs: sync-docs
-    uv run mike deploy --update-aliases $(git describe --tags --abbrev=0 --match 'v*' | sed 's/^v//') latest
-    uv run mike set-default latest
+    uv run mkdocs serve --dev-addr=localhost:8001
 
 format:
     uv run ruff format src/ test/
diff --git a/mkdocs.yml b/mkdocs.yml
index 560a5f5..e51f248 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -162,6 +162,7 @@ nav:
               - control/reference/rest_api/index.md
               - Service Providers: general/rest_api/service_provider.md
               - Chat Completions API: control/reference/rest_api/chat_completion.md
+              - Responses API: control/reference/rest_api/responses.md
               - Messages API: control/reference/rest_api/messages.md
               - Custom Headers:
                   - control/reference/rest_api/headers/index.md
@@ -173,6 +174,7 @@ nav:
               - SequrityClient.control:
                   - control/reference/sequrity_client/index.md
                   - Chat Completions API: control/reference/sequrity_client/chat_completion.md
+                  - Responses API: control/reference/sequrity_client/responses.md
                   - Messages API: control/reference/sequrity_client/message.md
                   - LangGraph: control/reference/sequrity_client/langgraph.md
                   - Policy Generation: control/reference/sequrity_client/policy_gen.md
diff --git a/src/sequrity/types/responses/request.py b/src/sequrity/types/responses/request.py
index 07cdef6..9ff2964 100644
--- a/src/sequrity/types/responses/request.py
+++ b/src/sequrity/types/responses/request.py
@@ -13,40 +13,26 @@
 
 class InputTextParam(BaseModel):
     text: str = Field(..., description="The text content.")
-    type: Literal["input_text"] = Field(
-        ..., description="The type of the input content."
-    )
+    type: Literal["input_text"] = Field(..., description="The type of the input content.")
 
 
 class InputImageParam(BaseModel):
     """An image input. Supports both URL and file_id references."""
 
-    type: Literal["input_image"] = Field(
-        ..., description="The type of the input content."
-    )
-    detail: Literal["auto", "low", "high"] | None = Field(
-        default=None, description="The detail level of the image."
-    )
-    file_id: str | None = Field(
-        default=None, description="The ID of the uploaded file."
-    )
-    image_url: str | None = Field(
-        default=None, description="The URL of the image or base64 data URL."
-    )
+    type: Literal["input_image"] = Field(..., description="The type of the input content.")
+    detail: Literal["auto", "low", "high"] | None = Field(default=None, description="The detail level of the image.")
+    file_id: str | None = Field(default=None, description="The ID of the uploaded file.")
+    image_url: str | None = Field(default=None, description="The URL of the image or base64 data URL.")
 
 
 class InputFileParam(BaseModel):
     file_id: str = Field(..., description="The ID of the uploaded file.")
-    type: Literal["input_file"] = Field(
-        ..., description="The type of the input content."
-    )
+    type: Literal["input_file"] = Field(..., description="The type of the input content.")
 
 
 class InputAudioParam(BaseModel):
     audio: str = Field(..., description="Base64-encoded audio data.")
-    type: Literal["input_audio"] = Field(
-        ..., description="The type of the input content."
-    )
+    type: Literal["input_audio"] = Field(..., description="The type of the input content.")
 
 
 InputContentParam = Annotated[
@@ -86,9 +72,7 @@ class MessageParam(BaseModel):
         ...,
         description="The role of the message input. One of 'user', 'system', or 'developer'.",
     )
-    type: Literal["message"] = Field(
-        ..., description="The type of the message input. Always 'message'."
-    )
+    type: Literal["message"] = Field(..., description="The type of the message input. Always 'message'.")
     status: Literal["in_progress", "completed", "incomplete"] | None = Field(
         default=None, description="The status of the item."
     )
@@ -103,9 +87,7 @@ class OutputTextParam(BaseModel):
     """A text output from the model."""
 
     text: str = Field(..., description="The text output from the model.")
-    type: Literal["output_text"] = Field(
-        ..., description="The type of the output text. Always 'output_text'."
-    )
+    type: Literal["output_text"] = Field(..., description="The type of the output text. Always 'output_text'.")
     annotations: list[dict[str, Any]] | None = Field(
         default=None, description="Annotations for the text, such as citations."
     )
@@ -115,9 +97,7 @@ class OutputRefusalParam(BaseModel):
     """A refusal from the model."""
 
     refusal: str = Field(..., description="The refusal explanation from the model.")
-    type: Literal["refusal"] = Field(
-        ..., description="The type of the refusal. Always 'refusal'."
-    )
+    type: Literal["refusal"] = Field(..., description="The type of the refusal. Always 'refusal'.")
 
 
 OutputMessageContentParam = Annotated[
@@ -130,18 +110,10 @@ class ResponseOutputMessageParam(BaseModel):
     """An output message from the model, used as input for multi-turn conversations."""
 
     id: str = Field(..., description="The unique ID of the output message.")
-    content: list[OutputMessageContentParam] = Field(
-        ..., description="The content of the output message."
-    )
-    role: Literal["assistant"] = Field(
-        ..., description="The role of the output message. Always 'assistant'."
-    )
-    status: Literal["in_progress", "completed", "incomplete"] = Field(
-        ..., description="The status of the message."
-    )
-    type: Literal["message"] = Field(
-        ..., description="The type of the output message. Always 'message'."
-    )
+    content: list[OutputMessageContentParam] = Field(..., description="The content of the output message.")
+    role: Literal["assistant"] = Field(..., description="The role of the output message. Always 'assistant'.")
+    status: Literal["in_progress", "completed", "incomplete"] = Field(..., description="The status of the message.")
+    type: Literal["message"] = Field(..., description="The type of the output message. Always 'message'.")
 
 
 # =============================================================================
@@ -152,9 +124,7 @@ class ResponseOutputMessageParam(BaseModel):
 class ResponseFunctionToolCallParam(BaseModel):
     """A tool call to run a function, used as input for multi-turn conversations."""
 
-    arguments: str = Field(
-        ..., description="A JSON string of the arguments to pass to the function."
-    )
+    arguments: str = Field(..., description="A JSON string of the arguments to pass to the function.")
     call_id: str = Field(
         ...,
         description="The unique ID of the function tool call generated by the model.",
@@ -163,9 +133,7 @@ class ResponseFunctionToolCallParam(BaseModel):
     type: Literal["function_call"] = Field(
         ..., description="The type of the function tool call. Always 'function_call'."
     )
-    id: str | None = Field(
-        default=None, description="The unique ID of the function tool call."
-    )
+    id: str | None = Field(default=None, description="The unique ID of the function tool call.")
     status: Literal["in_progress", "completed", "incomplete"] | None = Field(
         default=None, description="The status of the item."
     )
@@ -188,9 +156,7 @@ class FunctionCallOutputParam(BaseModel):
         ...,
         description="The type of the function tool call output. Always 'function_call_output'.",
     )
-    id: str | None = Field(
-        default=None, description="The unique ID of the function tool call output."
-    )
+    id: str | None = Field(default=None, description="The unique ID of the function tool call output.")
     status: Literal["in_progress", "completed", "incomplete"] | None = Field(
         default=None, description="The status of the item."
     )
@@ -208,16 +174,12 @@ class ResponseCustomToolCallParam(BaseModel):
         ...,
         description="An identifier used to map this custom tool call to a tool call output.",
     )
-    input: str = Field(
-        ..., description="The input for the custom tool call generated by the model."
-    )
+    input: str = Field(..., description="The input for the custom tool call generated by the model.")
     name: str = Field(..., description="The name of the custom tool being called.")
     type: Literal["custom_tool_call"] = Field(
         ..., description="The type of the custom tool call. Always 'custom_tool_call'."
     )
-    id: str | None = Field(
-        default=None, description="The unique ID of the custom tool call."
-    )
+    id: str | None = Field(default=None, description="The unique ID of the custom tool call.")
 
 
 # =============================================================================
@@ -232,16 +194,12 @@ class ResponseCustomToolCallOutputParam(BaseModel):
         ...,
         description="The call ID, used to map this custom tool call output to a custom tool call.",
     )
-    output: str = Field(
-        ..., description="The output from the custom tool call generated by your code."
-    )
+    output: str = Field(..., description="The output from the custom tool call generated by your code.")
     type: Literal["custom_tool_call_output"] = Field(
         ...,
         description="The type of the custom tool call output. Always 'custom_tool_call_output'.",
     )
-    id: str | None = Field(
-        default=None, description="The unique ID of the custom tool call output."
-    )
+    id: str | None = Field(default=None, description="The unique ID of the custom tool call output.")
 
 
 # =============================================================================
@@ -251,12 +209,8 @@ class ResponseCustomToolCallOutputParam(BaseModel):
 
 class ComputerCallOutputAcknowledgedSafetyCheck(BaseModel):
     id: str = Field(..., description="The safety check ID.")
-    code: str | None = Field(
-        default=None, description="The type of the pending safety check."
-    )
-    message: str | None = Field(
-        default=None, description="Details about the pending safety check."
-    )
+    code: str | None = Field(default=None, description="The type of the pending safety check.")
+    message: str | None = Field(default=None, description="Details about the pending safety check.")
 
 
 class ComputerCallOutputParam(BaseModel):
@@ -264,16 +218,12 @@ class ComputerCallOutputParam(BaseModel):
 
     call_id: str = Field(..., description="The computer tool call ID.")
     output: dict[str, Any] = Field(..., description="Screenshot output.")
-    type: Literal["computer_call_output"] = Field(
-        ..., description="Always 'computer_call_output'."
-    )
+    type: Literal["computer_call_output"] = Field(..., description="Always 'computer_call_output'.")
     id: str | None = Field(default=None, description="The unique ID.")
-    acknowledged_safety_checks: (
-        list[ComputerCallOutputAcknowledgedSafetyCheck] | None
-    ) = Field(default=None, description="Acknowledged safety checks.")
-    status: Literal["in_progress", "completed", "incomplete"] | None = Field(
-        default=None, description="The status."
+    acknowledged_safety_checks: list[ComputerCallOutputAcknowledgedSafetyCheck] | None = Field(
+        default=None, description="Acknowledged safety checks."
     )
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(default=None, description="The status.")
 
 
 # =============================================================================
@@ -283,35 +233,21 @@ class ComputerCallOutputParam(BaseModel):
 
 class LocalShellCallActionParam(BaseModel):
     command: list[str] = Field(..., description="The command to run.")
-    env: dict[str, str] = Field(
-        default_factory=dict, description="Environment variables."
-    )
+    env: dict[str, str] = Field(default_factory=dict, description="Environment variables.")
     type: Literal["exec"] = Field(..., description="Always 'exec'.")
-    timeout_ms: int | None = Field(
-        default=None, description="Optional timeout in milliseconds."
-    )
-    user: str | None = Field(
-        default=None, description="Optional user to run the command as."
-    )
-    working_directory: str | None = Field(
-        default=None, description="Optional working directory."
-    )
+    timeout_ms: int | None = Field(default=None, description="Optional timeout in milliseconds.")
+    user: str | None = Field(default=None, description="Optional user to run the command as.")
+    working_directory: str | None = Field(default=None, description="Optional working directory.")
 
 
 class LocalShellCallParam(BaseModel):
     """A local shell tool call."""
 
     id: str = Field(..., description="The unique ID.")
-    action: LocalShellCallActionParam = Field(
-        ..., description="The shell command to execute."
-    )
+    action: LocalShellCallActionParam = Field(..., description="The shell command to execute.")
     call_id: str = Field(..., description="The tool call ID from the model.")
-    status: Literal["in_progress", "completed", "incomplete"] = Field(
-        ..., description="The status."
-    )
-    type: Literal["local_shell_call"] = Field(
-        ..., description="Always 'local_shell_call'."
-    )
+    status: Literal["in_progress", "completed", "incomplete"] = Field(..., description="The status.")
+    type: Literal["local_shell_call"] = Field(..., description="Always 'local_shell_call'.")
 
 
 class LocalShellCallOutputParam(BaseModel):
@@ -319,12 +255,8 @@ class LocalShellCallOutputParam(BaseModel):
 
     id: str = Field(..., description="The tool call output ID.")
     output: str = Field(..., description="JSON string of output.")
-    type: Literal["local_shell_call_output"] = Field(
-        ..., description="Always 'local_shell_call_output'."
-    )
-    status: Literal["in_progress", "completed", "incomplete"] | None = Field(
-        default=None, description="The status."
-    )
+    type: Literal["local_shell_call_output"] = Field(..., description="Always 'local_shell_call_output'.")
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(default=None, description="The status.")
 
 
 # =============================================================================
@@ -334,12 +266,8 @@ class LocalShellCallOutputParam(BaseModel):
 
 class ShellCallActionParam(BaseModel):
     commands: list[str] = Field(..., description="Shell commands to run.")
-    max_output_length: int | None = Field(
-        default=None, description="Max output characters."
-    )
-    timeout_ms: int | None = Field(
-        default=None, description="Max wall-clock time in ms."
-    )
+    max_output_length: int | None = Field(default=None, description="Max output characters.")
+    timeout_ms: int | None = Field(default=None, description="Max wall-clock time in ms.")
 
 
 class ShellCallParam(BaseModel):
@@ -349,9 +277,7 @@ class ShellCallParam(BaseModel):
     call_id: str = Field(..., description="The tool call ID.")
     type: Literal["shell_call"] = Field(..., description="Always 'shell_call'.")
     id: str | None = Field(default=None, description="The unique ID.")
-    status: Literal["in_progress", "completed", "incomplete"] | None = Field(
-        default=None, description="The status."
-    )
+    status: Literal["in_progress", "completed", "incomplete"] | None = Field(default=None, description="The status.")
 
 
 class ShellCallOutputParam(BaseModel):
@@ -359,13 +285,9 @@ class ShellCallOutputParam(BaseModel):
 
     call_id: str = Field(..., description="The shell tool call ID.")
     output: list[dict[str, Any]] = Field(..., description="Output chunks.")
-    type: Literal["shell_call_output"] = Field(
-        ..., description="Always 'shell_call_output'."
-    )
+    type: Literal["shell_call_output"] = Field(..., description="Always 'shell_call_output'.")
     id: str | None = Field(default=None, description="The unique ID.")
-    max_output_length: int | None = Field(
-        default=None, description="Max output length."
-    )
+    max_output_length: int | None = Field(default=None, description="Max output length.")
 
 
 # =============================================================================
@@ -377,13 +299,9 @@ class ApplyPatchCallParam(BaseModel):
     """An apply patch tool call."""
 
     call_id: str = Field(..., description="The tool call ID.")
-    operation: dict[str, Any] = Field(
-        ..., description="The file operation (create_file, delete_file, update_file)."
-    )
+    operation: dict[str, Any] = Field(..., description="The file operation (create_file, delete_file, update_file).")
     status: Literal["in_progress", "completed"] = Field(..., description="The status.")
-    type: Literal["apply_patch_call"] = Field(
-        ..., description="Always 'apply_patch_call'."
-    )
+    type: Literal["apply_patch_call"] = Field(..., description="Always 'apply_patch_call'.")
     id: str | None = Field(default=None, description="The unique ID.")
 
 
@@ -392,9 +310,7 @@ class ApplyPatchCallOutputParam(BaseModel):
 
     call_id: str = Field(..., description="The tool call ID.")
     status: Literal["completed", "failed"] = Field(..., description="The status.")
-    type: Literal["apply_patch_call_output"] = Field(
-        ..., description="Always 'apply_patch_call_output'."
-    )
+    type: Literal["apply_patch_call_output"] = Field(..., description="Always 'apply_patch_call_output'.")
     id: str | None = Field(default=None, description="The unique ID.")
     output: str | None = Field(default=None, description="Log text.")
 
@@ -405,16 +321,10 @@ class ApplyPatchCallOutputParam(BaseModel):
 
 
 class McpListToolsToolParam(BaseModel):
-    input_schema: dict[str, Any] = Field(
-        ..., description="The JSON schema describing the tool's input."
-    )
+    input_schema: dict[str, Any] = Field(..., description="The JSON schema describing the tool's input.")
     name: str = Field(..., description="The name of the tool.")
-    annotations: dict[str, Any] | None = Field(
-        default=None, description="Additional annotations."
-    )
-    description: str | None = Field(
-        default=None, description="The description of the tool."
-    )
+    annotations: dict[str, Any] | None = Field(default=None, description="Additional annotations.")
+    description: str | None = Field(default=None, description="The description of the tool.")
 
 
 class McpListToolsParam(BaseModel):
@@ -434,9 +344,7 @@ class McpApprovalRequestParam(BaseModel):
     arguments: str = Field(..., description="JSON string of tool arguments.")
     name: str = Field(..., description="The tool name.")
     server_label: str = Field(..., description="The MCP server label.")
-    type: Literal["mcp_approval_request"] = Field(
-        ..., description="Always 'mcp_approval_request'."
-    )
+    type: Literal["mcp_approval_request"] = Field(..., description="Always 'mcp_approval_request'.")
 
 
 class McpApprovalResponseParam(BaseModel):
@@ -444,9 +352,7 @@ class McpApprovalResponseParam(BaseModel):
 
     approval_request_id: str = Field(..., description="The request ID being answered.")
     approve: bool = Field(..., description="The approval decision.")
-    type: Literal["mcp_approval_response"] = Field(
-        ..., description="Always 'mcp_approval_response'."
-    )
+    type: Literal["mcp_approval_response"] = Field(..., description="Always 'mcp_approval_response'.")
     id: str | None = Field(default=None, description="The unique ID.")
     reason: str | None = Field(default=None, description="The decision reason.")
 
@@ -459,14 +365,12 @@ class McpCallParam(BaseModel):
     name: str = Field(..., description="The tool name.")
     server_label: str = Field(..., description="The MCP server label.")
     type: Literal["mcp_call"] = Field(..., description="Always 'mcp_call'.")
-    approval_request_id: str | None = Field(
-        default=None, description="For approval flow."
-    )
+    approval_request_id: str | None = Field(default=None, description="For approval flow.")
     error: str | None = Field(default=None, description="Tool error.")
     output: str | None = Field(default=None, description="Tool output.")
-    status: (
-        Literal["in_progress", "completed", "incomplete", "calling", "failed"] | None
-    ) = Field(default=None, description="The status.")
+    status: Literal["in_progress", "completed", "incomplete", "calling", "failed"] | None = Field(
+        default=None, description="The status."
+    )
 
 
 # =============================================================================
@@ -478,9 +382,7 @@ class ItemReferenceParam(BaseModel):
     """A reference to an existing item."""
 
     id: str = Field(..., description="The item ID.")
-    type: Literal["item_reference"] | None = Field(
-        default=None, description="Always 'item_reference'."
-    )
+    type: Literal["item_reference"] | None = Field(default=None, description="Always 'item_reference'.")
 
 
 # =============================================================================
@@ -498,12 +400,8 @@ class ReasoningItemParam(BaseModel):
 
     id: str = Field(..., description="The unique identifier.")
     type: Literal["reasoning"] = Field(..., description="Always 'reasoning'.")
-    summary: list[ReasoningItemSummaryParam] = Field(
-        default_factory=list, description="Reasoning summary."
-    )
-    encrypted_content: str | None = Field(
-        default=None, description="Encrypted content for multi-turn."
-    )
+    summary: list[ReasoningItemSummaryParam] = Field(default_factory=list, description="Reasoning summary.")
+    encrypted_content: str | None = Field(default=None, description="Encrypted content for multi-turn.")
 
 
 # =============================================================================
@@ -515,15 +413,9 @@ class ImageGenerationCallParam(BaseModel):
     """An image generation call for multi-turn conversations."""
 
     id: str = Field(..., description="The unique ID.")
-    result: str | None = Field(
-        default=None, description="The generated image encoded in base64."
-    )
-    status: Literal["in_progress", "completed", "generating", "failed"] = Field(
-        ..., description="The status."
-    )
-    type: Literal["image_generation_call"] = Field(
-        ..., description="Always 'image_generation_call'."
-    )
+    result: str | None = Field(default=None, description="The generated image encoded in base64.")
+    status: Literal["in_progress", "completed", "generating", "failed"] = Field(..., description="The status.")
+    type: Literal["image_generation_call"] = Field(..., description="Always 'image_generation_call'.")
 
 
 # =============================================================================
@@ -578,12 +470,8 @@ class FunctionToolParam(BaseModel):
         default=None,
         description="Whether to enforce strict parameter validation. Default true.",
     )
-    type: Literal["function"] = Field(
-        ..., description="The type of the function tool. Always 'function'."
-    )
-    description: str | None = Field(
-        default=None, description="A description of the function."
-    )
+    type: Literal["function"] = Field(..., description="The type of the function tool. Always 'function'.")
+    description: str | None = Field(default=None, description="A description of the function.")
 
 
 class FileSearchToolParam(BaseModel):
@@ -611,30 +499,22 @@ class WebSearchPreviewToolParam(BaseModel):
     search_context_size: Literal["low", "medium", "high"] | None = Field(
         default=None, description="Search context size."
     )
-    user_location: WebSearchPreviewUserLocation | None = Field(
-        default=None, description="User location."
-    )
+    user_location: WebSearchPreviewUserLocation | None = Field(default=None, description="User location.")
 
 
 class CodeInterpreterContainerAuto(BaseModel):
     type: Literal["auto"] = Field(..., description="Always 'auto'.")
     file_ids: list[str] | None = Field(default=None, description="Uploaded files.")
-    memory_limit: Literal["1g", "4g", "16g", "64g"] | None = Field(
-        default=None, description="Memory limit."
-    )
+    memory_limit: Literal["1g", "4g", "16g", "64g"] | None = Field(default=None, description="Memory limit.")
 
 
 class CodeInterpreterToolParam(BaseModel):
     type: Literal["code_interpreter"] = Field(..., description="The type of the tool.")
-    container: str | CodeInterpreterContainerAuto = Field(
-        ..., description="The code interpreter container."
-    )
+    container: str | CodeInterpreterContainerAuto = Field(..., description="The code interpreter container.")
 
 
 class ComputerToolParam(BaseModel):
-    type: Literal["computer_use_preview"] = Field(
-        ..., description="The type of the tool."
-    )
+    type: Literal["computer_use_preview"] = Field(..., description="The type of the tool.")
     model_config = ConfigDict(extra="allow")
 
 
@@ -657,40 +537,22 @@ class ImageGenerationToolParam(BaseModel):
     background: Literal["transparent", "opaque", "auto"] | None = Field(
         default=None, description="The background of the generated image."
     )
-    input_fidelity: Literal["high", "low"] | None = Field(
-        default=None, description="Input fidelity."
-    )
-    input_image_mask: ImageGenerationInputImageMask | None = Field(
-        default=None, description="Inpainting mask."
-    )
-    moderation: Literal["auto", "low"] | None = Field(
-        default=None, description="Moderation level."
-    )
-    output_compression: int | None = Field(
-        default=None, description="Compression level."
-    )
-    output_format: Literal["png", "webp", "jpeg"] | None = Field(
-        default=None, description="Output format."
-    )
-    partial_images: int | None = Field(
-        default=None, description="Partial images for streaming (0-3)."
-    )
+    input_fidelity: Literal["high", "low"] | None = Field(default=None, description="Input fidelity.")
+    input_image_mask: ImageGenerationInputImageMask | None = Field(default=None, description="Inpainting mask.")
+    moderation: Literal["auto", "low"] | None = Field(default=None, description="Moderation level.")
+    output_compression: int | None = Field(default=None, description="Compression level.")
+    output_format: Literal["png", "webp", "jpeg"] | None = Field(default=None, description="Output format.")
+    partial_images: int | None = Field(default=None, description="Partial images for streaming (0-3).")
 
 
 class McpAllowedToolsFilter(BaseModel):
     read_only: bool | None = Field(default=None, description="Tool is read-only.")
-    tool_names: list[str] | None = Field(
-        default=None, description="Allowed tool names."
-    )
+    tool_names: list[str] | None = Field(default=None, description="Allowed tool names.")
 
 
 class McpApprovalFilter(BaseModel):
-    always: McpAllowedToolsFilter | None = Field(
-        default=None, description="Always require approval."
-    )
-    never: McpAllowedToolsFilter | None = Field(
-        default=None, description="Never require approval."
-    )
+    always: McpAllowedToolsFilter | None = Field(default=None, description="Always require approval.")
+    never: McpAllowedToolsFilter | None = Field(default=None, description="Never require approval.")
 
 
 class McpToolParam(BaseModel):
@@ -700,19 +562,13 @@ class McpToolParam(BaseModel):
     server_label: str = Field(..., description="Label for the MCP server.")
     server_url: str | None = Field(default=None, description="URL for the MCP server.")
     connector_id: str | None = Field(default=None, description="Service connector ID.")
-    allowed_tools: list[str] | McpAllowedToolsFilter | None = Field(
-        default=None, description="Allowed tools filter."
-    )
+    allowed_tools: list[str] | McpAllowedToolsFilter | None = Field(default=None, description="Allowed tools filter.")
     require_approval: Literal["always", "never"] | McpApprovalFilter | None = Field(
         default=None, description="Approval requirement."
     )
     authorization: str | None = Field(default=None, description="OAuth access token.")
-    headers: dict[str, str] | None = Field(
-        default=None, description="Custom HTTP headers."
-    )
-    server_description: str | None = Field(
-        default=None, description="MCP server description."
-    )
+    headers: dict[str, str] | None = Field(default=None, description="Custom HTTP headers.")
+    server_description: str | None = Field(default=None, description="MCP server description.")
 
 
 class LocalShellToolParam(BaseModel):
@@ -729,9 +585,7 @@ class CustomToolParam(BaseModel):
     type: Literal["custom"] = Field(..., description="The type of the tool.")
     name: str = Field(..., description="The custom tool name.")
     description: str | None = Field(default=None, description="Tool description.")
-    format: dict[str, Any] | None = Field(
-        default=None, description="Input format specification."
-    )
+    format: dict[str, Any] | None = Field(default=None, description="Input format specification.")
 
 
 class ApplyPatchToolParam(BaseModel):
@@ -764,29 +618,20 @@ class ToolChoiceNoneParam(BaseModel):
 
 
 class ToolChoiceAutoParam(BaseModel):
-    type: Literal["auto"] = Field(
-        ..., description="The model will automatically decide whether to use tools."
-    )
+    type: Literal["auto"] = Field(..., description="The model will automatically decide whether to use tools.")
 
 
 class ToolChoiceRequiredParam(BaseModel):
-    type: Literal["required"] = Field(
-        ..., description="The model must use at least one tool."
-    )
+    type: Literal["required"] = Field(..., description="The model must use at least one tool.")
 
 
 class ToolChoiceFunctionParam(BaseModel):
-    type: Literal["function"] = Field(
-        ..., description="The model will use the specified function."
-    )
+    type: Literal["function"] = Field(..., description="The model will use the specified function.")
     name: str = Field(..., description="The name of the function to use.")
 
 
 ToolChoiceParam = Annotated[
-    ToolChoiceNoneParam
-    | ToolChoiceAutoParam
-    | ToolChoiceRequiredParam
-    | ToolChoiceFunctionParam,
+    ToolChoiceNoneParam | ToolChoiceAutoParam | ToolChoiceRequiredParam | ToolChoiceFunctionParam,
     Field(discriminator="type"),
 ]
 
@@ -798,9 +643,7 @@ class ToolChoiceFunctionParam(BaseModel):
 class TextConfigJSONSchemaParam(BaseModel):
     type: Literal["json_schema"] = Field(..., description="Structured JSON output.")
     name: str = Field(..., description="The name of the schema.")
-    description: str | None = Field(
-        default=None, description="A description of the schema."
-    )
+    description: str | None = Field(default=None, description="A description of the schema.")
     schema_: dict[str, Any] = Field(..., alias="schema", description="The JSON schema.")
     strict: bool | None = Field(
         default=None,
@@ -825,9 +668,7 @@ class TextConfigTextParam(BaseModel):
 
 
 class ResponseTextConfigParam(BaseModel):
-    format: TextFormatParam | None = Field(
-        default=None, description="The format of the text response."
-    )
+    format: TextFormatParam | None = Field(default=None, description="The format of the text response.")
 
 
 # =============================================================================
@@ -944,11 +785,9 @@ class ResponsesRequest(BaseModel):
         default=None,
         description="A stable identifier used to help detect users violating usage policies.",
     )
-    service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = (
-        Field(
-            default=None,
-            description="Specifies the processing type used for serving the request.",
-        )
+    service_tier: Literal["auto", "default", "flex", "scale", "priority"] | None = Field(
+        default=None,
+        description="Specifies the processing type used for serving the request.",
     )
     store: bool | None = Field(
         default=None,
@@ -958,9 +797,7 @@ class ResponsesRequest(BaseModel):
         default=None,
         description="If set to true, the model response data will be streamed.",
     )
-    stream_options: StreamOptionsParam | None = Field(
-        default=None, description="Options for streaming responses."
-    )
+    stream_options: StreamOptionsParam | None = Field(default=None, description="Options for streaming responses.")
     temperature: float | None = Field(
         default=None,
         description="Sampling temperature (0-2). Higher values make output more random.",
@@ -969,9 +806,7 @@ class ResponsesRequest(BaseModel):
         default=None,
         description="Configuration options for a text response from the model.",
     )
-    tool_choice: (
-        Literal["none", "auto", "required"] | ToolChoiceFunctionParam | None
-    ) = Field(
+    tool_choice: Literal["none", "auto", "required"] | ToolChoiceFunctionParam | None = Field(
         default=None, description="How the model should select which tool to use."
     )
     tools: list[ToolParam] | None = Field(

From 05b1cc5a2361eb3d6aa044a5d7c10335ec01aaf0 Mon Sep 17 00:00:00 2001
From: Cheng Zhang <chengzhang98@outlook.com>
Date: Thu, 26 Feb 2026 20:28:30 +0000
Subject: [PATCH 04/11] responses api and streaming support

---
 pyproject.toml                              | 20 ++++-----
 src/sequrity/control/_stream.py             | 16 ++++---
 src/sequrity/control/resources/chat.py      | 46 ++++++++++++++-------
 src/sequrity/control/resources/messages.py  | 46 ++++++++++++++-------
 src/sequrity/control/resources/responses.py | 44 +++++++++++++-------
 src/sequrity/types/messages/stream.py       | 28 +++++++------
 src/sequrity/types/responses/stream.py      | 42 ++++++++++---------
 uv.lock                                     |  6 ++-
 8 files changed, 155 insertions(+), 93 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c69d241..9d6d3ed 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,11 +9,7 @@
         { name="Cheng Zhang", email="cheng@sequrity.ai" },
     ]
     requires-python=">=3.11"
-    dependencies=[
-        "httpx>=0.28.1",
-        "lark>=1.3.1",
-        "pydantic>=2.11.9",
-    ]
+    dependencies=["httpx>=0.28.1", "lark>=1.3.1", "pydantic>=2.11.9"]
     dynamic=["version"]
     classifiers=[
         "Development Status :: 4 - Beta",
@@ -26,13 +22,13 @@
         "Topic :: Software Development :: Libraries :: Python Modules",
     ]
 
-[project.optional-dependencies]
-    openai=["openai>=1.0.0", "openai-agents>=0.1.0"]
-    langchain=["langchain-openai>=1.1.7", "langgraph>=1.0.7"]
+    [project.optional-dependencies]
+        openai=["openai>=1.0.0", "openai-agents>=0.1.0"]
+        langchain=["langchain-openai>=1.1.7", "langgraph>=1.0.7"]
 
-[project.urls]
-    Homepage="https://sequrity.ai"
-    Repository="https://github.com/sequrity-ai/sequrity-api"
+    [project.urls]
+        Homepage="https://sequrity.ai"
+        Repository="https://github.com/sequrity-ai/sequrity-api"
 
 [build-system]
     requires=["hatchling", "hatch-vcs"]
@@ -46,7 +42,7 @@
 
 [dependency-groups]
     dev=["openai>=1.107.3", "pytest>=9.0.2", "pytest-asyncio>=0.24.0", "ruff>=0.14.13", "ty"]
-    langgraph=["langgraph>=1.0.6"]
+    langgraph=["langgraph>=1.0.6", "langchain-openai>=1.1.7"]
     agents=["openai-agents>=0.1.0"]
     docs=["griffe-pydantic>=1.2.0", "mike>=2.1", "mkdocs>=1.6", "mkdocs-material>=9.5", "mkdocstrings[python]>=0.28"]
     examples=["ipykernel<7.0.0", "rich>=14.2.0"]
diff --git a/src/sequrity/control/_stream.py b/src/sequrity/control/_stream.py
index bd36632..8c41c83 100644
--- a/src/sequrity/control/_stream.py
+++ b/src/sequrity/control/_stream.py
@@ -3,13 +3,19 @@
 from __future__ import annotations
 
 import json
-from typing import AsyncIterator, Generic, Iterator, TypeVar
+from typing import Any, AsyncIterator, Generic, Iterator, TypeVar
 
 import httpx
 from pydantic import TypeAdapter
 
 _T = TypeVar("_T")
 
+# TypeAdapter accepts type[T], Annotated types, and other special forms.
+# We use Any here so callers can pass discriminated-union TypeAlias values
+# (e.g. Annotated[Union[...], Field(discriminator="type")]) which are not
+# strictly type[T] but are valid TypeAdapter inputs.
+_ChunkSpec = Any
+
 
 class SyncStream(Generic[_T]):
     """Wraps an httpx streaming response, parses SSE lines, and yields typed chunks.
@@ -31,12 +37,12 @@ class SyncStream(Generic[_T]):
     def __init__(
         self,
         response: httpx.Response,
-        chunk_type: type[_T],
+        chunk_type: _ChunkSpec,
         *,
         session_id: str | None = None,
     ) -> None:
         self._response = response
-        self._adapter = TypeAdapter(chunk_type)
+        self._adapter: TypeAdapter[_T] = TypeAdapter(chunk_type)
         self.session_id = session_id
 
     def __iter__(self) -> Iterator[_T]:
@@ -69,12 +75,12 @@ class AsyncStream(Generic[_T]):
     def __init__(
         self,
         response: httpx.Response,
-        chunk_type: type[_T],
+        chunk_type: _ChunkSpec,
         *,
         session_id: str | None = None,
     ) -> None:
         self._response = response
-        self._adapter = TypeAdapter(chunk_type)
+        self._adapter: TypeAdapter[_T] = TypeAdapter(chunk_type)
         self.session_id = session_id
 
     async def __aiter__(self) -> AsyncIterator[_T]:
diff --git a/src/sequrity/control/resources/chat.py b/src/sequrity/control/resources/chat.py
index 37175bc..ca208f8 100644
--- a/src/sequrity/control/resources/chat.py
+++ b/src/sequrity/control/resources/chat.py
@@ -131,19 +131,27 @@ def create(
             endpoint_type=endpoint_type,
         )
 
-        sequrity_kwargs = dict(
+        if stream:
+            response = self._transport.stream_request(
+                url=url,
+                payload=payload,
+                llm_api_key=llm_api_key,
+                features=features,
+                security_policy=security_policy,
+                fine_grained_config=fine_grained_config,
+                session_id=session_id,
+            )
+            return SyncStream(response, ChatCompletionChunk, session_id=response.headers.get("X-Session-ID"))
+
+        response = self._transport.request(
+            url=url,
+            payload=payload,
             llm_api_key=llm_api_key,
             features=features,
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
         )
-
-        if stream:
-            response = self._transport.stream_request(url=url, payload=payload, **sequrity_kwargs)
-            return SyncStream(response, ChatCompletionChunk, session_id=response.headers.get("X-Session-ID"))
-
-        response = self._transport.request(url=url, payload=payload, **sequrity_kwargs)
         result = ChatCompletionResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
         return result
@@ -217,7 +225,7 @@ async def create(
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
-        session_id: str | None | _NotGiven = None,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
     ) -> ChatCompletionResponse | AsyncStream[ChatCompletionChunk]:
         """Async variant of :meth:`ChatResource.create`."""
         payload = ChatCompletionRequest.model_validate(
@@ -240,19 +248,27 @@ async def create(
             endpoint_type=endpoint_type,
         )
 
-        sequrity_kwargs = dict(
+        if stream:
+            response = await self._transport.stream_request(
+                url=url,
+                payload=payload,
+                llm_api_key=llm_api_key,
+                features=features,
+                security_policy=security_policy,
+                fine_grained_config=fine_grained_config,
+                session_id=session_id,
+            )
+            return AsyncStream(response, ChatCompletionChunk, session_id=response.headers.get("X-Session-ID"))
+
+        response = await self._transport.request(
+            url=url,
+            payload=payload,
             llm_api_key=llm_api_key,
             features=features,
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
         )
-
-        if stream:
-            response = await self._transport.stream_request(url=url, payload=payload, **sequrity_kwargs)
-            return AsyncStream(response, ChatCompletionChunk, session_id=response.headers.get("X-Session-ID"))
-
-        response = await self._transport.request(url=url, payload=payload, **sequrity_kwargs)
         result = ChatCompletionResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
         return result
diff --git a/src/sequrity/control/resources/messages.py b/src/sequrity/control/resources/messages.py
index dfb7a89..040d2bf 100644
--- a/src/sequrity/control/resources/messages.py
+++ b/src/sequrity/control/resources/messages.py
@@ -175,19 +175,27 @@ def create(
             endpoint_type=endpoint_type,
         )
 
-        sequrity_kwargs = dict(
+        if stream:
+            response = self._transport.stream_request(
+                url=url,
+                payload=payload,
+                llm_api_key=llm_api_key,
+                features=features,
+                security_policy=security_policy,
+                fine_grained_config=fine_grained_config,
+                session_id=session_id,
+            )
+            return SyncStream(response, AnthropicStreamEvent, session_id=response.headers.get("X-Session-ID"))
+
+        response = self._transport.request(
+            url=url,
+            payload=payload,
             llm_api_key=llm_api_key,
             features=features,
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
         )
-
-        if stream:
-            response = self._transport.stream_request(url=url, payload=payload, **sequrity_kwargs)
-            return SyncStream(response, AnthropicStreamEvent, session_id=response.headers.get("X-Session-ID"))
-
-        response = self._transport.request(url=url, payload=payload, **sequrity_kwargs)
         result = AnthropicMessageResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
         return result
@@ -282,7 +290,7 @@ async def create(
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
-        session_id: str | None | _NotGiven = None,
+        session_id: str | None | _NotGiven = NOT_GIVEN,
     ) -> AnthropicMessageResponse | AsyncStream[AnthropicStreamEvent]:
         """Async variant of :meth:`MessagesResource.create`."""
         payload = AnthropicMessageRequest.model_validate(
@@ -312,19 +320,27 @@ async def create(
             endpoint_type=endpoint_type,
         )
 
-        sequrity_kwargs = dict(
+        if stream:
+            response = await self._transport.stream_request(
+                url=url,
+                payload=payload,
+                llm_api_key=llm_api_key,
+                features=features,
+                security_policy=security_policy,
+                fine_grained_config=fine_grained_config,
+                session_id=session_id,
+            )
+            return AsyncStream(response, AnthropicStreamEvent, session_id=response.headers.get("X-Session-ID"))
+
+        response = await self._transport.request(
+            url=url,
+            payload=payload,
             llm_api_key=llm_api_key,
             features=features,
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
         )
-
-        if stream:
-            response = await self._transport.stream_request(url=url, payload=payload, **sequrity_kwargs)
-            return AsyncStream(response, AnthropicStreamEvent, session_id=response.headers.get("X-Session-ID"))
-
-        response = await self._transport.request(url=url, payload=payload, **sequrity_kwargs)
         result = AnthropicMessageResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
         return result
diff --git a/src/sequrity/control/resources/responses.py b/src/sequrity/control/resources/responses.py
index 5135741..18d37c8 100644
--- a/src/sequrity/control/resources/responses.py
+++ b/src/sequrity/control/resources/responses.py
@@ -220,19 +220,27 @@ def create(
             endpoint_type=endpoint_type,
         )
 
-        sequrity_kwargs = dict(
+        if stream:
+            response = self._transport.stream_request(
+                url=url,
+                payload=payload,
+                llm_api_key=llm_api_key,
+                features=features,
+                security_policy=security_policy,
+                fine_grained_config=fine_grained_config,
+                session_id=session_id,
+            )
+            return SyncStream(response, OpenAiResponseStreamEvent, session_id=response.headers.get("X-Session-ID"))
+
+        response = self._transport.request(
+            url=url,
+            payload=payload,
             llm_api_key=llm_api_key,
             features=features,
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
         )
-
-        if stream:
-            response = self._transport.stream_request(url=url, payload=payload, **sequrity_kwargs)
-            return SyncStream(response, OpenAiResponseStreamEvent, session_id=response.headers.get("X-Session-ID"))
-
-        response = self._transport.request(url=url, payload=payload, **sequrity_kwargs)
         result = ResponsesResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
         return result
@@ -393,19 +401,27 @@ async def create(
             endpoint_type=endpoint_type,
         )
 
-        sequrity_kwargs = dict(
+        if stream:
+            response = await self._transport.stream_request(
+                url=url,
+                payload=payload,
+                llm_api_key=llm_api_key,
+                features=features,
+                security_policy=security_policy,
+                fine_grained_config=fine_grained_config,
+                session_id=session_id,
+            )
+            return AsyncStream(response, OpenAiResponseStreamEvent, session_id=response.headers.get("X-Session-ID"))
+
+        response = await self._transport.request(
+            url=url,
+            payload=payload,
             llm_api_key=llm_api_key,
             features=features,
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
         )
-
-        if stream:
-            response = await self._transport.stream_request(url=url, payload=payload, **sequrity_kwargs)
-            return AsyncStream(response, OpenAiResponseStreamEvent, session_id=response.headers.get("X-Session-ID"))
-
-        response = await self._transport.request(url=url, payload=payload, **sequrity_kwargs)
         result = ResponsesResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
         return result
diff --git a/src/sequrity/types/messages/stream.py b/src/sequrity/types/messages/stream.py
index 9fa2586..e1293fb 100644
--- a/src/sequrity/types/messages/stream.py
+++ b/src/sequrity/types/messages/stream.py
@@ -7,7 +7,9 @@
 each carrying different data.
 """
 
-from typing import Annotated, Any, Literal
+from typing import Annotated, Any, Literal, Union
+
+from typing import TypeAlias
 
 from pydantic import BaseModel, ConfigDict, Field
 
@@ -43,8 +45,8 @@ class CitationsDelta(BaseModel):
     model_config = ConfigDict(extra="allow")
 
 
-type AnthropicContentDelta = Annotated[
-    TextDelta | InputJSONDelta | ThinkingDelta | SignatureDelta | CitationsDelta,
+AnthropicContentDelta: TypeAlias = Annotated[
+    Union[TextDelta, InputJSONDelta, ThinkingDelta, SignatureDelta, CitationsDelta],
     Field(discriminator="type"),
 ]
 
@@ -95,8 +97,8 @@ class ServerToolUseBlockStart(BaseModel):
     model_config = ConfigDict(extra="allow")
 
 
-type AnthropicContentBlockStart = Annotated[
-    TextBlockStart | ThinkingBlockStart | RedactedThinkingBlockStart | ToolUseBlockStart | ServerToolUseBlockStart,
+AnthropicContentBlockStart: TypeAlias = Annotated[
+    Union[TextBlockStart, ThinkingBlockStart, RedactedThinkingBlockStart, ToolUseBlockStart, ServerToolUseBlockStart],
     Field(discriminator="type"),
 ]
 
@@ -185,12 +187,14 @@ class RawMessageStopEvent(BaseModel):
 # Union Type
 # =============================================================================
 
-type AnthropicStreamEvent = Annotated[
-    RawMessageStartEvent
-    | RawContentBlockStartEvent
-    | RawContentBlockDeltaEvent
-    | RawContentBlockStopEvent
-    | RawMessageDeltaEvent
-    | RawMessageStopEvent,
+AnthropicStreamEvent: TypeAlias = Annotated[
+    Union[
+        RawMessageStartEvent,
+        RawContentBlockStartEvent,
+        RawContentBlockDeltaEvent,
+        RawContentBlockStopEvent,
+        RawMessageDeltaEvent,
+        RawMessageStopEvent,
+    ],
     Field(discriminator="type"),
 ]
diff --git a/src/sequrity/types/responses/stream.py b/src/sequrity/types/responses/stream.py
index 11c63d8..b738121 100644
--- a/src/sequrity/types/responses/stream.py
+++ b/src/sequrity/types/responses/stream.py
@@ -10,7 +10,9 @@
 - Reasoning: reasoning_summary_text delta/done, reasoning_summary_part added/done
 """
 
-from typing import Annotated, Any, Literal
+from typing import Annotated, Any, Literal, Union
+
+from typing import TypeAlias
 
 from pydantic import BaseModel, ConfigDict, Field
 
@@ -249,23 +251,25 @@ class ResponseReasoningSummaryTextDoneEvent(BaseModel):
 # Union Type
 # =============================================================================
 
-type OpenAiResponseStreamEvent = Annotated[
-    ResponseCreatedEvent
-    | ResponseInProgressEvent
-    | ResponseCompletedEvent
-    | ResponseFailedEvent
-    | ResponseIncompleteEvent
-    | ResponseOutputItemAddedEvent
-    | ResponseOutputItemDoneEvent
-    | ResponseContentPartAddedEvent
-    | ResponseContentPartDoneEvent
-    | ResponseTextDeltaEvent
-    | ResponseTextDoneEvent
-    | ResponseFunctionCallArgumentsDeltaEvent
-    | ResponseFunctionCallArgumentsDoneEvent
-    | ResponseReasoningSummaryPartAddedEvent
-    | ResponseReasoningSummaryPartDoneEvent
-    | ResponseReasoningSummaryTextDeltaEvent
-    | ResponseReasoningSummaryTextDoneEvent,
+OpenAiResponseStreamEvent: TypeAlias = Annotated[
+    Union[
+        ResponseCreatedEvent,
+        ResponseInProgressEvent,
+        ResponseCompletedEvent,
+        ResponseFailedEvent,
+        ResponseIncompleteEvent,
+        ResponseOutputItemAddedEvent,
+        ResponseOutputItemDoneEvent,
+        ResponseContentPartAddedEvent,
+        ResponseContentPartDoneEvent,
+        ResponseTextDeltaEvent,
+        ResponseTextDoneEvent,
+        ResponseFunctionCallArgumentsDeltaEvent,
+        ResponseFunctionCallArgumentsDoneEvent,
+        ResponseReasoningSummaryPartAddedEvent,
+        ResponseReasoningSummaryPartDoneEvent,
+        ResponseReasoningSummaryTextDeltaEvent,
+        ResponseReasoningSummaryTextDoneEvent,
+    ],
     Field(discriminator="type"),
 ]
diff --git a/uv.lock b/uv.lock
index 0207003..8b7b2e2 100644
--- a/uv.lock
+++ b/uv.lock
@@ -2140,6 +2140,7 @@ examples = [
     { name = "rich" },
 ]
 langgraph = [
+    { name = "langchain-openai" },
     { name = "langgraph" },
 ]
 
@@ -2175,7 +2176,10 @@ examples = [
     { name = "ipykernel", specifier = "<7.0.0" },
     { name = "rich", specifier = ">=14.2.0" },
 ]
-langgraph = [{ name = "langgraph", specifier = ">=1.0.6" }]
+langgraph = [
+    { name = "langchain-openai", specifier = ">=1.1.7" },
+    { name = "langgraph", specifier = ">=1.0.6" },
+]
 
 [[package]]
 name = "six"

From 435db0186d9ef6c3152a6ac43e08b864100a9c5f Mon Sep 17 00:00:00 2001
From: Cheng Zhang <chengzhang98@outlook.com>
Date: Thu, 26 Feb 2026 20:36:38 +0000
Subject: [PATCH 05/11] remove custom_instructions

---
 docs/control/reference/rest_api/headers/security_config.md | 2 --
 docs/release_notes.md                                      | 1 -
 2 files changed, 3 deletions(-)

diff --git a/docs/control/reference/rest_api/headers/security_config.md b/docs/control/reference/rest_api/headers/security_config.md
index 899bf56..ea1868b 100644
--- a/docs/control/reference/rest_api/headers/security_config.md
+++ b/docs/control/reference/rest_api/headers/security_config.md
@@ -43,7 +43,6 @@ This header is **optional** and can be used in Headers-Only Mode to fine-tune se
       "query_include_tool_calls": null,
       "query_include_tool_args": null,
       "query_include_tool_results": null,
-      "custom_instructions": null
     },
     "rllm": {
       "flavor": null,
@@ -290,7 +289,6 @@ Planning LLM prompt overrides:
 | `query_include_tool_calls` | `boolean` | `null` | Whether to include upstream tool calls in inlined query. |
 | `query_include_tool_args` | `boolean` | `null` | Whether to include arguments of upstream tool calls. |
 | `query_include_tool_results` | `boolean` | `null` | Whether to include results of upstream tool calls. |
-| `custom_instructions` | `string` | `null` | Custom instructions text appended to the planning LLM prompt. |
 
 ### `prompt.rllm`
 
diff --git a/docs/release_notes.md b/docs/release_notes.md
index 6c91f2e..8257c65 100644
--- a/docs/release_notes.md
+++ b/docs/release_notes.md
@@ -30,7 +30,6 @@ hide:
 
     - Added `tool_result_transform` FSM override (`"none"` or `"codex"`) for stripping Codex CLI metadata
     - Added `history_mismatch_policy` FSM override (`"reject"`, `"restart_turn"`, `"continue"`) for handling message history divergence in stateless mode (dual-LLM only)
-    - Added `custom_instructions` prompt override for appending custom text to the planning LLM prompt
     - Added `stream_thoughts` response format override for streaming model thinking process
 
     **Improvements**

From 1692b5ba57a5b3ac8a4c51e7e2a498ab2aed3fd7 Mon Sep 17 00:00:00 2001
From: Cheng Zhang <chengzhang98@outlook.com>
Date: Thu, 26 Feb 2026 20:50:21 +0000
Subject: [PATCH 06/11] add custom headers

---
 src/sequrity/control/_transport.py            | 20 +++++++++++++++++--
 src/sequrity/control/resources/chat.py        |  8 ++++++++
 .../control/resources/langgraph/__init__.py   |  2 ++
 .../control/resources/langgraph/_runner.py    |  6 +++++-
 src/sequrity/control/resources/messages.py    | 10 ++++++++++
 src/sequrity/control/resources/policy.py      |  5 +++++
 src/sequrity/control/resources/responses.py   | 10 ++++++++++
 7 files changed, 58 insertions(+), 3 deletions(-)

diff --git a/src/sequrity/control/_transport.py b/src/sequrity/control/_transport.py
index 2c180e5..a111aaa 100644
--- a/src/sequrity/control/_transport.py
+++ b/src/sequrity/control/_transport.py
@@ -67,6 +67,7 @@ def _build_headers(
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> dict[str, str]:
         eff_llm_key = _resolve(llm_api_key, self._config.llm_api_key)
         eff_features = _resolve(features, self._config.features)
@@ -74,7 +75,7 @@ def _build_headers(
         eff_config = _resolve(fine_grained_config, self._config.fine_grained_config)
         eff_session = _resolve(session_id, None)
 
-        return build_sequrity_headers(
+        headers = build_sequrity_headers(
             api_key=self._api_key,
             llm_api_key=eff_llm_key,
             features=eff_features.dump_for_headers(mode="json_str") if eff_features else None,
@@ -82,6 +83,9 @@ def _build_headers(
             config=eff_config.dump_for_headers(mode="json_str") if eff_config else None,
             session_id=eff_session,
         )
+        if custom_headers:
+            headers.update(custom_headers)
+        return headers
 
     def _track_session(self, response: httpx.Response) -> None:
         new_session = response.headers.get("X-Session-ID")
@@ -100,6 +104,7 @@ def request(
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> httpx.Response:
         """POST *payload* as JSON to *url* with merged Sequrity headers.
 
@@ -116,6 +121,7 @@ def request(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            custom_headers=custom_headers,
         )
 
         try:
@@ -139,6 +145,7 @@ def stream_request(
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> httpx.Response:
         """Open a streaming POST request.
 
@@ -156,6 +163,7 @@ def stream_request(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            custom_headers=custom_headers,
         )
 
         request = self._http.build_request("POST", url, json=payload, headers=headers)
@@ -211,6 +219,7 @@ def _build_headers(
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> dict[str, str]:
         eff_llm_key = _resolve(llm_api_key, self._config.llm_api_key)
         eff_features = _resolve(features, self._config.features)
@@ -218,7 +227,7 @@ def _build_headers(
         eff_config = _resolve(fine_grained_config, self._config.fine_grained_config)
         eff_session = _resolve(session_id, self._session_id)
 
-        return build_sequrity_headers(
+        headers = build_sequrity_headers(
             api_key=self._api_key,
             llm_api_key=eff_llm_key,
             features=eff_features.dump_for_headers(mode="json_str") if eff_features else None,
@@ -226,6 +235,9 @@ def _build_headers(
             config=eff_config.dump_for_headers(mode="json_str") if eff_config else None,
             session_id=eff_session,
         )
+        if custom_headers:
+            headers.update(custom_headers)
+        return headers
 
     def _track_session(self, response: httpx.Response) -> None:
         new_session = response.headers.get("X-Session-ID")
@@ -242,6 +254,7 @@ async def request(
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> httpx.Response:
         """Async variant of :meth:`ControlSyncTransport.request`."""
         headers = self._build_headers(
@@ -250,6 +263,7 @@ async def request(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            custom_headers=custom_headers,
         )
 
         try:
@@ -273,6 +287,7 @@ async def stream_request(
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> httpx.Response:
         """Open an async streaming POST request.
 
@@ -290,6 +305,7 @@ async def stream_request(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            custom_headers=custom_headers,
         )
 
         request = self._http.build_request("POST", url, json=payload, headers=headers)
diff --git a/src/sequrity/control/resources/chat.py b/src/sequrity/control/resources/chat.py
index ca208f8..9f5ec60 100644
--- a/src/sequrity/control/resources/chat.py
+++ b/src/sequrity/control/resources/chat.py
@@ -40,6 +40,7 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> SyncStream[ChatCompletionChunk]: ...
 
     @overload
@@ -62,6 +63,7 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> ChatCompletionResponse: ...
 
     def create(
@@ -85,6 +87,7 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> ChatCompletionResponse | SyncStream[ChatCompletionChunk]:
         """Send a chat completion request through Sequrity's secure orchestrator.
 
@@ -140,6 +143,7 @@ def create(
                 security_policy=security_policy,
                 fine_grained_config=fine_grained_config,
                 session_id=session_id,
+                custom_headers=custom_headers,
             )
             return SyncStream(response, ChatCompletionChunk, session_id=response.headers.get("X-Session-ID"))
 
@@ -151,6 +155,7 @@ def create(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            custom_headers=custom_headers,
         )
         result = ChatCompletionResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
@@ -226,6 +231,7 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> ChatCompletionResponse | AsyncStream[ChatCompletionChunk]:
         """Async variant of :meth:`ChatResource.create`."""
         payload = ChatCompletionRequest.model_validate(
@@ -257,6 +263,7 @@ async def create(
                 security_policy=security_policy,
                 fine_grained_config=fine_grained_config,
                 session_id=session_id,
+                custom_headers=custom_headers,
             )
             return AsyncStream(response, ChatCompletionChunk, session_id=response.headers.get("X-Session-ID"))
 
@@ -268,6 +275,7 @@ async def create(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            custom_headers=custom_headers,
         )
         result = ChatCompletionResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
diff --git a/src/sequrity/control/resources/langgraph/__init__.py b/src/sequrity/control/resources/langgraph/__init__.py
index 4da889c..c33b7cd 100644
--- a/src/sequrity/control/resources/langgraph/__init__.py
+++ b/src/sequrity/control/resources/langgraph/__init__.py
@@ -37,6 +37,7 @@ def run(
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> dict:
         """Compile and execute a LangGraph StateGraph with Sequrity security.
 
@@ -75,4 +76,5 @@ def run(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             provider=provider,
+            custom_headers=custom_headers,
         )
diff --git a/src/sequrity/control/resources/langgraph/_runner.py b/src/sequrity/control/resources/langgraph/_runner.py
index 9d48d5a..42012bf 100644
--- a/src/sequrity/control/resources/langgraph/_runner.py
+++ b/src/sequrity/control/resources/langgraph/_runner.py
@@ -234,6 +234,7 @@ def run_graph_sync(
     security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
     fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
     provider: LlmServiceProvider | LlmServiceProviderStr | None | _NotGiven = NOT_GIVEN,
+    custom_headers: dict[str, str] | None = None,
 ) -> dict:
     """Execute a LangGraph StateGraph through Sequrity's secure runtime.
 
@@ -307,7 +308,7 @@ def run_graph_sync(
     eff_policy = _resolve(security_policy, transport._config.security_policy)
 
     def _build_headers(session_id: str | None = None) -> dict[str, str]:
-        return build_sequrity_headers(
+        headers = build_sequrity_headers(
             api_key=transport._api_key,
             llm_api_key=eff_llm_key,
             features=eff_features.dump_for_headers(mode="json_str") if eff_features else None,
@@ -315,6 +316,9 @@ def _build_headers(session_id: str | None = None) -> dict[str, str]:
             config=eff_fine_grained.dump_for_headers(mode="json_str") if eff_fine_grained else None,
             session_id=session_id,
         )
+        if custom_headers:
+            headers.update(custom_headers)
+        return headers
 
     current_state = initial_state.copy()
     loop_fn = _run_messages_loop if rest_api_type == RestApiType.MESSAGES else _run_chat_completions_loop
diff --git a/src/sequrity/control/resources/messages.py b/src/sequrity/control/resources/messages.py
index 040d2bf..c715f2d 100644
--- a/src/sequrity/control/resources/messages.py
+++ b/src/sequrity/control/resources/messages.py
@@ -56,6 +56,7 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> SyncStream[AnthropicStreamEvent]: ...
 
     @overload
@@ -85,6 +86,7 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> AnthropicMessageResponse: ...
 
     def create(
@@ -115,6 +117,7 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> AnthropicMessageResponse | SyncStream[AnthropicStreamEvent]:
         """Send an Anthropic Messages API request through Sequrity.
 
@@ -184,6 +187,7 @@ def create(
                 security_policy=security_policy,
                 fine_grained_config=fine_grained_config,
                 session_id=session_id,
+                custom_headers=custom_headers,
             )
             return SyncStream(response, AnthropicStreamEvent, session_id=response.headers.get("X-Session-ID"))
 
@@ -195,6 +199,7 @@ def create(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            custom_headers=custom_headers,
         )
         result = AnthropicMessageResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
@@ -234,6 +239,7 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> AsyncStream[AnthropicStreamEvent]: ...
 
     @overload
@@ -263,6 +269,7 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> AnthropicMessageResponse: ...
 
     async def create(
@@ -291,6 +298,7 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> AnthropicMessageResponse | AsyncStream[AnthropicStreamEvent]:
         """Async variant of :meth:`MessagesResource.create`."""
         payload = AnthropicMessageRequest.model_validate(
@@ -329,6 +337,7 @@ async def create(
                 security_policy=security_policy,
                 fine_grained_config=fine_grained_config,
                 session_id=session_id,
+                custom_headers=custom_headers,
             )
             return AsyncStream(response, AnthropicStreamEvent, session_id=response.headers.get("X-Session-ID"))
 
@@ -340,6 +349,7 @@ async def create(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            custom_headers=custom_headers,
         )
         result = AnthropicMessageResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
diff --git a/src/sequrity/control/resources/policy.py b/src/sequrity/control/resources/policy.py
index 71b5248..1dc0b7c 100644
--- a/src/sequrity/control/resources/policy.py
+++ b/src/sequrity/control/resources/policy.py
@@ -22,6 +22,7 @@ def generate(
         request: PolicyGenRequest | dict,
         *,
         llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> PolicyGenResponse:
         """Generate a SQRT security policy from a natural language description.
 
@@ -30,6 +31,7 @@ def generate(
                 or a raw dict. The request must include a ``type`` discriminator
                 field to select the tool format variant.
             llm_api_key: Optional LLM provider API key override.
+            custom_headers: Optional extra HTTP headers to include in the request.
 
         Returns:
             Parsed ``PolicyGenResponse`` with generated policies and usage info.
@@ -52,6 +54,7 @@ def generate(
             security_policy=None,
             fine_grained_config=None,
             session_id=None,
+            custom_headers=custom_headers,
         )
 
         return PolicyGenResponse.model_validate(response.json())
@@ -68,6 +71,7 @@ async def generate(
         request: PolicyGenRequest | dict,
         *,
         llm_api_key: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> PolicyGenResponse:
         """Async variant of :meth:`PolicyResource.generate`."""
         if isinstance(request, dict):
@@ -85,6 +89,7 @@ async def generate(
             features=None,
             security_policy=None,
             fine_grained_config=None,
+            custom_headers=custom_headers,
         )
 
         return PolicyGenResponse.model_validate(response.json())
diff --git a/src/sequrity/control/resources/responses.py b/src/sequrity/control/resources/responses.py
index 18d37c8..4ce9e8d 100644
--- a/src/sequrity/control/resources/responses.py
+++ b/src/sequrity/control/resources/responses.py
@@ -65,6 +65,7 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> SyncStream[OpenAiResponseStreamEvent]: ...
 
     @overload
@@ -103,6 +104,7 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> ResponsesResponse: ...
 
     def create(
@@ -142,6 +144,7 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> ResponsesResponse | SyncStream[OpenAiResponseStreamEvent]:
         """Send a Responses API request through Sequrity's secure orchestrator.
 
@@ -229,6 +232,7 @@ def create(
                 security_policy=security_policy,
                 fine_grained_config=fine_grained_config,
                 session_id=session_id,
+                custom_headers=custom_headers,
             )
             return SyncStream(response, OpenAiResponseStreamEvent, session_id=response.headers.get("X-Session-ID"))
 
@@ -240,6 +244,7 @@ def create(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            custom_headers=custom_headers,
         )
         result = ResponsesResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")
@@ -288,6 +293,7 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> AsyncStream[OpenAiResponseStreamEvent]: ...
 
     @overload
@@ -326,6 +332,7 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> ResponsesResponse: ...
 
     async def create(
@@ -363,6 +370,7 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        custom_headers: dict[str, str] | None = None,
     ) -> ResponsesResponse | AsyncStream[OpenAiResponseStreamEvent]:
         """Async variant of :meth:`ResponsesResource.create`."""
         payload = ResponsesRequest.model_validate(
@@ -410,6 +418,7 @@ async def create(
                 security_policy=security_policy,
                 fine_grained_config=fine_grained_config,
                 session_id=session_id,
+                custom_headers=custom_headers,
             )
             return AsyncStream(response, OpenAiResponseStreamEvent, session_id=response.headers.get("X-Session-ID"))
 
@@ -421,6 +430,7 @@ async def create(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            custom_headers=custom_headers,
         )
         result = ResponsesResponse.model_validate(response.json())
         result.session_id = response.headers.get("X-Session-ID")

From 160730bbbd2605a4525923ac6b4a65e1024aa9ec Mon Sep 17 00:00:00 2001
From: Cheng Zhang <chengzhang98@outlook.com>
Date: Thu, 26 Feb 2026 20:54:39 +0000
Subject: [PATCH 07/11] upgrade risky deps

---
 uv.lock | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/uv.lock b/uv.lock
index 8b7b2e2..76a5ac5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -734,7 +734,7 @@ wheels = [
 
 [[package]]
 name = "langchain-core"
-version = "1.2.8"
+version = "1.2.16"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "jsonpatch" },
@@ -746,28 +746,28 @@ dependencies = [
     { name = "typing-extensions" },
     { name = "uuid-utils" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/75/cc/55bf57b83cbc164cbf84cbf0c5e4fb640d673546af131db70797b97b125b/langchain_core-1.2.8.tar.gz", hash = "sha256:76d933c3f4cfd8484d8131c39bf25f562e2df4d0d5fe3218e05ff773210713b6", size = 814506, upload-time = "2026-02-02T15:35:33.056Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/2e/a7/4c992456dae89a8704afec03e3c2a0149ccc5f29c1cbdd5f4aa77628e921/langchain_core-1.2.16.tar.gz", hash = "sha256:055a4bfe7d62f4ac45ed49fd759ee2e6bdd15abf998fbeea695fda5da2de6413", size = 835286, upload-time = "2026-02-25T16:27:30.551Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/cc/d4/37fef9639b701c1fb1eea9e68447b72d86852ca3dc3253cdfd9c0afe228d/langchain_core-1.2.8-py3-none-any.whl", hash = "sha256:c732301272d63cfbcd75d114540257678627878f11b87046241272a25ba12ea7", size = 495753, upload-time = "2026-02-02T15:35:31.284Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/a1/57d5feaa11dc2ebb40f3bc3d7bf4294b6703e152e56edea9d4c622475a6a/langchain_core-1.2.16-py3-none-any.whl", hash = "sha256:2768add9aa97232a7712580f678e0ba045ee1036c71fe471355be0434fcb6e30", size = 502219, upload-time = "2026-02-25T16:27:29.379Z" },
 ]
 
 [[package]]
 name = "langchain-openai"
-version = "1.1.7"
+version = "1.1.10"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "langchain-core" },
     { name = "openai" },
     { name = "tiktoken" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/38/b7/30bfc4d1b658a9ee524bcce3b0b2ec9c45a11c853a13c4f0c9da9882784b/langchain_openai-1.1.7.tar.gz", hash = "sha256:f5ec31961ed24777548b63a5fe313548bc6e0eb9730d6552b8c6418765254c81", size = 1039134, upload-time = "2026-01-07T19:44:59.728Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/0f/01147f842499338ae3b0dd0a351fb83006d9ed623cf3a999bd68ba5bbe2d/langchain_openai-1.1.10.tar.gz", hash = "sha256:ca6fae7cf19425acc81814efed59c7d205ec9a1f284fd1d08aae9bda85d6501b", size = 1059755, upload-time = "2026-02-17T18:03:44.506Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/64/a1/50e7596aca775d8c3883eceeaf47489fac26c57c1abe243c00174f715a8a/langchain_openai-1.1.7-py3-none-any.whl", hash = "sha256:34e9cd686aac1a120d6472804422792bf8080a2103b5d21ee450c9e42d053815", size = 84753, upload-time = "2026-01-07T19:44:58.629Z" },
+    { url = "https://files.pythonhosted.org/packages/72/17/3785cbcdc81c451179247e4176d2697879cb4f45ab2c59d949ca574e072d/langchain_openai-1.1.10-py3-none-any.whl", hash = "sha256:d91b2c09e9fbc70f7af45345d3aa477744962d41c73a029beb46b4f83b824827", size = 87205, upload-time = "2026-02-17T18:03:43.502Z" },
 ]
 
 [[package]]
 name = "langgraph"
-version = "1.0.7"
+version = "1.0.9"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "langchain-core" },
@@ -777,9 +777,9 @@ dependencies = [
     { name = "pydantic" },
     { name = "xxhash" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/72/5b/f72655717c04e33d3b62f21b166dc063d192b53980e9e3be0e2a117f1c9f/langgraph-1.0.7.tar.gz", hash = "sha256:0cfdfee51e6e8cfe503ecc7367c73933437c505b03fa10a85c710975c8182d9a", size = 497098, upload-time = "2026-01-22T16:57:47.303Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/dc/63/69373a6721f30026ffa462a62084b11ed4bb5a201d1672366e13a89532f3/langgraph-1.0.9.tar.gz", hash = "sha256:feac2729faba7d3c325bef76f240d7d7f66b02d2cbf4fdb1ed7d0cc83f963651", size = 502800, upload-time = "2026-02-19T18:19:45.228Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7e/0e/fe80144e3e4048e5d19ccdb91ac547c1a7dc3da8dbd1443e210048194c14/langgraph-1.0.7-py3-none-any.whl", hash = "sha256:9d68e8f8dd8f3de2fec45f9a06de05766d9b075b78fb03171779893b7a52c4d2", size = 157353, upload-time = "2026-01-22T16:57:45.997Z" },
+    { url = "https://files.pythonhosted.org/packages/23/a2/562a6c2430085c2c29b23c1e1d12233bf41a64e9a9832eda7573af3666cf/langgraph-1.0.9-py3-none-any.whl", hash = "sha256:bce0d1f3e9a20434215a2a818395a58aedfc11c87bd6b52706c0db5c05ec44ec", size = 158150, upload-time = "2026-02-19T18:19:43.913Z" },
 ]
 
 [[package]]
@@ -797,15 +797,15 @@ wheels = [
 
 [[package]]
 name = "langgraph-prebuilt"
-version = "1.0.7"
+version = "1.0.8"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "langchain-core" },
     { name = "langgraph-checkpoint" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a7/59/711aecd1a50999456850dc328f3cad72b4372d8218838d8d5326f80cb76f/langgraph_prebuilt-1.0.7.tar.gz", hash = "sha256:38e097e06de810de4d0e028ffc0e432bb56d1fb417620fb1dfdc76c5e03e4bf9", size = 163692, upload-time = "2026-01-22T16:45:22.801Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/0d/06/dd61a5c2dce009d1b03b1d56f2a85b3127659fdddf5b3be5d8f1d60820fb/langgraph_prebuilt-1.0.8.tar.gz", hash = "sha256:0cd3cf5473ced8a6cd687cc5294e08d3de57529d8dd14fdc6ae4899549efcf69", size = 164442, upload-time = "2026-02-19T18:14:39.083Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/47/49/5e37abb3f38a17a3487634abc2a5da87c208cc1d14577eb8d7184b25c886/langgraph_prebuilt-1.0.7-py3-none-any.whl", hash = "sha256:e14923516504405bb5edc3977085bc9622c35476b50c1808544490e13871fe7c", size = 35324, upload-time = "2026-01-22T16:45:21.784Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/41/ec966424ad3f2ed3996d24079d3342c8cd6c0bd0653c12b2a917a685ec6c/langgraph_prebuilt-1.0.8-py3-none-any.whl", hash = "sha256:d16a731e591ba4470f3e313a319c7eee7dbc40895bcf15c821f985a3522a7ce0", size = 35648, upload-time = "2026-02-19T18:14:37.611Z" },
 ]
 
 [[package]]
@@ -1149,7 +1149,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "2.16.0"
+version = "2.24.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -1161,14 +1161,14 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b1/6c/e4c964fcf1d527fdf4739e7cc940c60075a4114d50d03871d5d5b1e13a88/openai-2.16.0.tar.gz", hash = "sha256:42eaa22ca0d8ded4367a77374104d7a2feafee5bd60a107c3c11b5243a11cd12", size = 629649, upload-time = "2026-01-27T23:28:02.579Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/55/13/17e87641b89b74552ed408a92b231283786523edddc95f3545809fab673c/openai-2.24.0.tar.gz", hash = "sha256:1e5769f540dbd01cb33bc4716a23e67b9d695161a734aff9c5f925e2bf99a673", size = 658717, upload-time = "2026-02-24T20:02:07.958Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/16/83/0315bf2cfd75a2ce8a7e54188e9456c60cec6c0cf66728ed07bd9859ff26/openai-2.16.0-py3-none-any.whl", hash = "sha256:5f46643a8f42899a84e80c38838135d7038e7718333ce61396994f887b09a59b", size = 1068612, upload-time = "2026-01-27T23:28:00.356Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/30/844dc675ee6902579b8eef01ed23917cc9319a1c9c0c14ec6e39340c96d0/openai-2.24.0-py3-none-any.whl", hash = "sha256:fed30480d7d6c884303287bde864980a4b137b60553ffbcf9ab4a233b7a73d94", size = 1120122, upload-time = "2026-02-24T20:02:05.669Z" },
 ]
 
 [[package]]
 name = "openai-agents"
-version = "0.7.0"
+version = "0.10.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "griffe" },
@@ -1179,9 +1179,9 @@ dependencies = [
     { name = "types-requests" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f8/a2/63a5ff78d89fa0861fe461a7b91d2123315115dcbf2c3fdab051b99185e5/openai_agents-0.7.0.tar.gz", hash = "sha256:5a283e02ee0d7c0d869421de9918691711bf19d1b1dc4d2840548335f2d24de6", size = 2169530, upload-time = "2026-01-23T00:06:35.746Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4b/8f/21c2be52daefb8adeb6649ffcbee8bac6e8e22be05afd832d70876b085c4/openai_agents-0.10.2.tar.gz", hash = "sha256:195b0d81bb0f4b142c4b9153f2a54c1d332ca024a03e8e24b945f1cff8532ebd", size = 2439958, upload-time = "2026-02-26T08:06:45.942Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a2/92/9cbbdd604f858056d4e4f105a1b99779128bae61b6a3681db0f035ef73b4/openai_agents-0.7.0-py3-none-any.whl", hash = "sha256:4446935a65d3bb1c2c1cd0546b1bc286ced9dde0adba947ab390b2e74802aa49", size = 288537, upload-time = "2026-01-23T00:06:33.78Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/2c/6c5c607da44307e49f3107be307a794eea9245e1fac9e666e7d522e866d4/openai_agents-0.10.2-py3-none-any.whl", hash = "sha256:8bf8bbd16cdba02e9c63c193ff2811b09d7094c5c39a4e2daa49a7037b840bc1", size = 404190, upload-time = "2026-02-26T08:06:43.952Z" },
 ]
 
 [[package]]

From 404d0750bf2da5ee9e200f052742f71216f7d014 Mon Sep 17 00:00:00 2001
From: Cheng Zhang <chengzhang98@outlook.com>
Date: Fri, 27 Feb 2026 01:03:09 +0000
Subject: [PATCH 08/11] custom header overrides

---
 src/sequrity/control/_transport.py          |  56 ++++-
 src/sequrity/control/resources/chat.py      |  41 +++-
 src/sequrity/control/resources/messages.py  |  39 +++-
 src/sequrity/control/resources/responses.py |  39 +++-
 src/sequrity/control/types/headers.py       |  88 ++++++--
 test/control/test_header_overrides.py       | 223 ++++++++++++++++++++
 6 files changed, 454 insertions(+), 32 deletions(-)
 create mode 100644 test/control/test_header_overrides.py

diff --git a/src/sequrity/control/_transport.py b/src/sequrity/control/_transport.py
index a111aaa..a7c4f92 100644
--- a/src/sequrity/control/_transport.py
+++ b/src/sequrity/control/_transport.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, TypeVar
+from typing import TYPE_CHECKING, Any, TypeVar
 
 import httpx
 
@@ -67,6 +67,9 @@ def _build_headers(
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> dict[str, str]:
         eff_llm_key = _resolve(llm_api_key, self._config.llm_api_key)
@@ -75,12 +78,18 @@ def _build_headers(
         eff_config = _resolve(fine_grained_config, self._config.fine_grained_config)
         eff_session = _resolve(session_id, None)
 
+        features_str = (
+            eff_features.dump_for_headers(mode="json_str", overrides=feature_overrides) if eff_features else None
+        )
+        policy_str = eff_policy.dump_for_headers(mode="json_str", overrides=policy_overrides) if eff_policy else None
+        config_str = eff_config.dump_for_headers(mode="json_str", overrides=config_overrides) if eff_config else None
+
         headers = build_sequrity_headers(
             api_key=self._api_key,
             llm_api_key=eff_llm_key,
-            features=eff_features.dump_for_headers(mode="json_str") if eff_features else None,
-            policy=eff_policy.dump_for_headers(mode="json_str") if eff_policy else None,
-            config=eff_config.dump_for_headers(mode="json_str") if eff_config else None,
+            features=features_str,
+            policy=policy_str,
+            config=config_str,
             session_id=eff_session,
         )
         if custom_headers:
@@ -104,6 +113,9 @@ def request(
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> httpx.Response:
         """POST *payload* as JSON to *url* with merged Sequrity headers.
@@ -121,6 +133,9 @@ def request(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            feature_overrides=feature_overrides,
+            policy_overrides=policy_overrides,
+            config_overrides=config_overrides,
             custom_headers=custom_headers,
         )
 
@@ -145,6 +160,9 @@ def stream_request(
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> httpx.Response:
         """Open a streaming POST request.
@@ -163,6 +181,9 @@ def stream_request(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            feature_overrides=feature_overrides,
+            policy_overrides=policy_overrides,
+            config_overrides=config_overrides,
             custom_headers=custom_headers,
         )
 
@@ -219,6 +240,9 @@ def _build_headers(
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> dict[str, str]:
         eff_llm_key = _resolve(llm_api_key, self._config.llm_api_key)
@@ -227,12 +251,18 @@ def _build_headers(
         eff_config = _resolve(fine_grained_config, self._config.fine_grained_config)
         eff_session = _resolve(session_id, self._session_id)
 
+        features_str = (
+            eff_features.dump_for_headers(mode="json_str", overrides=feature_overrides) if eff_features else None
+        )
+        policy_str = eff_policy.dump_for_headers(mode="json_str", overrides=policy_overrides) if eff_policy else None
+        config_str = eff_config.dump_for_headers(mode="json_str", overrides=config_overrides) if eff_config else None
+
         headers = build_sequrity_headers(
             api_key=self._api_key,
             llm_api_key=eff_llm_key,
-            features=eff_features.dump_for_headers(mode="json_str") if eff_features else None,
-            policy=eff_policy.dump_for_headers(mode="json_str") if eff_policy else None,
-            config=eff_config.dump_for_headers(mode="json_str") if eff_config else None,
+            features=features_str,
+            policy=policy_str,
+            config=config_str,
             session_id=eff_session,
         )
         if custom_headers:
@@ -254,6 +284,9 @@ async def request(
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> httpx.Response:
         """Async variant of :meth:`ControlSyncTransport.request`."""
@@ -263,6 +296,9 @@ async def request(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            feature_overrides=feature_overrides,
+            policy_overrides=policy_overrides,
+            config_overrides=config_overrides,
             custom_headers=custom_headers,
         )
 
@@ -287,6 +323,9 @@ async def stream_request(
         security_policy: SecurityPolicyHeader | None | _NotGiven = NOT_GIVEN,
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> httpx.Response:
         """Open an async streaming POST request.
@@ -305,6 +344,9 @@ async def stream_request(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            feature_overrides=feature_overrides,
+            policy_overrides=policy_overrides,
+            config_overrides=config_overrides,
             custom_headers=custom_headers,
         )
 
diff --git a/src/sequrity/control/resources/chat.py b/src/sequrity/control/resources/chat.py
index 9f5ec60..89da738 100644
--- a/src/sequrity/control/resources/chat.py
+++ b/src/sequrity/control/resources/chat.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Literal, overload
+from typing import Any, Literal, overload
 
 from ..._sentinel import NOT_GIVEN, _NotGiven
 from ...types.chat_completion.request import ChatCompletionRequest, Message, ReasoningEffort, ResponseFormat, Tool
@@ -40,6 +40,9 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> SyncStream[ChatCompletionChunk]: ...
 
@@ -63,6 +66,9 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> ChatCompletionResponse: ...
 
@@ -87,6 +93,9 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> ChatCompletionResponse | SyncStream[ChatCompletionChunk]:
         """Send a chat completion request through Sequrity's secure orchestrator.
@@ -109,6 +118,13 @@ def create(
             fine_grained_config: Fine-grained config override.
             endpoint_type: Endpoint type override.
             session_id: Explicit session ID override.
+            feature_overrides: Dict to deep-merge into the serialized ``X-Features``
+                header JSON. Allows adding or overriding fields without loosening
+                Pydantic validation on :class:`FeaturesHeader`.
+            policy_overrides: Dict to deep-merge into the serialized ``X-Policy``
+                header JSON.
+            config_overrides: Dict to deep-merge into the serialized ``X-Config``
+                header JSON.
 
         Returns:
             ``ChatCompletionResponse`` when ``stream`` is ``False``/``None``,
@@ -143,6 +159,9 @@ def create(
                 security_policy=security_policy,
                 fine_grained_config=fine_grained_config,
                 session_id=session_id,
+                feature_overrides=feature_overrides,
+                policy_overrides=policy_overrides,
+                config_overrides=config_overrides,
                 custom_headers=custom_headers,
             )
             return SyncStream(response, ChatCompletionChunk, session_id=response.headers.get("X-Session-ID"))
@@ -155,6 +174,9 @@ def create(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            feature_overrides=feature_overrides,
+            policy_overrides=policy_overrides,
+            config_overrides=config_overrides,
             custom_headers=custom_headers,
         )
         result = ChatCompletionResponse.model_validate(response.json())
@@ -188,6 +210,10 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
+        custom_headers: dict[str, str] | None = None,
     ) -> AsyncStream[ChatCompletionChunk]: ...
 
     @overload
@@ -210,6 +236,10 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
+        custom_headers: dict[str, str] | None = None,
     ) -> ChatCompletionResponse: ...
 
     async def create(
@@ -231,6 +261,9 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> ChatCompletionResponse | AsyncStream[ChatCompletionChunk]:
         """Async variant of :meth:`ChatResource.create`."""
@@ -263,6 +296,9 @@ async def create(
                 security_policy=security_policy,
                 fine_grained_config=fine_grained_config,
                 session_id=session_id,
+                feature_overrides=feature_overrides,
+                policy_overrides=policy_overrides,
+                config_overrides=config_overrides,
                 custom_headers=custom_headers,
             )
             return AsyncStream(response, ChatCompletionChunk, session_id=response.headers.get("X-Session-ID"))
@@ -275,6 +311,9 @@ async def create(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            feature_overrides=feature_overrides,
+            policy_overrides=policy_overrides,
+            config_overrides=config_overrides,
             custom_headers=custom_headers,
         )
         result = ChatCompletionResponse.model_validate(response.json())
diff --git a/src/sequrity/control/resources/messages.py b/src/sequrity/control/resources/messages.py
index c715f2d..4249c89 100644
--- a/src/sequrity/control/resources/messages.py
+++ b/src/sequrity/control/resources/messages.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Literal, overload
+from typing import Any, Literal, overload
 
 from ..._sentinel import NOT_GIVEN, _NotGiven
 from ...types.enums import LlmServiceProvider, LlmServiceProviderStr, RestApiType
@@ -56,6 +56,9 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> SyncStream[AnthropicStreamEvent]: ...
 
@@ -86,6 +89,9 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> AnthropicMessageResponse: ...
 
@@ -117,6 +123,9 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> AnthropicMessageResponse | SyncStream[AnthropicStreamEvent]:
         """Send an Anthropic Messages API request through Sequrity.
@@ -146,6 +155,13 @@ def create(
             fine_grained_config: Fine-grained config override.
             endpoint_type: Endpoint type override.
             session_id: Explicit session ID override.
+            feature_overrides: Dict to deep-merge into the serialized ``X-Features``
+                header JSON. Allows adding or overriding fields without loosening
+                Pydantic validation on :class:`FeaturesHeader`.
+            policy_overrides: Dict to deep-merge into the serialized ``X-Policy``
+                header JSON.
+            config_overrides: Dict to deep-merge into the serialized ``X-Config``
+                header JSON.
 
         Returns:
             ``AnthropicMessageResponse`` when ``stream`` is ``False``/``None``,
@@ -187,6 +203,9 @@ def create(
                 security_policy=security_policy,
                 fine_grained_config=fine_grained_config,
                 session_id=session_id,
+                feature_overrides=feature_overrides,
+                policy_overrides=policy_overrides,
+                config_overrides=config_overrides,
                 custom_headers=custom_headers,
             )
             return SyncStream(response, AnthropicStreamEvent, session_id=response.headers.get("X-Session-ID"))
@@ -199,6 +218,9 @@ def create(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            feature_overrides=feature_overrides,
+            policy_overrides=policy_overrides,
+            config_overrides=config_overrides,
             custom_headers=custom_headers,
         )
         result = AnthropicMessageResponse.model_validate(response.json())
@@ -239,6 +261,9 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> AsyncStream[AnthropicStreamEvent]: ...
 
@@ -269,6 +294,9 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> AnthropicMessageResponse: ...
 
@@ -298,6 +326,9 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> AnthropicMessageResponse | AsyncStream[AnthropicStreamEvent]:
         """Async variant of :meth:`MessagesResource.create`."""
@@ -337,6 +368,9 @@ async def create(
                 security_policy=security_policy,
                 fine_grained_config=fine_grained_config,
                 session_id=session_id,
+                feature_overrides=feature_overrides,
+                policy_overrides=policy_overrides,
+                config_overrides=config_overrides,
                 custom_headers=custom_headers,
             )
             return AsyncStream(response, AnthropicStreamEvent, session_id=response.headers.get("X-Session-ID"))
@@ -349,6 +383,9 @@ async def create(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            feature_overrides=feature_overrides,
+            policy_overrides=policy_overrides,
+            config_overrides=config_overrides,
             custom_headers=custom_headers,
         )
         result = AnthropicMessageResponse.model_validate(response.json())
diff --git a/src/sequrity/control/resources/responses.py b/src/sequrity/control/resources/responses.py
index 4ce9e8d..93ffbb8 100644
--- a/src/sequrity/control/resources/responses.py
+++ b/src/sequrity/control/resources/responses.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Literal, overload
+from typing import Any, Literal, overload
 
 from ..._sentinel import NOT_GIVEN, _NotGiven
 from ...types.enums import LlmServiceProvider, LlmServiceProviderStr, RestApiType
@@ -65,6 +65,9 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> SyncStream[OpenAiResponseStreamEvent]: ...
 
@@ -104,6 +107,9 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> ResponsesResponse: ...
 
@@ -144,6 +150,9 @@ def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> ResponsesResponse | SyncStream[OpenAiResponseStreamEvent]:
         """Send a Responses API request through Sequrity's secure orchestrator.
@@ -182,6 +191,13 @@ def create(
             fine_grained_config: Fine-grained config override.
             endpoint_type: Endpoint type override.
             session_id: Explicit session ID override.
+            feature_overrides: Dict to deep-merge into the serialized ``X-Features``
+                header JSON. Allows adding or overriding fields without loosening
+                Pydantic validation on :class:`FeaturesHeader`.
+            policy_overrides: Dict to deep-merge into the serialized ``X-Policy``
+                header JSON.
+            config_overrides: Dict to deep-merge into the serialized ``X-Config``
+                header JSON.
 
         Returns:
             ``ResponsesResponse`` when ``stream`` is ``False``/``None``,
@@ -232,6 +248,9 @@ def create(
                 security_policy=security_policy,
                 fine_grained_config=fine_grained_config,
                 session_id=session_id,
+                feature_overrides=feature_overrides,
+                policy_overrides=policy_overrides,
+                config_overrides=config_overrides,
                 custom_headers=custom_headers,
             )
             return SyncStream(response, OpenAiResponseStreamEvent, session_id=response.headers.get("X-Session-ID"))
@@ -244,6 +263,9 @@ def create(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            feature_overrides=feature_overrides,
+            policy_overrides=policy_overrides,
+            config_overrides=config_overrides,
             custom_headers=custom_headers,
         )
         result = ResponsesResponse.model_validate(response.json())
@@ -293,6 +315,9 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> AsyncStream[OpenAiResponseStreamEvent]: ...
 
@@ -332,6 +357,9 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> ResponsesResponse: ...
 
@@ -370,6 +398,9 @@ async def create(
         fine_grained_config: FineGrainedConfigHeader | None | _NotGiven = NOT_GIVEN,
         endpoint_type: str | _NotGiven = NOT_GIVEN,
         session_id: str | None | _NotGiven = NOT_GIVEN,
+        feature_overrides: dict[str, Any] | None = None,
+        policy_overrides: dict[str, Any] | None = None,
+        config_overrides: dict[str, Any] | None = None,
         custom_headers: dict[str, str] | None = None,
     ) -> ResponsesResponse | AsyncStream[OpenAiResponseStreamEvent]:
         """Async variant of :meth:`ResponsesResource.create`."""
@@ -418,6 +449,9 @@ async def create(
                 security_policy=security_policy,
                 fine_grained_config=fine_grained_config,
                 session_id=session_id,
+                feature_overrides=feature_overrides,
+                policy_overrides=policy_overrides,
+                config_overrides=config_overrides,
                 custom_headers=custom_headers,
             )
             return AsyncStream(response, OpenAiResponseStreamEvent, session_id=response.headers.get("X-Session-ID"))
@@ -430,6 +464,9 @@ async def create(
             security_policy=security_policy,
             fine_grained_config=fine_grained_config,
             session_id=session_id,
+            feature_overrides=feature_overrides,
+            policy_overrides=policy_overrides,
+            config_overrides=config_overrides,
             custom_headers=custom_headers,
         )
         result = ResponsesResponse.model_validate(response.json())
diff --git a/src/sequrity/control/types/headers.py b/src/sequrity/control/types/headers.py
index 4ad5c16..e8d5430 100644
--- a/src/sequrity/control/types/headers.py
+++ b/src/sequrity/control/types/headers.py
@@ -6,10 +6,22 @@
 
 from __future__ import annotations
 
-from typing import Literal, TypeAlias, overload
+import json
+from typing import Any, Literal, TypeAlias, overload
 
 from pydantic import BaseModel, ConfigDict, Field
 
+
+def _deep_merge(base: dict[str, Any], overrides: dict[str, Any]) -> dict[str, Any]:
+    """Recursively merge *overrides* into *base* (mutates *base*). Override values win."""
+    for key, value in overrides.items():
+        if key in base and isinstance(base[key], dict) and isinstance(value, dict):
+            _deep_merge(base[key], value)
+        else:
+            base[key] = value
+    return base
+
+
 # ---------------------------------------------------------------------------
 # X-Features header  (FeaturesHeader)
 # ---------------------------------------------------------------------------
@@ -78,16 +90,28 @@ class FeaturesHeader(BaseModel):
     )
 
     @overload
-    def dump_for_headers(self, mode: Literal["json_str"] = ...) -> str: ...
+    def dump_for_headers(self, mode: Literal["json_str"] = ..., *, overrides: dict[str, Any] | None = ...) -> str: ...
     @overload
-    def dump_for_headers(self, mode: Literal["json"]) -> dict: ...
-
-    def dump_for_headers(self, mode: Literal["json", "json_str"] = "json_str") -> dict | str:
-        """Serialize for use as the ``X-Features`` HTTP header value."""
+    def dump_for_headers(self, mode: Literal["json"], *, overrides: dict[str, Any] | None = ...) -> dict: ...
+
+    def dump_for_headers(
+        self, mode: Literal["json", "json_str"] = "json_str", *, overrides: dict[str, Any] | None = None
+    ) -> dict | str:
+        """Serialize for use as the ``X-Features`` HTTP header value.
+
+        Args:
+            mode: Output format — ``"json"`` for a dict, ``"json_str"`` for a JSON string.
+            overrides: Optional dict to deep-merge into the serialized output.
+                Allows adding or overriding fields not defined on the model
+                without loosening Pydantic validation.
+        """
+        data = self.model_dump(mode="json", exclude_none=True)
+        if overrides:
+            _deep_merge(data, overrides)
         if mode == "json":
-            return self.model_dump(mode="json", exclude_none=True)
+            return data
         elif mode == "json_str":
-            return self.model_dump_json(exclude_none=True)
+            return json.dumps(data)
         else:
             raise ValueError(f"Invalid mode: {mode}. Must be 'json' or 'json_str'.")
 
@@ -255,16 +279,26 @@ class SecurityPolicyHeader(BaseModel):
     presets: InternalPolicyPresets | None = Field(default=None, description="Internal policy presets configuration.")
 
     @overload
-    def dump_for_headers(self, mode: Literal["json_str"] = ...) -> str: ...
+    def dump_for_headers(self, mode: Literal["json_str"] = ..., *, overrides: dict[str, Any] | None = ...) -> str: ...
     @overload
-    def dump_for_headers(self, mode: Literal["json"]) -> dict: ...
-
-    def dump_for_headers(self, mode: Literal["json", "json_str"] = "json_str") -> dict | str:
-        """Serialize for use as the ``X-Policy`` HTTP header value."""
+    def dump_for_headers(self, mode: Literal["json"], *, overrides: dict[str, Any] | None = ...) -> dict: ...
+
+    def dump_for_headers(
+        self, mode: Literal["json", "json_str"] = "json_str", *, overrides: dict[str, Any] | None = None
+    ) -> dict | str:
+        """Serialize for use as the ``X-Policy`` HTTP header value.
+
+        Args:
+            mode: Output format — ``"json"`` for a dict, ``"json_str"`` for a JSON string.
+            overrides: Optional dict to deep-merge into the serialized output.
+        """
+        data = self.model_dump(mode="json", exclude_none=True)
+        if overrides:
+            _deep_merge(data, overrides)
         if mode == "json":
-            return self.model_dump(mode="json", exclude_none=True)
+            return data
         elif mode == "json_str":
-            return self.model_dump_json(exclude_none=True)
+            return json.dumps(data)
         else:
             raise ValueError(f"Unsupported mode for dump_for_headers: {mode}")
 
@@ -596,16 +630,26 @@ class FineGrainedConfigHeader(BaseModel):
     )
 
     @overload
-    def dump_for_headers(self, mode: Literal["json_str"] = ...) -> str: ...
+    def dump_for_headers(self, mode: Literal["json_str"] = ..., *, overrides: dict[str, Any] | None = ...) -> str: ...
     @overload
-    def dump_for_headers(self, mode: Literal["json"]) -> dict: ...
-
-    def dump_for_headers(self, mode: Literal["json", "json_str"] = "json_str") -> dict | str:
-        """Serialize for use as the ``X-Config`` HTTP header value."""
+    def dump_for_headers(self, mode: Literal["json"], *, overrides: dict[str, Any] | None = ...) -> dict: ...
+
+    def dump_for_headers(
+        self, mode: Literal["json", "json_str"] = "json_str", *, overrides: dict[str, Any] | None = None
+    ) -> dict | str:
+        """Serialize for use as the ``X-Config`` HTTP header value.
+
+        Args:
+            mode: Output format — ``"json"`` for a dict, ``"json_str"`` for a JSON string.
+            overrides: Optional dict to deep-merge into the serialized output.
+        """
+        data = self.model_dump(mode="json", exclude_none=True)
+        if overrides:
+            _deep_merge(data, overrides)
         if mode == "json":
-            return self.model_dump(mode="json", exclude_none=True)
+            return data
         elif mode == "json_str":
-            return self.model_dump_json(exclude_none=True)
+            return json.dumps(data)
         else:
             raise ValueError(f"Unsupported mode for dump_for_headers: {mode}")
 
diff --git a/test/control/test_header_overrides.py b/test/control/test_header_overrides.py
new file mode 100644
index 0000000..d2837be
--- /dev/null
+++ b/test/control/test_header_overrides.py
@@ -0,0 +1,223 @@
+"""Unit tests for header override merging in dump_for_headers.
+
+These tests verify that the ``overrides`` parameter on
+``FeaturesHeader``, ``SecurityPolicyHeader``, and ``FineGrainedConfigHeader``
+correctly deep-merges extra/override fields into the serialized output
+without requiring ``extra="allow"`` on the Pydantic models.
+"""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+
+from sequrity.control.types.headers import (
+    FeaturesHeader,
+    FineGrainedConfigHeader,
+    SecurityPolicyHeader,
+    _deep_merge,
+)
+
+
+# ---------------------------------------------------------------------------
+# _deep_merge helper
+# ---------------------------------------------------------------------------
+
+
+class TestDeepMerge:
+    """Unit tests for the recursive _deep_merge utility."""
+
+    def test_flat_merge(self):
+        base = {"a": 1, "b": 2}
+        result = _deep_merge(base, {"b": 99, "c": 3})
+        assert result == {"a": 1, "b": 99, "c": 3}
+
+    def test_nested_merge(self):
+        base = {"outer": {"a": 1, "b": 2}}
+        result = _deep_merge(base, {"outer": {"b": 99, "c": 3}})
+        assert result == {"outer": {"a": 1, "b": 99, "c": 3}}
+
+    def test_override_replaces_non_dict_with_dict(self):
+        base = {"key": "string_value"}
+        result = _deep_merge(base, {"key": {"nested": True}})
+        assert result == {"key": {"nested": True}}
+
+    def test_override_replaces_dict_with_non_dict(self):
+        base = {"key": {"nested": True}}
+        result = _deep_merge(base, {"key": "flat_value"})
+        assert result == {"key": "flat_value"}
+
+    def test_deeply_nested(self):
+        base = {"a": {"b": {"c": 1, "d": 2}}}
+        result = _deep_merge(base, {"a": {"b": {"c": 99, "e": 3}}})
+        assert result == {"a": {"b": {"c": 99, "d": 2, "e": 3}}}
+
+    def test_empty_override(self):
+        base = {"a": 1}
+        result = _deep_merge(base, {})
+        assert result == {"a": 1}
+
+    def test_mutates_base(self):
+        base = {"a": 1}
+        _deep_merge(base, {"b": 2})
+        assert base == {"a": 1, "b": 2}
+
+
+# ---------------------------------------------------------------------------
+# FeaturesHeader overrides
+# ---------------------------------------------------------------------------
+
+
+class TestFeaturesHeaderOverrides:
+    """Test dump_for_headers(overrides=...) on FeaturesHeader."""
+
+    def test_no_overrides_unchanged(self):
+        header = FeaturesHeader.single_llm(toxicity_filter=True)
+        without = json.loads(header.dump_for_headers())
+        with_none = json.loads(header.dump_for_headers(overrides=None))
+        assert without == with_none
+
+    def test_add_new_field(self):
+        header = FeaturesHeader.single_llm()
+        result = json.loads(header.dump_for_headers(overrides={"custom_field": "value"}))
+        assert result["agent_arch"] == "single-llm"
+        assert result["custom_field"] == "value"
+
+    def test_override_existing_field(self):
+        header = FeaturesHeader.single_llm()
+        result = json.loads(header.dump_for_headers(overrides={"agent_arch": "dual-llm"}))
+        assert result["agent_arch"] == "dual-llm"
+
+    def test_add_nested_custom_entry(self):
+        header = FeaturesHeader.dual_llm(toxicity_filter=True)
+        overrides = {
+            "content_classifiers": [
+                {"name": "custom_classifier", "threshold": 0.8},
+            ],
+        }
+        result = json.loads(header.dump_for_headers(overrides=overrides))
+        # Override replaces the list entirely
+        assert len(result["content_classifiers"]) == 1
+        assert result["content_classifiers"][0]["name"] == "custom_classifier"
+
+    def test_json_mode_returns_dict(self):
+        header = FeaturesHeader.single_llm()
+        result = header.dump_for_headers(mode="json", overrides={"extra": True})
+        assert isinstance(result, dict)
+        assert result["extra"] is True
+
+    def test_json_str_mode_returns_string(self):
+        header = FeaturesHeader.single_llm()
+        result = header.dump_for_headers(mode="json_str", overrides={"extra": True})
+        assert isinstance(result, str)
+        parsed = json.loads(result)
+        assert parsed["extra"] is True
+
+    def test_pydantic_validation_still_strict(self):
+        """extra='forbid' still blocks unknown fields at construction time."""
+        with pytest.raises(Exception):  # ValidationError
+            FeaturesHeader(agent_arch="single-llm", bogus_field=123)
+
+
+# ---------------------------------------------------------------------------
+# SecurityPolicyHeader overrides
+# ---------------------------------------------------------------------------
+
+
+class TestSecurityPolicyHeaderOverrides:
+    """Test dump_for_headers(overrides=...) on SecurityPolicyHeader."""
+
+    def test_no_overrides_unchanged(self):
+        header = SecurityPolicyHeader.single_llm()
+        without = json.loads(header.dump_for_headers())
+        with_none = json.loads(header.dump_for_headers(overrides=None))
+        assert without == with_none
+
+    def test_add_new_top_level_field(self):
+        header = SecurityPolicyHeader.single_llm()
+        result = json.loads(header.dump_for_headers(overrides={"custom_policy": "enabled"}))
+        assert result["mode"] == "standard"
+        assert result["custom_policy"] == "enabled"
+
+    def test_override_nested_presets(self):
+        header = SecurityPolicyHeader.dual_llm(default_allow=True)
+        result = json.loads(header.dump_for_headers(overrides={"presets": {"default_allow": False}}))
+        assert result["presets"]["default_allow"] is False
+        # Other preset fields should still be present
+        assert "enable_non_executable_memory" in result["presets"]
+
+    def test_add_custom_nested_entry(self):
+        header = SecurityPolicyHeader.single_llm()
+        result = json.loads(header.dump_for_headers(overrides={"presets": {"custom_preset": {"level": "high"}}}))
+        assert result["presets"]["custom_preset"] == {"level": "high"}
+
+    def test_override_mode(self):
+        header = SecurityPolicyHeader.single_llm(mode="standard")
+        result = json.loads(header.dump_for_headers(overrides={"mode": "strict"}))
+        assert result["mode"] == "strict"
+
+    def test_json_mode_with_overrides(self):
+        header = SecurityPolicyHeader.dual_llm()
+        result = header.dump_for_headers(mode="json", overrides={"extra_key": 42})
+        assert isinstance(result, dict)
+        assert result["extra_key"] == 42
+
+
+# ---------------------------------------------------------------------------
+# FineGrainedConfigHeader overrides
+# ---------------------------------------------------------------------------
+
+
+class TestFineGrainedConfigHeaderOverrides:
+    """Test dump_for_headers(overrides=...) on FineGrainedConfigHeader."""
+
+    def test_no_overrides_unchanged(self):
+        header = FineGrainedConfigHeader.dual_llm()
+        without = json.loads(header.dump_for_headers())
+        with_none = json.loads(header.dump_for_headers(overrides=None))
+        assert without == with_none
+
+    def test_override_fsm_field(self):
+        header = FineGrainedConfigHeader.dual_llm(max_n_turns=5)
+        result = json.loads(header.dump_for_headers(overrides={"fsm": {"max_n_turns": 20}}))
+        assert result["fsm"]["max_n_turns"] == 20
+        # Other FSM fields preserved
+        assert "disable_rllm" in result["fsm"]
+
+    def test_add_custom_fsm_entry(self):
+        header = FineGrainedConfigHeader.single_llm()
+        result = json.loads(header.dump_for_headers(overrides={"fsm": {"custom_setting": "enabled"}}))
+        assert result["fsm"]["custom_setting"] == "enabled"
+        # Original fields still present
+        assert result["fsm"]["max_n_turns"] == 50
+
+    def test_add_new_top_level_section(self):
+        header = FineGrainedConfigHeader.single_llm()
+        result = json.loads(header.dump_for_headers(overrides={"custom_section": {"key": "value"}}))
+        assert result["custom_section"] == {"key": "value"}
+        assert "fsm" in result
+
+    def test_override_response_format(self):
+        header = FineGrainedConfigHeader.dual_llm(include_program=True)
+        result = json.loads(
+            header.dump_for_headers(overrides={"response_format": {"include_program": False, "custom_format": True}})
+        )
+        assert result["response_format"]["include_program"] is False
+        assert result["response_format"]["custom_format"] is True
+
+    def test_deeply_nested_prompt_override(self):
+        header = FineGrainedConfigHeader.dual_llm(pllm_debug_info_level="minimal")
+        result = json.loads(header.dump_for_headers(overrides={"prompt": {"pllm": {"debug_info_level": "extra"}}}))
+        assert result["prompt"]["pllm"]["debug_info_level"] == "extra"
+
+    def test_json_mode_with_overrides(self):
+        header = FineGrainedConfigHeader.dual_llm()
+        result = header.dump_for_headers(mode="json", overrides={"fsm": {"max_n_turns": 99}})
+        assert isinstance(result, dict)
+        assert result["fsm"]["max_n_turns"] == 99
+
+    def test_pydantic_validation_still_strict(self):
+        """extra='forbid' still blocks unknown fields at construction time."""
+        with pytest.raises(Exception):  # ValidationError
+            FineGrainedConfigHeader(fsm=None, bogus_field=True)

From 7a291d350d6730585f775a070473a1912b1b9e6a Mon Sep 17 00:00:00 2001
From: Cheng Zhang <chengzhang98@outlook.com>
Date: Fri, 27 Feb 2026 01:11:13 +0000
Subject: [PATCH 09/11] custom header overrides and docs

---
 .../sequrity_client/headers/config_header.md  |  1 +
 .../headers/features_header.md                |  2 +-
 .../sequrity_client/headers/policy_header.md  |  1 +
 src/sequrity/control/types/headers.py         | 19 +++++++++++++++++++
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/docs/control/reference/sequrity_client/headers/config_header.md b/docs/control/reference/sequrity_client/headers/config_header.md
index 3c13032..86184ee 100644
--- a/docs/control/reference/sequrity_client/headers/config_header.md
+++ b/docs/control/reference/sequrity_client/headers/config_header.md
@@ -6,6 +6,7 @@ Advanced configuration for session behavior, prompt settings, and response forma
     options:
       show_root_heading: true
       show_source: true
+      members: ["single_llm", "dual_llm", "dump_for_headers"]
 
 ---
 
diff --git a/docs/control/reference/sequrity_client/headers/features_header.md b/docs/control/reference/sequrity_client/headers/features_header.md
index 2831f39..32d16ef 100644
--- a/docs/control/reference/sequrity_client/headers/features_header.md
+++ b/docs/control/reference/sequrity_client/headers/features_header.md
@@ -4,7 +4,7 @@
     options:
       show_root_heading: true
       show_source: true
-      members: ["single_llm", "dual_llm"]
+      members: ["single_llm", "dual_llm", "dump_for_headers"]
 
 ::: sequrity.control.types.headers.TaggerConfig
     options:
diff --git a/docs/control/reference/sequrity_client/headers/policy_header.md b/docs/control/reference/sequrity_client/headers/policy_header.md
index 3ea5577..c65f15c 100644
--- a/docs/control/reference/sequrity_client/headers/policy_header.md
+++ b/docs/control/reference/sequrity_client/headers/policy_header.md
@@ -6,6 +6,7 @@ Configure security policies using SQRT policy language.
     options:
       show_root_heading: true
       show_source: true
+      members: ["single_llm", "dual_llm", "dump_for_headers"]
 
 ::: sequrity.control.types.headers.PolicyCode
     options:
diff --git a/src/sequrity/control/types/headers.py b/src/sequrity/control/types/headers.py
index e8d5430..e77ffe3 100644
--- a/src/sequrity/control/types/headers.py
+++ b/src/sequrity/control/types/headers.py
@@ -642,6 +642,25 @@ def dump_for_headers(
         Args:
             mode: Output format — ``"json"`` for a dict, ``"json_str"`` for a JSON string.
             overrides: Optional dict to deep-merge into the serialized output.
+                Nested dicts are merged recursively; non-dict values replace
+                existing ones. This lets you inject fields the Pydantic model
+                doesn't define while keeping ``extra="forbid"`` validation at
+                construction time.
+
+        Example:
+            ```python
+            config = FineGrainedConfigHeader.dual_llm(max_n_turns=5)
+            header_json = config.dump_for_headers(overrides={
+                "fsm": {
+                    "max_n_turns": 20,              # override existing
+                    "custom_beta_flag": True,       # add custom field that is not defined on the model
+                },
+                "prompt": {
+                    "pllm": {"debug_info_level": "extra"},  # nested override
+                },
+                "experimental_section": {"key": "value"},   # new top-level key
+            })
+            ```
         """
         data = self.model_dump(mode="json", exclude_none=True)
         if overrides:

From ba3a243d3fd0ab14925fb6b3b5b2adaed0ecefde Mon Sep 17 00:00:00 2001
From: Cheng Zhang <chengzhang98@outlook.com>
Date: Fri, 27 Feb 2026 12:24:49 +0000
Subject: [PATCH 10/11] fix default session id

---
 src/sequrity/control/_transport.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sequrity/control/_transport.py b/src/sequrity/control/_transport.py
index a7c4f92..af4ba57 100644
--- a/src/sequrity/control/_transport.py
+++ b/src/sequrity/control/_transport.py
@@ -76,7 +76,7 @@ def _build_headers(
         eff_features = _resolve(features, self._config.features)
         eff_policy = _resolve(security_policy, self._config.security_policy)
         eff_config = _resolve(fine_grained_config, self._config.fine_grained_config)
-        eff_session = _resolve(session_id, None)
+        eff_session = _resolve(session_id, self._session_id)
 
         features_str = (
             eff_features.dump_for_headers(mode="json_str", overrides=feature_overrides) if eff_features else None

From a0b1270f0a87ca0c33737464a0fc5085df61c3ed Mon Sep 17 00:00:00 2001
From: Cheng Zhang <chengzhang98@outlook.com>
Date: Fri, 27 Feb 2026 12:26:37 +0000
Subject: [PATCH 11/11] fix version: v0.4.1

---
 docs/release_notes.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/release_notes.md b/docs/release_notes.md
index 8257c65..61d6d7a 100644
--- a/docs/release_notes.md
+++ b/docs/release_notes.md
@@ -5,7 +5,7 @@ hide:
 
 # Release Notes
 
-## v0.5
+## v0.4.1
 
 `time: 2026-02-25`
 
@@ -13,7 +13,7 @@ hide:
 | ---------|---------|
 | Control API | `094afd3174b700f104cab612d32e3f54ad1b152c` |
 
-??? info "v0.5 Release Notes"
+??? info "v0.4.1 Release Notes"
 
     **New Features**