diff --git a/docs/config.html b/docs/config.html
index 26187bb9..4c0d066e 100644
--- a/docs/config.html
+++ b/docs/config.html
@@ -988,6 +988,12 @@ <h2 id="llamastackconfiguration">LlamaStackConfiguration</h2>
           <td>Path to configuration file used when Llama Stack is run in library
 mode</td>
         </tr>
+        <tr class="odd">
+          <td>timeout</td>
+          <td>integer</td>
+          <td>Timeout in seconds for requests to Llama Stack service. Default is
+180 seconds (3 minutes) to accommodate long-running RAG queries.</td>
+        </tr>
       </tbody>
     </table>
     <h2 id="modelcontextprotocolserver">ModelContextProtocolServer</h2>
diff --git a/docs/config.json b/docs/config.json
index f9f1e023..124c005a 100644
--- a/docs/config.json
+++ b/docs/config.json
@@ -6,6 +6,40 @@
   },
   "components": {
     "schemas": {
+      "A2AStateConfiguration": {
+        "additionalProperties": false,
+        "description": "A2A protocol persistent state configuration.\n\nConfigures how A2A task state and context-to-conversation mappings are\nstored. For multi-worker deployments, use SQLite or PostgreSQL to ensure\nstate is shared across all workers.\n\nIf no configuration is provided, in-memory storage is used (default).\nThis is suitable for single-worker deployments but state will be lost\non restarts and not shared across workers.\n\nAttributes:\n    sqlite: SQLite database configuration for A2A state storage.\n    postgres: PostgreSQL database configuration for A2A state storage.",
+        "properties": {
+          "sqlite": {
+            "anyOf": [
+              {
+                "$ref": "#/components/schemas/SQLiteDatabaseConfiguration"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null,
+            "description": "SQLite database configuration for A2A state storage.",
+            "title": "SQLite configuration"
+          },
+          "postgres": {
+            "anyOf": [
+              {
+                "$ref": "#/components/schemas/PostgreSQLDatabaseConfiguration"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null,
+            "description": "PostgreSQL database configuration for A2A state storage.",
+            "title": "PostgreSQL configuration"
+          }
+        },
+        "title": "A2AStateConfiguration",
+        "type": "object"
+      },
       "APIKeyTokenConfiguration": {
         "additionalProperties": false,
         "description": "API Key Token configuration.",
@@ -78,7 +112,11 @@
           "get_config",
           "info",
           "model_override",
-          "rlsapi_v1_infer"
+          "rlsapi_v1_infer",
+          "a2a_agent_card",
+          "a2a_task_execution",
+          "a2a_message",
+          "a2a_jsonrpc"
         ],
         "title": "Action",
         "type": "string"
@@ -97,6 +135,12 @@
             "title": "Skip Tls Verification",
             "type": "boolean"
           },
+          "skip_for_health_probes": {
+            "default": false,
+            "description": "Skip authorization for readiness and liveness probes",
+            "title": "Skip authorization for probes",
+            "type": "boolean"
+          },
           "k8s_cluster_api": {
             "type": "string",
             "nullable": true,
@@ -162,6 +206,43 @@
         "title": "AuthorizationConfiguration",
         "type": "object"
       },
+      "AzureEntraIdConfiguration": {
+        "additionalProperties": false,
+        "description": "Microsoft Entra ID authentication attributes for Azure.",
+        "properties": {
+          "tenant_id": {
+            "format": "password",
+            "title": "Tenant Id",
+            "type": "string",
+            "writeOnly": true
+          },
+          "client_id": {
+            "format": "password",
+            "title": "Client Id",
+            "type": "string",
+            "writeOnly": true
+          },
+          "client_secret": {
+            "format": "password",
+            "title": "Client Secret",
+            "type": "string",
+            "writeOnly": true
+          },
+          "scope": {
+            "default": "https://cognitiveservices.azure.com/.default",
+            "description": "Azure Cognitive Services scope for token requests. Override only if using a different Azure service.",
+            "title": "Token scope",
+            "type": "string"
+          }
+        },
+        "required": [
+          "tenant_id",
+          "client_id",
+          "client_secret"
+        ],
+        "title": "AzureEntraIdConfiguration",
+        "type": "object"
+      },
       "ByokRag": {
         "additionalProperties": false,
         "description": "BYOK (Bring Your Own Knowledge) RAG configuration.",
@@ -346,10 +427,45 @@
             "title": "BYOK RAG configuration",
             "type": "array"
           },
+          "a2a_state": {
+            "$ref": "#/components/schemas/A2AStateConfiguration",
+            "description": "Configuration for A2A protocol persistent state storage.",
+            "title": "A2A state configuration"
+          },
           "quota_handlers": {
             "$ref": "#/components/schemas/QuotaHandlersConfiguration",
             "description": "Quota handlers configuration",
             "title": "Quota handlers"
+          },
+          "azure_entra_id": {
+            "anyOf": [
+              {
+                "$ref": "#/components/schemas/AzureEntraIdConfiguration"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null
+          },
+          "splunk": {
+            "anyOf": [
+              {
+                "$ref": "#/components/schemas/SplunkConfiguration"
+              },
+              {
+                "type": "null"
+              }
+            ],
+            "default": null,
+            "description": "Splunk HEC configuration for sending telemetry events.",
+            "title": "Splunk configuration"
+          },
+          "deployment_environment": {
+            "default": "development",
+            "description": "Deployment environment name (e.g., 'development', 'staging', 'production'). Used in telemetry events.",
+            "title": "Deployment environment",
+            "type": "string"
           }
         },
         "required": [
@@ -466,6 +582,18 @@
             "default": null,
             "title": "System Prompt"
           },
+          "agent_card_path": {
+            "type": "string",
+            "nullable": true,
+            "default": null,
+            "title": "Agent Card Path"
+          },
+          "agent_card_config": {
+            "type": "object",
+            "nullable": true,
+            "default": null,
+            "title": "Agent Card Config"
+          },
           "custom_profile": {
             "anyOf": [
               {
@@ -713,6 +841,21 @@
             "description": "URL of the MCP server",
             "title": "MCP server URL",
             "type": "string"
+          },
+          "authorization_headers": {
+            "additionalProperties": {
+              "type": "string"
+            },
+            "description": "Headers to send to the MCP server. The map contains the header name and the path to a file containing the header value (secret). There are 2 special cases: 1. Usage of the kubernetes token in the header. To specify this use a string 'kubernetes' instead of the file path. 2. Usage of the client provided token in the header. To specify this use a string 'client' instead of the file path.",
+            "title": "Authorization headers",
+            "type": "object"
+          },
+          "timeout": {
+            "type": "integer",
+            "nullable": true,
+            "default": null,
+            "description": "Timeout in seconds for requests to the MCP server. If not specified, the default timeout from Llama Stack will be used. Note: This field is reserved for future use when Llama Stack adds timeout support.",
+            "title": "Request timeout"
           }
         },
         "required": [
@@ -900,6 +1043,20 @@
             "minimum": 0,
             "title": "Period",
             "type": "integer"
+          },
+          "database_reconnection_count": {
+            "default": 10,
+            "description": "Database reconnection count on startup. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.",
+            "minimum": 0,
+            "title": "Database reconnection count on startup",
+            "type": "integer"
+          },
+          "database_reconnection_delay": {
+            "default": 1,
+            "description": "Database reconnection delay specified in seconds. When database for quota is not available on startup, the service tries to reconnect N times with specified delay.",
+            "minimum": 0,
+            "title": "Database reconnection delay",
+            "type": "integer"
           }
         },
         "title": "QuotaSchedulerConfiguration",
@@ -953,6 +1110,13 @@
             "title": "Port",
             "type": "integer"
           },
+          "base_url": {
+            "type": "string",
+            "nullable": true,
+            "default": null,
+            "description": "Externally reachable base URL for the service; needed for A2A support.",
+            "title": "Base URL"
+          },
           "auth_enabled": {
             "default": false,
             "description": "Enables the authentication subsystem",
@@ -992,6 +1156,60 @@
         "title": "ServiceConfiguration",
         "type": "object"
       },
+      "SplunkConfiguration": {
+        "additionalProperties": false,
+        "description": "Splunk HEC (HTTP Event Collector) configuration.\n\nSplunk HEC allows sending events directly to Splunk over HTTP/HTTPS.\nThis configuration is used to send telemetry events for inference\nrequests to the corporate Splunk deployment.\n\nUseful resources:\n\n  - [Splunk HEC Docs](https://docs.splunk.com/Documentation/SplunkCloud)\n  - [About HEC](https://docs.splunk.com/Documentation/Splunk/latest/Data)",
+        "properties": {
+          "enabled": {
+            "default": false,
+            "description": "Enable or disable Splunk HEC integration.",
+            "title": "Enabled",
+            "type": "boolean"
+          },
+          "url": {
+            "type": "string",
+            "nullable": true,
+            "default": null,
+            "description": "Splunk HEC endpoint URL.",
+            "title": "HEC URL"
+          },
+          "token_path": {
+            "type": "string",
+            "nullable": true,
+            "default": null,
+            "description": "Path to file containing the Splunk HEC authentication token.",
+            "title": "Token path"
+          },
+          "index": {
+            "type": "string",
+            "nullable": true,
+            "default": null,
+            "description": "Target Splunk index for events.",
+            "title": "Index"
+          },
+          "source": {
+            "default": "lightspeed-stack",
+            "description": "Event source identifier.",
+            "title": "Source",
+            "type": "string"
+          },
+          "timeout": {
+            "default": 5,
+            "description": "HTTP timeout in seconds for HEC requests.",
+            "minimum": 0,
+            "title": "Timeout",
+            "type": "integer"
+          },
+          "verify_ssl": {
+            "default": true,
+            "description": "Whether to verify SSL certificates for HEC endpoint.",
+            "title": "Verify SSL",
+            "type": "boolean"
+          }
+        },
+        "title": "SplunkConfiguration",
+        "type": "object"
+      },
       "TLSConfiguration": {
         "additionalProperties": false,
         "description": "TLS configuration.\n\nTransport Layer Security (TLS) is a cryptographic protocol designed to\nprovide communications security over a computer network, such as the\nInternet. The protocol is widely used in applications such as email,\ninstant messaging, and voice over IP, but its use in securing HTTPS remains\nthe most publicly visible.\n\nUseful resources:\n\n  - [FastAPI HTTPS Deployment](https://fastapi.tiangolo.com/deployment/https/)\n  - [Transport Layer Security Overview](https://en.wikipedia.org/wiki/Transport_Layer_Security)\n  - [What is TLS](https://www.ssltrust.eu/learning/ssl/transport-layer-security-tls)",
diff --git a/docs/config.md b/docs/config.md
index e6d461e0..1c5dd1cf 100644
--- a/docs/config.md
+++ b/docs/config.md
@@ -345,6 +345,7 @@ Useful resources:
 | api_key | string | API key to access Llama Stack service |
 | use_as_library_client | boolean | When set to true Llama Stack will be used in library mode, not in server mode (default) |
 | library_client_config_path | string | Path to configuration file used when Llama Stack is run in library mode |
+| timeout | integer | Timeout in seconds for requests to Llama Stack service. Default is 180 seconds (3 minutes) to accommodate long-running RAG queries. |
 
 
 ## ModelContextProtocolServer
diff --git a/docs/config.puml b/docs/config.puml
index 250c4a4b..bec2d1f2 100644
--- a/docs/config.puml
+++ b/docs/config.puml
@@ -1,5 +1,12 @@
 @startuml classes
 set namespaceSeparator none
+class "A2AStateConfiguration" as src.models.config.A2AStateConfiguration {
+  config
+  postgres : Optional[PostgreSQLDatabaseConfiguration]
+  sqlite : Optional[SQLiteDatabaseConfiguration]
+  storage_type
+  check_a2a_state_configuration() -> Self
+}
 class "APIKeyTokenConfiguration" as src.models.config.APIKeyTokenConfiguration {
   api_key
 }
@@ -20,12 +27,19 @@ class "AuthenticationConfiguration" as src.models.config.AuthenticationConfigura
   module : str
   rh_identity_config : Optional[RHIdentityConfiguration]
   rh_identity_configuration
+  skip_for_health_probes : bool
   skip_tls_verification : bool
   check_authentication_model() -> Self
 }
 class "AuthorizationConfiguration" as src.models.config.AuthorizationConfiguration {
   access_rules : list[AccessRule]
 }
+class "AzureEntraIdConfiguration" as src.models.config.AzureEntraIdConfiguration {
+  client_id
+  client_secret
+  scope : str
+  tenant_id
+}
 class "ByokRag" as src.models.config.ByokRag {
   db_path
   embedding_dimension
@@ -42,20 +56,25 @@ class "CORSConfiguration" as src.models.config.CORSConfiguration {
   check_cors_configuration() -> Self
 }
 class "Configuration" as src.models.config.Configuration {
+  a2a_state
   authentication
   authorization : Optional[AuthorizationConfiguration]
+  azure_entra_id : Optional[AzureEntraIdConfiguration]
   byok_rag : list[ByokRag]
   conversation_cache
   customization : Optional[Customization]
   database
+  deployment_environment : str
   inference
   llama_stack
   mcp_servers : list[ModelContextProtocolServer]
   name : str
   quota_handlers
   service
+  splunk : Optional[SplunkConfiguration]
   user_data_collection
-  dump(filename: str) -> None
+  dump(filename: str | Path) -> None
+  validate_mcp_auth_headers() -> Self
 }
 class "ConfigurationBase" as src.models.config.ConfigurationBase {
   model_config
@@ -64,7 +83,7 @@ class "ConversationHistoryConfiguration" as src.models.config.ConversationHistor
   memory : Optional[InMemoryCacheConfig]
   postgres : Optional[PostgreSQLDatabaseConfiguration]
   sqlite : Optional[SQLiteDatabaseConfiguration]
-  type : Literal['noop', 'memory', 'sqlite', 'postgres'] | None
+  type : Optional[Literal['noop', 'memory', 'sqlite', 'postgres']]
   check_cache_configuration() -> Self
 }
 class "CustomProfile" as src.models.config.CustomProfile {
@@ -73,6 +92,8 @@ class "CustomProfile" as src.models.config.CustomProfile {
   get_prompts() -> dict[str, str]
 }
 class "Customization" as src.models.config.Customization {
+  agent_card_config : Optional[dict[str, Any]]
+  agent_card_path : Optional[FilePath]
   custom_profile : Optional[CustomProfile]
   disable_query_system_prompt : bool
   profile_path : Optional[str]
@@ -121,14 +142,19 @@ class "JwtRoleRule" as src.models.config.JwtRoleRule {
 class "LlamaStackConfiguration" as src.models.config.LlamaStackConfiguration {
   api_key : Optional[SecretStr]
   library_client_config_path : Optional[str]
+  timeout
   url : Optional[str]
   use_as_library_client : Optional[bool]
   check_llama_stack_model() -> Self
 }
 class "ModelContextProtocolServer" as src.models.config.ModelContextProtocolServer {
+  authorization_headers : dict[str, str]
   name : str
   provider_id : str
+  resolved_authorization_headers
+  timeout : Optional[PositiveInt]
   url : str
+  resolve_auth_headers() -> Self
 }
 class "PostgreSQLDatabaseConfiguration" as src.models.config.PostgreSQLDatabaseConfiguration {
   ca_cert_path : Optional[FilePath]
@@ -170,6 +196,7 @@ class "SQLiteDatabaseConfiguration" as src.models.config.SQLiteDatabaseConfigura
 class "ServiceConfiguration" as src.models.config.ServiceConfiguration {
   access_log : bool
   auth_enabled : bool
+  base_url : Optional[str]
   color_log : bool
   cors
   host : str
@@ -178,6 +205,16 @@ class "ServiceConfiguration" as src.models.config.ServiceConfiguration {
   workers
   check_service_configuration() -> Self
 }
+class "SplunkConfiguration" as src.models.config.SplunkConfiguration {
+  enabled : bool
+  index : Optional[str]
+  source : str
+  timeout
+  token_path : Optional[FilePath]
+  url : Optional[str]
+  verify_ssl : bool
+  check_splunk_configuration() -> Self
+}
 class "TLSConfiguration" as src.models.config.TLSConfiguration {
   tls_certificate_path : Optional[FilePath]
   tls_key_password : Optional[FilePath]
@@ -191,10 +228,12 @@ class "UserDataCollection" as src.models.config.UserDataCollection {
   transcripts_storage : Optional[str]
   check_storage_location_is_set_when_needed() -> Self
 }
+src.models.config.A2AStateConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.APIKeyTokenConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.AccessRule --|> src.models.config.ConfigurationBase
 src.models.config.AuthenticationConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.AuthorizationConfiguration --|> src.models.config.ConfigurationBase
+src.models.config.AzureEntraIdConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.ByokRag --|> src.models.config.ConfigurationBase
 src.models.config.CORSConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.Configuration --|> src.models.config.ConfigurationBase
@@ -215,8 +254,10 @@ src.models.config.QuotaSchedulerConfiguration --|> src.models.config.Configurati
 src.models.config.RHIdentityConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.SQLiteDatabaseConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.ServiceConfiguration --|> src.models.config.ConfigurationBase
+src.models.config.SplunkConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.TLSConfiguration --|> src.models.config.ConfigurationBase
 src.models.config.UserDataCollection --|> src.models.config.ConfigurationBase
+src.models.config.A2AStateConfiguration --* src.models.config.Configuration : a2a_state
 src.models.config.AuthenticationConfiguration --* src.models.config.Configuration : authentication
 src.models.config.CORSConfiguration --* src.models.config.ServiceConfiguration : cors
 src.models.config.ConversationHistoryConfiguration --* src.models.config.Configuration : conversation_cache
diff --git a/docs/openapi.json b/docs/openapi.json
index 693ff1b9..f329d4ed 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -3664,7 +3664,7 @@
                     "rlsapi-v1"
                 ],
                 "summary": "Infer Endpoint",
-                "description": "Handle rlsapi v1 /infer requests for stateless inference.\n\nThis endpoint serves requests from the RHEL Lightspeed Command Line Assistant (CLA).\n\nAccepts a question with optional context (stdin, attachments, terminal output,\nsystem info) and returns an LLM-generated response.\n\nArgs:\n    infer_request: The inference request containing question and context.\n    auth: Authentication tuple from the configured auth provider.\n\nReturns:\n    RlsapiV1InferResponse containing the generated response text and request ID.\n\nRaises:\n    HTTPException: 503 if the LLM service is unavailable.",
+                "description": "Handle rlsapi v1 /infer requests for stateless inference.\n\nThis endpoint serves requests from the RHEL Lightspeed Command Line Assistant (CLA).\n\nAccepts a question with optional context (stdin, attachments, terminal output,\nsystem info) and returns an LLM-generated response.\n\nArgs:\n    infer_request: The inference request containing question and context.\n    request: The FastAPI request object for accessing headers and state.\n    background_tasks: FastAPI background tasks for async Splunk event sending.\n    auth: Authentication tuple from the configured auth provider.\n\nReturns:\n    RlsapiV1InferResponse containing the generated response text and request ID.\n\nRaises:\n    HTTPException: 503 if the LLM service is unavailable.",
                 "operationId": "infer_endpoint_v1_infer_post",
                 "requestBody": {
                     "content": {
@@ -4290,7 +4290,7 @@
                 ],
                 "summary": "Handle A2A Jsonrpc",
                 "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n    request: FastAPI request object\n    auth: Authentication tuple\n    mcp_headers: MCP headers for context propagation\n\nReturns:\n    JSON-RPC response or streaming response",
-                "operationId": "handle_a2a_jsonrpc_a2a_get",
+                "operationId": "handle_a2a_jsonrpc_a2a_post",
                 "responses": {
                     "200": {
                         "description": "Successful Response",
@@ -4308,7 +4308,7 @@
                 ],
                 "summary": "Handle A2A Jsonrpc",
                 "description": "Handle A2A JSON-RPC requests following the A2A protocol specification.\n\nThis endpoint uses the DefaultRequestHandler from the A2A SDK to handle\nall JSON-RPC requests including message/send, message/stream, etc.\n\nThe A2A SDK application is created per-request to include authentication\ncontext while still leveraging FastAPI's authorization middleware.\n\nAutomatically detects streaming requests (message/stream JSON-RPC method)\nand returns a StreamingResponse to enable real-time chunk delivery.\n\nArgs:\n    request: FastAPI request object\n    auth: Authentication tuple\n    mcp_headers: MCP headers for context propagation\n\nReturns:\n    JSON-RPC response or streaming response",
-                "operationId": "handle_a2a_jsonrpc_a2a_get",
+                "operationId": "handle_a2a_jsonrpc_a2a_post",
                 "responses": {
                     "200": {
                         "description": "Successful Response",
@@ -6957,6 +6957,13 @@
                         ],
                         "title": "Llama Stack configuration path",
                         "description": "Path to configuration file used when Llama Stack is run in library mode"
+                    },
+                    "timeout": {
+                        "type": "integer",
+                        "exclusiveMinimum": 0.0,
+                        "title": "Request timeout",
+                        "description": "Timeout in seconds for requests to Llama Stack service. Default is 180 seconds (3 minutes) to accommodate long-running RAG queries.",
+                        "default": 180
                     }
                 },
                 "additionalProperties": false,
diff --git a/docs/openapi.md b/docs/openapi.md
index f576011e..74315b24 100644
--- a/docs/openapi.md
+++ b/docs/openapi.md
@@ -3200,6 +3200,8 @@ system info) and returns an LLM-generated response.
 
 Args:
     infer_request: The inference request containing question and context.
+    request: The FastAPI request object for accessing headers and state.
+    background_tasks: FastAPI background tasks for async Splunk event sending.
     auth: Authentication tuple from the configured auth provider.
 
 Returns:
@@ -4860,6 +4862,7 @@ Useful resources:
 | api_key |  | API key to access Llama Stack service |
 | use_as_library_client |  | When set to true Llama Stack will be used in library mode, not in server mode (default) |
 | library_client_config_path |  | Path to configuration file used when Llama Stack is run in library mode |
+| timeout | integer | Timeout in seconds for requests to Llama Stack service. Default is 180 seconds (3 minutes) to accommodate long-running RAG queries. |
 
 
 ## MCPClientAuthOptionsResponse