From 0c7b999770b3930d394b7c9e5e7e91704510f80a Mon Sep 17 00:00:00 2001 From: Syed Hashmi Date: Mon, 2 Mar 2026 19:15:35 -0600 Subject: [PATCH 1/6] fix: make upstream timeout configurable, default to 300s --- cli/planoai/utils.py | 4 ++-- config/plano_config_schema.yaml | 4 ++++ crates/common/src/configuration.rs | 1 + crates/common/src/consts.rs | 8 ++++---- crates/prompt_gateway/src/http_context.rs | 9 +++++++-- crates/prompt_gateway/src/stream_context.rs | 20 ++++++++++++++++---- 6 files changed, 34 insertions(+), 12 deletions(-) diff --git a/cli/planoai/utils.py b/cli/planoai/utils.py index 171006f14..a4076660d 100644 --- a/cli/planoai/utils.py +++ b/cli/planoai/utils.py @@ -92,7 +92,7 @@ def convert_legacy_listeners( "type": "model_listener", "port": 12000, "address": "0.0.0.0", - "timeout": "30s", + "timeout": "300s", "model_providers": model_providers or [], } @@ -101,7 +101,7 @@ def convert_legacy_listeners( "type": "prompt_listener", "port": 10000, "address": "0.0.0.0", - "timeout": "30s", + "timeout": "300s", } # Handle None case diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml index b63cb8244..4fa4c1333 100644 --- a/config/plano_config_schema.yaml +++ b/config/plano_config_schema.yaml @@ -265,6 +265,10 @@ properties: type: boolean use_agent_orchestrator: type: boolean + upstream_timeout_ms: + type: integer + minimum: 1000 + description: "Timeout in milliseconds for outbound upstream calls from WASM filters (tool endpoints, function calling, default prompt targets). Default is 300000 (300s)." upstream_connect_timeout: type: string description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'." diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index f4e2b7b41..3afcecbf7 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -84,6 +84,7 @@ pub struct Overrides { pub prompt_target_intent_matching_threshold: Option, pub optimize_context_window: Option, pub use_agent_orchestrator: Option, + pub upstream_timeout_ms: Option, } #[derive(Debug, Clone, Serialize, Deserialize, Default)] diff --git a/crates/common/src/consts.rs b/crates/common/src/consts.rs index cafc8e809..81e28e866 100644 --- a/crates/common/src/consts.rs +++ b/crates/common/src/consts.rs @@ -3,10 +3,10 @@ pub const SYSTEM_ROLE: &str = "system"; pub const USER_ROLE: &str = "user"; pub const TOOL_ROLE: &str = "tool"; pub const ASSISTANT_ROLE: &str = "assistant"; -pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds -pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds -pub const API_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds -pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 30000; // 30 seconds +pub const ARCH_FC_REQUEST_TIMEOUT_MS: u64 = 300_000; // 300 seconds +pub const DEFAULT_TARGET_REQUEST_TIMEOUT_MS: u64 = 300_000; // 300 seconds +pub const API_REQUEST_TIMEOUT_MS: u64 = 300_000; // 300 seconds +pub const MODEL_SERVER_REQUEST_TIMEOUT_MS: u64 = 300_000; // 300 seconds pub const MODEL_SERVER_NAME: &str = "bright_staff"; pub const ARCH_ROUTING_HEADER: &str = "x-arch-llm-provider"; pub const MESSAGES_KEY: &str = "messages"; diff --git a/crates/prompt_gateway/src/http_context.rs b/crates/prompt_gateway/src/http_context.rs index e3d00b3ff..e33b2a4c5 100644 --- a/crates/prompt_gateway/src/http_context.rs +++ b/crates/prompt_gateway/src/http_context.rs @@ -205,7 +205,12 @@ impl HttpContext for StreamContext { info!("on_http_request_body: sending request to model server"); debug!("request body: {}", json_data); - let timeout_str = MODEL_SERVER_REQUEST_TIMEOUT_MS.to_string(); + let timeout_ms = if let Some(overrides) = self.overrides.as_ref() { + overrides.upstream_timeout_ms.unwrap_or(MODEL_SERVER_REQUEST_TIMEOUT_MS) + } else { + MODEL_SERVER_REQUEST_TIMEOUT_MS + }; + let timeout_str = timeout_ms.to_string(); let mut headers = vec![ (ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME), @@ -230,7 +235,7 @@ impl HttpContext for StreamContext { headers, Some(json_data.as_bytes()), vec![], - Duration::from_secs(5), + Duration::from_millis(timeout_ms), ); if let Some(content) = self.user_prompt.as_ref().unwrap().content.as_ref() { diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs index 8ff44d522..173fcaf5b 100644 --- a/crates/prompt_gateway/src/stream_context.rs +++ b/crates/prompt_gateway/src/stream_context.rs @@ -171,7 +171,14 @@ impl StreamContext { callout_context.request_body.messages.clone(), ); let arch_messages_json = serde_json::to_string(¶ms).unwrap(); - let timeout_str = DEFAULT_TARGET_REQUEST_TIMEOUT_MS.to_string(); + let timeout_ms = if let Some(overrides) = self.overrides.as_ref() { + overrides + .upstream_timeout_ms + .unwrap_or(DEFAULT_TARGET_REQUEST_TIMEOUT_MS) + } else { + DEFAULT_TARGET_REQUEST_TIMEOUT_MS + }; + let timeout_str = timeout_ms.to_string(); let mut headers = vec![ (":method", "POST"), @@ -193,7 +200,7 @@ impl StreamContext { headers, Some(arch_messages_json.as_bytes()), vec![], - Duration::from_secs(5), + Duration::from_millis(timeout_ms), ); callout_context.response_handler_type = ResponseHandlerType::DefaultTarget; callout_context.prompt_target_name = Some(default_prompt_target.name.clone()); @@ -422,7 +429,12 @@ impl StreamContext { debug!("on_http_call_response: api call body {:?}", api_call_body); - let timeout_str = API_REQUEST_TIMEOUT_MS.to_string(); + let timeout_ms = if let Some(overrides) = self.overrides.as_ref() { + overrides.upstream_timeout_ms.unwrap_or(API_REQUEST_TIMEOUT_MS) + } else { + API_REQUEST_TIMEOUT_MS + }; + let timeout_str = timeout_ms.to_string(); let http_method_str = http_method.to_string(); let mut headers: HashMap<_, _> = [ @@ -457,7 +469,7 @@ impl StreamContext { headers.into_iter().collect(), api_call_body.as_deref().map(|s| s.as_bytes()), vec![], - Duration::from_secs(5), + Duration::from_millis(timeout_ms), ); info!( From 00bd11061e53582d85f568729c58e73731bcdc1d Mon Sep 17 00:00:00 2001 From: Syed Hashmi Date: Wed, 4 Mar 2026 18:53:32 -0600 Subject: [PATCH 2/6] fix: make upstream timeout configurable and increase default to 300s (#787) Hardcoded 30s timeouts in envoy config caused premature termination of long-running LLM requests (tool-use, agentic workflows). Make timeouts configurable via upstream_timeout_ms override and default to 300s. --- cli/planoai/config_generator.py | 9 +++++++++ config/envoy.template.yaml | 6 +++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py index 522968c94..fcbae0e4d 100644 --- a/cli/planoai/config_generator.py +++ b/cli/planoai/config_generator.py @@ -466,6 +466,15 @@ def validate_and_render_schema(): "upstream_tls_ca_path", "/etc/ssl/certs/ca-certificates.crt" ) + upstream_timeout_ms = overrides.get("upstream_timeout_ms") + if upstream_timeout_ms is not None: + timeout_s = f"{int(upstream_timeout_ms) // 1000}s" + llm_gateway["timeout"] = timeout_s + prompt_gateway["timeout"] = timeout_s + for listener in listeners: + if listener.get("type") == "agent" and "timeout" not in listener: + listener["timeout"] = timeout_s + data = { "prompt_gateway_listener": prompt_gateway, "llm_gateway_listener": llm_gateway, diff --git a/config/envoy.template.yaml b/config/envoy.template.yaml index a780c3f1b..409801672 100644 --- a/config/envoy.template.yaml +++ b/config/envoy.template.yaml @@ -336,7 +336,7 @@ static_resources: auto_host_rewrite: true prefix_rewrite: "/agents/" cluster: bright_staff - timeout: {{ listener.timeout | default('30s') }} + timeout: {{ listener.timeout | default('300s') }} http_filters: - name: envoy.filters.http.compressor typed_config: @@ -517,12 +517,12 @@ static_resources: route: auto_host_rewrite: true cluster: {{ llm_cluster_name }} - timeout: 300s + timeout: {{ llm_gateway_listener.timeout }} {% if llm_gateway_listener.max_retries %} retry_policy: retry_on: "5xx,connect-failure,refused-stream,reset,retriable-status-codes" num_retries: {{ llm_gateway_listener.max_retries }} - per_try_timeout: 30s + per_try_timeout: {{ llm_gateway_listener.timeout }} retriable_status_codes: [429, 500, 502, 503, 504] retry_back_off: base_interval: 0.5s From 3e182894a2b3e8e8633d553c87b6c148377c0a48 Mon Sep 17 00:00:00 2001 From: Syed Hashmi Date: Mon, 16 Mar 2026 16:46:49 -0700 Subject: [PATCH 3/6] use per-listener timeout config instead of separate upstream_timeout override --- config/envoy.template.yaml | 4 ++-- crates/prompt_gateway/src/http_context.rs | 6 +----- crates/prompt_gateway/src/stream_context.rs | 14 ++------------ 3 files changed, 5 insertions(+), 19 deletions(-) diff --git a/config/envoy.template.yaml b/config/envoy.template.yaml index 409801672..7415bba51 100644 --- a/config/envoy.template.yaml +++ b/config/envoy.template.yaml @@ -257,7 +257,7 @@ static_resources: route: auto_host_rewrite: true cluster: bright_staff - timeout: 300s + timeout: {{ listener.timeout | default('300s') }} {% for cluster_name, cluster in plano_clusters.items() %} - match: prefix: "/" @@ -268,7 +268,7 @@ static_resources: route: auto_host_rewrite: true cluster: {{ cluster_name }} - timeout: 300s + timeout: {{ listener.timeout | default('300s') }} {% endfor %} http_filters: - name: envoy.filters.http.router diff --git a/crates/prompt_gateway/src/http_context.rs b/crates/prompt_gateway/src/http_context.rs index e33b2a4c5..2b0ca4331 100644 --- a/crates/prompt_gateway/src/http_context.rs +++ b/crates/prompt_gateway/src/http_context.rs @@ -205,11 +205,7 @@ impl HttpContext for StreamContext { info!("on_http_request_body: sending request to model server"); debug!("request body: {}", json_data); - let timeout_ms = if let Some(overrides) = self.overrides.as_ref() { - overrides.upstream_timeout_ms.unwrap_or(MODEL_SERVER_REQUEST_TIMEOUT_MS) - } else { - MODEL_SERVER_REQUEST_TIMEOUT_MS - }; + let timeout_ms = MODEL_SERVER_REQUEST_TIMEOUT_MS; let timeout_str = timeout_ms.to_string(); let mut headers = vec![ diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs index 173fcaf5b..6166e09bf 100644 --- a/crates/prompt_gateway/src/stream_context.rs +++ b/crates/prompt_gateway/src/stream_context.rs @@ -171,13 +171,7 @@ impl StreamContext { callout_context.request_body.messages.clone(), ); let arch_messages_json = serde_json::to_string(¶ms).unwrap(); - let timeout_ms = if let Some(overrides) = self.overrides.as_ref() { - overrides - .upstream_timeout_ms - .unwrap_or(DEFAULT_TARGET_REQUEST_TIMEOUT_MS) - } else { - DEFAULT_TARGET_REQUEST_TIMEOUT_MS - }; + let timeout_ms = DEFAULT_TARGET_REQUEST_TIMEOUT_MS; let timeout_str = timeout_ms.to_string(); let mut headers = vec![ @@ -429,11 +423,7 @@ impl StreamContext { debug!("on_http_call_response: api call body {:?}", api_call_body); - let timeout_ms = if let Some(overrides) = self.overrides.as_ref() { - overrides.upstream_timeout_ms.unwrap_or(API_REQUEST_TIMEOUT_MS) - } else { - API_REQUEST_TIMEOUT_MS - }; + let timeout_ms = API_REQUEST_TIMEOUT_MS; let timeout_str = timeout_ms.to_string(); let http_method_str = http_method.to_string(); From 125af57837e4920649f2a42afd4a285219f95c6b Mon Sep 17 00:00:00 2001 From: Syed Hashmi Date: Mon, 16 Mar 2026 16:50:36 -0700 Subject: [PATCH 4/6] remove upstream_timeout_ms from config, schema, and rust struct --- cli/planoai/config_generator.py | 9 --------- config/plano_config_schema.yaml | 4 ---- crates/common/src/configuration.rs | 1 - 3 files changed, 14 deletions(-) diff --git a/cli/planoai/config_generator.py b/cli/planoai/config_generator.py index fcbae0e4d..522968c94 100644 --- a/cli/planoai/config_generator.py +++ b/cli/planoai/config_generator.py @@ -466,15 +466,6 @@ def validate_and_render_schema(): "upstream_tls_ca_path", "/etc/ssl/certs/ca-certificates.crt" ) - upstream_timeout_ms = overrides.get("upstream_timeout_ms") - if upstream_timeout_ms is not None: - timeout_s = f"{int(upstream_timeout_ms) // 1000}s" - llm_gateway["timeout"] = timeout_s - prompt_gateway["timeout"] = timeout_s - for listener in listeners: - if listener.get("type") == "agent" and "timeout" not in listener: - listener["timeout"] = timeout_s - data = { "prompt_gateway_listener": prompt_gateway, "llm_gateway_listener": llm_gateway, diff --git a/config/plano_config_schema.yaml b/config/plano_config_schema.yaml index 4fa4c1333..b63cb8244 100644 --- a/config/plano_config_schema.yaml +++ b/config/plano_config_schema.yaml @@ -265,10 +265,6 @@ properties: type: boolean use_agent_orchestrator: type: boolean - upstream_timeout_ms: - type: integer - minimum: 1000 - description: "Timeout in milliseconds for outbound upstream calls from WASM filters (tool endpoints, function calling, default prompt targets). Default is 300000 (300s)." upstream_connect_timeout: type: string description: "Connect timeout for upstream provider clusters (e.g., '5s', '10s'). Default is '5s'." diff --git a/crates/common/src/configuration.rs b/crates/common/src/configuration.rs index 3afcecbf7..f4e2b7b41 100644 --- a/crates/common/src/configuration.rs +++ b/crates/common/src/configuration.rs @@ -84,7 +84,6 @@ pub struct Overrides { pub prompt_target_intent_matching_threshold: Option, pub optimize_context_window: Option, pub use_agent_orchestrator: Option, - pub upstream_timeout_ms: Option, } #[derive(Debug, Clone, Serialize, Deserialize, Default)] From 95486db92a5c7ab82efc4ca9aa5dbc0d08eb6178 Mon Sep 17 00:00:00 2001 From: Syed Hashmi Date: Mon, 16 Mar 2026 16:52:35 -0700 Subject: [PATCH 5/6] revert prompt_gateway changes --- crates/prompt_gateway/src/http_context.rs | 5 ++--- crates/prompt_gateway/src/stream_context.rs | 10 ++++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/crates/prompt_gateway/src/http_context.rs b/crates/prompt_gateway/src/http_context.rs index 2b0ca4331..e3d00b3ff 100644 --- a/crates/prompt_gateway/src/http_context.rs +++ b/crates/prompt_gateway/src/http_context.rs @@ -205,8 +205,7 @@ impl HttpContext for StreamContext { info!("on_http_request_body: sending request to model server"); debug!("request body: {}", json_data); - let timeout_ms = MODEL_SERVER_REQUEST_TIMEOUT_MS; - let timeout_str = timeout_ms.to_string(); + let timeout_str = MODEL_SERVER_REQUEST_TIMEOUT_MS.to_string(); let mut headers = vec![ (ARCH_UPSTREAM_HOST_HEADER, MODEL_SERVER_NAME), @@ -231,7 +230,7 @@ impl HttpContext for StreamContext { headers, Some(json_data.as_bytes()), vec![], - Duration::from_millis(timeout_ms), + Duration::from_secs(5), ); if let Some(content) = self.user_prompt.as_ref().unwrap().content.as_ref() { diff --git a/crates/prompt_gateway/src/stream_context.rs b/crates/prompt_gateway/src/stream_context.rs index 6166e09bf..8ff44d522 100644 --- a/crates/prompt_gateway/src/stream_context.rs +++ b/crates/prompt_gateway/src/stream_context.rs @@ -171,8 +171,7 @@ impl StreamContext { callout_context.request_body.messages.clone(), ); let arch_messages_json = serde_json::to_string(¶ms).unwrap(); - let timeout_ms = DEFAULT_TARGET_REQUEST_TIMEOUT_MS; - let timeout_str = timeout_ms.to_string(); + let timeout_str = DEFAULT_TARGET_REQUEST_TIMEOUT_MS.to_string(); let mut headers = vec![ (":method", "POST"), @@ -194,7 +193,7 @@ impl StreamContext { headers, Some(arch_messages_json.as_bytes()), vec![], - Duration::from_millis(timeout_ms), + Duration::from_secs(5), ); callout_context.response_handler_type = ResponseHandlerType::DefaultTarget; callout_context.prompt_target_name = Some(default_prompt_target.name.clone()); @@ -423,8 +422,7 @@ impl StreamContext { debug!("on_http_call_response: api call body {:?}", api_call_body); - let timeout_ms = API_REQUEST_TIMEOUT_MS; - let timeout_str = timeout_ms.to_string(); + let timeout_str = API_REQUEST_TIMEOUT_MS.to_string(); let http_method_str = http_method.to_string(); let mut headers: HashMap<_, _> = [ @@ -459,7 +457,7 @@ impl StreamContext { headers.into_iter().collect(), api_call_body.as_deref().map(|s| s.as_bytes()), vec![], - Duration::from_millis(timeout_ms), + Duration::from_secs(5), ); info!( From 8cb3471531285490a88fefcd03d07d1c94d41285 Mon Sep 17 00:00:00 2001 From: Syed Hashmi Date: Mon, 16 Mar 2026 17:00:02 -0700 Subject: [PATCH 6/6] fix: use prompt_gateway_listener.timeout for outbound_api_traffic listener --- config/envoy.template.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config/envoy.template.yaml b/config/envoy.template.yaml index 7415bba51..8e5b414b3 100644 --- a/config/envoy.template.yaml +++ b/config/envoy.template.yaml @@ -257,7 +257,7 @@ static_resources: route: auto_host_rewrite: true cluster: bright_staff - timeout: {{ listener.timeout | default('300s') }} + timeout: {{ prompt_gateway_listener.timeout | default('300s') }} {% for cluster_name, cluster in plano_clusters.items() %} - match: prefix: "/" @@ -268,7 +268,7 @@ static_resources: route: auto_host_rewrite: true cluster: {{ cluster_name }} - timeout: {{ listener.timeout | default('300s') }} + timeout: {{ prompt_gateway_listener.timeout | default('300s') }} {% endfor %} http_filters: - name: envoy.filters.http.router