Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions cli/planoai/config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,41 @@ def validate_and_render_schema():
f"Invalid opentracing_grpc_endpoint {opentracing_grpc_endpoint}, path must be empty"
)

routing = config_yaml.get("routing", {})
policy_provider = routing.get("policy_provider")
if policy_provider:
policy_url = policy_provider.get("url")
if not policy_url:
raise Exception(
"routing.policy_provider.url is required when policy_provider is set"
)
if "$" in policy_url:
policy_url = os.path.expandvars(policy_url)
policy_url_result = urlparse(policy_url)
if (
policy_url_result.scheme not in ["http", "https"]
or not policy_url_result.hostname
):
raise Exception(
f"Invalid routing.policy_provider.url {policy_provider.get('url')}, must be a valid http/https URL"
)

ttl_seconds = policy_provider.get("ttl_seconds")
if ttl_seconds is not None and ttl_seconds <= 0:
raise Exception(
"routing.policy_provider.ttl_seconds must be greater than 0"
)

headers = policy_provider.get("headers")
if headers is not None:
if not isinstance(headers, dict):
raise Exception("routing.policy_provider.headers must be an object")
for key, value in headers.items():
if not isinstance(key, str) or not isinstance(value, str):
raise Exception(
"routing.policy_provider.headers must contain string keys and string values"
)

llms_with_endpoint = []
llms_with_endpoint_cluster_names = set()
updated_model_providers = []
Expand Down
17 changes: 17 additions & 0 deletions config/plano_config_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -411,10 +411,27 @@ properties:
routing:
type: object
properties:
model_provider:
type: string
llm_provider:
type: string
model:
type: string
policy_provider:
type: object
properties:
url:
type: string
headers:
type: object
additionalProperties:
type: string
ttl_seconds:
type: integer
minimum: 1
additionalProperties: false
required:
- url
additionalProperties: false
state_storage:
type: object
Expand Down
9 changes: 8 additions & 1 deletion crates/brightstaff/src/handlers/llm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use std::sync::Arc;
use tokio::sync::RwLock;
use tracing::{debug, info, info_span, warn, Instrument};

use crate::handlers::policy_provider::PolicyProviderClient;
use crate::handlers::router_chat::router_chat_get_upstream_model;
use crate::handlers::utils::{
create_streaming_response, truncate_message, ObservableStreamProcessor,
Expand All @@ -34,9 +35,11 @@ use crate::tracing::{

use common::errors::BrightStaffError;

#[allow(clippy::too_many_arguments)]
pub async fn llm_chat(
request: Request<hyper::body::Incoming>,
router_service: Arc<RouterService>,
policy_provider: Option<Arc<PolicyProviderClient>>,
full_qualified_llm_provider_url: String,
model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
llm_providers: Arc<RwLock<LlmProviders>>,
Expand Down Expand Up @@ -73,6 +76,7 @@ pub async fn llm_chat(
llm_chat_inner(
request,
router_service,
policy_provider,
full_qualified_llm_provider_url,
model_aliases,
llm_providers,
Expand All @@ -90,6 +94,7 @@ pub async fn llm_chat(
async fn llm_chat_inner(
request: Request<hyper::body::Incoming>,
router_service: Arc<RouterService>,
policy_provider: Option<Arc<PolicyProviderClient>>,
full_qualified_llm_provider_url: String,
model_aliases: Arc<Option<HashMap<String, ModelAlias>>>,
llm_providers: Arc<RwLock<LlmProviders>>,
Expand Down Expand Up @@ -134,7 +139,7 @@ async fn llm_chat_inner(
);

// Extract routing_policy from request body if present
let (chat_request_bytes, inline_routing_policy) =
let (chat_request_bytes, inline_routing_policy, policy_id) =
match crate::handlers::routing_service::extract_routing_policy(&raw_bytes, false) {
Ok(result) => result,
Err(err) => {
Expand Down Expand Up @@ -355,6 +360,8 @@ async fn llm_chat_inner(
&request_path,
&request_id,
inline_routing_policy,
policy_id,
policy_provider,
)
.await
}
Expand Down
1 change: 1 addition & 0 deletions crates/brightstaff/src/handlers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pub mod jsonrpc;
pub mod llm;
pub mod models;
pub mod pipeline_processor;
pub mod policy_provider;
pub mod response_handler;
pub mod router_chat;
pub mod routing_service;
Expand Down
Loading
Loading