Skip to content
102 changes: 102 additions & 0 deletions config/plano_config_schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ properties:
- 0.1-beta
- 0.2.0
- v0.3.0
- v0.4.0

agents:
type: array
Expand Down Expand Up @@ -470,6 +471,107 @@ properties:
additionalProperties: false
required:
- jailbreak
routing_preferences:
type: array
items:
type: object
properties:
name:
type: string
description:
type: string
models:
type: array
items:
type: string
minItems: 1
selection_policy:
type: object
properties:
prefer:
type: string
enum:
- cheapest
- fastest
- random
- none
additionalProperties: false
required:
- prefer
additionalProperties: false
required:
- name
- description
- models
- selection_policy

model_metrics_sources:
type: array
items:
oneOf:
- type: object
properties:
type:
type: string
const: cost_metrics
url:
type: string
refresh_interval:
type: integer
minimum: 1
auth:
type: object
properties:
type:
type: string
enum:
- bearer
token:
type: string
required:
- type
- token
additionalProperties: false
required:
- type
- url
additionalProperties: false
- type: object
properties:
type:
type: string
const: prometheus_metrics
url:
type: string
query:
type: string
refresh_interval:
type: integer
minimum: 1
description: "Refresh interval in seconds"
required:
- type
- url
- query
additionalProperties: false
- type: object
properties:
type:
type: string
const: digitalocean_pricing
refresh_interval:
type: integer
minimum: 1
description: "Refresh interval in seconds"
model_aliases:
type: object
description: "Map DO catalog keys (lowercase(creator)/model_id) to Plano model names used in routing_preferences. Example: 'openai/openai-gpt-oss-120b: openai/gpt-4o'"
additionalProperties:
type: string
required:
- type
additionalProperties: false

additionalProperties: false
required:
- version
Expand Down
26 changes: 12 additions & 14 deletions crates/brightstaff/src/handlers/llm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ async fn llm_chat_inner(
temperature,
tool_names,
user_message_preview,
inline_routing_policy,
inline_routing_preferences,
client_api,
provider_id,
} = parsed;
Expand Down Expand Up @@ -261,7 +261,7 @@ async fn llm_chat_inner(
&traceparent,
&request_path,
&request_id,
inline_routing_policy,
inline_routing_preferences,
)
.await
}
Expand Down Expand Up @@ -323,7 +323,7 @@ struct PreparedRequest {
temperature: Option<f32>,
tool_names: Option<Vec<String>>,
user_message_preview: Option<String>,
inline_routing_policy: Option<Vec<common::configuration::ModelUsagePreference>>,
inline_routing_preferences: Option<Vec<common::configuration::TopLevelRoutingPreference>>,
client_api: Option<SupportedAPIsFromClient>,
provider_id: hermesllm::ProviderId,
}
Expand Down Expand Up @@ -352,16 +352,14 @@ async fn parse_and_validate_request(
"request body received"
);

// Extract routing_policy from request body if present
let (chat_request_bytes, inline_routing_policy) =
crate::handlers::routing_service::extract_routing_policy(&raw_bytes, false).map_err(
|err| {
warn!(error = %err, "failed to parse request JSON");
let mut r = Response::new(full(format!("Failed to parse request: {}", err)));
*r.status_mut() = StatusCode::BAD_REQUEST;
r
},
)?;
// Extract routing_preferences from request body if present
let (chat_request_bytes, inline_routing_preferences) =
crate::handlers::routing_service::extract_routing_policy(&raw_bytes).map_err(|err| {
warn!(error = %err, "failed to parse request JSON");
let mut r = Response::new(full(format!("Failed to parse request: {}", err)));
*r.status_mut() = StatusCode::BAD_REQUEST;
r
})?;

let api_type = SupportedAPIsFromClient::from_endpoint(request_path).ok_or_else(|| {
warn!(path = %request_path, "unsupported endpoint");
Expand Down Expand Up @@ -439,7 +437,7 @@ async fn parse_and_validate_request(
temperature,
tool_names,
user_message_preview,
inline_routing_policy,
inline_routing_preferences,
client_api,
provider_id,
})
Expand Down
36 changes: 11 additions & 25 deletions crates/brightstaff/src/handlers/llm/model_selection.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use common::configuration::ModelUsagePreference;
use common::configuration::TopLevelRoutingPreference;
use hermesllm::clients::endpoints::SupportedUpstreamAPIs;
use hermesllm::{ProviderRequest, ProviderRequestType};
use hermesllm::ProviderRequestType;
use hyper::StatusCode;
use std::sync::Arc;
use tracing::{debug, info, warn};
Expand All @@ -10,7 +10,10 @@ use crate::streaming::truncate_message;
use crate::tracing::routing;

pub struct RoutingResult {
/// Primary model to use (first in the ranked list).
pub model_name: String,
/// Full ranked list — use subsequent entries as fallbacks on 429/5xx.
pub models: Vec<String>,
pub route_name: Option<String>,
}

Expand Down Expand Up @@ -39,11 +42,8 @@ pub async fn router_chat_get_upstream_model(
traceparent: &str,
request_path: &str,
request_id: &str,
inline_usage_preferences: Option<Vec<ModelUsagePreference>>,
inline_routing_preferences: Option<Vec<TopLevelRoutingPreference>>,
) -> Result<RoutingResult, RoutingError> {
// Clone metadata for routing before converting (which consumes client_request)
let routing_metadata = client_request.metadata().clone();

// Convert to ChatCompletionsRequest for routing (regardless of input type)
let chat_request = match ProviderRequestType::try_from((
client_request,
Expand Down Expand Up @@ -78,22 +78,6 @@ pub async fn router_chat_get_upstream_model(
"router request"
);

// Use inline preferences if provided, otherwise fall back to metadata extraction
let usage_preferences: Option<Vec<ModelUsagePreference>> = if inline_usage_preferences.is_some()
{
inline_usage_preferences
} else {
let usage_preferences_str: Option<String> =
routing_metadata.as_ref().and_then(|metadata| {
metadata
.get("plano_preference_config")
.map(|value| value.to_string())
});
usage_preferences_str
.as_ref()
.and_then(|s| serde_yaml::from_str(s).ok())
};

// Prepare log message with latest message from chat request
let latest_message_for_log = chat_request
.messages
Expand All @@ -107,7 +91,6 @@ pub async fn router_chat_get_upstream_model(
let latest_message_for_log = truncate_message(&latest_message_for_log, 50);

info!(
has_usage_preferences = usage_preferences.is_some(),
path = %request_path,
latest_message = %latest_message_for_log,
"processing router request"
Expand All @@ -121,7 +104,7 @@ pub async fn router_chat_get_upstream_model(
.determine_route(
&chat_request.messages,
traceparent,
usage_preferences,
inline_routing_preferences,
request_id,
)
.await;
Expand All @@ -132,10 +115,12 @@ pub async fn router_chat_get_upstream_model(

match routing_result {
Ok(route) => match route {
Some((route_name, model_name)) => {
Some((route_name, ranked_models)) => {
let model_name = ranked_models.first().cloned().unwrap_or_default();
current_span.record("route.selected_model", model_name.as_str());
Ok(RoutingResult {
model_name,
models: ranked_models,
route_name: Some(route_name),
})
}
Expand All @@ -147,6 +132,7 @@ pub async fn router_chat_get_upstream_model(

Ok(RoutingResult {
model_name: "none".to_string(),
models: vec!["none".to_string()],
route_name: None,
})
}
Expand Down
Loading
Loading