Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pcrDev.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"HashAlgorithm": "Sha384 { ... }",
"PCR0": "e7c17ca4d532cc072adf3275f389d38e569e9eca6b2126e8bbf44a232be08e760f4609ed1229eab95d30f96beea5b26f",
"PCR0": "3d9321e733a46639d8ae761f14abd8da63fdcc1b9f21deba24daea1f2d45e80aeec5a32a80cbdb1a2e3d82051300d99f",
"PCR1": "f004075c672258b499f8e88d59701031a3b451f65c7de60c81d09da2b0799272675481ec390527594dd7069cb7de59d7",
"PCR2": "2a0bae73ea3695a197d8632b6b51bf1e56a10c3afb644464d8893557cd10efacca12716635d2a30040015a6396fb1278"
"PCR2": "0779b8da6c8f7991a647b66470f86b8e709b96cf1abfb809d7939b4b5e12adc22ce25cc56c48aae421ad298e7c55bab9"
}
7 changes: 7 additions & 0 deletions pcrDevHistory.json
Original file line number Diff line number Diff line change
Expand Up @@ -467,5 +467,12 @@
"PCR2": "2a0bae73ea3695a197d8632b6b51bf1e56a10c3afb644464d8893557cd10efacca12716635d2a30040015a6396fb1278",
"timestamp": 1767041480,
"signature": "xDOixutPlcvdAnfkcmdgj3eyDP+5tGgAvqj0irHvPC/QVZtI7XzpQl6QL4uW0ed4MDppE82AhAufykz1QIeYZ9oEzvHjv+CIP3QmveekJlZEMNBdHlw6fq1jTxMAH+jC"
},
{
"PCR0": "3d9321e733a46639d8ae761f14abd8da63fdcc1b9f21deba24daea1f2d45e80aeec5a32a80cbdb1a2e3d82051300d99f",
"PCR1": "f004075c672258b499f8e88d59701031a3b451f65c7de60c81d09da2b0799272675481ec390527594dd7069cb7de59d7",
"PCR2": "0779b8da6c8f7991a647b66470f86b8e709b96cf1abfb809d7939b4b5e12adc22ce25cc56c48aae421ad298e7c55bab9",
"timestamp": 1767208875,
"signature": "bqmQxASWSsssZ2mDgKN9kuTKuC9xnKZyvtvFaPoQ48a2iX0lmH97gzmBCahW0XUWDZkkAPUZTiq7rxtzZfmxYdwuyM/DgKdgh5cqWRv18Lm/Q6QL/LFjwCKwZgfxQ5zd"
}
]
4 changes: 2 additions & 2 deletions pcrProd.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"HashAlgorithm": "Sha384 { ... }",
"PCR0": "9b19a90e826870ba2009ca05a5863fbcd08ab567cf410451e76f682060484fe476d694765d357c72f0096733e40ca080",
"PCR0": "287c9c5a49a2a15fd4fb9a0fd8a0824592426aa403a5a235f501a01549239f61bf2cd80e523ef5f4f24acf18f9e1261c",
"PCR1": "f004075c672258b499f8e88d59701031a3b451f65c7de60c81d09da2b0799272675481ec390527594dd7069cb7de59d7",
"PCR2": "385f62c3a47c42239782369010c0783a2e2871590e2daf7fddb25fa8f4f6247c7f50d2213b0e95975510e3dca307f934"
"PCR2": "50f5d5f6dac30558e51df0710f3c9f94b64c08d0bd1626aa9a2b91ce0f8960131b7ed184326cfbc3ff68f27b0755e47e"
}
7 changes: 7 additions & 0 deletions pcrProdHistory.json
Original file line number Diff line number Diff line change
Expand Up @@ -467,5 +467,12 @@
"PCR2": "385f62c3a47c42239782369010c0783a2e2871590e2daf7fddb25fa8f4f6247c7f50d2213b0e95975510e3dca307f934",
"timestamp": 1767041498,
"signature": "5OO3eq8ROMEOe0K9jvdVDvG4rRHX/YT7dix+0txR06pjRuv3jfgz0fNkxjdmkzeSQQqfV8qvYjAJWFI3Z18fG4BOJco6OV4xjaL2+OFK58JDI7+VMgfkUAhbmulxX7RD"
},
{
"PCR0": "287c9c5a49a2a15fd4fb9a0fd8a0824592426aa403a5a235f501a01549239f61bf2cd80e523ef5f4f24acf18f9e1261c",
"PCR1": "f004075c672258b499f8e88d59701031a3b451f65c7de60c81d09da2b0799272675481ec390527594dd7069cb7de59d7",
"PCR2": "50f5d5f6dac30558e51df0710f3c9f94b64c08d0bd1626aa9a2b91ce0f8960131b7ed184326cfbc3ff68f27b0755e47e",
"timestamp": 1767208909,
"signature": "abH5kzpLjCTAA7vW7YdsLifDMVzvjWj1LpLiJGmH7aezleuO21iBSgVS9hBZj+nJHwugNMO8PN/YG9EjF02Vftz6b7M9DtxizzJc9iYwxH9A77Gm1pTQOebeNuOYxgLw"
}
]
2 changes: 2 additions & 0 deletions src/proxy_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ impl ProxyRouter {
routes.insert("qwen3-coder-480b".to_string(), tinfoil_route.clone());
routes.insert("qwen3-vl-30b".to_string(), tinfoil_route.clone());
routes.insert("kimi-k2-thinking".to_string(), tinfoil_route.clone());
routes.insert("nomic-embed-text".to_string(), tinfoil_route.clone());

// Continuum-only models
let continuum_route = ModelRoute {
Expand Down Expand Up @@ -181,6 +182,7 @@ impl ProxyRouter {
"qwen3-coder-480b",
"qwen3-vl-30b",
"kimi-k2-thinking",
"nomic-embed-text",
]
} else {
// Without Tinfoil: only Continuum models
Expand Down
190 changes: 190 additions & 0 deletions src/web/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,24 @@ fn default_transcription_response_format() -> String {
"json".to_string()
}

/// Request structure for embeddings endpoints
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
struct EmbeddingRequest {
input: serde_json::Value, // string or array of strings
#[serde(default = "default_embedding_model")]
model: String,
#[serde(skip_serializing_if = "Option::is_none")]
encoding_format: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
dimensions: Option<i32>,
#[serde(skip_serializing_if = "Option::is_none")]
user: Option<String>,
}

fn default_embedding_model() -> String {
"nomic-embed-text".to_string()
}

// ============================================================================
// Centralized Billing Architecture - New Types
// ============================================================================
Expand Down Expand Up @@ -200,6 +218,13 @@ pub fn router(app_state: Arc<AppState>) -> Router<()> {
decrypt_request::<TranscriptionRequest>,
)),
)
.route(
"/v1/embeddings",
post(proxy_embeddings).layer(axum::middleware::from_fn_with_state(
app_state.clone(),
decrypt_request::<EmbeddingRequest>,
)),
)
.with_state(app_state)
}

Expand Down Expand Up @@ -1533,6 +1558,171 @@ async fn proxy_tts(
encrypt_response(&state, &session_id, &audio_response).await
}

async fn proxy_embeddings(
State(state): State<Arc<AppState>>,
_headers: HeaderMap,
axum::Extension(session_id): axum::Extension<Uuid>,
axum::Extension(user): axum::Extension<User>,
axum::Extension(_auth_method): axum::Extension<AuthMethod>,
axum::Extension(embedding_request): axum::Extension<EmbeddingRequest>,
) -> Result<Json<EncryptedResponse<Value>>, ApiError> {
debug!("Entering proxy_embeddings function");

// Check if guest user is allowed (paid guests are allowed, free guests are not)
if user.is_guest() {
if let Some(billing_client) = &state.billing_client {
match billing_client.is_user_paid(user.uuid).await {
Ok(true) => {
debug!("Paid guest user allowed for embeddings: {}", user.uuid);
}
Ok(false) => {
error!(
"Free guest user attempted to use embeddings feature: {}",
user.uuid
);
return Err(ApiError::Unauthorized);
}
Err(e) => {
error!("Billing check failed for guest user {}: {}", user.uuid, e);
return Err(ApiError::Unauthorized);
}
}
} else {
error!(
"Guest user attempted to use embeddings without billing client: {}",
user.uuid
);
return Err(ApiError::Unauthorized);
}
}

// Validate input is not empty
let is_empty = match &embedding_request.input {
Value::String(s) => s.trim().is_empty(),
Value::Array(arr) => arr.is_empty(),
_ => true,
};
if is_empty {
error!("Input is empty or invalid");
return Err(ApiError::BadRequest);
}

// Get the model route configuration
let route = match state.proxy_router.get_model_route(&embedding_request.model) {
Some(r) => r,
None => {
error!(
"Model '{}' not found in routing table",
embedding_request.model
);
return Err(ApiError::BadRequest);
}
};

// Create a new hyper client
let https = HttpsConnector::new();
let client = Client::builder()
.pool_idle_timeout(Duration::from_secs(30))
.pool_max_idle_per_host(10)
.build::<_, HyperBody>(https);

// Build request body
let request_body = serde_json::to_string(&embedding_request).map_err(|e| {
error!("Failed to serialize embedding request: {:?}", e);
ApiError::InternalServerError
})?;

// Build request to provider
let endpoint = format!("{}/v1/embeddings", route.primary.base_url);
let mut req = Request::builder()
.method("POST")
.uri(&endpoint)
.header("Content-Type", "application/json");

if let Some(api_key) = &route.primary.api_key {
if !api_key.is_empty() {
req = req.header("Authorization", format!("Bearer {}", api_key));
}
}

let req = req.body(HyperBody::from(request_body)).map_err(|e| {
error!("Failed to create request: {:?}", e);
ApiError::InternalServerError
})?;

// Send request with timeout
let res = timeout(
Duration::from_secs(REQUEST_TIMEOUT_SECS),
client.request(req),
)
.await
.map_err(|_| {
error!(
"Embeddings request timed out after {}s",
REQUEST_TIMEOUT_SECS
);
ApiError::InternalServerError
})?
.map_err(|e| {
error!("Failed to send embeddings request: {:?}", e);
ApiError::InternalServerError
})?;

if !res.status().is_success() {
let status = res.status();
let body_bytes = to_bytes(res.into_body()).await.ok();
let error_msg = body_bytes
.map(|b| String::from_utf8_lossy(&b).to_string())
.unwrap_or_else(|| status.to_string());
error!(
"Embeddings proxy returned non-success status: {} - {}",
status, error_msg
);
return Err(ApiError::InternalServerError);
}

// Parse response
let body_bytes = to_bytes(res.into_body()).await.map_err(|e| {
error!("Failed to read embeddings response body: {:?}", e);
ApiError::InternalServerError
})?;

let response_json: Value = serde_json::from_slice(&body_bytes).map_err(|e| {
error!("Failed to parse embeddings response: {:?}", e);
ApiError::InternalServerError
})?;

// Handle billing - embeddings only have prompt_tokens (no completion_tokens)
if let Some(usage) = response_json.get("usage") {
let prompt_tokens = usage
.get("prompt_tokens")
.and_then(|v| v.as_i64())
.unwrap_or(0) as i32;

if prompt_tokens > 0 {
let billing_context =
BillingContext::new(_auth_method, embedding_request.model.clone());
let embedding_usage = CompletionUsage {
prompt_tokens,
completion_tokens: 0, // Embeddings don't have completion tokens
};
publish_usage_event_internal(
&state,
&user,
&billing_context,
embedding_usage,
&route.primary.provider_name,
)
.await;
}
}

debug!("Exiting proxy_embeddings function");

// Encrypt and return the response
encrypt_response(&state, &session_id, &response_json).await
}

/// Helper function to try a provider once
async fn try_provider(
client: &Client<HttpsConnector<hyper::client::HttpConnector>, HyperBody>,
Expand Down
Binary file modified tinfoil-proxy/dist/tinfoil-proxy
Binary file not shown.
Loading