From cc22169955470c1a044df0cc7bef37374f04c30e Mon Sep 17 00:00:00 2001 From: JustAGhosT Date: Sun, 15 Mar 2026 02:53:34 +0200 Subject: [PATCH] feat(telemetry): add OpenTelemetry support for request-to-token attribution - Add otel callback to LiteLLM success/failure callbacks - Add OTEL env vars: OTEL_SERVICE_NAME, OTEL_TRACER_NAME, OTEL_EXPORTER_OTLP_ENDPOINT - Add new variables: otel_exporter_endpoint, otel_service_name - Update planning doc with implementation details and upstream requirements --- docs/planning/request_to_token_attribution.md | 61 +++++++++++++++---- infra/modules/aigateway_aca/main.tf | 39 ++++++++++-- infra/modules/aigateway_aca/variables.tf | 13 ++++ 3 files changed, 96 insertions(+), 17 deletions(-) diff --git a/docs/planning/request_to_token_attribution.md b/docs/planning/request_to_token_attribution.md index 9e3674d..5d51ea8 100644 --- a/docs/planning/request_to_token_attribution.md +++ b/docs/planning/request_to_token_attribution.md @@ -29,20 +29,37 @@ Document the current architecture: ## What Can Be Done in This Repo -### Phase 1: LiteLLM Custom Callback (Core Implementation) +### Phase 1: OpenTelemetry Integration (Core Implementation) -**Files to modify:** +**Implementation Approach:** -- `infra/modules/aigateway_aca/main.tf` - Add custom callback config -- Create new custom callback module +Instead of a custom callback (which requires a custom LiteLLM image), we're using LiteLLM's built-in OpenTelemetry support. This provides structured traces with token telemetry out of the box. -**Approach:** Use LiteLLM's CustomLogger class or success_callback to emit structured token telemetry +**Changes Made:** -**Challenge:** LiteLLM runs as a container image - need to either: +- Added `otel` to LiteLLM `success_callback` and `failure_callback` in `infra/modules/aigateway_aca/main.tf` +- Added OTEL environment variables: + - `OTEL_SERVICE_NAME` - service name for traces + - `OTEL_TRACER_NAME` - tracer name + - `OTEL_EXPORTER_OTLP_ENDPOINT` - OTLP collector endpoint + - `OTEL_EXPORTER_OTLP_PROTOCOL` - protocol (http/json) +- Added new variables in `infra/modules/aigateway_aca/variables.tf`: + - `otel_exporter_endpoint` - OTLP collector URL + - `otel_service_name` - custom service name -1. Build a custom image with callback baked in -2. Use environment variables + config-based callback -3. Add a sidecar container +**How It Works:** + +LiteLLM's OTEL callback automatically emits spans with: + +- Model name, provider, deployment +- Token usage (prompt_tokens, completion_tokens, total_tokens) +- Duration +- Request/response metadata + +**Files Modified:** + +- `infra/modules/aigateway_aca/main.tf` - Added OTEL callback and env vars +- `infra/modules/aigateway_aca/variables.tf` - Added OTEL configuration variables ### Phase 2: Correlation ID Propagation @@ -59,7 +76,27 @@ Document the current architecture: ### 1. cognitive-mesh (Upstream Caller) -Required: Must pass correlation headers when calling gateway: +**Required:** Must pass correlation headers when calling gateway. There are two methods: + +**Method A: Via Request Metadata (Recommended)** +Pass correlation IDs in the request body `metadata` field: + +```json +{ + "model": "gpt-5.3-codex", + "messages": [{ "role": "user", "content": "Hello" }], + "metadata": { + "request_id": "req_123", + "session_id": "sess_456", + "workflow": "manual_orchestration", + "stage": "writer", + "endpoint": "/api/manual-orchestration/sessions/start", + "user_id": "user_abc" + } +} +``` + +**Method B: Via HTTP Headers** - x-request-id - x-session-id @@ -68,9 +105,11 @@ Required: Must pass correlation headers when calling gateway: - x-stage-name - x-user-id +_Note: Method B requires additional LiteLLM configuration or middleware._ + ### 2. pvc-costops-analytics (Downstream Analytics) -Required: KQL queries and dashboards to: +**Required:** KQL queries and dashboards to: - Join requests table to token events by operation_Id/request_id - Aggregate rollups by endpoint, workflow, stage, model, deployment diff --git a/infra/modules/aigateway_aca/main.tf b/infra/modules/aigateway_aca/main.tf index 53e2fca..412723c 100644 --- a/infra/modules/aigateway_aca/main.tf +++ b/infra/modules/aigateway_aca/main.tf @@ -41,6 +41,7 @@ locals { # Features enabled here: # - JSON structured logging → Log Analytics Workspace via Container Apps stdout # - Prometheus /metrics endpoint (built-in, no extra infra) + # - OpenTelemetry tracing for request-to-token attribution # - Langfuse tracing (when both langfuse_public_key and langfuse_secret_key are provided) # - Redis semantic caching (when enable_redis_cache = true) # - Global budget / rate limits (when set above 0) @@ -73,16 +74,13 @@ locals { num_retries: 2 json_logs: true # Prometheus /metrics: token usage, latency and error rate at /metrics + # OpenTelemetry: structured traces with token telemetry for attribution success_callback: - prometheus - %{if var.langfuse_public_key != "" && var.langfuse_secret_key != ""~} - - langfuse - %{endif} + - otel failure_callback: - prometheus - %{if var.langfuse_public_key != "" && var.langfuse_secret_key != ""~} - - langfuse - %{endif} + - otel %{if var.enable_redis_cache~} # Redis: deduplicate identical requests to reduce Azure OpenAI token spend cache: true @@ -402,6 +400,35 @@ resource "azurerm_container_app" "ca" { } } + # OpenTelemetry configuration for request-to-token attribution + # OTEL_EXPORTER_OTLP_ENDPOINT should be set to your OTLP collector URL + # e.g., https://your-collector.eastus.azure.com:4318 + env { + name = "OTEL_SERVICE_NAME" + value = "ai-gateway-${var.env}" + } + + env { + name = "OTEL_TRACER_NAME" + value = "litellm" + } + + dynamic "env" { + for_each = var.otel_exporter_endpoint != "" ? [var.otel_exporter_endpoint] : [] + content { + name = "OTEL_EXPORTER_OTLP_ENDPOINT" + value = env.value + } + } + + dynamic "env" { + for_each = var.otel_exporter_endpoint != "" ? ["http/json"] : [] + content { + name = "OTEL_EXPORTER_OTLP_PROTOCOL" + value = env.value + } + } + # LiteLLM commonly listens on 4000; set port as needed } } diff --git a/infra/modules/aigateway_aca/variables.tf b/infra/modules/aigateway_aca/variables.tf index d5a5d87..377b408 100644 --- a/infra/modules/aigateway_aca/variables.tf +++ b/infra/modules/aigateway_aca/variables.tf @@ -211,3 +211,16 @@ variable "tpm_limit" { description = "Global tokens-per-minute cap across all API keys (0 = no limit)." default = 0 } + +# OpenTelemetry configuration for request-to-token attribution +variable "otel_exporter_endpoint" { + type = string + description = "OpenTelemetry OTLP exporter endpoint (e.g., https://collector.example.com:4318). Leave empty to disable OTEL tracing." + default = "" +} + +variable "otel_service_name" { + type = string + description = "OpenTelemetry service name for tracing." + default = "ai-gateway" +}