diff --git a/internal/cmd/root.go b/internal/cmd/root.go index 8ad7df2a..fb262b66 100644 --- a/internal/cmd/root.go +++ b/internal/cmd/root.go @@ -354,6 +354,10 @@ func run(cmd *cobra.Command, args []string) error { } }() + // Apply W3C parent context from configured traceId/spanId (spec §4.1.3.6). + // This links the gateway process lifetime span into a pre-existing trace when provided. + ctx = tracing.ParentContext(ctx, tracingCfg) + if tracingProvider.Tracer() != nil { // Log what InitProvider actually resolved (config already has env var defaults merged via CLI flags) endpoint := "" diff --git a/internal/config/config_core.go b/internal/config/config_core.go index c9fc0385..d98f7a9f 100644 --- a/internal/config/config_core.go +++ b/internal/config/config_core.go @@ -118,10 +118,16 @@ type GatewayConfig struct { // Example values: "copilot-swe-agent[bot]", "my-org-bot[bot]" TrustedBots []string `toml:"trusted_bots" json:"trusted_bots,omitempty"` - // Tracing holds OpenTelemetry OTLP tracing configuration. + // Tracing holds OpenTelemetry OTLP tracing configuration (legacy TOML key). + // New configurations should use the opentelemetry key (spec §4.1.3.6). // When Endpoint is set, traces are exported to the specified OTLP endpoint. // When omitted or Endpoint is empty, a noop tracer is used (zero overhead). Tracing *TracingConfig `toml:"tracing" json:"tracing,omitempty"` + + // Opentelemetry holds OpenTelemetry OTLP tracing configuration per spec §4.1.3.6. + // This key takes precedence over the legacy tracing key when both are present. + // MUST use an HTTPS endpoint when configured. + Opentelemetry *TracingConfig `toml:"opentelemetry" json:"opentelemetry,omitempty"` } // HTTPKeepaliveInterval returns the keepalive interval as a time.Duration. @@ -349,6 +355,21 @@ func LoadFromFile(path string) (*Config, error) { return nil, err } + // Merge opentelemetry key into tracing when present (spec §4.1.3.6). + // opentelemetry takes precedence over the legacy tracing key. + if cfg.Gateway.Opentelemetry != nil { + cfg.Gateway.Tracing = cfg.Gateway.Opentelemetry + cfg.Gateway.Opentelemetry = nil + // Expand ${VAR} expressions in tracing fields before validation. + if err := expandTracingVariables(cfg.Gateway.Tracing); err != nil { + return nil, err + } + // Validate HTTPS endpoint requirement for the opentelemetry section + if err := validateOpenTelemetryConfig(cfg.Gateway.Tracing, true); err != nil { + return nil, err + } + } + // Apply core gateway defaults applyGatewayDefaults(cfg.Gateway) diff --git a/internal/config/config_stdin.go b/internal/config/config_stdin.go index a5865f60..9907820b 100644 --- a/internal/config/config_stdin.go +++ b/internal/config/config_stdin.go @@ -32,14 +32,33 @@ type StdinConfig struct { // StdinGatewayConfig represents gateway configuration in stdin JSON format. // Uses pointers for optional fields to distinguish between unset and zero values. type StdinGatewayConfig struct { - Port *int `json:"port,omitempty"` - APIKey string `json:"apiKey,omitempty"` - Domain string `json:"domain,omitempty"` - StartupTimeout *int `json:"startupTimeout,omitempty"` - ToolTimeout *int `json:"toolTimeout,omitempty"` - KeepaliveInterval *int `json:"keepaliveInterval,omitempty"` - PayloadDir string `json:"payloadDir,omitempty"` - TrustedBots []string `json:"trustedBots,omitempty"` + Port *int `json:"port,omitempty"` + APIKey string `json:"apiKey,omitempty"` + Domain string `json:"domain,omitempty"` + StartupTimeout *int `json:"startupTimeout,omitempty"` + ToolTimeout *int `json:"toolTimeout,omitempty"` + KeepaliveInterval *int `json:"keepaliveInterval,omitempty"` + PayloadDir string `json:"payloadDir,omitempty"` + TrustedBots []string `json:"trustedBots,omitempty"` + OpenTelemetry *StdinOpenTelemetryConfig `json:"opentelemetry,omitempty"` +} + +// StdinOpenTelemetryConfig represents the OpenTelemetry configuration in stdin JSON format (spec §4.1.3.6). +type StdinOpenTelemetryConfig struct { + // Endpoint is the OTLP/HTTP collector URL. MUST be HTTPS. Supports ${VAR} expansion. + Endpoint string `json:"endpoint"` + + // Headers are HTTP headers for export requests (e.g. auth tokens). Values support ${VAR}. + Headers map[string]string `json:"headers,omitempty"` + + // TraceID is the parent trace ID (32-char lowercase hex, W3C format). Supports ${VAR}. + TraceID string `json:"traceId,omitempty"` + + // SpanID is the parent span ID (16-char lowercase hex, W3C format). Ignored without TraceID. Supports ${VAR}. + SpanID string `json:"spanId,omitempty"` + + // ServiceName is the service.name resource attribute. Default: "mcp-gateway". + ServiceName string `json:"serviceName,omitempty"` } // StdinGuardConfig represents a guard configuration in stdin JSON format. diff --git a/internal/config/config_tracing.go b/internal/config/config_tracing.go index bf87bcca..a6ad6832 100644 --- a/internal/config/config_tracing.go +++ b/internal/config/config_tracing.go @@ -15,27 +15,48 @@ const DefaultTracingServiceName = "mcp-gateway" // - OTEL_EXPORTER_OTLP_ENDPOINT — overrides Endpoint // - OTEL_SERVICE_NAME — overrides ServiceName // -// Example TOML: +// Example TOML (spec §4.1.3.6, using the opentelemetry section): // -// [gateway.tracing] -// endpoint = "http://localhost:4318" +// [gateway.opentelemetry] +// endpoint = "https://otel-collector.example.com" // service_name = "mcp-gateway" -// sample_rate = 1.0 +// trace_id = "4bf92f3577b34da6a3ce929d0e0e4736" +// span_id = "00f067aa0ba902b7" +// +// [gateway.opentelemetry.headers] +// Authorization = "Bearer ${OTEL_TOKEN}" type TracingConfig struct { // Endpoint is the OTLP HTTP endpoint to export traces to. - // Example: "http://localhost:4318" (Jaeger, Grafana Tempo, Honeycomb, etc.) + // When using the opentelemetry section (spec §4.1.3.6), this MUST be an HTTPS URL. // If empty, tracing is disabled and a noop tracer is used. Endpoint string `toml:"endpoint" json:"endpoint,omitempty"` + // Headers are HTTP headers sent with every OTLP export request (e.g. auth tokens). + // Header values support ${VAR} variable expansion (expanded at config load time). + Headers map[string]string `toml:"headers" json:"headers,omitempty"` + + // TraceID is an optional W3C trace ID (32-char lowercase hex) used to construct the + // parent traceparent header, linking gateway spans into a pre-existing trace. + // Supports ${VAR} variable expansion (expanded at config load time). + // Must be 32 lowercase hex characters and must not be all zeros. + TraceID string `toml:"trace_id" json:"traceId,omitempty"` + + // SpanID is an optional W3C span ID (16-char lowercase hex) paired with TraceID + // to construct the parent traceparent header. Ignored when TraceID is absent. + // Supports ${VAR} variable expansion (expanded at config load time). + // Must be 16 lowercase hex characters and must not be all zeros. + SpanID string `toml:"span_id" json:"spanId,omitempty"` + // ServiceName is the service name reported in traces. // Defaults to "mcp-gateway". - ServiceName string `toml:"service_name" json:"service_name,omitempty"` + ServiceName string `toml:"service_name" json:"serviceName,omitempty"` // SampleRate controls the fraction of traces that are sampled and exported. // Valid range: 0.0 (no sampling) to 1.0 (sample everything). // Defaults to 1.0 (100% sampling). // Uses a pointer so that 0.0 can be distinguished from "unset". - SampleRate *float64 `toml:"sample_rate" json:"sample_rate,omitempty"` + // Note: SampleRate is a gateway extension field not present in spec §4.1.3.6. + SampleRate *float64 `toml:"sample_rate" json:"sampleRate,omitempty"` } // GetSampleRate returns the configured sample rate, defaulting to 1.0 if unset. @@ -55,4 +76,25 @@ func init() { } } }) + + // Register stdin converter for the opentelemetry gateway config field (spec §4.1.3.6). + RegisterStdinConverter(func(cfg *Config, stdinCfg *StdinConfig) { + if stdinCfg.Gateway == nil || stdinCfg.Gateway.OpenTelemetry == nil { + return + } + otel := stdinCfg.Gateway.OpenTelemetry + if cfg.Gateway == nil { + cfg.Gateway = &GatewayConfig{} + } + cfg.Gateway.Tracing = &TracingConfig{ + Endpoint: otel.Endpoint, + Headers: otel.Headers, + TraceID: otel.TraceID, + SpanID: otel.SpanID, + ServiceName: otel.ServiceName, + } + if cfg.Gateway.Tracing.ServiceName == "" { + cfg.Gateway.Tracing.ServiceName = DefaultTracingServiceName + } + }) } diff --git a/internal/config/config_tracing_test.go b/internal/config/config_tracing_test.go new file mode 100644 index 00000000..cebf1147 --- /dev/null +++ b/internal/config/config_tracing_test.go @@ -0,0 +1,282 @@ +// Package config provides configuration loading and parsing. +// This file contains compliance tests for OpenTelemetry configuration per spec §4.1.3.6 +// (MCP Gateway Specification v1.11.0). +// +// Test IDs correspond to the compliance matrix in the issue: +// - T-OTEL-001: Gateway starts when opentelemetry is omitted +// - T-OTEL-002: Gateway starts with valid HTTPS endpoint +// - T-OTEL-003: Reject missing endpoint when opentelemetry is present +// - T-OTEL-004: Reject non-HTTPS endpoint +// - T-OTEL-005: TracingConfig carries required fields (headers, traceId, spanId) +// - T-OTEL-006: Headers are preserved in TracingConfig +// - T-OTEL-007: Valid traceId + spanId pass validation +// - T-OTEL-008: traceId-only is valid (spanId optional) +// - T-OTEL-009: spanId without traceId logs warning but does not fail +// - T-OTEL-010: serviceName defaults to "mcp-gateway" +package config + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// T-OTEL-001: Gateway starts when the opentelemetry section is omitted. +// No error should be produced when TracingConfig is nil. +func TestOTEL001_NoOpenTelemetryConfig_NoError(t *testing.T) { + err := validateOpenTelemetryConfig(nil, true) + require.NoError(t, err, "T-OTEL-001: nil config must not produce an error") +} + +// T-OTEL-002: Gateway starts (validates) with a valid HTTPS endpoint. +func TestOTEL002_ValidHTTPSEndpoint_NoError(t *testing.T) { + cfg := &TracingConfig{ + Endpoint: "https://otel-collector.example.com", + ServiceName: "mcp-gateway", + } + err := validateOpenTelemetryConfig(cfg, true) + require.NoError(t, err, "T-OTEL-002: valid HTTPS endpoint must be accepted") +} + +// T-OTEL-003: Reject missing endpoint when the opentelemetry section is present. +func TestOTEL003_MissingEndpoint_Error(t *testing.T) { + cfg := &TracingConfig{ + ServiceName: "mcp-gateway", + // Endpoint intentionally absent + } + err := validateOpenTelemetryConfig(cfg, true) + require.Error(t, err, "T-OTEL-003: missing endpoint must be rejected") + assert.Contains(t, err.Error(), "endpoint", "error must mention the missing field") +} + +// T-OTEL-004: Reject non-HTTPS endpoint. +func TestOTEL004_NonHTTPSEndpoint_Error(t *testing.T) { + cfg := &TracingConfig{ + Endpoint: "http://otel-collector.example.com", // HTTP, not HTTPS + } + err := validateOpenTelemetryConfig(cfg, true) + require.Error(t, err, "T-OTEL-004: non-HTTPS endpoint must be rejected") + assert.Contains(t, err.Error(), "HTTPS", "error must mention the HTTPS requirement") +} + +// T-OTEL-005: TracingConfig struct carries all required spec §4.1.3.6 fields. +func TestOTEL005_TracingConfigFields(t *testing.T) { + headers := map[string]string{"Authorization": "Bearer token"} + cfg := &TracingConfig{ + Endpoint: "https://otel-collector.example.com", + Headers: headers, + TraceID: "4bf92f3577b34da6a3ce929d0e0e4736", + SpanID: "00f067aa0ba902b7", + ServiceName: "my-service", + } + + assert.Equal(t, "https://otel-collector.example.com", cfg.Endpoint) + assert.Equal(t, headers, cfg.Headers) + assert.Equal(t, "4bf92f3577b34da6a3ce929d0e0e4736", cfg.TraceID) + assert.Equal(t, "00f067aa0ba902b7", cfg.SpanID) + assert.Equal(t, "my-service", cfg.ServiceName) +} + +// T-OTEL-006: Headers are preserved in TracingConfig when configured. +func TestOTEL006_HeadersPreserved(t *testing.T) { + headers := map[string]string{ + "Authorization": "Bearer my-token", + "X-Custom": "value", + } + cfg := &TracingConfig{ + Endpoint: "https://otel-collector.example.com", + Headers: headers, + } + + err := validateOpenTelemetryConfig(cfg, true) + require.NoError(t, err) + assert.Equal(t, headers, cfg.Headers, "T-OTEL-006: headers must be preserved unchanged") +} + +// T-OTEL-007: Valid W3C traceId (32-char lowercase hex) + spanId (16-char lowercase hex) pass validation. +func TestOTEL007_ValidTraceIDAndSpanID_NoError(t *testing.T) { + cfg := &TracingConfig{ + Endpoint: "https://otel-collector.example.com", + TraceID: "4bf92f3577b34da6a3ce929d0e0e4736", + SpanID: "00f067aa0ba902b7", + } + err := validateOpenTelemetryConfig(cfg, true) + require.NoError(t, err, "T-OTEL-007: valid traceId+spanId must be accepted") +} + +// T-OTEL-007b: Invalid traceId (wrong length) must be rejected. +func TestOTEL007b_InvalidTraceID_Error(t *testing.T) { + cfg := &TracingConfig{ + Endpoint: "https://otel-collector.example.com", + TraceID: "4bf92f35", // too short + } + err := validateOpenTelemetryConfig(cfg, true) + require.Error(t, err, "T-OTEL-007b: invalid traceId must be rejected") + assert.Contains(t, err.Error(), "traceId") +} + +// T-OTEL-007c: Invalid spanId (wrong length) must be rejected. +func TestOTEL007c_InvalidSpanID_Error(t *testing.T) { + cfg := &TracingConfig{ + Endpoint: "https://otel-collector.example.com", + TraceID: "4bf92f3577b34da6a3ce929d0e0e4736", + SpanID: "00f067aa", // too short + } + err := validateOpenTelemetryConfig(cfg, true) + require.Error(t, err, "T-OTEL-007c: invalid spanId must be rejected") + assert.Contains(t, err.Error(), "spanId") +} + +// T-OTEL-007d: Uppercase hex in traceId must be rejected (must be lowercase). +func TestOTEL007d_UppercaseTraceID_Error(t *testing.T) { + cfg := &TracingConfig{ + Endpoint: "https://otel-collector.example.com", + TraceID: "4BF92F3577B34DA6A3CE929D0E0E4736", // uppercase + } + err := validateOpenTelemetryConfig(cfg, true) + require.Error(t, err, "T-OTEL-007d: uppercase traceId must be rejected") +} + +// T-OTEL-008: traceId alone (without spanId) is valid — spanId is optional. +func TestOTEL008_TraceIDOnlyIsValid(t *testing.T) { + cfg := &TracingConfig{ + Endpoint: "https://otel-collector.example.com", + TraceID: "4bf92f3577b34da6a3ce929d0e0e4736", + // SpanID intentionally absent + } + err := validateOpenTelemetryConfig(cfg, true) + require.NoError(t, err, "T-OTEL-008: traceId without spanId must be accepted") +} + +// T-OTEL-009: spanId without traceId must NOT fail validation (warning only). +func TestOTEL009_SpanIDWithoutTraceID_NoError(t *testing.T) { + cfg := &TracingConfig{ + Endpoint: "https://otel-collector.example.com", + SpanID: "00f067aa0ba902b7", + // TraceID intentionally absent + } + err := validateOpenTelemetryConfig(cfg, true) + require.NoError(t, err, "T-OTEL-009: spanId without traceId must produce a warning but not an error") +} + +// T-OTEL-010: serviceName defaults to "mcp-gateway" when not specified. +// Tests the actual registered defaults setter applied via applyDefaults. +func TestOTEL010_ServiceNameDefaults(t *testing.T) { + // Test the constant + assert.Equal(t, "mcp-gateway", DefaultTracingServiceName, "T-OTEL-010: DefaultTracingServiceName must be 'mcp-gateway'") + + // Test that the defaults setter correctly applies the default service name + cfg := &Config{ + Gateway: &GatewayConfig{ + Tracing: &TracingConfig{ + Endpoint: "https://otel-collector.example.com", + // ServiceName intentionally absent + }, + }, + } + applyDefaults(cfg) + assert.Equal(t, "mcp-gateway", cfg.Gateway.Tracing.ServiceName, + "T-OTEL-010: default serviceName must be 'mcp-gateway' after applyDefaults") +} + +// TestValidateOpenTelemetryConfig_UnexpandedVarExpressions verifies that unexpanded +// ${VAR} expressions are rejected by validation. In practice, expandTracingVariables +// (TOML path) or ExpandRawJSONVariables (stdin JSON path) expand vars before validation, +// so unexpanded expressions should never reach the validator in normal flow. +func TestValidateOpenTelemetryConfig_UnexpandedVarExpressions(t *testing.T) { + cfg := &TracingConfig{ + Endpoint: "https://otel-collector.example.com", + TraceID: "${TRACE_ID}", + } + err := validateOpenTelemetryConfig(cfg, true) + require.Error(t, err, "Unexpanded variable expressions must fail hex validation") + assert.Contains(t, err.Error(), "traceId") +} + +// TestExpandTracingVariables verifies that ${VAR} expressions in tracing config +// fields are expanded from environment variables. +func TestExpandTracingVariables(t *testing.T) { + t.Setenv("TEST_OTEL_ENDPOINT", "https://otel.example.com") + t.Setenv("TEST_TRACE_ID", "4bf92f3577b34da6a3ce929d0e0e4736") + t.Setenv("TEST_SPAN_ID", "00f067aa0ba902b7") + t.Setenv("TEST_AUTH_TOKEN", "Bearer secret-token") + + cfg := &TracingConfig{ + Endpoint: "${TEST_OTEL_ENDPOINT}", + TraceID: "${TEST_TRACE_ID}", + SpanID: "${TEST_SPAN_ID}", + Headers: map[string]string{"Authorization": "${TEST_AUTH_TOKEN}"}, + } + + err := expandTracingVariables(cfg) + require.NoError(t, err) + + assert.Equal(t, "https://otel.example.com", cfg.Endpoint) + assert.Equal(t, "4bf92f3577b34da6a3ce929d0e0e4736", cfg.TraceID) + assert.Equal(t, "00f067aa0ba902b7", cfg.SpanID) + assert.Equal(t, "Bearer secret-token", cfg.Headers["Authorization"]) + + // After expansion, validation should pass + err = validateOpenTelemetryConfig(cfg, true) + require.NoError(t, err, "Expanded config should pass validation") +} + +// TestExpandTracingVariables_UndefinedVar verifies that an undefined variable +// in tracing config causes an error during expansion. +func TestExpandTracingVariables_UndefinedVar(t *testing.T) { + cfg := &TracingConfig{ + Endpoint: "${UNDEFINED_OTEL_ENDPOINT_XYZZY}", + } + err := expandTracingVariables(cfg) + require.Error(t, err, "Undefined variable must cause expansion error") +} + +// TestValidateOpenTelemetryConfig_AllZeroTraceID verifies that an all-zero traceId +// is rejected per W3C Trace Context specification. +func TestValidateOpenTelemetryConfig_AllZeroTraceID(t *testing.T) { + cfg := &TracingConfig{ + Endpoint: "https://otel-collector.example.com", + TraceID: "00000000000000000000000000000000", + } + err := validateOpenTelemetryConfig(cfg, true) + require.Error(t, err, "All-zero traceId must be rejected per W3C Trace Context") + assert.Contains(t, err.Error(), "all zeros") +} + +// TestValidateOpenTelemetryConfig_AllZeroSpanID verifies that an all-zero spanId +// is rejected per W3C Trace Context specification. +func TestValidateOpenTelemetryConfig_AllZeroSpanID(t *testing.T) { + cfg := &TracingConfig{ + Endpoint: "https://otel-collector.example.com", + TraceID: "4bf92f3577b34da6a3ce929d0e0e4736", + SpanID: "0000000000000000", + } + err := validateOpenTelemetryConfig(cfg, true) + require.Error(t, err, "All-zero spanId must be rejected per W3C Trace Context") + assert.Contains(t, err.Error(), "all zeros") +} + +// TestGetSampleRate_NewFields verifies that the new fields don't affect GetSampleRate. +func TestGetSampleRate_NewFields(t *testing.T) { + rate := 0.5 + cfg := &TracingConfig{ + Endpoint: "https://otel-collector.example.com", + Headers: map[string]string{"Authorization": "Bearer tok"}, + TraceID: "4bf92f3577b34da6a3ce929d0e0e4736", + SpanID: "00f067aa0ba902b7", + ServiceName: "my-service", + SampleRate: &rate, + } + assert.InDelta(t, 0.5, cfg.GetSampleRate(), 0.001) +} + +// TestValidateOpenTelemetryConfig_NonEnforcing verifies that when enforceHTTPS is false, +// a non-HTTPS endpoint is allowed (backward compat with legacy tracing section). +func TestValidateOpenTelemetryConfig_NonEnforcing(t *testing.T) { + cfg := &TracingConfig{ + Endpoint: "http://localhost:4318", // HTTP is OK in legacy mode + } + err := validateOpenTelemetryConfig(cfg, false) + require.NoError(t, err, "Non-enforcing mode should accept HTTP endpoints for backward compat") +} diff --git a/internal/config/schema/mcp-gateway-config.schema.json b/internal/config/schema/mcp-gateway-config.schema.json index 0c7119e6..43afc480 100644 --- a/internal/config/schema/mcp-gateway-config.schema.json +++ b/internal/config/schema/mcp-gateway-config.schema.json @@ -1,346 +1,420 @@ { - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://docs.github.com/gh-aw/schemas/mcp-gateway-config.schema.json", - "title": "MCP Gateway Configuration", - "description": "Configuration schema for the Model Context Protocol (MCP) Gateway as defined in the MCP Gateway Specification v1.0.0. The gateway provides transparent HTTP access to multiple MCP servers with protocol translation, server isolation, and authentication capabilities.", - "type": "object", - "properties": { - "mcpServers": { - "type": "object", - "description": "Map of MCP server configurations. Each key is a unique server identifier, and the value is the server configuration.", - "additionalProperties": { - "$ref": "#/definitions/mcpServerConfig" - } - }, - "gateway": { - "$ref": "#/definitions/gatewayConfig", - "description": "Gateway-specific configuration for the MCP Gateway service." - }, - "customSchemas": { - "type": "object", - "description": "Map of custom server type names to JSON Schema URLs for validation. Custom types enable extensibility for specialized MCP server implementations. Keys are type names (must not be 'stdio' or 'http'), values are HTTPS URLs pointing to JSON Schema definitions, or empty strings to skip validation.", - "patternProperties": { - "^(?!stdio$|http$)[a-z][a-z0-9-]*$": { - "oneOf": [ - { - "type": "string", - "format": "uri", - "pattern": "^https://.+" - }, - { - "type": "string", - "enum": [""] + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://docs.github.com/gh-aw/schemas/mcp-gateway-config.schema.json", + "title": "MCP Gateway Configuration", + "description": "Configuration schema for the Model Context Protocol (MCP) Gateway. The gateway provides transparent HTTP access to multiple MCP servers with protocol translation, server isolation, and authentication capabilities.", + "type": "object", + "properties": { + "mcpServers": { + "type": "object", + "description": "Map of MCP server configurations. Each key is a unique server identifier, and the value is the server configuration.", + "additionalProperties": { + "$ref": "#/definitions/mcpServerConfig" } - ] - } - }, - "additionalProperties": false - } - }, - "required": ["mcpServers", "gateway"], - "additionalProperties": false, - "definitions": { - "mcpServerConfig": { - "type": "object", - "description": "Configuration for an individual MCP server. Supports stdio servers, HTTP servers, and custom server types registered via customSchemas. Per MCP Gateway Specification section 4.1.4, custom types enable extensibility for specialized MCP server implementations.", - "oneOf": [ - { - "$ref": "#/definitions/stdioServerConfig" - }, - { - "$ref": "#/definitions/httpServerConfig" - }, - { - "$ref": "#/definitions/customServerConfig" - } - ] - }, - "stdioServerConfig": { - "type": "object", - "description": "Configuration for a containerized stdio-based MCP server. The gateway communicates with the server via standard input/output streams. Per MCP Gateway Specification section 3.2.1, all stdio servers MUST be containerized - direct command execution is not supported.", - "properties": { - "type": { - "type": "string", - "enum": ["stdio"], - "description": "Transport type for the MCP server. For containerized servers, use 'stdio'.", - "default": "stdio" - }, - "container": { - "type": "string", - "description": "Container image for the MCP server (e.g., 'ghcr.io/example/mcp-server:latest'). This field is required for stdio servers per MCP Gateway Specification section 4.1.2.", - "minLength": 1, - "pattern": "^[a-zA-Z0-9][a-zA-Z0-9./_-]*(:([a-zA-Z0-9._-]+|latest))?$" - }, - "entrypoint": { - "type": "string", - "description": "Optional entrypoint override for the container, equivalent to 'docker run --entrypoint'. If not specified, the container's default entrypoint is used.", - "minLength": 1 - }, - "entrypointArgs": { - "type": "array", - "description": "Arguments passed to the container entrypoint. These are executed inside the container after the entrypoint command.", - "items": { - "type": "string" - }, - "default": [] }, - "mounts": { - "type": "array", - "description": "Volume mounts for the container. Format: 'source:dest' or 'source:dest:mode' where mode is 'ro' (read-only) or 'rw' (read-write). Example: '/host/data:/container/data:ro'", - "items": { - "type": "string", - "pattern": "^[^:]+:[^:]+(:(ro|rw))?$" - }, - "default": [] + "gateway": { + "$ref": "#/definitions/gatewayConfig", + "description": "Gateway-specific configuration for the MCP Gateway service." }, - "env": { - "type": "object", - "description": "Environment variables for the server process. Values may contain variable expressions using '${VARIABLE_NAME}' syntax, which will be resolved from the process environment.", - "additionalProperties": { - "type": "string" - }, - "default": {} - }, - "args": { - "type": "array", - "description": "Additional Docker runtime arguments passed before the container image (e.g., '--network', 'host').", - "items": { - "type": "string" - }, - "default": [] - }, - "tools": { - "type": "array", - "description": "Tool filter for the MCP server. Use ['*'] to allow all tools, or specify a list of tool names to allow. This field is passed through to agent configurations.", - "items": { - "type": "string" - }, - "default": ["*"] + "customSchemas": { + "type": "object", + "description": "Map of custom server type names to JSON Schema URLs for validation. Custom types enable extensibility for specialized MCP server implementations. Keys are type names (must not be 'stdio' or 'http'), values are HTTPS URLs pointing to JSON Schema definitions, or empty strings to skip validation.", + "patternProperties": { + "^(?!stdio$|http$)[a-z][a-z0-9-]*$": { + "oneOf": [ + { + "type": "string", + "format": "uri", + "pattern": "^https://.+" + }, + { + "type": "string", + "enum": [ + "" + ] + } + ] + } + }, + "additionalProperties": false } - }, - "required": ["container"], - "additionalProperties": false }, - "httpServerConfig": { - "type": "object", - "description": "Configuration for an HTTP-based MCP server. The gateway forwards requests directly to the specified HTTP endpoint.", - "properties": { - "type": { - "type": "string", - "enum": ["http"], - "description": "Transport type for the MCP server. For HTTP servers, use 'http'." - }, - "url": { - "type": "string", - "description": "HTTP endpoint URL for the MCP server (e.g., 'https://api.example.com/mcp'). This field is required for HTTP servers per MCP Gateway Specification section 4.1.2.", - "format": "uri", - "pattern": "^https?://.+", - "minLength": 1 + "required": [ + "mcpServers", + "gateway" + ], + "additionalProperties": false, + "definitions": { + "mcpServerConfig": { + "type": "object", + "description": "Configuration for an individual MCP server. Supports stdio servers, HTTP servers, and custom server types registered via customSchemas. Per MCP Gateway Specification section 4.1.4, custom types enable extensibility for specialized MCP server implementations.", + "oneOf": [ + { + "$ref": "#/definitions/stdioServerConfig" + }, + { + "$ref": "#/definitions/httpServerConfig" + }, + { + "$ref": "#/definitions/customServerConfig" + } + ] }, - "headers": { - "type": "object", - "description": "HTTP headers to include in requests to the external HTTP MCP server. Commonly used for authentication to the external server (e.g., Authorization: 'Bearer ${API_TOKEN}' for servers that require Bearer tokens). Note: This is for authenticating to external HTTP servers, not for gateway client authentication. Values may contain variable expressions using '${VARIABLE_NAME}' syntax.", - "additionalProperties": { - "type": "string" - }, - "default": {} + "stdioServerConfig": { + "type": "object", + "description": "Configuration for a containerized stdio-based MCP server. The gateway communicates with the server via standard input/output streams. Per MCP Gateway Specification section 3.2.1, all stdio servers MUST be containerized - direct command execution is not supported.", + "properties": { + "type": { + "type": "string", + "enum": [ + "stdio" + ], + "description": "Transport type for the MCP server. For containerized servers, use 'stdio'.", + "default": "stdio" + }, + "container": { + "type": "string", + "description": "Container image for the MCP server (e.g., 'ghcr.io/example/mcp-server:latest'). This field is required for stdio servers per MCP Gateway Specification section 4.1.2.", + "minLength": 1, + "pattern": "^[a-zA-Z0-9][a-zA-Z0-9./_-]*(:([a-zA-Z0-9._-]+|latest))?$" + }, + "entrypoint": { + "type": "string", + "description": "Optional entrypoint override for the container, equivalent to 'docker run --entrypoint'. If not specified, the container's default entrypoint is used.", + "minLength": 1 + }, + "entrypointArgs": { + "type": "array", + "description": "Arguments passed to the container entrypoint. These are executed inside the container after the entrypoint command.", + "items": { + "type": "string" + }, + "default": [] + }, + "mounts": { + "type": "array", + "description": "Volume mounts for the container. Format: 'source:dest' or 'source:dest:mode' where mode is 'ro' (read-only) or 'rw' (read-write). Example: '/host/data:/container/data:ro'", + "items": { + "type": "string", + "pattern": "^[^:]+:[^:]+(:(ro|rw))?$" + }, + "default": [] + }, + "env": { + "type": "object", + "description": "Environment variables for the server process. Values may contain variable expressions using '${VARIABLE_NAME}' syntax, which will be resolved from the process environment.", + "additionalProperties": { + "type": "string" + }, + "default": {} + }, + "args": { + "type": "array", + "description": "Additional Docker runtime arguments passed before the container image (e.g., '--network', 'host').", + "items": { + "type": "string" + }, + "default": [] + }, + "tools": { + "type": "array", + "description": "Tool filter for the MCP server. Use ['*'] to allow all tools, or specify a list of tool names to allow. This field is passed through to agent configurations.", + "items": { + "type": "string" + }, + "default": [ + "*" + ] + } + }, + "required": [ + "container" + ], + "additionalProperties": false }, - "tools": { - "type": "array", - "description": "Tool filter for the MCP server. Use ['*'] to allow all tools, or specify a list of tool names to allow. This field is passed through to agent configurations.", - "items": { - "type": "string" - }, - "default": ["*"] + "httpServerConfig": { + "type": "object", + "description": "Configuration for an HTTP-based MCP server. The gateway forwards requests directly to the specified HTTP endpoint.", + "properties": { + "type": { + "type": "string", + "enum": [ + "http" + ], + "description": "Transport type for the MCP server. For HTTP servers, use 'http'." + }, + "url": { + "type": "string", + "description": "HTTP endpoint URL for the MCP server (e.g., 'https://api.example.com/mcp'). This field is required for HTTP servers per MCP Gateway Specification section 4.1.2.", + "format": "uri", + "pattern": "^https?://.+", + "minLength": 1 + }, + "headers": { + "type": "object", + "description": "HTTP headers to include in requests to the external HTTP MCP server. Commonly used for authentication to the external server (e.g., Authorization: 'Bearer ${API_TOKEN}' for servers that require Bearer tokens). Note: This is for authenticating to external HTTP servers, not for gateway client authentication. Values may contain variable expressions using '${VARIABLE_NAME}' syntax.", + "additionalProperties": { + "type": "string" + }, + "default": {} + }, + "tools": { + "type": "array", + "description": "Tool filter for the MCP server. Use ['*'] to allow all tools, or specify a list of tool names to allow. This field is passed through to agent configurations.", + "items": { + "type": "string" + }, + "default": [ + "*" + ] + }, + "env": { + "type": "object", + "description": "Environment variables to pass through for variable resolution. Values may contain variable expressions using '${VARIABLE_NAME}' syntax, which will be resolved from the process environment.", + "additionalProperties": { + "type": "string" + }, + "default": {} + }, + "guard-policies": { + "type": "object", + "description": "Guard policies for access control at the MCP gateway level. The structure of guard policies is server-specific. For GitHub MCP server, see the GitHub guard policy schema. For other servers (Jira, WorkIQ), different policy schemas will apply.", + "additionalProperties": true + } + }, + "required": [ + "type", + "url" + ], + "additionalProperties": false }, - "env": { - "type": "object", - "description": "Environment variables to pass through for variable resolution. Values may contain variable expressions using '${VARIABLE_NAME}' syntax, which will be resolved from the process environment.", - "additionalProperties": { - "type": "string" - }, - "default": {} + "customServerConfig": { + "type": "object", + "description": "Configuration for a custom MCP server type. Custom types must be registered in customSchemas with a JSON Schema URL. The configuration is validated against the registered schema. Per MCP Gateway Specification section 4.1.4, this enables extensibility for specialized MCP server implementations.", + "properties": { + "type": { + "type": "string", + "pattern": "^(?!stdio$|http$)[a-z][a-z0-9-]*$", + "description": "Custom server type name. Must not be 'stdio' or 'http'. Must be registered in customSchemas." + } + }, + "required": [ + "type" + ], + "additionalProperties": true }, - "guard-policies": { - "type": "object", - "description": "Guard policies for access control at the MCP gateway level. The structure of guard policies is server-specific. For GitHub MCP server, see the GitHub guard policy schema. For other servers (Jira, WorkIQ), different policy schemas will apply.", - "additionalProperties": true - } - }, - "required": ["type", "url"], - "additionalProperties": false - }, - "customServerConfig": { - "type": "object", - "description": "Configuration for a custom MCP server type. Custom types must be registered in customSchemas with a JSON Schema URL. The configuration is validated against the registered schema. Per MCP Gateway Specification section 4.1.4, this enables extensibility for specialized MCP server implementations.", - "properties": { - "type": { - "type": "string", - "pattern": "^(?!stdio$|http$)[a-z][a-z0-9-]*$", - "description": "Custom server type name. Must not be 'stdio' or 'http'. Must be registered in customSchemas." + "gatewayConfig": { + "type": "object", + "description": "Gateway-specific configuration for the MCP Gateway service.", + "properties": { + "port": { + "oneOf": [ + { + "type": "integer", + "minimum": 1, + "maximum": 65535 + }, + { + "type": "string", + "pattern": "^\\$\\{[A-Z_][A-Z0-9_]*\\}$" + } + ], + "description": "HTTP server port for the gateway. The gateway exposes endpoints at http://{domain}:{port}/. Can be an integer (1-65535) or a variable expression like '${MCP_GATEWAY_PORT}'." + }, + "apiKey": { + "type": "string", + "description": "API key for authentication. When configured, clients must include 'Authorization: ' header in all RPC requests (the API key is used directly without Bearer or other scheme prefix). Per MCP Gateway Specification section 7.1, the authorization header format is 'Authorization: ' where the API key is the complete header value. API keys must not be logged in plaintext per section 7.2.", + "minLength": 1 + }, + "domain": { + "oneOf": [ + { + "type": "string", + "enum": [ + "localhost", + "host.docker.internal" + ] + }, + { + "type": "string", + "pattern": "^\\$\\{[A-Z_][A-Z0-9_]*\\}$" + } + ], + "description": "Gateway domain for constructing URLs. Use 'localhost' for local development or 'host.docker.internal' when the gateway runs in a container and needs to access the host. Can also be a variable expression like '${MCP_GATEWAY_DOMAIN}'." + }, + "startupTimeout": { + "type": "integer", + "description": "Server startup timeout in seconds. The gateway enforces this timeout when initializing containerized stdio servers.", + "minimum": 1, + "default": 30 + }, + "toolTimeout": { + "type": "integer", + "description": "Tool invocation timeout in seconds. The gateway enforces this timeout for individual tool/method calls to MCP servers.", + "minimum": 1, + "default": 60 + }, + "payloadDir": { + "type": "string", + "description": "Directory path for storing large payload JSON files for authenticated clients. MUST be an absolute path: Unix paths start with '/', Windows paths start with a drive letter followed by ':\\'. Relative paths, empty strings, and paths that don't follow these conventions are not allowed.", + "minLength": 1, + "pattern": "^(/|[A-Za-z]:\\\\)" + }, + "payloadSizeThreshold": { + "type": "integer", + "description": "Size threshold in bytes for writing payloads to files instead of inlining them in the response. Payloads larger than this threshold are written to files in payloadDir. Defaults to 524288 (512KB) if not specified.", + "minimum": 1 + }, + "payloadPathPrefix": { + "type": "string", + "description": "Optional path prefix for payload file paths as seen from within agent containers. Use this when the payload directory is mounted at a different path inside the container than on the host.", + "minLength": 1 + }, + "trustedBots": { + "type": "array", + "description": "Additional trusted bot identity strings passed to the gateway and merged with its built-in internal trusted identity list. This field is additive and cannot remove entries from the gateway's built-in list. Typically GitHub bot usernames such as 'github-actions[bot]' or 'copilot-swe-agent[bot]'.", + "items": { + "type": "string", + "minLength": 1 + }, + "minItems": 1 + }, + "opentelemetry": { + "type": "object", + "description": "OpenTelemetry OTLP tracing configuration (spec \u00a74.1.3.6). When present, endpoint is required and MUST be HTTPS. Enables distributed tracing of MCP tool invocations.", + "properties": { + "endpoint": { + "type": "string", + "description": "OTLP/HTTP collector URL. MUST use HTTPS (e.g., \"https://otel-collector.example.com\"). Supports ${VAR} expansion.", + "minLength": 1, + "pattern": "^(https://.+|\\$\\{[A-Za-z_][A-Za-z0-9_]*\\})$" + }, + "headers": { + "type": "object", + "description": "HTTP headers sent with every OTLP export request (e.g. authentication tokens). Values support ${VAR} expansion.", + "additionalProperties": { + "type": "string" + } + }, + "traceId": { + "type": "string", + "description": "Parent trace ID in W3C format (32-char lowercase hex). Used to link gateway spans into a pre-existing trace. Supports ${VAR} expansion.", + "pattern": "^([0-9a-f]{32}|\\$\\{[A-Za-z_][A-Za-z0-9_]*\\})$" + }, + "spanId": { + "type": "string", + "description": "Parent span ID in W3C format (16-char lowercase hex). Paired with traceId to construct the traceparent header. Ignored when traceId is absent. Supports ${VAR} expansion.", + "pattern": "^([0-9a-f]{16}|\\$\\{[A-Za-z_][A-Za-z0-9_]*\\})$" + }, + "serviceName": { + "type": "string", + "description": "service.name resource attribute for all emitted spans. Defaults to \"mcp-gateway\".", + "minLength": 1 + } + }, + "required": [ + "endpoint" + ], + "additionalProperties": false + } + }, + "required": [ + "port", + "domain", + "apiKey" + ], + "additionalProperties": false } - }, - "required": ["type"], - "additionalProperties": true }, - "gatewayConfig": { - "type": "object", - "description": "Gateway-specific configuration for the MCP Gateway service.", - "properties": { - "port": { - "oneOf": [ - { - "type": "integer", - "minimum": 1, - "maximum": 65535 + "examples": [ + { + "mcpServers": { + "github": { + "container": "ghcr.io/github/github-mcp-server:latest", + "env": { + "GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_TOKEN}" + } + } }, - { - "type": "string", - "pattern": "^\\$\\{[A-Z_][A-Z0-9_]*\\}$" + "gateway": { + "port": 8080, + "domain": "localhost", + "apiKey": "gateway-secret-token" } - ], - "description": "HTTP server port for the gateway. The gateway exposes endpoints at http://{domain}:{port}/. Can be an integer (1-65535) or a variable expression like '${MCP_GATEWAY_PORT}'." - }, - "apiKey": { - "type": "string", - "description": "API key for authentication. When configured, clients must include 'Authorization: ' header in all RPC requests (the API key is used directly without Bearer or other scheme prefix). Per MCP Gateway Specification section 7.1, the authorization header format is 'Authorization: ' where the API key is the complete header value. API keys must not be logged in plaintext per section 7.2.", - "minLength": 1 }, - "domain": { - "oneOf": [ - { - "type": "string", - "enum": ["localhost", "host.docker.internal"] + { + "mcpServers": { + "data-server": { + "container": "ghcr.io/example/data-mcp:latest", + "entrypoint": "/custom/entrypoint.sh", + "entrypointArgs": [ + "--config", + "/app/config.json" + ], + "mounts": [ + "/host/data:/container/data:ro", + "/host/config:/container/config:rw" + ], + "type": "stdio" + } }, - { - "type": "string", - "pattern": "^\\$\\{[A-Z_][A-Z0-9_]*\\}$" + "gateway": { + "port": 8080, + "domain": "localhost", + "startupTimeout": 60, + "toolTimeout": 120 } - ], - "description": "Gateway domain for constructing URLs. Use 'localhost' for local development or 'host.docker.internal' when the gateway runs in a container and needs to access the host. Can also be a variable expression like '${MCP_GATEWAY_DOMAIN}'." - }, - "startupTimeout": { - "type": "integer", - "description": "Server startup timeout in seconds. The gateway enforces this timeout when initializing containerized stdio servers.", - "minimum": 1, - "default": 30 - }, - "toolTimeout": { - "type": "integer", - "description": "Tool invocation timeout in seconds. The gateway enforces this timeout for individual tool/method calls to MCP servers.", - "minimum": 1, - "default": 60 - }, - "payloadDir": { - "type": "string", - "description": "Directory path for storing large payload JSON files for authenticated clients. MUST be an absolute path: Unix paths start with '/', Windows paths start with a drive letter followed by ':\\'. Relative paths, empty strings, and paths that don't follow these conventions are not allowed.", - "minLength": 1, - "pattern": "^(/|[A-Za-z]:\\\\)" }, - "payloadSizeThreshold": { - "type": "integer", - "description": "Size threshold in bytes for writing payloads to files instead of inlining them in the response. Payloads larger than this threshold are written to files in payloadDir. Defaults to 524288 (512KB) if not specified.", - "minimum": 1 - }, - "payloadPathPrefix": { - "type": "string", - "description": "Optional path prefix for payload file paths as seen from within agent containers. Use this when the payload directory is mounted at a different path inside the container than on the host.", - "minLength": 1 - }, - "trustedBots": { - "type": "array", - "description": "Additional trusted bot identity strings passed to the gateway and merged with its built-in internal trusted identity list. This field is additive and cannot remove entries from the gateway's built-in list. Typically GitHub bot usernames such as 'github-actions[bot]' or 'copilot-swe-agent[bot]'.", - "items": { - "type": "string", - "minLength": 1 - }, - "minItems": 1 - } - }, - "required": ["port", "domain", "apiKey"], - "additionalProperties": false - } - }, - "examples": [ - { - "mcpServers": { - "github": { - "container": "ghcr.io/github/github-mcp-server:latest", - "env": { - "GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_TOKEN}" - } - } - }, - "gateway": { - "port": 8080, - "domain": "localhost", - "apiKey": "gateway-secret-token" - } - }, - { - "mcpServers": { - "data-server": { - "container": "ghcr.io/example/data-mcp:latest", - "entrypoint": "/custom/entrypoint.sh", - "entrypointArgs": ["--config", "/app/config.json"], - "mounts": ["/host/data:/container/data:ro", "/host/config:/container/config:rw"], - "type": "stdio" - } - }, - "gateway": { - "port": 8080, - "domain": "localhost", - "startupTimeout": 60, - "toolTimeout": 120 - } - }, - { - "mcpServers": { - "local-server": { - "container": "ghcr.io/example/python-mcp:latest", - "entrypointArgs": ["--config", "/app/config.json"], - "type": "stdio" + { + "mcpServers": { + "local-server": { + "container": "ghcr.io/example/python-mcp:latest", + "entrypointArgs": [ + "--config", + "/app/config.json" + ], + "type": "stdio" + }, + "remote-server": { + "type": "http", + "url": "https://api.example.com/mcp", + "headers": { + "Authorization": "Bearer ${API_TOKEN}" + } + } + }, + "gateway": { + "port": 8080, + "domain": "localhost", + "apiKey": "gateway-secret-token" + } }, - "remote-server": { - "type": "http", - "url": "https://api.example.com/mcp", - "headers": { - "Authorization": "Bearer ${API_TOKEN}" - } - } - }, - "gateway": { - "port": 8080, - "domain": "localhost", - "apiKey": "gateway-secret-token" - } - }, - { - "mcpServers": { - "mcp-scripts-server": { - "type": "safeinputs", - "tools": { - "greet-user": { - "description": "Greet a user by name", - "inputs": { - "name": { - "type": "string", - "required": true + { + "mcpServers": { + "mcp-scripts-server": { + "type": "safeinputs", + "tools": { + "greet-user": { + "description": "Greet a user by name", + "inputs": { + "name": { + "type": "string", + "required": true + } + }, + "script": "return { message: `Hello, ${name}!` };" + } + } } - }, - "script": "return { message: `Hello, ${name}!` };" + }, + "gateway": { + "port": 8080, + "domain": "localhost", + "apiKey": "gateway-secret-token" + }, + "customSchemas": { + "safeinputs": "https://docs.github.com/gh-aw/schemas/mcp-scripts-config.schema.json" } - } } - }, - "gateway": { - "port": 8080, - "domain": "localhost", - "apiKey": "gateway-secret-token" - }, - "customSchemas": { - "safeinputs": "https://docs.github.com/gh-aw/schemas/mcp-scripts-config.schema.json" - } - } - ] + ] } diff --git a/internal/config/validation.go b/internal/config/validation.go index 36439fb1..c0b137c2 100644 --- a/internal/config/validation.go +++ b/internal/config/validation.go @@ -18,6 +18,15 @@ type ValidationError = rules.ValidationError // Variable expression pattern: ${VARIABLE_NAME} var varExprPattern = regexp.MustCompile(`\$\{([A-Za-z_][A-Za-z0-9_]*)\}`) +// W3C trace context patterns (spec §4.1.3.6) +var ( + traceIDPattern = regexp.MustCompile(`^[0-9a-f]{32}$`) + spanIDPattern = regexp.MustCompile(`^[0-9a-f]{16}$`) + // W3C Trace Context forbids all-zero trace/span IDs. + allZeroTraceID = regexp.MustCompile(`^0{32}$`) + allZeroSpanID = regexp.MustCompile(`^0{16}$`) +) + var logValidation = logger.New("config:validation") // logValidateServerStart logs the beginning of server config validation. @@ -108,6 +117,49 @@ func expandEnvVariables(env map[string]string, serverName string) (map[string]st return result, nil } +// expandTracingVariables expands ${VAR} expressions in TracingConfig fields. +// This is called for TOML-loaded configs before validation, mirroring the +// stdin JSON path where ExpandRawJSONVariables handles expansion. +func expandTracingVariables(cfg *TracingConfig) error { + if cfg == nil { + return nil + } + + if cfg.Endpoint != "" { + expanded, err := expandVariables(cfg.Endpoint, "gateway.opentelemetry.endpoint") + if err != nil { + return err + } + cfg.Endpoint = expanded + } + + if cfg.TraceID != "" { + expanded, err := expandVariables(cfg.TraceID, "gateway.opentelemetry.traceId") + if err != nil { + return err + } + cfg.TraceID = expanded + } + + if cfg.SpanID != "" { + expanded, err := expandVariables(cfg.SpanID, "gateway.opentelemetry.spanId") + if err != nil { + return err + } + cfg.SpanID = expanded + } + + for key, value := range cfg.Headers { + expanded, err := expandVariables(value, fmt.Sprintf("gateway.opentelemetry.headers.%s", key)) + if err != nil { + return err + } + cfg.Headers[key] = expanded + } + + return nil +} + // validateMounts validates mount specifications using centralized rules func validateMounts(mounts []string, jsonPath string) error { for i, mount := range mounts { @@ -417,6 +469,20 @@ func validateGatewayConfig(gateway *StdinGatewayConfig) error { return err } + // Validate OpenTelemetry config per spec §4.1.3.6 when present + if gateway.OpenTelemetry != nil { + tracingCfg := &TracingConfig{ + Endpoint: gateway.OpenTelemetry.Endpoint, + Headers: gateway.OpenTelemetry.Headers, + TraceID: gateway.OpenTelemetry.TraceID, + SpanID: gateway.OpenTelemetry.SpanID, + ServiceName: gateway.OpenTelemetry.ServiceName, + } + if err := validateOpenTelemetryConfig(tracingCfg, true); err != nil { + return err + } + } + logValidation.Print("Gateway config validation passed") return nil } @@ -463,3 +529,76 @@ func validateTOMLStdioContainerization(servers map[string]*ServerConfig) error { logValidation.Print("TOML stdio containerization validation passed") return nil } + +// validateOpenTelemetryConfig validates OpenTelemetry configuration per spec §4.1.3.6. +// When enforceHTTPS is true (i.e. the config came from the opentelemetry section), +// the endpoint is required and MUST use HTTPS. +// traceId and spanId are validated as W3C hex strings when they contain no unexpanded ${VAR}. +func validateOpenTelemetryConfig(cfg *TracingConfig, enforceHTTPS bool) error { + if cfg == nil { + return nil + } + + logValidation.Print("Validating OpenTelemetry configuration (spec §4.1.3.6)") + + // endpoint is required when opentelemetry section is present + if enforceHTTPS && cfg.Endpoint == "" { + return rules.MissingRequired("endpoint", "opentelemetry", "gateway.opentelemetry.endpoint", + "Provide an HTTPS OTLP endpoint (e.g., \"https://otel-collector.example.com\")") + } + + // endpoint MUST be HTTPS (spec §4.1.3.6) + if enforceHTTPS && cfg.Endpoint != "" { + if !strings.HasPrefix(cfg.Endpoint, "https://") { + logValidation.Printf("Non-HTTPS endpoint in opentelemetry config: %s", cfg.Endpoint) + return rules.InvalidValue("endpoint", + fmt.Sprintf("opentelemetry endpoint must use HTTPS, got '%s'", cfg.Endpoint), + "gateway.opentelemetry.endpoint", + "Use an HTTPS URL (e.g., \"https://otel-collector.example.com\")") + } + } + + // Validate traceId: must be a 32-char lowercase hex string, not all-zero + if cfg.TraceID != "" { + if !traceIDPattern.MatchString(cfg.TraceID) { + logValidation.Printf("Invalid traceId format: %s", cfg.TraceID) + return rules.InvalidValue("traceId", + fmt.Sprintf("traceId must be a 32-character lowercase hexadecimal string, got '%s'", cfg.TraceID), + "gateway.opentelemetry.traceId", + "Provide a valid W3C trace ID (32 lowercase hex chars, e.g., \"4bf92f3577b34da6a3ce929d0e0e4736\")") + } + if allZeroTraceID.MatchString(cfg.TraceID) { + logValidation.Printf("All-zero traceId rejected per W3C Trace Context: %s", cfg.TraceID) + return rules.InvalidValue("traceId", + "traceId must not be all zeros (W3C Trace Context forbids an all-zero trace-id)", + "gateway.opentelemetry.traceId", + "Provide a non-zero W3C trace ID (e.g., \"4bf92f3577b34da6a3ce929d0e0e4736\")") + } + } + + // Validate spanId: must be a 16-char lowercase hex string, not all-zero + if cfg.SpanID != "" { + if !spanIDPattern.MatchString(cfg.SpanID) { + logValidation.Printf("Invalid spanId format: %s", cfg.SpanID) + return rules.InvalidValue("spanId", + fmt.Sprintf("spanId must be a 16-character lowercase hexadecimal string, got '%s'", cfg.SpanID), + "gateway.opentelemetry.spanId", + "Provide a valid W3C span ID (16 lowercase hex chars, e.g., \"00f067aa0ba902b7\")") + } + if allZeroSpanID.MatchString(cfg.SpanID) { + logValidation.Printf("All-zero spanId rejected per W3C Trace Context: %s", cfg.SpanID) + return rules.InvalidValue("spanId", + "spanId must not be all zeros (W3C Trace Context forbids an all-zero span-id)", + "gateway.opentelemetry.spanId", + "Provide a non-zero W3C span ID (e.g., \"00f067aa0ba902b7\")") + } + } + + // spanId without traceId is meaningless — log a warning but do not fail + if cfg.SpanID != "" && cfg.TraceID == "" { + logValidation.Print("Warning: opentelemetry spanId is set without traceId; spanId will be ignored") + } + + logValidation.Print("OpenTelemetry config validation passed") + return nil +} diff --git a/internal/server/unified.go b/internal/server/unified.go index a34b316f..a1fbb5b8 100644 --- a/internal/server/unified.go +++ b/internal/server/unified.go @@ -378,14 +378,22 @@ func (us *UnifiedServer) callBackendTool(ctx context.Context, serverID, toolName logUnified.Printf("callBackendTool: serverID=%s, toolName=%s, args=%+v", serverID, toolName, args) // Start an OTEL span for the full tool call lifecycle (spans all phases 0–6) - ctx, toolSpan := tracing.Tracer().Start(ctx, "gateway.tool_call", + // Attribute names follow MCP Gateway Specification §4.1.3.6 + ctx, toolSpan := tracing.Tracer().Start(ctx, "mcp.tool_call", oteltrace.WithAttributes( - attribute.String("tool.name", toolName), - attribute.String("server.id", serverID), + attribute.String("mcp.server", serverID), + attribute.String("mcp.method", "tools/call"), + attribute.String("mcp.tool", toolName), ), oteltrace.WithSpanKind(oteltrace.SpanKindInternal), ) - defer toolSpan.End() + // httpStatusCode tracks the conceptual HTTP status of the proxied response (spec §4.1.3.6). + // It starts at 200 and is updated to 500 (error) or 403 (access denied) before each exit. + httpStatusCode := 200 + defer func() { + toolSpan.SetAttributes(attribute.Int("http.status_code", httpStatusCode)) + toolSpan.End() + }() // Get guard for this backend g := us.guardRegistry.Get(serverID) @@ -401,6 +409,7 @@ func (us *UnifiedServer) callBackendTool(ctx context.Context, serverID, toolName // Initialize policy-driven guard session state (label_agent) before first guarded call. enforcementMode, err := us.ensureGuardInitialized(ctx, sessionID, serverID, g, backendCaller) if err != nil { + httpStatusCode = 500 return newErrorCallToolResult(fmt.Errorf("guard session initialization failed: %w", err)) } @@ -427,6 +436,7 @@ func (us *UnifiedServer) callBackendTool(ctx context.Context, serverID, toolName resource, operation, err := g.LabelResource(ctx, toolName, args, backendCaller, us.capabilities) if err != nil { log.Printf("[DIFC] Guard labeling failed: %v", err) + httpStatusCode = 500 return newErrorCallToolResult(fmt.Errorf("guard labeling failed: %w", err)) } @@ -452,6 +462,7 @@ func (us *UnifiedServer) callBackendTool(ctx context.Context, serverID, toolName detailedErr := difc.FormatViolationError(result, agentLabels.Secrecy, agentLabels.Integrity, resource) toolSpan.RecordError(detailedErr) toolSpan.SetStatus(codes.Error, "access denied: "+result.Reason) + httpStatusCode = 403 return &sdk.CallToolResult{ Content: []sdk.Content{ &sdk.TextContent{ @@ -478,6 +489,7 @@ func (us *UnifiedServer) callBackendTool(ctx context.Context, serverID, toolName if err != nil { execSpan.RecordError(err) execSpan.SetStatus(codes.Error, err.Error()) + httpStatusCode = 500 return newErrorCallToolResult(err) } @@ -493,6 +505,7 @@ func (us *UnifiedServer) callBackendTool(ctx context.Context, serverID, toolName labeledData, err = g.LabelResponse(ctx, toolName, backendResult, backendCaller, us.capabilities) if err != nil { log.Printf("[DIFC] Response labeling failed: %v", err) + httpStatusCode = 500 return newErrorCallToolResult(fmt.Errorf("response labeling failed: %w", err)) } } else { @@ -517,6 +530,7 @@ func (us *UnifiedServer) callBackendTool(ctx context.Context, serverID, toolName filtered.GetFilteredCount(), filtered.TotalCount) blockErr := fmt.Errorf("DIFC policy violation: %d of %d items in response are not accessible to agent %s", filtered.GetFilteredCount(), filtered.TotalCount, agentID) + httpStatusCode = 403 return &sdk.CallToolResult{ Content: []sdk.Content{ &sdk.TextContent{ @@ -536,12 +550,14 @@ func (us *UnifiedServer) callBackendTool(ctx context.Context, serverID, toolName // Convert filtered data to result finalResult, err = filtered.ToResult() if err != nil { + httpStatusCode = 500 return newErrorCallToolResult(fmt.Errorf("failed to convert filtered data: %w", err)) } } else { // Simple labeled data - already passed coarse-grained check finalResult, err = labeledData.ToResult() if err != nil { + httpStatusCode = 500 return newErrorCallToolResult(fmt.Errorf("failed to convert labeled data: %w", err)) } } @@ -570,6 +586,7 @@ func (us *UnifiedServer) callBackendTool(ctx context.Context, serverID, toolName // Convert finalResult to SDK CallToolResult format callResult, err := mcp.ConvertToCallToolResult(finalResult) if err != nil { + httpStatusCode = 500 return newErrorCallToolResult(fmt.Errorf("failed to convert result: %w", err)) } diff --git a/internal/tracing/provider.go b/internal/tracing/provider.go index 7152dfd6..14ea2aa9 100644 --- a/internal/tracing/provider.go +++ b/internal/tracing/provider.go @@ -19,6 +19,8 @@ package tracing import ( "context" + "crypto/rand" + "encoding/hex" "fmt" "time" @@ -91,6 +93,78 @@ func resolveSampleRate(cfg *config.TracingConfig) float64 { return config.DefaultTracingSampleRate } +// resolveHeaders returns the configured OTLP export headers (or nil). +func resolveHeaders(cfg *config.TracingConfig) map[string]string { + if cfg != nil && len(cfg.Headers) > 0 { + return cfg.Headers + } + return nil +} + +// resolveParentContext builds a context carrying the W3C remote parent span context +// from the configured traceId and spanId (spec §4.1.3.6). +// If traceId is absent, or either ID is malformed, the original context is returned unchanged. +// A missing spanId is replaced with a random span ID so the traceparent is still valid. +func resolveParentContext(ctx context.Context, cfg *config.TracingConfig) context.Context { + if cfg == nil || cfg.TraceID == "" { + return ctx + } + + traceIDBytes, err := hex.DecodeString(cfg.TraceID) + if err != nil || len(traceIDBytes) != 16 { + logTracing.Printf("Warning: invalid traceId '%s'; skipping W3C parent context", cfg.TraceID) + return ctx + } + var traceID trace.TraceID + copy(traceID[:], traceIDBytes) + + var spanID trace.SpanID + if cfg.SpanID != "" { + spanIDBytes, err := hex.DecodeString(cfg.SpanID) + if err != nil || len(spanIDBytes) != 8 { + logTracing.Printf("Warning: invalid spanId '%s'; generating a random span ID", cfg.SpanID) + // Fall through to generate a random span ID below + } else { + copy(spanID[:], spanIDBytes) + } + } + + // When spanId is all-zeros (absent or invalid), generate a random span ID. + // A valid SpanContext requires a non-zero SpanID (W3C Trace Context spec). + // T-OTEL-008: when only traceId is provided, a random spanId is generated. + if spanID == (trace.SpanID{}) { + generatedID, genErr := generateRandomSpanID() + if genErr != nil { + logTracing.Printf("Warning: failed to generate random span ID: %v; skipping W3C parent context", genErr) + return ctx + } + spanID = generatedID + logTracing.Printf("Generated random spanId for W3C parent context") + } + + sc := trace.NewSpanContext(trace.SpanContextConfig{ + TraceID: traceID, + SpanID: spanID, + TraceFlags: trace.FlagsSampled, + Remote: true, + }) + if !sc.IsValid() { + logTracing.Printf("Warning: constructed parent SpanContext is not valid; skipping W3C parent context") + return ctx + } + logTracing.Printf("W3C parent context resolved: traceId=%s, spanId=%s", traceID, spanID) + return trace.ContextWithRemoteSpanContext(ctx, sc) +} + +// generateRandomSpanID creates a cryptographically random 8-byte span ID. +func generateRandomSpanID() (trace.SpanID, error) { + var id trace.SpanID + if _, err := rand.Read(id[:]); err != nil { + return id, fmt.Errorf("failed to generate random span ID: %w", err) + } + return id, nil +} + // registerPropagator installs the global W3C TraceContext + Baggage propagator. // This enables incoming traceparent/tracestate headers to be extracted so that // agent-initiated traces are continued rather than fragmented. @@ -132,11 +206,20 @@ func InitProvider(ctx context.Context, cfg *config.TracingConfig) (*Provider, er logTracing.Printf("Initializing OTLP tracing: endpoint=%s, service=%s, sampleRate=%.2f", endpoint, serviceName, sampleRate) - // Build OTLP HTTP exporter with 10s timeout - exporter, err := otlptracehttp.New(ctx, + // Build OTLP HTTP exporter options + exporterOpts := []otlptracehttp.Option{ otlptracehttp.WithEndpointURL(endpoint), - otlptracehttp.WithTimeout(10*time.Second), - ) + otlptracehttp.WithTimeout(10 * time.Second), + } + + // Apply configured headers (spec §4.1.3.6: headers sent with every OTLP export request) + if headers := resolveHeaders(cfg); headers != nil { + logTracing.Printf("Applying %d OTLP export header(s)", len(headers)) + exporterOpts = append(exporterOpts, otlptracehttp.WithHeaders(headers)) + } + + // Build OTLP HTTP exporter with 10s timeout + exporter, err := otlptracehttp.New(ctx, exporterOpts...) if err != nil { return nil, fmt.Errorf("failed to create OTLP trace exporter: %w", err) } @@ -191,3 +274,10 @@ func InitProvider(ctx context.Context, cfg *config.TracingConfig) (*Provider, er func Tracer() trace.Tracer { return otel.Tracer(instrumentationName) } + +// ParentContext returns a context carrying the W3C remote parent span context +// from the configured traceId and spanId (spec §4.1.3.6). +// Exported for use at startup to build the root span's parent context. +func ParentContext(ctx context.Context, cfg *config.TracingConfig) context.Context { + return resolveParentContext(ctx, cfg) +} diff --git a/internal/tracing/provider_test.go b/internal/tracing/provider_test.go index a4a72b45..f29f6f04 100644 --- a/internal/tracing/provider_test.go +++ b/internal/tracing/provider_test.go @@ -2,6 +2,7 @@ package tracing_test import ( "context" + "fmt" "net/http" "net/http/httptest" "testing" @@ -311,3 +312,145 @@ func TestWrapHTTPHandler_GeneratesRootSpan(t *testing.T) { assert.True(t, capturedSpanCtx.IsValid(), "should have a valid span context even without traceparent") assert.False(t, capturedSpanCtx.IsRemote(), "span should not be marked remote — it is a local root span") } + +// TestInitProvider_WithHeaders verifies that OTLP export headers are forwarded +// to the collector. A channel synchronises with the test HTTP server so the +// assertion is deterministic rather than timing-dependent. +func TestInitProvider_WithHeaders(t *testing.T) { + ctx := context.Background() + + // Channel signals when the test server receives an export request. + received := make(chan string, 1) + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + select { + case received <- r.Header.Get("Authorization"): + default: + } + w.WriteHeader(http.StatusOK) + })) + defer ts.Close() + + cfg := &config.TracingConfig{ + Endpoint: ts.URL, + Headers: map[string]string{"Authorization": "Bearer test-token"}, + } + + provider, err := tracing.InitProvider(ctx, cfg) + require.NoError(t, err) + require.NotNil(t, provider) + + // Create and end a span to trigger an export attempt. + tr := provider.Tracer() + _, span := tr.Start(ctx, "header-test-span") + span.End() + + // Shutdown flushes the batch processor, ensuring the export is sent. + shutdownCtx, cancel := context.WithTimeout(ctx, 2*time.Second) + defer cancel() + _ = provider.Shutdown(shutdownCtx) + + // Wait for the export request with a timeout. + select { + case auth := <-received: + assert.Equal(t, "Bearer test-token", auth, + "Authorization header must be forwarded to the OTLP collector") + case <-time.After(3 * time.Second): + t.Fatal("timed out waiting for OTLP export request — headers test is non-deterministic") + } +} + +// TestParentContext_WithValidTraceIDAndSpanID verifies that ParentContext builds a valid +// remote span context when both traceId and spanId are provided. +func TestParentContext_WithValidTraceIDAndSpanID(t *testing.T) { + ctx := context.Background() + + // Initialize noop provider to set up the W3C propagator globally + provider, err := tracing.InitProvider(ctx, nil) + require.NoError(t, err) + defer provider.Shutdown(ctx) + + cfg := &config.TracingConfig{ + Endpoint: "https://example.com", + TraceID: "4bf92f3577b34da6a3ce929d0e0e4736", + SpanID: "00f067aa0ba902b7", + } + + parentCtx := tracing.ParentContext(ctx, cfg) + + // The context must be enriched (different from background context) + assert.NotEqual(t, ctx, parentCtx, "ParentContext must return an enriched context") + + // Verify the remote span context contains the correct traceId and spanId + // by extracting it from the context and checking via propagation round-trip. + prop := otel.GetTextMapPropagator() + carrier := propagation.MapCarrier{} + prop.Inject(parentCtx, carrier) + traceparent := carrier["traceparent"] + require.NotEmpty(t, traceparent, "W3C traceparent must be present after injection") + + // traceparent format: 00-{traceId}-{spanId}-{flags} + assert.Contains(t, traceparent, "4bf92f3577b34da6a3ce929d0e0e4736", + "traceparent must contain the configured traceId") + assert.Contains(t, traceparent, "00f067aa0ba902b7", + "traceparent must contain the configured spanId") +} + +// TestParentContext_WithTraceIDOnly verifies that ParentContext works when only traceId is provided. +func TestParentContext_WithTraceIDOnly(t *testing.T) { + ctx := context.Background() + + cfg := &config.TracingConfig{ + TraceID: "4bf92f3577b34da6a3ce929d0e0e4736", + // SpanID intentionally absent + } + + parentCtx := tracing.ParentContext(ctx, cfg) + // Should return an enriched context + assert.NotEqual(t, ctx, parentCtx, "ParentContext with traceId only must return an enriched context") +} + +// TestParentContext_NoConfig verifies that ParentContext is a no-op when config is nil. +func TestParentContext_NoConfig(t *testing.T) { + ctx := context.Background() + parentCtx := tracing.ParentContext(ctx, nil) + assert.Equal(t, ctx, parentCtx, "ParentContext with nil config must return the original context unchanged") +} + +// TestParentContext_EmptyTraceID verifies that ParentContext is a no-op when traceId is empty. +func TestParentContext_EmptyTraceID(t *testing.T) { + ctx := context.Background() + cfg := &config.TracingConfig{ + SpanID: "00f067aa0ba902b7", // spanId without traceId + } + parentCtx := tracing.ParentContext(ctx, cfg) + assert.Equal(t, ctx, parentCtx, "ParentContext without traceId must return the original context unchanged") +} + +// TestParentContext_InvalidTraceID verifies that ParentContext handles malformed traceIds gracefully. +func TestParentContext_InvalidTraceID(t *testing.T) { + ctx := context.Background() + cfg := &config.TracingConfig{ + TraceID: "not-valid-hex", + } + parentCtx := tracing.ParentContext(ctx, cfg) + assert.Equal(t, ctx, parentCtx, "ParentContext with invalid traceId must return original context") +} + +// TestInitProvider_WithTraceIDAndSpanID verifies that InitProvider succeeds with traceId/spanId config. +func TestInitProvider_WithTraceIDAndSpanID(t *testing.T) { + ctx := context.Background() + + cfg := &config.TracingConfig{ + Endpoint: fmt.Sprintf("http://localhost:%d", 14320), + TraceID: "4bf92f3577b34da6a3ce929d0e0e4736", + SpanID: "00f067aa0ba902b7", + } + + provider, err := tracing.InitProvider(ctx, cfg) + require.NoError(t, err) + require.NotNil(t, provider) + + shutdownCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond) + defer cancel() + _ = provider.Shutdown(shutdownCtx) +}