diff --git a/go.mod b/go.mod index 847fde3a..0c9db388 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( github.com/BurntSushi/toml v1.6.0 github.com/modelcontextprotocol/go-sdk v1.4.1 github.com/spf13/cobra v1.10.2 - golang.org/x/term v0.38.0 + golang.org/x/term v0.41.0 ) require ( @@ -14,11 +14,21 @@ require ( github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 github.com/stretchr/testify v1.11.1 github.com/tetratelabs/wazero v1.11.0 + go.opentelemetry.io/otel v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 + go.opentelemetry.io/otel/sdk v1.43.0 + go.opentelemetry.io/otel/trace v1.43.0 ) require ( + github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/google/jsonschema-go v0.4.2 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/itchyny/timefmt-go v0.1.7 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect @@ -26,7 +36,17 @@ require ( github.com/segmentio/encoding v0.5.4 // indirect github.com/spf13/pflag v1.0.9 // indirect github.com/yosida95/uritemplate/v3 v3.0.2 // indirect - golang.org/x/oauth2 v0.34.0 // indirect - golang.org/x/sys v0.40.0 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 // indirect + go.opentelemetry.io/otel/metric v1.43.0 // indirect + go.opentelemetry.io/proto/otlp v1.10.0 // indirect + golang.org/x/net v0.52.0 // indirect + golang.org/x/oauth2 v0.35.0 // indirect + golang.org/x/sys v0.42.0 // indirect + golang.org/x/text v0.35.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect + google.golang.org/grpc v1.80.0 // indirect + google.golang.org/protobuf v1.36.11 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 3bc3cca1..da2d956b 100644 --- a/go.sum +++ b/go.sum @@ -1,24 +1,45 @@ github.com/BurntSushi/toml v1.6.0 h1:dRaEfpa2VI55EwlIW72hMRHdWouJeRF7TPYhI+AUQjk= github.com/BurntSushi/toml v1.6.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= +github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= +github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/jsonschema-go v0.4.2 h1:tmrUohrwoLZZS/P3x7ex0WAVknEkBZM46iALbcqoRA8= github.com/google/jsonschema-go v0.4.2/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/itchyny/gojq v0.12.18 h1:gFGHyt/MLbG9n6dqnvlliiya2TaMMh6FFaR2b1H6Drc= github.com/itchyny/gojq v0.12.18/go.mod h1:4hPoZ/3lN9fDL1D+aK7DY1f39XZpY9+1Xpjz8atrEkg= github.com/itchyny/timefmt-go v0.1.7 h1:xyftit9Tbw+Dc/huSSPJaEmX1TVL8lw5vxjJLK4GMMA= github.com/itchyny/timefmt-go v0.1.7/go.mod h1:5E46Q+zj7vbTgWY8o5YkMeYb4I6GeWLFnetPy5oBrAI= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/modelcontextprotocol/go-sdk v1.4.1 h1:M4x9GyIPj+HoIlHNGpK2hq5o3BFhC+78PkEaldQRphc= github.com/modelcontextprotocol/go-sdk v1.4.1/go.mod h1:Bo/mS87hPQqHSRkMv4dQq1XCu6zv4INdXnFZabkNU6s= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6NgVqpn3+iol9aGu4= github.com/santhosh-tekuri/jsonschema/v5 v5.3.1/go.mod h1:uToXkOrWAZ6/Oc07xWQrPOhJotwFIyu2bBVN41fcDUY= @@ -36,16 +57,51 @@ github.com/tetratelabs/wazero v1.11.0 h1:+gKemEuKCTevU4d7ZTzlsvgd1uaToIDtlQlmNbw github.com/tetratelabs/wazero v1.11.0/go.mod h1:eV28rsN8Q+xwjogd7f4/Pp4xFxO7uOGbLcD/LzB1wiU= github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/otel v1.43.0 h1:mYIM03dnh5zfN7HautFE4ieIig9amkNANT+xcVxAj9I= +go.opentelemetry.io/otel v1.43.0/go.mod h1:JuG+u74mvjvcm8vj8pI5XiHy1zDeoCS2LB1spIq7Ay0= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bTWkw0ICGcOLCAI5l6zsD1j20k= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 h1:3iZJKlCZufyRzPzlQhUIWVmfltrXuGyfjREgGP3UUjc= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0/go.mod h1:/G+nUPfhq2e+qiXMGxMwumDrP5jtzU+mWN7/sjT2rak= +go.opentelemetry.io/otel/metric v1.43.0 h1:d7638QeInOnuwOONPp4JAOGfbCEpYb+K6DVWvdxGzgM= +go.opentelemetry.io/otel/metric v1.43.0/go.mod h1:RDnPtIxvqlgO8GRW18W6Z/4P462ldprJtfxHxyKd2PY= +go.opentelemetry.io/otel/sdk v1.43.0 h1:pi5mE86i5rTeLXqoF/hhiBtUNcrAGHLKQdhg4h4V9Dg= +go.opentelemetry.io/otel/sdk v1.43.0/go.mod h1:P+IkVU3iWukmiit/Yf9AWvpyRDlUeBaRg6Y+C58QHzg= +go.opentelemetry.io/otel/sdk/metric v1.43.0 h1:S88dyqXjJkuBNLeMcVPRFXpRw2fuwdvfCGLEo89fDkw= +go.opentelemetry.io/otel/sdk/metric v1.43.0/go.mod h1:C/RJtwSEJ5hzTiUz5pXF1kILHStzb9zFlIEe85bhj6A= +go.opentelemetry.io/otel/trace v1.43.0 h1:BkNrHpup+4k4w+ZZ86CZoHHEkohws8AY+WTX09nk+3A= +go.opentelemetry.io/otel/trace v1.43.0/go.mod h1:/QJhyVBUUswCphDVxq+8mld+AvhXZLhe+8WVFxiFff0= +go.opentelemetry.io/proto/otlp v1.10.0 h1:IQRWgT5srOCYfiWnpqUYz9CVmbO8bFmKcwYxpuCSL2g= +go.opentelemetry.io/proto/otlp v1.10.0/go.mod h1:/CV4QoCR/S9yaPj8utp3lvQPoqMtxXdzn7ozvvozVqk= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= -golang.org/x/oauth2 v0.34.0 h1:hqK/t4AKgbqWkdkcAeI8XLmbK+4m4G5YeQRrmiotGlw= -golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= -golang.org/x/sys v0.40.0 h1:DBZZqJ2Rkml6QMQsZywtnjnnGvHza6BTfYFWY9kjEWQ= -golang.org/x/sys v0.40.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q= -golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg= -golang.org/x/tools v0.41.0 h1:a9b8iMweWG+S0OBnlU36rzLp20z1Rp10w+IY2czHTQc= -golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= +golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= +golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= +golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= +golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU= +golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A= +golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= +golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= +golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k= +golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0= +gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= +gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA= +google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg= +google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:4Hqkh8ycfw05ld/3BWL7rJOSfebL2Q+DVDeRgYgxUU8= +google.golang.org/grpc v1.80.0 h1:Xr6m2WmWZLETvUNvIUmeD5OAagMw3FiKmMlTdViWsHM= +google.golang.org/grpc v1.80.0/go.mod h1:ho/dLnxwi3EDJA4Zghp7k2Ec1+c2jqup0bFkw07bwF4= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/cmd/flags_tracing.go b/internal/cmd/flags_tracing.go new file mode 100644 index 00000000..69270fbe --- /dev/null +++ b/internal/cmd/flags_tracing.go @@ -0,0 +1,39 @@ +package cmd + +// Tracing-related flags for OpenTelemetry OTLP trace export. + +import ( + "github.com/github/gh-aw-mcpg/internal/config" + "github.com/github/gh-aw-mcpg/internal/envutil" + "github.com/spf13/cobra" +) + +// Tracing flag variables +var ( + otlpEndpoint string + otlpServiceName string + otlpSampleRate float64 +) + +func init() { + RegisterFlag(func(cmd *cobra.Command) { + cmd.Flags().StringVar(&otlpEndpoint, "otlp-endpoint", getDefaultOTLPEndpoint(), + "OTLP HTTP endpoint for trace export (e.g. http://localhost:4318). Defaults from OTEL_EXPORTER_OTLP_ENDPOINT when set. Tracing is disabled when empty.") + cmd.Flags().StringVar(&otlpServiceName, "otlp-service-name", getDefaultOTLPServiceName(), + "Service name reported in traces. Defaults from OTEL_SERVICE_NAME when set.") + cmd.Flags().Float64Var(&otlpSampleRate, "otlp-sample-rate", config.DefaultTracingSampleRate, + "Fraction of traces to sample and export (0.0–1.0). Default 1.0 samples everything.") + }) +} + +// getDefaultOTLPEndpoint returns the OTLP endpoint, checking OTEL_EXPORTER_OTLP_ENDPOINT +// environment variable first, then falling back to empty (disabled). +func getDefaultOTLPEndpoint() string { + return envutil.GetEnvString("OTEL_EXPORTER_OTLP_ENDPOINT", "") +} + +// getDefaultOTLPServiceName returns the OTLP service name, checking OTEL_SERVICE_NAME +// environment variable first, then falling back to the default. +func getDefaultOTLPServiceName() string { + return envutil.GetEnvString("OTEL_SERVICE_NAME", config.DefaultTracingServiceName) +} diff --git a/internal/cmd/proxy.go b/internal/cmd/proxy.go index 0f43ded5..cf3ce5d5 100644 --- a/internal/cmd/proxy.go +++ b/internal/cmd/proxy.go @@ -1,6 +1,7 @@ package cmd import ( + "context" "crypto/tls" "fmt" "log" @@ -10,10 +11,13 @@ import ( "os/signal" "path/filepath" "syscall" + "time" + "github.com/github/gh-aw-mcpg/internal/config" "github.com/github/gh-aw-mcpg/internal/difc" "github.com/github/gh-aw-mcpg/internal/logger" "github.com/github/gh-aw-mcpg/internal/proxy" + "github.com/github/gh-aw-mcpg/internal/tracing" "github.com/spf13/cobra" ) @@ -21,17 +25,20 @@ var logProxyCmd = logger.New("cmd:proxy") // Proxy subcommand flag variables var ( - proxyGuardWasm string - proxyPolicy string - proxyToken string - proxyListen string - proxyLogDir string - proxyDIFCMode string - proxyAPIURL string - proxyTLS bool - proxyTLSDir string - proxyTrustedBots []string - proxyTrustedUsers []string + proxyGuardWasm string + proxyPolicy string + proxyToken string + proxyListen string + proxyLogDir string + proxyDIFCMode string + proxyAPIURL string + proxyTLS bool + proxyTLSDir string + proxyTrustedBots []string + proxyTrustedUsers []string + proxyOTLPEndpoint string + proxyOTLPService string + proxyOTLPSampleRate float64 ) func init() { @@ -104,6 +111,12 @@ Local usage: cmd.Flags().StringVar(&proxyTLSDir, "tls-dir", "", "Directory for TLS certificates (default: /proxy-tls)") cmd.Flags().StringSliceVar(&proxyTrustedBots, "trusted-bots", nil, "Additional trusted bot usernames (comma-separated, extends built-in list)") cmd.Flags().StringSliceVar(&proxyTrustedUsers, "trusted-users", nil, "User logins that receive approved integrity (comma-separated)") + cmd.Flags().StringVar(&proxyOTLPEndpoint, "otlp-endpoint", getDefaultOTLPEndpoint(), + "OTLP HTTP endpoint for trace export (e.g. http://localhost:4318). Tracing is disabled when empty.") + cmd.Flags().StringVar(&proxyOTLPService, "otlp-service-name", getDefaultOTLPServiceName(), + "Service name reported in traces.") + cmd.Flags().Float64Var(&proxyOTLPSampleRate, "otlp-sample-rate", config.DefaultTracingSampleRate, + "Fraction of traces to sample and export (0.0–1.0).") // Only require --guard-wasm when no baked-in guard is available if defaultGuard == "" { @@ -137,6 +150,35 @@ func runProxy(cmd *cobra.Command, args []string) error { logger.LogInfo("startup", "MCPG Proxy starting: listen=%s, guard=%s, mode=%s, tls=%v", proxyListen, proxyGuardWasm, proxyDIFCMode, proxyTLS) + // Initialize OpenTelemetry tracer provider for the proxy server. + // When no endpoint is configured, a noop provider is used (zero overhead). + var tracingCfg *config.TracingConfig + if proxyOTLPEndpoint != "" { + tracingCfg = &config.TracingConfig{ + Endpoint: proxyOTLPEndpoint, + ServiceName: proxyOTLPService, + SampleRate: &proxyOTLPSampleRate, + } + } + tracingProvider, err := tracing.InitProvider(ctx, tracingCfg) + if err != nil { + log.Printf("Warning: failed to initialize tracing provider: %v", err) + tracingProvider, _ = tracing.InitProvider(ctx, nil) + } + defer func() { + shutdownCtx, cancelTracing := context.WithTimeout(context.Background(), 5*time.Second) + defer cancelTracing() + if err := tracingProvider.Shutdown(shutdownCtx); err != nil { + log.Printf("Warning: tracing provider shutdown error: %v", err) + } + }() + if tracingCfg != nil { + log.Printf("OpenTelemetry tracing enabled for proxy: endpoint=%s, service=%s", proxyOTLPEndpoint, proxyOTLPService) + logger.LogInfo("startup", "OpenTelemetry tracing enabled for proxy: endpoint=%s, service=%s", proxyOTLPEndpoint, proxyOTLPService) + } else { + log.Printf("OpenTelemetry tracing disabled for proxy (no --otlp-endpoint configured)") + } + // Resolve GitHub token (optional — proxy forwards client auth by default) token := proxyToken if token == "" { diff --git a/internal/cmd/root.go b/internal/cmd/root.go index 3db309b2..9835285d 100644 --- a/internal/cmd/root.go +++ b/internal/cmd/root.go @@ -21,6 +21,7 @@ import ( "github.com/github/gh-aw-mcpg/internal/difc" "github.com/github/gh-aw-mcpg/internal/logger" "github.com/github/gh-aw-mcpg/internal/server" + "github.com/github/gh-aw-mcpg/internal/tracing" "github.com/github/gh-aw-mcpg/internal/version" "github.com/spf13/cobra" ) @@ -312,6 +313,68 @@ func run(cmd *cobra.Command, args []string) error { logger.LogInfoMd("startup", "Generated temporary random API key (spec §7.3)") } + // Apply tracing flags: CLI flags override config values. + // Merge CLI/env tracing settings into gateway config. + if otlpEndpoint != "" || cmd.Flags().Changed("otlp-endpoint") { + if cfg.Gateway.Tracing == nil { + cfg.Gateway.Tracing = &config.TracingConfig{} + } + cfg.Gateway.Tracing.Endpoint = otlpEndpoint + } + if cmd.Flags().Changed("otlp-service-name") { + if cfg.Gateway.Tracing == nil { + cfg.Gateway.Tracing = &config.TracingConfig{} + } + cfg.Gateway.Tracing.ServiceName = otlpServiceName + } + if cmd.Flags().Changed("otlp-sample-rate") { + if cfg.Gateway.Tracing == nil { + cfg.Gateway.Tracing = &config.TracingConfig{} + } + cfg.Gateway.Tracing.SampleRate = &otlpSampleRate + } + + // Initialize OpenTelemetry tracer provider. + // When no endpoint is configured, a noop provider is used (zero overhead). + var tracingCfg *config.TracingConfig + if cfg.Gateway != nil { + tracingCfg = cfg.Gateway.Tracing + } + tracingProvider, err := tracing.InitProvider(ctx, tracingCfg) + if err != nil { + log.Printf("Warning: failed to initialize tracing provider: %v", err) + logger.LogWarn("startup", "Failed to initialize tracing provider: %v", err) + // Non-fatal: continue without tracing + tracingProvider, _ = tracing.InitProvider(ctx, nil) + } + defer func() { + shutdownCtxTracing, cancelTracing := context.WithTimeout(context.Background(), 5*time.Second) + defer cancelTracing() + if err := tracingProvider.Shutdown(shutdownCtxTracing); err != nil { + log.Printf("Warning: tracing provider shutdown error: %v", err) + } + }() + + if tracingProvider.Tracer() != nil { + // Log what InitProvider actually resolved (config already has env var defaults merged via CLI flags) + endpoint := "" + sampleRate := config.DefaultTracingSampleRate + serviceName := config.DefaultTracingServiceName + if tracingCfg != nil { + endpoint = tracingCfg.Endpoint + sampleRate = tracingCfg.GetSampleRate() + serviceName = tracingCfg.ServiceName + } + if endpoint != "" { + log.Printf("OpenTelemetry tracing enabled: endpoint=%s, service=%s, sampleRate=%.2f", + endpoint, serviceName, sampleRate) + logger.LogInfoMd("startup", "OpenTelemetry tracing enabled: endpoint=%s, service=%s", + endpoint, serviceName) + } else { + log.Printf("OpenTelemetry tracing disabled (no OTLP endpoint configured)") + } + } + // Create unified MCP server (backend for both modes) unifiedServer, err := server.NewUnified(ctx, cfg) if err != nil { diff --git a/internal/config/config_core.go b/internal/config/config_core.go index ead6b046..c9fc0385 100644 --- a/internal/config/config_core.go +++ b/internal/config/config_core.go @@ -117,6 +117,11 @@ type GatewayConfig struct { // bot list and is purely additive (it cannot remove built-in trusted bots). // Example values: "copilot-swe-agent[bot]", "my-org-bot[bot]" TrustedBots []string `toml:"trusted_bots" json:"trusted_bots,omitempty"` + + // Tracing holds OpenTelemetry OTLP tracing configuration. + // When Endpoint is set, traces are exported to the specified OTLP endpoint. + // When omitted or Endpoint is empty, a noop tracer is used (zero overhead). + Tracing *TracingConfig `toml:"tracing" json:"tracing,omitempty"` } // HTTPKeepaliveInterval returns the keepalive interval as a time.Duration. diff --git a/internal/config/config_tracing.go b/internal/config/config_tracing.go new file mode 100644 index 00000000..bf87bcca --- /dev/null +++ b/internal/config/config_tracing.go @@ -0,0 +1,58 @@ +// Package config provides configuration loading and parsing. +// This file defines the tracing configuration for OpenTelemetry OTLP export. +package config + +// DefaultTracingSampleRate is the default sample rate for tracing (100% sampling). +const DefaultTracingSampleRate = 1.0 + +// DefaultTracingServiceName is the default service name for tracing. +const DefaultTracingServiceName = "mcp-gateway" + +// TracingConfig holds OpenTelemetry tracing configuration. +// Tracing is disabled when Endpoint is empty. +// +// Configuration can also be provided via standard OTEL environment variables: +// - OTEL_EXPORTER_OTLP_ENDPOINT — overrides Endpoint +// - OTEL_SERVICE_NAME — overrides ServiceName +// +// Example TOML: +// +// [gateway.tracing] +// endpoint = "http://localhost:4318" +// service_name = "mcp-gateway" +// sample_rate = 1.0 +type TracingConfig struct { + // Endpoint is the OTLP HTTP endpoint to export traces to. + // Example: "http://localhost:4318" (Jaeger, Grafana Tempo, Honeycomb, etc.) + // If empty, tracing is disabled and a noop tracer is used. + Endpoint string `toml:"endpoint" json:"endpoint,omitempty"` + + // ServiceName is the service name reported in traces. + // Defaults to "mcp-gateway". + ServiceName string `toml:"service_name" json:"service_name,omitempty"` + + // SampleRate controls the fraction of traces that are sampled and exported. + // Valid range: 0.0 (no sampling) to 1.0 (sample everything). + // Defaults to 1.0 (100% sampling). + // Uses a pointer so that 0.0 can be distinguished from "unset". + SampleRate *float64 `toml:"sample_rate" json:"sample_rate,omitempty"` +} + +// GetSampleRate returns the configured sample rate, defaulting to 1.0 if unset. +func (c *TracingConfig) GetSampleRate() float64 { + if c == nil || c.SampleRate == nil { + return DefaultTracingSampleRate + } + return *c.SampleRate +} + +func init() { + // Register default setter for Tracing config + RegisterDefaults(func(cfg *Config) { + if cfg.Gateway != nil && cfg.Gateway.Tracing != nil { + if cfg.Gateway.Tracing.ServiceName == "" { + cfg.Gateway.Tracing.ServiceName = DefaultTracingServiceName + } + } + }) +} diff --git a/internal/proxy/handler.go b/internal/proxy/handler.go index 537a4dc3..a94a29e7 100644 --- a/internal/proxy/handler.go +++ b/internal/proxy/handler.go @@ -9,11 +9,16 @@ import ( "log" "net/http" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + oteltrace "go.opentelemetry.io/otel/trace" + "github.com/github/gh-aw-mcpg/internal/difc" "github.com/github/gh-aw-mcpg/internal/guard" "github.com/github/gh-aw-mcpg/internal/httputil" "github.com/github/gh-aw-mcpg/internal/logger" "github.com/github/gh-aw-mcpg/internal/strutil" + "github.com/github/gh-aw-mcpg/internal/tracing" ) var logHandler = logger.New("proxy:handler") @@ -125,6 +130,16 @@ func (h *proxyHandler) handleWithDIFC(w http.ResponseWriter, r *http.Request, pa s := h.server backend := &restBackendCaller{server: s, clientAuth: r.Header.Get("Authorization")} + // Start a DIFC pipeline span covering all phases for this request + ctx, difcSpan := tracing.Tracer().Start(ctx, "proxy.difc_pipeline", + oteltrace.WithAttributes( + attribute.String("tool.name", toolName), + attribute.String("http.path", path), + ), + oteltrace.WithSpanKind(oteltrace.SpanKindInternal), + ) + defer difcSpan.End() + if !s.guardInitialized { log.Printf("[proxy] WARNING: guard not initialized, blocking request") http.Error(w, "proxy enforcement not configured", http.StatusServiceUnavailable) @@ -159,6 +174,7 @@ func (h *proxyHandler) handleWithDIFC(w http.ResponseWriter, r *http.Request, pa } else { // Write blocked logHandler.Printf("[DIFC] Phase 2: BLOCKED %s %s — %s", r.Method, path, evalResult.Reason) + difcSpan.SetStatus(codes.Error, "access denied: "+evalResult.Reason) writeDIFCForbidden(w, fmt.Sprintf("DIFC policy violation: %s", evalResult.Reason)) return } @@ -168,11 +184,23 @@ func (h *proxyHandler) handleWithDIFC(w http.ResponseWriter, r *http.Request, pa clientAuth := r.Header.Get("Authorization") var resp *http.Response var respBody []byte + + fwdCtx, fwdSpan := tracing.Tracer().Start(ctx, "proxy.backend.forward", + oteltrace.WithAttributes( + attribute.String("http.path", path), + attribute.String("tool.name", toolName), + ), + oteltrace.WithSpanKind(oteltrace.SpanKindClient), + ) if graphQLBody != nil { - resp, respBody = h.forwardAndReadBody(w, ctx, http.MethodPost, "/graphql", bytes.NewReader(graphQLBody), "application/json", clientAuth) + resp, respBody = h.forwardAndReadBody(w, fwdCtx, http.MethodPost, "/graphql", bytes.NewReader(graphQLBody), "application/json", clientAuth) } else { - resp, respBody = h.forwardAndReadBody(w, ctx, r.Method, path, nil, "", clientAuth) + resp, respBody = h.forwardAndReadBody(w, fwdCtx, r.Method, path, nil, "", clientAuth) + } + if resp != nil { + fwdSpan.SetAttributes(attribute.Int("http.status_code", resp.StatusCode)) } + fwdSpan.End() if resp == nil { return } diff --git a/internal/proxy/proxy.go b/internal/proxy/proxy.go index b3f1e8bc..581e1619 100644 --- a/internal/proxy/proxy.go +++ b/internal/proxy/proxy.go @@ -21,6 +21,7 @@ import ( "github.com/github/gh-aw-mcpg/internal/difc" "github.com/github/gh-aw-mcpg/internal/guard" "github.com/github/gh-aw-mcpg/internal/logger" + "github.com/github/gh-aw-mcpg/internal/tracing" ) var logProxy = logger.New("proxy:proxy") @@ -264,8 +265,10 @@ func (s *Server) initGuardPolicy(ctx context.Context, policyJSON string, trusted } // Handler returns an http.Handler for the proxy server. +// Every request is wrapped with an OTEL "proxy.request" span so the full +// proxy lifecycle (DIFC pipeline + GitHub API round-trip) appears in traces. func (s *Server) Handler() http.Handler { - return &proxyHandler{server: s} + return tracing.WrapHTTPHandler(&proxyHandler{server: s}, "proxy.request") } // restBackendCaller translates guard CallTool requests into GitHub REST API diff --git a/internal/server/http_helpers.go b/internal/server/http_helpers.go index b919520b..540b97f4 100644 --- a/internal/server/http_helpers.go +++ b/internal/server/http_helpers.go @@ -8,12 +8,18 @@ import ( "net/http" "time" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/propagation" + oteltrace "go.opentelemetry.io/otel/trace" + "github.com/github/gh-aw-mcpg/internal/auth" "github.com/github/gh-aw-mcpg/internal/guard" "github.com/github/gh-aw-mcpg/internal/httputil" "github.com/github/gh-aw-mcpg/internal/logger" "github.com/github/gh-aw-mcpg/internal/logger/sanitize" "github.com/github/gh-aw-mcpg/internal/mcp" + "github.com/github/gh-aw-mcpg/internal/tracing" ) var logHelpers = logger.New("server:helpers") @@ -180,11 +186,47 @@ func setupSessionCallback(r *http.Request, backendID string) (string, bool) { return sessionID, true } +// WithOTELTracing wraps an http.Handler with an OpenTelemetry span for each request. +// The span covers the full HTTP handler lifecycle and includes session ID, HTTP path, +// and method as span attributes. The span context is propagated into the request context +// so that nested spans (e.g. tool call spans) are automatically parented to it. +// +// Incoming W3C traceparent/tracestate headers are extracted so that an +// agent-originated trace is continued; if no such headers are present a fresh +// root span (and new trace ID) is created automatically. +func WithOTELTracing(next http.Handler, tag string) http.Handler { + t := tracing.Tracer() + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Extract incoming W3C trace context (traceparent / tracestate). + // If the headers are absent the returned ctx is unchanged and OTEL + // will generate a fresh trace ID when the span is started. + ctx := otel.GetTextMapPropagator().Extract(r.Context(), propagation.HeaderCarrier(r.Header)) + + ctx, span := t.Start(ctx, "gateway.request", + oteltrace.WithAttributes( + attribute.String("http.method", r.Method), + attribute.String("http.path", r.URL.Path), + attribute.String("gateway.tag", tag), + ), + oteltrace.WithSpanKind(oteltrace.SpanKindServer), + ) + defer span.End() + + req := r.WithContext(ctx) + next.ServeHTTP(w, req) + + // Add session ID after request handling, once the session has been attached + sessionID := SessionIDFromContext(req.Context()) + span.SetAttributes(attribute.String("session.id", auth.TruncateSessionID(sessionID))) + }) +} + // wrapWithMiddleware applies the standard middleware stack to an SDK handler. // The middleware is applied in the following order (per spec): -// 1. SDK logging (WithSDKLogging) - Detailed JSON-RPC translation debugging -// 2. Shutdown check (rejectIfShutdown) - Spec 5.1.3: Reject requests during shutdown -// 3. Auth (applyAuthIfConfigured) - Spec 7.1: API key authentication if configured +// 1. OTEL tracing (WithOTELTracing) - OpenTelemetry span for the request +// 2. SDK logging (WithSDKLogging) - Detailed JSON-RPC translation debugging +// 3. Shutdown check (rejectIfShutdown) - Spec 5.1.3: Reject requests during shutdown +// 4. Auth (applyAuthIfConfigured) - Spec 7.1: API key authentication if configured // // This ensures consistent middleware ordering across both routed and unified server modes. func wrapWithMiddleware(handler http.Handler, logTag string, unifiedServer *UnifiedServer, apiKey string) http.HandlerFunc { @@ -198,8 +240,11 @@ func wrapWithMiddleware(handler http.Handler, logTag string, unifiedServer *Unif shutdownHandler := rejectIfShutdown(unifiedServer, loggedHandler, "server:"+logTag) // Apply auth middleware if API key is configured (spec 7.1) - finalHandler := applyAuthIfConfigured(apiKey, shutdownHandler.ServeHTTP) + authedHandler := applyAuthIfConfigured(apiKey, shutdownHandler.ServeHTTP) + + // Wrap with OTEL tracing span (outermost, so it covers auth + shutdown + logging) + tracingHandler := WithOTELTracing(authedHandler, logTag) logHelpers.Printf("Middleware wrapping complete: logTag=%s", logTag) - return finalHandler + return tracingHandler.ServeHTTP } diff --git a/internal/server/unified.go b/internal/server/unified.go index 1df1d201..a34b316f 100644 --- a/internal/server/unified.go +++ b/internal/server/unified.go @@ -12,12 +12,17 @@ import ( "sync" "time" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/codes" + oteltrace "go.opentelemetry.io/otel/trace" + "github.com/github/gh-aw-mcpg/internal/config" "github.com/github/gh-aw-mcpg/internal/difc" "github.com/github/gh-aw-mcpg/internal/guard" "github.com/github/gh-aw-mcpg/internal/launcher" "github.com/github/gh-aw-mcpg/internal/logger" "github.com/github/gh-aw-mcpg/internal/mcp" + "github.com/github/gh-aw-mcpg/internal/tracing" "github.com/github/gh-aw-mcpg/internal/version" sdk "github.com/modelcontextprotocol/go-sdk/mcp" ) @@ -372,6 +377,16 @@ func (us *UnifiedServer) callBackendTool(ctx context.Context, serverID, toolName log.Printf("Calling tool on %s: %s with DIFC enforcement", serverID, toolName) logUnified.Printf("callBackendTool: serverID=%s, toolName=%s, args=%+v", serverID, toolName, args) + // Start an OTEL span for the full tool call lifecycle (spans all phases 0–6) + ctx, toolSpan := tracing.Tracer().Start(ctx, "gateway.tool_call", + oteltrace.WithAttributes( + attribute.String("tool.name", toolName), + attribute.String("server.id", serverID), + ), + oteltrace.WithSpanKind(oteltrace.SpanKindInternal), + ) + defer toolSpan.End() + // Get guard for this backend g := us.guardRegistry.Get(serverID) sessionID := us.getSessionID(ctx) @@ -435,6 +450,8 @@ func (us *UnifiedServer) callBackendTool(ctx context.Context, serverID, toolName // Non-read operation - block the request log.Printf("[DIFC] Access DENIED for agent %s to %s: %s", agentID, resource.Description, result.Reason) detailedErr := difc.FormatViolationError(result, agentLabels.Secrecy, agentLabels.Integrity, resource) + toolSpan.RecordError(detailedErr) + toolSpan.SetStatus(codes.Error, "access denied: "+result.Reason) return &sdk.CallToolResult{ Content: []sdk.Content{ &sdk.TextContent{ @@ -449,8 +466,18 @@ func (us *UnifiedServer) callBackendTool(ctx context.Context, serverID, toolName } // **Phase 3: Execute the backend call** - backendResult, err := executeBackendToolCall(ctx, us.launcher, serverID, sessionID, toolName, args) + execCtx, execSpan := tracing.Tracer().Start(ctx, "gateway.backend.execute", + oteltrace.WithAttributes( + attribute.String("tool.name", toolName), + attribute.String("server.id", serverID), + ), + oteltrace.WithSpanKind(oteltrace.SpanKindClient), + ) + defer execSpan.End() + backendResult, err := executeBackendToolCall(execCtx, us.launcher, serverID, sessionID, toolName, args) if err != nil { + execSpan.RecordError(err) + execSpan.SetStatus(codes.Error, err.Error()) return newErrorCallToolResult(err) } diff --git a/internal/tracing/http.go b/internal/tracing/http.go new file mode 100644 index 00000000..d39427a0 --- /dev/null +++ b/internal/tracing/http.go @@ -0,0 +1,46 @@ +// Package tracing provides OpenTelemetry OTLP trace export for the MCP Gateway. +// This file provides HTTP handler wrapping helpers. +package tracing + +import ( + "net/http" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/propagation" + oteltrace "go.opentelemetry.io/otel/trace" +) + +// WrapHTTPHandler wraps an http.Handler with an OpenTelemetry server span. +// A span named spanName is created for every request, with http.method and +// http.path set automatically. Extra attrs are merged in. +// +// Incoming W3C traceparent/tracestate headers are extracted so that an +// agent-originated trace is continued; if no such headers are present a fresh +// root span (and new trace ID) is created automatically. +// +// This is a low-level helper used by both the MCP gateway middleware and the +// GitHub API proxy. Callers that need session-level attributes (e.g. session.id) +// should add them as extra attrs or extend the context themselves. +func WrapHTTPHandler(next http.Handler, spanName string, extraAttrs ...attribute.KeyValue) http.Handler { + t := Tracer() + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Extract incoming W3C trace context (traceparent / tracestate). + // If the headers are absent the returned ctx is unchanged and OTEL + // will generate a fresh trace ID when the span is started. + ctx := otel.GetTextMapPropagator().Extract(r.Context(), propagation.HeaderCarrier(r.Header)) + + attrs := append([]attribute.KeyValue{ + attribute.String("http.method", r.Method), + attribute.String("http.path", r.URL.Path), + }, extraAttrs...) + + ctx, span := t.Start(ctx, spanName, + oteltrace.WithAttributes(attrs...), + oteltrace.WithSpanKind(oteltrace.SpanKindServer), + ) + defer span.End() + + next.ServeHTTP(w, r.WithContext(ctx)) + }) +} diff --git a/internal/tracing/provider.go b/internal/tracing/provider.go new file mode 100644 index 00000000..7152dfd6 --- /dev/null +++ b/internal/tracing/provider.go @@ -0,0 +1,193 @@ +// Package tracing provides OpenTelemetry OTLP trace export for the MCP Gateway. +// +// When an OTLP endpoint is configured (via config or OTEL_EXPORTER_OTLP_ENDPOINT), +// this package initializes a real tracer provider that exports spans over HTTP. +// When no endpoint is configured, a noop tracer provider is used, adding zero overhead. +// +// Usage: +// +// tp, err := tracing.InitProvider(ctx, cfg.Gateway.Tracing) +// if err != nil { +// return err +// } +// defer tp.Shutdown(ctx) +// +// Once initialized, obtain a tracer with: +// +// tracer := otel.Tracer("github.com/github/gh-aw-mcpg") +package tracing + +import ( + "context" + "fmt" + "time" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.26.0" + "go.opentelemetry.io/otel/trace" + "go.opentelemetry.io/otel/trace/noop" + + "github.com/github/gh-aw-mcpg/internal/config" + "github.com/github/gh-aw-mcpg/internal/logger" +) + +const instrumentationName = "github.com/github/gh-aw-mcpg" + +var logTracing = logger.New("tracing:provider") + +// Provider wraps an OpenTelemetry TracerProvider and provides a Shutdown method. +type Provider struct { + tp trace.TracerProvider + sdk *sdktrace.TracerProvider // non-nil only when OTLP is configured + tracer trace.Tracer +} + +// Tracer returns the tracer for the MCP gateway instrumentation scope. +func (p *Provider) Tracer() trace.Tracer { + return p.tracer +} + +// Shutdown flushes and shuts down the tracer provider. +// For noop providers this is a no-op. Must be called on application exit. +func (p *Provider) Shutdown(ctx context.Context) error { + if p.sdk != nil { + return p.sdk.Shutdown(ctx) + } + return nil +} + +// resolveEndpoint returns the OTLP endpoint from config. +// CLI flags set the config value using env vars as defaults, so config already +// reflects the correct precedence: CLI flag > env var > config file. +func resolveEndpoint(cfg *config.TracingConfig) string { + if cfg != nil { + return cfg.Endpoint + } + return "" +} + +// resolveServiceName returns the service name from config. +func resolveServiceName(cfg *config.TracingConfig) string { + if cfg != nil && cfg.ServiceName != "" { + return cfg.ServiceName + } + return config.DefaultTracingServiceName +} + +// resolveSampleRate returns the sample rate from config (defaults to 1.0). +// Valid configured values are in the range [0.0, 1.0], where 0.0 disables sampling. +func resolveSampleRate(cfg *config.TracingConfig) float64 { + rate := cfg.GetSampleRate() + + if rate >= 0.0 && rate <= 1.0 { + return rate + } + + logTracing.Printf("Warning: invalid tracing sample rate %.4f; using default %.2f", rate, config.DefaultTracingSampleRate) + return config.DefaultTracingSampleRate +} + +// registerPropagator installs the global W3C TraceContext + Baggage propagator. +// This enables incoming traceparent/tracestate headers to be extracted so that +// agent-initiated traces are continued rather than fragmented. +func registerPropagator() { + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( + propagation.TraceContext{}, + propagation.Baggage{}, + )) +} + +// InitProvider initializes the global OpenTelemetry tracer provider. +// When endpoint is empty, a noop provider is installed (zero overhead). +// When endpoint is configured, an OTLP/HTTP exporter is created and the SDK +// tracer provider is registered as the global provider. +// +// In both cases a W3C TraceContext propagator is registered globally so that +// incoming traceparent/tracestate headers are honoured by all HTTP middleware. +// +// The returned Provider must be shut down on application exit to flush buffered spans. +func InitProvider(ctx context.Context, cfg *config.TracingConfig) (*Provider, error) { + endpoint := resolveEndpoint(cfg) + serviceName := resolveServiceName(cfg) + sampleRate := resolveSampleRate(cfg) + + // Always register the W3C propagator so that incoming traceparent headers + // are extracted, even when tracing is disabled (noop spans are still + // parented correctly if propagation is later enabled upstream). + registerPropagator() + + if endpoint == "" { + logTracing.Printf("Tracing disabled: no OTLP endpoint configured") + noopTP := noop.NewTracerProvider() + otel.SetTracerProvider(noopTP) + return &Provider{ + tp: noopTP, + tracer: noopTP.Tracer(instrumentationName), + }, nil + } + + logTracing.Printf("Initializing OTLP tracing: endpoint=%s, service=%s, sampleRate=%.2f", endpoint, serviceName, sampleRate) + + // Build OTLP HTTP exporter with 10s timeout + exporter, err := otlptracehttp.New(ctx, + otlptracehttp.WithEndpointURL(endpoint), + otlptracehttp.WithTimeout(10*time.Second), + ) + if err != nil { + return nil, fmt.Errorf("failed to create OTLP trace exporter: %w", err) + } + + // Build resource with service name and version + res, err := resource.New(ctx, + resource.WithAttributes( + semconv.ServiceName(serviceName), + ), + resource.WithProcessPID(), + resource.WithHost(), + ) + if err != nil { + // Non-fatal: proceed with empty resource + logTracing.Printf("Warning: failed to create OTEL resource: %v", err) + res = resource.Empty() + } + + // Select sampler based on configured rate + var sampler sdktrace.Sampler + switch { + case sampleRate >= 1.0: + sampler = sdktrace.AlwaysSample() + case sampleRate <= 0.0: + sampler = sdktrace.NeverSample() + default: + sampler = sdktrace.TraceIDRatioBased(sampleRate) + } + + sdkTP := sdktrace.NewTracerProvider( + sdktrace.WithBatcher(exporter), + sdktrace.WithResource(res), + sdktrace.WithSampler(sampler), + ) + + // Register as the global provider so instrumented libraries pick it up + otel.SetTracerProvider(sdkTP) + + tracer := sdkTP.Tracer(instrumentationName) + logTracing.Printf("OTLP tracing initialized successfully") + + return &Provider{ + tp: sdkTP, + sdk: sdkTP, + tracer: tracer, + }, nil +} + +// Tracer returns the global MCP gateway tracer. +// This is a convenience wrapper around otel.Tracer for packages that don't +// hold a reference to the Provider. +func Tracer() trace.Tracer { + return otel.Tracer(instrumentationName) +} diff --git a/internal/tracing/provider_test.go b/internal/tracing/provider_test.go new file mode 100644 index 00000000..a4a72b45 --- /dev/null +++ b/internal/tracing/provider_test.go @@ -0,0 +1,313 @@ +package tracing_test + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/propagation" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/sdk/trace/tracetest" + "go.opentelemetry.io/otel/trace" + "go.opentelemetry.io/otel/trace/noop" + + "github.com/github/gh-aw-mcpg/internal/config" + "github.com/github/gh-aw-mcpg/internal/tracing" +) + +func ptrFloat64(v float64) *float64 { return &v } + +func TestInitProvider_NoEndpoint_ReturnsNoopProvider(t *testing.T) { + ctx := context.Background() + + // With nil config (no endpoint), should return a noop provider + provider, err := tracing.InitProvider(ctx, nil) + require.NoError(t, err) + require.NotNil(t, provider) + + // Noop provider must shut down cleanly + assert.NoError(t, provider.Shutdown(ctx)) + + // The global provider should be a noop provider + tp := otel.GetTracerProvider() + assert.IsType(t, noop.NewTracerProvider(), tp) +} + +func TestInitProvider_EmptyEndpoint_ReturnsNoopProvider(t *testing.T) { + ctx := context.Background() + + cfg := &config.TracingConfig{ + Endpoint: "", // explicitly empty + ServiceName: "test-service", + SampleRate: ptrFloat64(1.0), + } + + provider, err := tracing.InitProvider(ctx, cfg) + require.NoError(t, err) + require.NotNil(t, provider) + + assert.NoError(t, provider.Shutdown(ctx)) +} + +func TestInitProvider_WithEndpoint_ReturnsSdkProvider(t *testing.T) { + ctx := context.Background() + + // Point at a non-existent endpoint; exporter creation should still succeed + // (connection is lazy) and the provider should be initialized. + cfg := &config.TracingConfig{ + Endpoint: "http://localhost:14318", // non-existent, but valid URL + ServiceName: "test-service", + SampleRate: ptrFloat64(1.0), + } + + provider, err := tracing.InitProvider(ctx, cfg) + require.NoError(t, err) + require.NotNil(t, provider) + + // Tracer should be non-nil + assert.NotNil(t, provider.Tracer()) + + // Shutdown with a short context so test doesn't hang waiting to flush + shutdownCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond) + defer cancel() + // Shutdown may fail if it tries to flush to the non-existent endpoint, + // but the provider itself should handle it gracefully (no panic) + _ = provider.Shutdown(shutdownCtx) +} + +func TestTracer_ReturnsNonNil(t *testing.T) { + // Reset to noop global provider + ctx := context.Background() + provider, err := tracing.InitProvider(ctx, nil) + require.NoError(t, err) + defer provider.Shutdown(ctx) + + tr := tracing.Tracer() + assert.NotNil(t, tr) +} + +func TestInitProvider_SampleRateZero_UsesNeverSampler(t *testing.T) { + ctx := context.Background() + + cfg := &config.TracingConfig{ + Endpoint: "http://localhost:14318", + ServiceName: "test-service", + SampleRate: ptrFloat64(0.0), // never sample + } + + provider, err := tracing.InitProvider(ctx, cfg) + require.NoError(t, err) + require.NotNil(t, provider) + + // Verify NeverSample behavior: spans should not be sampled + tr := provider.Tracer() + _, span := tr.Start(ctx, "test-span") + assert.False(t, span.SpanContext().IsSampled(), "span should NOT be sampled with rate 0.0") + assert.False(t, span.IsRecording(), "span should NOT be recording with rate 0.0") + span.End() + + shutdownCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond) + defer cancel() + _ = provider.Shutdown(shutdownCtx) +} + +func TestInitProvider_SampleRatePartial_UsesRatioSampler(t *testing.T) { + ctx := context.Background() + + cfg := &config.TracingConfig{ + Endpoint: "http://localhost:14318", + ServiceName: "test-service", + SampleRate: ptrFloat64(0.5), // 50% sampling + } + + provider, err := tracing.InitProvider(ctx, cfg) + require.NoError(t, err) + require.NotNil(t, provider) + + shutdownCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond) + defer cancel() + _ = provider.Shutdown(shutdownCtx) +} + +func TestInitProvider_SampleRateOne_UsesAlwaysSampler(t *testing.T) { + ctx := context.Background() + + cfg := &config.TracingConfig{ + Endpoint: "http://localhost:14318", + ServiceName: "test-service", + SampleRate: ptrFloat64(1.0), // always sample + } + + provider, err := tracing.InitProvider(ctx, cfg) + require.NoError(t, err) + require.NotNil(t, provider) + + // Verify AlwaysSample behavior: spans should be sampled + tr := provider.Tracer() + _, span := tr.Start(ctx, "test-span") + assert.True(t, span.SpanContext().IsSampled(), "span should be sampled with rate 1.0") + assert.True(t, span.IsRecording(), "span should be recording with rate 1.0") + span.End() + + shutdownCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond) + defer cancel() + _ = provider.Shutdown(shutdownCtx) +} + +func TestInitProvider_SampleRateNil_DefaultsToAlwaysSample(t *testing.T) { + ctx := context.Background() + + cfg := &config.TracingConfig{ + Endpoint: "http://localhost:14318", + ServiceName: "test-service", + // SampleRate is nil (unset) — should default to 1.0 + } + + provider, err := tracing.InitProvider(ctx, cfg) + require.NoError(t, err) + require.NotNil(t, provider) + + // Verify default AlwaysSample behavior + tr := provider.Tracer() + _, span := tr.Start(ctx, "test-span") + assert.True(t, span.SpanContext().IsSampled(), "span should be sampled with default rate") + assert.True(t, span.IsRecording(), "span should be recording with default rate") + span.End() + + shutdownCtx, cancel := context.WithTimeout(ctx, 100*time.Millisecond) + defer cancel() + _ = provider.Shutdown(shutdownCtx) +} + +// TestInitProvider_GlobalPropagatorRegistration verifies that InitProvider registers the +// W3C TraceContext propagator globally, so that incoming traceparent headers are +// respected by downstream HTTP middleware. +func TestInitProvider_GlobalPropagatorRegistration(t *testing.T) { + ctx := context.Background() + + // Use a nil config (noop path) — propagator should still be registered. + provider, err := tracing.InitProvider(ctx, nil) + require.NoError(t, err) + defer provider.Shutdown(ctx) + + prop := otel.GetTextMapPropagator() + require.NotNil(t, prop) + + // Round-trip: inject a known span context, then extract it. + exporter := tracetest.NewInMemoryExporter() + sp := sdktrace.NewSimpleSpanProcessor(exporter) + tp := sdktrace.NewTracerProvider( + sdktrace.WithSpanProcessor(sp), + sdktrace.WithSampler(sdktrace.AlwaysSample()), + ) + tr := tp.Tracer("test") + + // Create a parent span and inject its context into HTTP headers. + _, parentSpan := tr.Start(ctx, "parent") + parentSpanCtx := parentSpan.SpanContext() + parentSpan.End() + + carrier := propagation.MapCarrier{} + prop.Inject(trace.ContextWithSpanContext(ctx, parentSpanCtx), carrier) + + // Simulate an incoming HTTP request carrying the traceparent header. + req := httptest.NewRequest(http.MethodGet, "/mcp", nil) + for k, v := range carrier { + req.Header.Set(k, v) + } + + // Extract should recover the parent span context from the request headers. + extractedCtx := otel.GetTextMapPropagator().Extract(req.Context(), propagation.HeaderCarrier(req.Header)) + extractedSpanCtx := trace.SpanFromContext(extractedCtx).SpanContext() + + assert.Equal(t, parentSpanCtx.TraceID(), extractedSpanCtx.TraceID(), + "extracted trace ID must match the injected parent trace ID") +} + +// TestWrapHTTPHandler_ContinuesRemoteTrace verifies that WrapHTTPHandler extracts an +// incoming traceparent header and makes the span a child of the remote parent. +func TestWrapHTTPHandler_ContinuesRemoteTrace(t *testing.T) { + ctx := context.Background() + + // Initialise provider so the W3C propagator is registered globally. + provider, err := tracing.InitProvider(ctx, nil) + require.NoError(t, err) + defer provider.Shutdown(ctx) + + // Build an in-memory SDK provider so we can capture spans. + exporter := tracetest.NewInMemoryExporter() + sp := sdktrace.NewSimpleSpanProcessor(exporter) + tp := sdktrace.NewTracerProvider( + sdktrace.WithSpanProcessor(sp), + sdktrace.WithSampler(sdktrace.AlwaysSample()), + ) + otel.SetTracerProvider(tp) + defer otel.SetTracerProvider(noop.NewTracerProvider()) + + // Create a parent span and build a request with its traceparent header. + _, parentSpan := tp.Tracer("test").Start(ctx, "agent-span") + parentTraceID := parentSpan.SpanContext().TraceID() + parentSpan.End() + + carrier := propagation.MapCarrier{} + otel.GetTextMapPropagator().Inject( + trace.ContextWithSpanContext(ctx, parentSpan.SpanContext()), + carrier, + ) + + req := httptest.NewRequest(http.MethodPost, "/mcp", nil) + for k, v := range carrier { + req.Header.Set(k, v) + } + rr := httptest.NewRecorder() + + // Capture the span context seen inside the handler. + var capturedSpanCtx trace.SpanContext + inner := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedSpanCtx = trace.SpanFromContext(r.Context()).SpanContext() + }) + + handler := tracing.WrapHTTPHandler(inner, "test.request") + handler.ServeHTTP(rr, req) + + assert.Equal(t, parentTraceID, capturedSpanCtx.TraceID(), + "handler span should share the parent's trace ID when traceparent is present") +} + +// TestWrapHTTPHandler_GeneratesRootSpan verifies that when no +// traceparent header is present a fresh root span (new trace ID) is generated. +func TestWrapHTTPHandler_GeneratesRootSpan(t *testing.T) { + ctx := context.Background() + + provider, err := tracing.InitProvider(ctx, nil) + require.NoError(t, err) + defer provider.Shutdown(ctx) + + exporter := tracetest.NewInMemoryExporter() + sp := sdktrace.NewSimpleSpanProcessor(exporter) + tp := sdktrace.NewTracerProvider( + sdktrace.WithSpanProcessor(sp), + sdktrace.WithSampler(sdktrace.AlwaysSample()), + ) + otel.SetTracerProvider(tp) + defer otel.SetTracerProvider(noop.NewTracerProvider()) + + req := httptest.NewRequest(http.MethodGet, "/mcp", nil) // no traceparent + rr := httptest.NewRecorder() + + var capturedSpanCtx trace.SpanContext + inner := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + capturedSpanCtx = trace.SpanFromContext(r.Context()).SpanContext() + }) + + handler := tracing.WrapHTTPHandler(inner, "test.request") + handler.ServeHTTP(rr, req) + + assert.True(t, capturedSpanCtx.IsValid(), "should have a valid span context even without traceparent") + assert.False(t, capturedSpanCtx.IsRemote(), "span should not be marked remote — it is a local root span") +}