Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
24 changes: 24 additions & 0 deletions .claude/agents/k8s-discovery.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
name: k8s-discovery
description: Validates Kubernetes DNS discovery patterns — CoreDNS integration, service mesh compatibility, in-cluster SVCB resolution.
---

You are a Kubernetes DNS discovery specialist for airc.

## Scope

AICR agents run inside Kubernetes clusters and use DNS SVCB lookups to discover peer agents. CoreDNS serves as the cluster DNS resolver.

## Validation Checks

1. **SVCB resolution**: `_{agent-name}._{protocol}._agents.{namespace}.svc.cluster.local` resolves correctly
2. **CoreDNS integration**: SVCB records served from ConfigMap-backed zone data
3. **Index records**: `_index._agents.{namespace}.svc.cluster.local` returns agent inventory
4. **Service mesh**: connect-class (65406) and connect-meta (65407) map to K8s NetworkPolicy or service mesh config
5. **Pod identity**: Agent identity derived from K8s ServiceAccount for OpenShell authorization
6. **Namespace isolation**: Cross-namespace agent discovery respects RBAC and network policies
7. **DNS caching**: TTL handling ensures stale agent records don't persist after pod termination

## CoreDNS Plugin Pipeline

Query → kubernetes plugin → response
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ require (
github.com/kyverno/pkg/ext v0.0.0-20250303002756-48769d003e55 // indirect
github.com/mattn/go-colorable v0.1.14 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/miekg/dns v1.1.72 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,8 @@ github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHP
github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/miekg/dns v1.1.72 h1:vhmr+TF2A3tuoGNkLDFK9zi36F2LS+hKTRW0Uf8kbzI=
github.com/miekg/dns v1.1.72/go.mod h1:+EuEPhdHOsfk6Wk5TT2CzssZdqkmFhf8r+aVyDEToIs=
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
Expand Down
137 changes: 137 additions & 0 deletions pkg/api/agents.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
// Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package api

import (
"context"
"net/http"
"regexp"

"log/slog"

"github.com/NVIDIA/aicr/pkg/defaults"
"github.com/NVIDIA/aicr/pkg/discovery"
"github.com/NVIDIA/aicr/pkg/errors"
"github.com/NVIDIA/aicr/pkg/openshell"
"github.com/NVIDIA/aicr/pkg/serializer"
"github.com/NVIDIA/aicr/pkg/server"
)

// dnsNameRegex validates DNS domain names: labels separated by dots, optional trailing dot.
// Each label: 1-63 chars of [a-z0-9_-], total <= 253 chars.
var dnsNameRegex = regexp.MustCompile(`^[a-z0-9_][a-z0-9_\-]{0,62}(\.[a-z0-9_][a-z0-9_\-]{0,62})*\.?$`)

// agentResponse represents a single agent in the /v1/agents JSON response.
type agentResponse struct {
Name string `json:"name"`
Protocol string `json:"protocol"`
Endpoint string `json:"endpoint"`
Port uint16 `json:"port"`
Priority uint16 `json:"priority"`
Params agentParamsSummary `json:"params,omitempty"`
PolicyResult *policyResultSummary `json:"policyResult,omitempty"`
}

// agentParamsSummary is a subset of SvcParams relevant for the API response.
type agentParamsSummary struct {
Realm string `json:"realm,omitempty"`
BAP []string `json:"bap,omitempty"`
Policy string `json:"policy,omitempty"`
ConnectClass string `json:"connectClass,omitempty"`
}

// policyResultSummary is the OpenShell policy evaluation result for an agent.
type policyResultSummary struct {
Allowed bool `json:"allowed"`
Violations []string `json:"violations,omitempty"`
}

// handleAgents returns an http.HandlerFunc that discovers all agents in a domain,
// evaluates OpenShell policy for each, and returns them as JSON. The domain is
// taken from the "domain" query parameter and defaults to defaultDomain.
// If guard is nil, policy evaluation is skipped.
func handleAgents(disc *discovery.Discoverer, guard *openshell.Guard, defaultDomain string) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
w.Header().Set("Allow", http.MethodGet)
server.WriteError(w, r, http.StatusMethodNotAllowed, errors.ErrCodeMethodNotAllowed,
"Method not allowed", false, nil)
return
}

ctx, cancel := context.WithTimeout(r.Context(), defaults.DiscoveryHandlerTimeout)
defer cancel()

domain := r.URL.Query().Get("domain")
if domain == "" {
domain = defaultDomain
}
if len(domain) > 253 || !dnsNameRegex.MatchString(domain) {
server.WriteError(w, r, http.StatusBadRequest, errors.ErrCodeInvalidRequest,
"invalid domain parameter", false, map[string]any{"domain": domain})
return
}

records, err := disc.DiscoverAll(ctx, domain)
if err != nil {
server.WriteErrorFromErr(w, r, err, "agent discovery failed", nil)
return
}

agents := make([]agentResponse, 0, len(records))
for i := range records {
rec := &records[i]
resp := agentResponse{
Name: rec.Name,
Protocol: string(rec.Protocol),
Endpoint: rec.Endpoint,
Port: rec.Port,
Priority: rec.Priority,
Params: agentParamsSummary{
Realm: rec.Params.Realm,
BAP: rec.Params.BAP,
Policy: rec.Params.Policy,
ConnectClass: rec.Params.ConnectClass,
},
}

// Evaluate OpenShell policy if a guard is configured
if guard != nil && rec.Params.Policy != "" {
result, err := guard.Check(ctx, rec)
if err != nil {
slog.Debug("openshell policy fetch error (fail-open)",
"agent", rec.Name, "error", err)
}
if result != nil {
summary := policyResultSummary{Allowed: result.Allowed}
for _, v := range result.Violations {
summary.Violations = append(summary.Violations, v.Rule+": "+v.Detail)
}
resp.PolicyResult = &summary
}
}

agents = append(agents, resp)
}

resp := map[string]any{
"domain": domain,
"agents": agents,
"count": len(agents),
}

serializer.RespondJSON(w, http.StatusOK, resp)
}
}
213 changes: 213 additions & 0 deletions pkg/api/agents_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
// Copyright (c) 2026, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package api

import (
"encoding/json"
"fmt"
"net"
"net/http"
"net/http/httptest"
"testing"
"time"

"github.com/NVIDIA/aicr/pkg/discovery"
"github.com/miekg/dns"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

// startMockDNS starts a mock DNS server for testing.
// Duplicated from pkg/discovery/testhelper_test.go because Go test helpers
// cannot be shared across packages without a non-test dependency.
func startMockDNS(t *testing.T, handler dns.HandlerFunc) string {
t.Helper()

pc, err := net.ListenPacket("udp", "127.0.0.1:0")
require.NoError(t, err)

server := &dns.Server{
PacketConn: pc,
Handler: handler,
}

go func() {
_ = server.ActivateAndServe()
}()

t.Cleanup(func() {
_ = server.Shutdown()
})

return pc.LocalAddr().String()
}

func TestHandleAgents(t *testing.T) {
handler := func(w dns.ResponseWriter, r *dns.Msg) {
m := new(dns.Msg)
m.SetReply(r)

qname := r.Question[0].Name
switch r.Question[0].Qtype {
case dns.TypeTXT:
m.Answer = []dns.RR{
&dns.TXT{
Hdr: dns.RR_Header{Name: qname, Rrtype: dns.TypeTXT, Class: dns.ClassINET, Ttl: 300},
Txt: []string{"aicrd:mcp"},
},
}
case dns.TypeSVCB:
m.Answer = []dns.RR{
&dns.SVCB{
Hdr: dns.RR_Header{Name: qname, Rrtype: dns.TypeSVCB, Class: dns.ClassINET, Ttl: 300},
Priority: 1,
Target: fmt.Sprintf("aicrd.default.svc.cluster.local."),
Value: []dns.SVCBKeyValue{
&dns.SVCBPort{Port: 8080},
},
},
}
}
_ = w.WriteMsg(m)
}

addr := startMockDNS(t, handler)
disc := discovery.NewDiscoverer(
discovery.WithDNSServer(addr),
discovery.WithDNSTimeout(2*time.Second),
)

h := handleAgents(disc, nil, "default.svc.cluster.local.")

t.Run("GET returns agents", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/v1/agents", nil)
w := httptest.NewRecorder()

h(w, req)

require.Equal(t, http.StatusOK, w.Code)
assert.Equal(t, "application/json", w.Header().Get("Content-Type"))

var resp map[string]any
err := json.Unmarshal(w.Body.Bytes(), &resp)
require.NoError(t, err)

assert.Equal(t, "default.svc.cluster.local.", resp["domain"])
assert.Equal(t, float64(1), resp["count"])

agents, ok := resp["agents"].([]any)
require.True(t, ok)
require.Len(t, agents, 1)

agent := agents[0].(map[string]any)
assert.Equal(t, "aicrd", agent["name"])
assert.Equal(t, "mcp", agent["protocol"])
assert.Equal(t, float64(8080), agent["port"])
})

t.Run("custom domain via query param", func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/v1/agents?domain=custom.svc.cluster.local.", nil)
w := httptest.NewRecorder()

h(w, req)

var resp map[string]any
err := json.Unmarshal(w.Body.Bytes(), &resp)
require.NoError(t, err)
assert.Equal(t, "custom.svc.cluster.local.", resp["domain"])
})

t.Run("POST returns method not allowed", func(t *testing.T) {
req := httptest.NewRequest(http.MethodPost, "/v1/agents", nil)
w := httptest.NewRecorder()

h(w, req)

assert.Equal(t, http.StatusMethodNotAllowed, w.Code)
})
}

func TestHandleAgentsEmptyIndex(t *testing.T) {
handler := func(w dns.ResponseWriter, r *dns.Msg) {
m := new(dns.Msg)
m.SetRcode(r, dns.RcodeNameError)
_ = w.WriteMsg(m)
}

addr := startMockDNS(t, handler)
disc := discovery.NewDiscoverer(
discovery.WithDNSServer(addr),
discovery.WithDNSTimeout(2*time.Second),
)

h := handleAgents(disc, nil, "empty.svc.cluster.local.")

req := httptest.NewRequest(http.MethodGet, "/v1/agents", nil)
w := httptest.NewRecorder()

h(w, req)

require.Equal(t, http.StatusOK, w.Code)
assert.Equal(t, "application/json", w.Header().Get("Content-Type"))

var resp map[string]any
err := json.Unmarshal(w.Body.Bytes(), &resp)
require.NoError(t, err)
assert.Equal(t, float64(0), resp["count"])
}

func TestHandleAgentsInvalidDomain(t *testing.T) {
// Handler shouldn't even hit DNS for invalid domains
disc := discovery.NewDiscoverer(
discovery.WithDNSServer("127.0.0.1:1"),
discovery.WithDNSTimeout(100*time.Millisecond),
)

h := handleAgents(disc, nil, "default.svc.cluster.local.")

tests := []struct {
name string
domain string
}{
{"path traversal", "../etc/passwd"},
{"uppercase", "Default.SVC.cluster.local"},
{"empty label", "foo..bar.com"},
{"starts with hyphen", "-bad.example.com"},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
req := httptest.NewRequest(http.MethodGet, "/v1/agents?domain="+tt.domain, nil)
w := httptest.NewRecorder()

h(w, req)

assert.Equal(t, http.StatusBadRequest, w.Code)
})
}
}

func TestDiscoveryDomain(t *testing.T) {
// Without any env vars or service account, should fall back to default
t.Setenv("AICR_DISCOVERY_DOMAIN", "")
domain := discoveryDomain()
// When no SA namespace file exists, expect default
assert.Contains(t, domain, "svc.cluster.local.")

// With env var set
t.Setenv("AICR_DISCOVERY_DOMAIN", "custom.example.com.")
domain = discoveryDomain()
assert.Equal(t, "custom.example.com.", domain)
}
Loading