diff --git a/cmd/apiserver/app/config.go b/cmd/apiserver/app/config.go index 5c2f474..088c2fa 100644 --- a/cmd/apiserver/app/config.go +++ b/cmd/apiserver/app/config.go @@ -162,10 +162,7 @@ func NewConfig(opts options.CompletedOptions) (*Config, error) { return } if h, err := crdRuntimeMgr.Runtime(cid, genericConfig.DrainedNotify()); err == nil && h != nil { - h = genericfilters.WithRequestInfo(h, conf.RequestInfoResolver) - h = genericfilters.WithAuditInit(h) - h = serverfilters.WithPanicRecovery(h, conf.RequestInfoResolver) - h.ServeHTTP(w, r) + wrapClusterCRDHandler(h, conf, cid, false).ServeHTTP(w, r) return } klog.Errorf("mc.crdRuntime unresolved at kube cluster=%s path=%s", cid, r.URL.Path) @@ -361,13 +358,7 @@ func NewConfig(opts options.CompletedOptions) (*Config, error) { http.Error(w, "cluster CRD runtime unavailable", http.StatusServiceUnavailable) return true } - // Ensure RequestInfo is computed from the normalized /apis path - // before entering the cluster-scoped CRD runtime handler. - h = genericfilters.WithRequestInfo(h, conf.RequestInfoResolver) - h = withClusterCRDRequestInfoRewrite(h, clusterID) - h = genericfilters.WithAuditInit(h) - h = serverfilters.WithPanicRecovery(h, conf.RequestInfoResolver) - h.ServeHTTP(w, r) + wrapClusterCRDHandler(h, conf, clusterID, false).ServeHTTP(w, r) return true } // Ensure CRDs are also routed through the multicluster handler @@ -480,6 +471,25 @@ func withClusterCRDRequestInfoRewrite(next http.Handler, clusterID string) http. }) } +func wrapClusterCRDHandler(next http.Handler, conf *server.Config, clusterID string, rewriteRequestInfo bool) http.Handler { + if next == nil || conf == nil { + return next + } + h := next + if rewriteRequestInfo { + h = withClusterCRDRequestInfoRewrite(h, clusterID) + } + h = genericfilters.WithAuthorization(h, conf.Authorization.Authorizer, conf.Serializer) + failedHandler := genericfilters.Unauthorized(conf.Serializer) + failedHandler = genericfilters.WithFailedAuthenticationAudit(failedHandler, conf.AuditBackend, conf.AuditPolicyRuleEvaluator) + h = genericfilters.WithAuthentication(h, conf.Authentication.Authenticator, failedHandler, conf.Authentication.APIAudiences, conf.Authentication.RequestHeaderConfig) + // RequestInfo must be available before rewrite/authn/authz wrappers execute. + h = genericfilters.WithRequestInfo(h, conf.RequestInfoResolver) + h = genericfilters.WithAuditInit(h) + h = serverfilters.WithPanicRecovery(h, conf.RequestInfoResolver) + return h +} + func decorateRESTOptionsGetter(server string, getter generic.RESTOptionsGetter, opts mc.Options) generic.RESTOptionsGetter { if _, ok := getter.(mc.RESTOptionsDecorator); ok { klog.Infof("mc.restOptionsGetter server=%s alreadyDecorated=true", server) diff --git a/pkg/multicluster/admission/webhook/generic/versioned_attributes.go b/pkg/multicluster/admission/webhook/generic/versioned_attributes.go new file mode 100644 index 0000000..bfd9534 --- /dev/null +++ b/pkg/multicluster/admission/webhook/generic/versioned_attributes.go @@ -0,0 +1,33 @@ +package generic + +import ( + "k8s.io/apiserver/pkg/admission" + "k8s.io/apiserver/pkg/authentication/user" +) + +// EnsureVersionedAttributesUserInfo guarantees that AdmissionReview construction +// always sees a non-nil user.Info, preventing nil dereferences in upstream +// request builders when attributes are missing user context. +func EnsureVersionedAttributesUserInfo(attr *admission.VersionedAttributes) *admission.VersionedAttributes { + if attr == nil || attr.Attributes == nil || attr.Attributes.GetUserInfo() != nil { + return attr + } + + cloned := *attr + cloned.Attributes = userInfoFallbackAttributes{Attributes: attr.Attributes} + return &cloned +} + +type userInfoFallbackAttributes struct { + admission.Attributes +} + +func (a userInfoFallbackAttributes) GetUserInfo() user.Info { + if a.Attributes == nil { + return &user.DefaultInfo{} + } + if info := a.Attributes.GetUserInfo(); info != nil { + return info + } + return &user.DefaultInfo{} +} diff --git a/pkg/multicluster/admission/webhook/mutating/dispatcher.go b/pkg/multicluster/admission/webhook/mutating/dispatcher.go index b98716b..16120e0 100644 --- a/pkg/multicluster/admission/webhook/mutating/dispatcher.go +++ b/pkg/multicluster/admission/webhook/mutating/dispatcher.go @@ -254,7 +254,7 @@ func (a *mutatingDispatcher) callAttrMutatingHook(ctx context.Context, h *admiss } } - uid, request, response, err := webhookrequest.CreateAdmissionObjects(attr, invocation) + uid, request, response, err := webhookrequest.CreateAdmissionObjects(generic.EnsureVersionedAttributesUserInfo(attr), invocation) if err != nil { return false, &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("could not create admission objects: %w", err), Status: apierrors.NewBadRequest("error creating admission objects")} } diff --git a/pkg/multicluster/admission/webhook/validating/dispatcher.go b/pkg/multicluster/admission/webhook/validating/dispatcher.go index 8afd335..c208030 100644 --- a/pkg/multicluster/admission/webhook/validating/dispatcher.go +++ b/pkg/multicluster/admission/webhook/validating/dispatcher.go @@ -255,7 +255,7 @@ func (d *validatingDispatcher) callHook(ctx context.Context, h *v1.ValidatingWeb } } - uid, request, response, err := webhookrequest.CreateAdmissionObjects(versionedAttr, invocation) + uid, request, response, err := webhookrequest.CreateAdmissionObjects(generic.EnsureVersionedAttributesUserInfo(versionedAttr), invocation) if err != nil { return &webhookutil.ErrCallingWebhook{WebhookName: h.Name, Reason: fmt.Errorf("could not create admission objects: %w", err), Status: apierrors.NewBadRequest("error creating admission objects")} } diff --git a/pkg/multicluster/auth/dispatcher.go b/pkg/multicluster/auth/dispatcher.go index 8333cdc..410d132 100644 --- a/pkg/multicluster/auth/dispatcher.go +++ b/pkg/multicluster/auth/dispatcher.go @@ -2,7 +2,9 @@ package auth import ( "context" + "fmt" "net/http" + "reflect" "strings" mc "github.com/kplane-dev/apiserver/pkg/multicluster" @@ -48,8 +50,8 @@ func (c *ClusterAuthenticator) AuthenticateRequest(req *http.Request) (*authenti } } useRoot := cid == "" || cid == c.rootCluster - if useRoot && c.root != nil { - return c.root.AuthenticateRequest(req) + if useRoot && !isNil(c.root) { + return authenticateSafely(c.root, req, "root") } if c.resolver == nil { return nil, false, nil @@ -58,10 +60,10 @@ func (c *ClusterAuthenticator) AuthenticateRequest(req *http.Request) (*authenti if err != nil { return nil, false, err } - if authn == nil { + if isNil(authn) { return nil, false, nil } - return authn.AuthenticateRequest(req) + return authenticateSafely(authn, req, cid) } // ClusterAuthorizer dispatches authorization per cluster. @@ -93,27 +95,33 @@ func NewClusterAuthorizer(rootCluster string, root authorizer.Authorizer, rootRe // Authorize dispatches by cluster context. func (c *ClusterAuthorizer) Authorize(ctx context.Context, a authorizer.Attributes) (authorizer.Decision, string, error) { cid := clusterFromContext(ctx) - if cid == "" || (cid == c.rootCluster && c.root != nil) || c.resolver == nil { - if c.root == nil { + if cid == "" || (cid == c.rootCluster && !isNil(c.root)) || c.resolver == nil { + if isNil(c.root) { return authorizer.DecisionNoOpinion, "no root authorizer", nil } - return c.root.Authorize(ctx, a) + if err := validateAttributesForAuthorize(a, "root", authorizerType(c.root)); err != nil { + return authorizer.DecisionDeny, "", err + } + return authorizeSafely(c.root, ctx, a, "root") } authz, _, err := c.resolver.AuthorizerForCluster(cid) if err != nil { - return authorizer.DecisionNoOpinion, "", err + return authorizer.DecisionDeny, "", err } - if authz == nil { + if isNil(authz) { return authorizer.DecisionNoOpinion, "no cluster authorizer", nil } - return authz.Authorize(ctx, a) + if err := validateAttributesForAuthorize(a, cid, authorizerType(authz)); err != nil { + return authorizer.DecisionDeny, "", err + } + return authorizeSafely(authz, ctx, a, cid) } // RulesFor dispatches rule resolution per cluster. func (c *ClusterAuthorizer) RulesFor(ctx context.Context, u user.Info, namespace string) ([]authorizer.ResourceRuleInfo, []authorizer.NonResourceRuleInfo, bool, error) { cid := clusterFromContext(ctx) - if cid == "" || (cid == c.rootCluster && c.rootResolver != nil) || c.resolver == nil { - if c.rootResolver == nil { + if cid == "" || (cid == c.rootCluster && !isNil(c.rootResolver)) || c.resolver == nil { + if isNil(c.rootResolver) { return nil, nil, false, nil } return c.rootResolver.RulesFor(ctx, u, namespace) @@ -122,12 +130,88 @@ func (c *ClusterAuthorizer) RulesFor(ctx context.Context, u user.Info, namespace if err != nil { return nil, nil, false, err } - if resolver == nil { + if isNil(resolver) { return nil, nil, false, nil } return resolver.RulesFor(ctx, u, namespace) } +func isNil(v any) bool { + if v == nil { + return true + } + rv := reflect.ValueOf(v) + switch rv.Kind() { + case reflect.Chan, reflect.Func, reflect.Interface, reflect.Map, reflect.Pointer, reflect.Slice: + return rv.IsNil() + default: + return false + } +} + +func authorizeSafely(authz authorizer.Authorizer, ctx context.Context, a authorizer.Attributes, target string) (decision authorizer.Decision, reason string, err error) { + defer func() { + if r := recover(); r != nil { + decision = authorizer.DecisionDeny + reason = "" + err = fmt.Errorf("authorizer panic for cluster %q (type=%s): %v", target, authorizerType(authz), r) + } + }() + return authz.Authorize(ctx, a) +} + +func validateAttributesForAuthorize(a authorizer.Attributes, clusterID, authzType string) error { + if isNil(a) { + return fmt.Errorf("invalid authorization attributes for cluster %q (authorizer=%s): attributes is nil", clusterID, authzType) + } + u, err := userFromAttributes(a) + if err != nil { + return fmt.Errorf("invalid authorization attributes for cluster %q (authorizer=%s): %w", clusterID, authzType, err) + } + if isNil(u) { + return fmt.Errorf("invalid authorization attributes for cluster %q (authorizer=%s): user is nil", clusterID, authzType) + } + return nil +} + +func userFromAttributes(a authorizer.Attributes) (u user.Info, err error) { + defer func() { + if r := recover(); r != nil { + u = nil + err = fmt.Errorf("GetUser panic: %v", r) + } + }() + return a.GetUser(), nil +} + +func authorizerType(authz authorizer.Authorizer) string { + if isNil(authz) { + return "" + } + return fmt.Sprintf("%T", authz) +} + +func authenticateSafely(authn authenticator.Request, req *http.Request, target string) (resp *authenticator.Response, ok bool, err error) { + defer func() { + if r := recover(); r != nil { + resp = nil + ok = false + err = fmt.Errorf("authenticator panic for cluster %q (type=%T): %v", target, authn, r) + } + }() + resp, ok, err = authn.AuthenticateRequest(req) + if err != nil || !ok { + return resp, ok, err + } + if resp == nil { + return nil, false, fmt.Errorf("invalid authenticator response for cluster %q (type=%T): response is nil", target, authn) + } + if isNil(resp.User) { + return nil, false, fmt.Errorf("invalid authenticator response for cluster %q (type=%T): user is nil", target, authn) + } + return resp, ok, nil +} + func clusterFromContext(ctx context.Context) string { cid, _, _ := mc.FromContext(ctx) return cid diff --git a/pkg/multicluster/auth/dispatcher_test.go b/pkg/multicluster/auth/dispatcher_test.go index fa30345..ea4d588 100644 --- a/pkg/multicluster/auth/dispatcher_test.go +++ b/pkg/multicluster/auth/dispatcher_test.go @@ -24,6 +24,12 @@ func (f *fakeAuthenticator) AuthenticateRequest(*http.Request) (*authenticator.R return &authenticator.Response{User: &user.DefaultInfo{Name: f.name}}, true, nil } +type badUserAuthenticator struct{} + +func (b *badUserAuthenticator) AuthenticateRequest(*http.Request) (*authenticator.Response, bool, error) { + return &authenticator.Response{}, true, nil +} + type fakeAuthorizer struct { name string called *string @@ -43,6 +49,16 @@ func (f *fakeAuthorizer) RulesFor(ctx context.Context, _ user.Info, _ string) ([ return nil, nil, false, nil } +type panicAuthorizer struct{} + +func (p *panicAuthorizer) Authorize(context.Context, authorizer.Attributes) (authorizer.Decision, string, error) { + panic("boom") +} + +func (p *panicAuthorizer) RulesFor(context.Context, user.Info, string) ([]authorizer.ResourceRuleInfo, []authorizer.NonResourceRuleInfo, bool, error) { + return nil, nil, false, nil +} + type fakeResolver struct { authn authenticator.Request authz authorizer.Authorizer @@ -102,9 +118,10 @@ func TestClusterAuthorizerDispatch(t *testing.T) { resolver := &fakeResolver{authz: cluster, ruleResolver: cluster, lastCluster: &lastCluster} dispatch := NewClusterAuthorizer("root", root, root, resolver) + attrs := authorizer.AttributesRecord{User: &user.DefaultInfo{Name: "test-user"}} ctx := mc.WithCluster(context.Background(), "root", false) - _, _, _ = dispatch.Authorize(ctx, authorizer.AttributesRecord{}) + _, _, _ = dispatch.Authorize(ctx, attrs) if called != "root" { t.Fatalf("expected root authorizer, got %q", called) } @@ -112,7 +129,7 @@ func TestClusterAuthorizerDispatch(t *testing.T) { called = "" lastCluster = "" ctx = mc.WithCluster(context.Background(), "c-2", false) - _, _, _ = dispatch.Authorize(ctx, authorizer.AttributesRecord{}) + _, _, _ = dispatch.Authorize(ctx, attrs) if called != "cluster" { t.Fatalf("expected cluster authorizer, got %q", called) } @@ -155,3 +172,94 @@ func TestClusterAuthenticatorUsesTokenHintWithoutClusterContext(t *testing.T) { t.Fatalf("expected resolver cluster c-42, got %q", lastCluster) } } + +func TestClusterAuthenticatorRejectsNilUserResponse(t *testing.T) { + dispatch := NewClusterAuthenticator("root", nil, &fakeResolver{ + authn: &badUserAuthenticator{}, + }) + req := httptest.NewRequest("GET", "http://example", nil) + req = req.WithContext(mc.WithCluster(req.Context(), "c-bad-user", false)) + + resp, ok, err := dispatch.AuthenticateRequest(req) + if err == nil { + t.Fatalf("expected error, got nil") + } + if ok { + t.Fatalf("expected ok=false for invalid auth response") + } + if resp != nil { + t.Fatalf("expected nil response on invalid auth response") + } +} + +func TestClusterAuthorizerTypedNilDoesNotPanic(t *testing.T) { + var typedNilCluster *fakeAuthorizer + dispatch := NewClusterAuthorizer("root", &fakeAuthorizer{name: "root"}, nil, &fakeResolver{ + authz: typedNilCluster, + ruleResolver: typedNilCluster, + }) + attrs := authorizer.AttributesRecord{User: &user.DefaultInfo{Name: "test-user"}} + + ctx := mc.WithCluster(context.Background(), "c-typed-nil", false) + decision, reason, err := dispatch.Authorize(ctx, attrs) + if err != nil { + t.Fatalf("expected nil error, got %v", err) + } + if decision != authorizer.DecisionNoOpinion { + t.Fatalf("expected DecisionNoOpinion, got %v", decision) + } + if reason != "no cluster authorizer" { + t.Fatalf("expected no cluster authorizer reason, got %q", reason) + } + + _, _, incomplete, err := dispatch.RulesFor(ctx, &user.DefaultInfo{Name: "test"}, "") + if err != nil { + t.Fatalf("expected nil error from RulesFor, got %v", err) + } + if incomplete { + t.Fatalf("expected incomplete=false for missing resolver") + } +} + +func TestClusterAuthorizerRootTypedNilDoesNotPanic(t *testing.T) { + var typedNilRoot *fakeAuthorizer + dispatch := NewClusterAuthorizer("root", typedNilRoot, nil, nil) + + decision, reason, err := dispatch.Authorize(context.Background(), authorizer.AttributesRecord{User: &user.DefaultInfo{Name: "test-user"}}) + if err != nil { + t.Fatalf("expected nil error, got %v", err) + } + if decision != authorizer.DecisionNoOpinion { + t.Fatalf("expected DecisionNoOpinion, got %v", decision) + } + if reason != "no root authorizer" { + t.Fatalf("expected no root authorizer reason, got %q", reason) + } +} + +func TestClusterAuthorizerPanicIsRecovered(t *testing.T) { + dispatch := NewClusterAuthorizer("root", &fakeAuthorizer{name: "root"}, nil, &fakeResolver{ + authz: &panicAuthorizer{}, + }) + attrs := authorizer.AttributesRecord{User: &user.DefaultInfo{Name: "test-user"}} + + ctx := mc.WithCluster(context.Background(), "c-panic", false) + decision, _, err := dispatch.Authorize(ctx, attrs) + if err == nil { + t.Fatalf("expected recovered panic error, got nil") + } + if decision != authorizer.DecisionDeny { + t.Fatalf("expected DecisionDeny on panic, got %v", decision) + } +} + +func TestClusterAuthorizerNilUserDenied(t *testing.T) { + dispatch := NewClusterAuthorizer("root", &fakeAuthorizer{name: "root"}, nil, nil) + decision, _, err := dispatch.Authorize(context.Background(), authorizer.AttributesRecord{}) + if err == nil { + t.Fatalf("expected error for nil user, got nil") + } + if decision != authorizer.DecisionDeny { + t.Fatalf("expected DecisionDeny for nil user, got %v", decision) + } +} diff --git a/pkg/multicluster/bootstrap/crd_runtime_manager_wrapped.go b/pkg/multicluster/bootstrap/crd_runtime_manager_wrapped.go index 2e20978..5ecf0a5 100644 --- a/pkg/multicluster/bootstrap/crd_runtime_manager_wrapped.go +++ b/pkg/multicluster/bootstrap/crd_runtime_manager_wrapped.go @@ -131,7 +131,23 @@ func (m *CRDRuntimeManager) ServesGroupVersion(clusterID, group, version string, crdServesLookupLat.WithLabelValues(r).Observe(time.Since(start).Seconds()) return served, nil } - // No fallback direct API lookup: shared projection is the source of truth. + // Reconcile from shared projection, then briefly wait for projection updates + // to absorb fresh CRD install/update events before returning not served. + if served, ok := m.lookupFromSharedProjection(clusterID, group, version); ok { + r := result(served) + crdServesCacheMiss.WithLabelValues("projection").Inc() + crdServesLookupTotal.WithLabelValues(r).Inc() + crdServesLookupLat.WithLabelValues(r).Observe(time.Since(start).Seconds()) + return served, nil + } + if served, ok := m.waitForSharedProjection(clusterID, group, version, 2*time.Second); ok { + r := result(served) + crdServesCacheMiss.WithLabelValues("projection-wait").Inc() + crdServesLookupTotal.WithLabelValues(r).Inc() + crdServesLookupLat.WithLabelValues(r).Observe(time.Since(start).Seconds()) + return served, nil + } + r := result(false) crdServesLookupTotal.WithLabelValues(r).Inc() crdServesLookupLat.WithLabelValues(r).Observe(time.Since(start).Seconds()) @@ -180,7 +196,7 @@ func (m *CRDRuntimeManager) ensureSharedRuntime(stopCh <-chan struct{}) (runtime go crdServer.GenericAPIServer.RunPostStartHooks(runCtx) entry := runtimeEntry{ - handler: crdServer.GenericAPIServer.Handler.NonGoRestfulMux, + handler: crdServer.GenericAPIServer.Handler.Director, server: crdServer.GenericAPIServer, cancel: cancel, } @@ -261,6 +277,52 @@ func (m *CRDRuntimeManager) lookupFromInformerIndex(clusterID, group, version st return m.servesIndex.Lookup(clusterID, group, version) } +func (m *CRDRuntimeManager) lookupFromSharedProjection(clusterID, group, version string) (bool, bool) { + crds := m.sharedProjection.List(clusterID) + objs := make([]interface{}, 0, len(crds)) + served := false + for _, crd := range crds { + if crd == nil { + continue + } + objs = append(objs, crd) + if !isCRDEstablished(crd) || crd.Spec.Group != group { + continue + } + for _, v := range crd.Spec.Versions { + if v.Served && v.Name == version { + served = true + break + } + } + } + m.rebuildClusterIndex(clusterID, objs) + return served, true +} + +func (m *CRDRuntimeManager) waitForSharedProjection(clusterID, group, version string, timeout time.Duration) (bool, bool) { + if timeout <= 0 { + timeout = 2 * time.Second + } + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + if served, ok := m.lookupFromInformerIndex(clusterID, group, version); ok { + return served, true + } + if served, ok := m.lookupFromSharedProjection(clusterID, group, version); ok && served { + return true, true + } + time.Sleep(25 * time.Millisecond) + } + if served, ok := m.lookupFromInformerIndex(clusterID, group, version); ok { + return served, true + } + if served, ok := m.lookupFromSharedProjection(clusterID, group, version); ok { + return served, true + } + return false, false +} + func (m *CRDRuntimeManager) ensureSharedCRDState(stopCh <-chan struct{}) error { m.mu.Lock() if m.sharedStarted {