diff --git a/cmd/apiserver/app/config.go b/cmd/apiserver/app/config.go index 793dcad..88e4f28 100644 --- a/cmd/apiserver/app/config.go +++ b/cmd/apiserver/app/config.go @@ -322,6 +322,10 @@ func NewConfig(opts options.CompletedOptions) (*Config, error) { ControlPlaneSegment: mcOpts.ControlPlaneSegment, DefaultCluster: mcOpts.DefaultCluster, }) + apiExtensions.ExtraConfig.CRDGetter = crdRuntimeMgr.CRDGetterForRequest + apiExtensions.ExtraConfig.CRDListerForRequest = crdRuntimeMgr.CRDListerForRequest + crdController := mcbootstrap.NewMulticlusterCRDController(crdRuntimeMgr, mcOpts.DefaultCluster) + crdController.Start(genericConfig.DrainedNotify()) prevOnClusterSelected := mcOpts.OnClusterSelected mcOpts.OnClusterSelected = func(clusterID string) { if prevOnClusterSelected != nil { @@ -330,7 +334,37 @@ func NewConfig(opts options.CompletedOptions) (*Config, error) { if clusterID == "" || clusterID == mcOpts.DefaultCluster { return } - _, _ = crdRuntimeMgr.Runtime(clusterID, genericConfig.DrainedNotify()) + crdController.EnsureCluster(clusterID) + } + serveClusterCRD := func(w http.ResponseWriter, r *http.Request, conf *server.Config, clusterID, caller string) bool { + group, version, ok := apisGroupVersionFromPath(r.URL.Path) + if !ok { + return false + } + + served, err := crdRuntimeMgr.ServesGroupVersion(clusterID, group, version, genericConfig.DrainedNotify()) + if err != nil { + klog.Errorf("mc.crdRuntime lookup failed at %s cluster=%s path=%s err=%v", caller, clusterID, r.URL.Path, err) + http.Error(w, "cluster CRD runtime unavailable", http.StatusServiceUnavailable) + return true + } + if !served { + return false + } + + h, err := crdRuntimeMgr.Runtime(clusterID, genericConfig.DrainedNotify()) + if err != nil || h == nil { + klog.Errorf("mc.crdRuntime unresolved at %s cluster=%s path=%s err=%v", caller, clusterID, r.URL.Path, err) + http.Error(w, "cluster CRD runtime unavailable", http.StatusServiceUnavailable) + return true + } + // Ensure RequestInfo is computed from the normalized /apis path + // before entering the cluster-scoped CRD runtime handler. + h = genericfilters.WithRequestInfo(h, conf.RequestInfoResolver) + h = genericfilters.WithAuditInit(h) + h = serverfilters.WithPanicRecovery(h, conf.RequestInfoResolver) + h.ServeHTTP(w, r) + return true } // Ensure CRDs are also routed through the multicluster handler apiExtensions.GenericConfig.BuildHandlerChainFunc = func(h http.Handler, conf *server.Config) http.Handler { @@ -339,28 +373,7 @@ func NewConfig(opts options.CompletedOptions) (*Config, error) { dispatch := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { cid, _, _ := mc.FromContext(r.Context()) if cid != "" && cid != mcOpts.DefaultCluster { - if group, version, ok := apisGroupVersionFromPath(r.URL.Path); ok { - served, err := crdRuntimeMgr.ServesGroupVersion(cid, group, version, genericConfig.DrainedNotify()) - if err != nil { - klog.Errorf("mc.crdRuntime lookup failed at apiextensions cluster=%s path=%s err=%v", cid, r.URL.Path, err) - http.Error(w, "cluster CRD runtime unavailable", http.StatusServiceUnavailable) - return - } - if !served { - base.ServeHTTP(w, r) - return - } - if h, err := crdRuntimeMgr.Runtime(cid, genericConfig.DrainedNotify()); err == nil && h != nil { - // Ensure RequestInfo is computed from the normalized /apis path - // before entering the cluster-scoped CRD runtime handler. - h = genericfilters.WithRequestInfo(h, conf.RequestInfoResolver) - h = genericfilters.WithAuditInit(h) - h = serverfilters.WithPanicRecovery(h, conf.RequestInfoResolver) - h.ServeHTTP(w, r) - return - } - klog.Errorf("mc.crdRuntime unresolved cluster=%s path=%s", cid, r.URL.Path) - http.Error(w, "cluster CRD runtime unavailable", http.StatusServiceUnavailable) + if serveClusterCRD(w, r, conf, cid, "apiextensions") { return } } @@ -396,25 +409,8 @@ func NewConfig(opts options.CompletedOptions) (*Config, error) { dispatch := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { cid, _, _ := mc.FromContext(r.Context()) if cid != "" && cid != mcOpts.DefaultCluster && crdRuntimeMgr != nil { - if group, version, ok := apisGroupVersionFromPath(r.URL.Path); ok { - served, err := crdRuntimeMgr.ServesGroupVersion(cid, group, version, genericConfig.DrainedNotify()) - if err != nil { - klog.Errorf("mc.crdRuntime lookup failed at aggregator cluster=%s path=%s err=%v", cid, r.URL.Path, err) - http.Error(w, "cluster CRD runtime unavailable", http.StatusServiceUnavailable) - return - } - if served { - if h, err := crdRuntimeMgr.Runtime(cid, genericConfig.DrainedNotify()); err == nil && h != nil { - h = genericfilters.WithRequestInfo(h, conf.RequestInfoResolver) - h = genericfilters.WithAuditInit(h) - h = serverfilters.WithPanicRecovery(h, conf.RequestInfoResolver) - h.ServeHTTP(w, r) - return - } - klog.Errorf("mc.crdRuntime unresolved at aggregator cluster=%s path=%s", cid, r.URL.Path) - http.Error(w, "cluster CRD runtime unavailable", http.StatusServiceUnavailable) - return - } + if serveClusterCRD(w, r, conf, cid, "aggregator") { + return } } base.ServeHTTP(w, r) diff --git a/pkg/multicluster/admission/namespace/manager.go b/pkg/multicluster/admission/namespace/manager.go index 1ea560b..3717920 100644 --- a/pkg/multicluster/admission/namespace/manager.go +++ b/pkg/multicluster/admission/namespace/manager.go @@ -3,6 +3,7 @@ package namespace import ( "sync" + "github.com/kplane-dev/apiserver/pkg/multicluster/scopedinformer" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" @@ -31,10 +32,16 @@ type Manager struct { mu sync.Mutex clusters map[string]*clusterEnv + + sharedOnce sync.Once + sharedErr error + shared informers.SharedInformerFactory + sharedStop <-chan struct{} + sharedOwn chan struct{} } type clusterEnv struct { - stopCh <-chan struct{} + stopCh chan struct{} cid string clientset kubernetes.Interface @@ -45,9 +52,6 @@ func NewManager(opts Options) *Manager { if opts.ClientPool == nil && opts.BaseLoopbackClientConfig != nil { opts.ClientPool = mc.NewClientPool(opts.BaseLoopbackClientConfig, opts.PathPrefix, opts.ControlPlaneSegment) } - if opts.InformerPool == nil && opts.ClientPool != nil { - opts.InformerPool = mc.NewInformerPoolFromClientPool(opts.ClientPool, 0, nil) - } return &Manager{ opts: opts, clusters: map[string]*clusterEnv{}, @@ -62,24 +66,82 @@ func (m *Manager) envForCluster(clusterID string) (*clusterEnv, error) { return e, nil } - if m.opts.InformerPool == nil { + if m.opts.ClientPool == nil { return nil, mc.ErrMissingClientFactory } - cs, inf, stopCh, err := m.opts.InformerPool.Get(clusterID) + cs, err := m.opts.ClientPool.KubeClientForCluster(clusterID) + if err != nil { + return nil, err + } + scoped, err := m.scopedNamespaceFactory(clusterID) if err != nil { return nil, err } + stopCh := make(chan struct{}) e := &clusterEnv{ cid: clusterID, stopCh: stopCh, clientset: cs, - informers: inf, + informers: scoped, } // Warm the namespaces informer (used by NamespaceLifecycle). - _ = inf.Core().V1().Namespaces().Informer() - inf.Start(stopCh) + _ = scoped.Core().V1().Namespaces().Informer() + scoped.Start(stopCh) m.clusters[clusterID] = e return e, nil } + +func (m *Manager) scopedNamespaceFactory(clusterID string) (informers.SharedInformerFactory, error) { + shared, err := m.ensureSharedFactory() + if err != nil { + return nil, err + } + return newScopedFactory(clusterID, mc.DefaultClusterAnnotation, shared), nil +} + +func (m *Manager) ensureSharedFactory() (informers.SharedInformerFactory, error) { + m.sharedOnce.Do(func() { + if m.opts.BaseLoopbackClientConfig == nil { + m.sharedErr = mc.ErrMissingClientFactory + return + } + cs, err := scopedinformer.NewAllClustersKubeClient(m.opts.BaseLoopbackClientConfig) + if err != nil { + m.sharedErr = err + return + } + factory := informers.NewSharedInformerFactory(cs, 0) + if err := factory.Core().V1().Namespaces().Informer().SetTransform(transformNamespaceForShared(mc.DefaultClusterAnnotation)); err != nil { + m.sharedErr = err + return + } + if err := scopedinformer.EnsureClusterIndex(factory.Core().V1().Namespaces().Informer(), mc.DefaultClusterAnnotation); err != nil { + m.sharedErr = err + return + } + if m.sharedStop == nil { + m.sharedOwn = make(chan struct{}) + m.sharedStop = m.sharedOwn + } + factory.Start(m.sharedStop) + m.shared = factory + }) + if m.sharedErr != nil { + return nil, m.sharedErr + } + return m.shared, nil +} + +// StopCluster is test-oriented cleanup; production can leave informers running. +func (m *Manager) StopCluster(clusterID string) { + m.mu.Lock() + defer m.mu.Unlock() + if e, ok := m.clusters[clusterID]; ok { + if e.stopCh != nil { + close(e.stopCh) + } + delete(m.clusters, clusterID) + } +} diff --git a/pkg/multicluster/admission/namespace/scoped_factory.go b/pkg/multicluster/admission/namespace/scoped_factory.go new file mode 100644 index 0000000..1a615b4 --- /dev/null +++ b/pkg/multicluster/admission/namespace/scoped_factory.go @@ -0,0 +1,206 @@ +package namespace + +import ( + "fmt" + "reflect" + "strings" + + mc "github.com/kplane-dev/apiserver/pkg/multicluster" + "github.com/kplane-dev/apiserver/pkg/multicluster/scopedinformer" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/informers" + coreinformers "k8s.io/client-go/informers/core" + coreinformersv1 "k8s.io/client-go/informers/core/v1" + corelisters "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" +) + +const sharedNamespaceNamePrefix = "__mcns__" + +type scopedFactory struct { + informers.SharedInformerFactory + clusterID string + clusterLabelKey string + shared informers.SharedInformerFactory +} + +func newScopedFactory(clusterID, clusterLabelKey string, shared informers.SharedInformerFactory) informers.SharedInformerFactory { + if clusterLabelKey == "" { + clusterLabelKey = mc.DefaultClusterAnnotation + } + return &scopedFactory{ + SharedInformerFactory: shared, + clusterID: clusterID, + clusterLabelKey: clusterLabelKey, + shared: shared, + } +} + +func (f *scopedFactory) Core() coreinformers.Interface { return &scopedCoreGroup{f: f} } + +func (f *scopedFactory) WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool { + out := f.SharedInformerFactory.WaitForCacheSync(stopCh) + if out == nil { + out = map[reflect.Type]bool{} + } + out[reflect.TypeOf(&corev1.Namespace{})] = f.shared.Core().V1().Namespaces().Informer().HasSynced() + return out +} + +type scopedCoreGroup struct{ f *scopedFactory } + +func (g *scopedCoreGroup) V1() coreinformersv1.Interface { return &scopedCoreV1{f: g.f} } + +type scopedCoreV1 struct{ f *scopedFactory } + +func (v *scopedCoreV1) ComponentStatuses() coreinformersv1.ComponentStatusInformer { + return v.f.SharedInformerFactory.Core().V1().ComponentStatuses() +} +func (v *scopedCoreV1) ConfigMaps() coreinformersv1.ConfigMapInformer { + return v.f.SharedInformerFactory.Core().V1().ConfigMaps() +} +func (v *scopedCoreV1) Endpoints() coreinformersv1.EndpointsInformer { + return v.f.SharedInformerFactory.Core().V1().Endpoints() +} +func (v *scopedCoreV1) Events() coreinformersv1.EventInformer { + return v.f.SharedInformerFactory.Core().V1().Events() +} +func (v *scopedCoreV1) LimitRanges() coreinformersv1.LimitRangeInformer { + return v.f.SharedInformerFactory.Core().V1().LimitRanges() +} +func (v *scopedCoreV1) Namespaces() coreinformersv1.NamespaceInformer { + base := v.f.shared.Core().V1().Namespaces().Informer() + return &scopedNamespaceInformer{ + informer: newFilteredSharedIndexInformer(base, v.f.clusterID, v.f.clusterLabelKey), + lister: &scopedNamespaceLister{indexer: base.GetIndexer(), clusterID: v.f.clusterID}, + } +} +func (v *scopedCoreV1) Nodes() coreinformersv1.NodeInformer { + return v.f.SharedInformerFactory.Core().V1().Nodes() +} +func (v *scopedCoreV1) PersistentVolumes() coreinformersv1.PersistentVolumeInformer { + return v.f.SharedInformerFactory.Core().V1().PersistentVolumes() +} +func (v *scopedCoreV1) PersistentVolumeClaims() coreinformersv1.PersistentVolumeClaimInformer { + return v.f.SharedInformerFactory.Core().V1().PersistentVolumeClaims() +} +func (v *scopedCoreV1) Pods() coreinformersv1.PodInformer { + return v.f.SharedInformerFactory.Core().V1().Pods() +} +func (v *scopedCoreV1) PodTemplates() coreinformersv1.PodTemplateInformer { + return v.f.SharedInformerFactory.Core().V1().PodTemplates() +} +func (v *scopedCoreV1) ReplicationControllers() coreinformersv1.ReplicationControllerInformer { + return v.f.SharedInformerFactory.Core().V1().ReplicationControllers() +} +func (v *scopedCoreV1) ResourceQuotas() coreinformersv1.ResourceQuotaInformer { + return v.f.SharedInformerFactory.Core().V1().ResourceQuotas() +} +func (v *scopedCoreV1) Secrets() coreinformersv1.SecretInformer { + return v.f.SharedInformerFactory.Core().V1().Secrets() +} +func (v *scopedCoreV1) Services() coreinformersv1.ServiceInformer { + return v.f.SharedInformerFactory.Core().V1().Services() +} +func (v *scopedCoreV1) ServiceAccounts() coreinformersv1.ServiceAccountInformer { + return v.f.SharedInformerFactory.Core().V1().ServiceAccounts() +} + +func newFilteredSharedIndexInformer(shared cache.SharedIndexInformer, clusterID, clusterLabelKey string) cache.SharedIndexInformer { + return scopedinformer.NewFilteredSharedIndexInformer(shared, clusterID, clusterLabelKey) +} + +func objectCluster(obj interface{}, clusterLabelKey string) string { + return scopedinformer.ObjectCluster(obj, clusterLabelKey) +} + +func filteredByCluster(indexer cache.Indexer, clusterID string) []interface{} { + return scopedinformer.FilteredByCluster(indexer, clusterID) +} + +func transformNamespaceForShared(clusterLabelKey string) cache.TransformFunc { + return func(obj interface{}) (interface{}, error) { + cid := objectCluster(obj, clusterLabelKey) + if cid == "" { + return obj, nil + } + ns, ok := obj.(*corev1.Namespace) + if !ok { + return obj, nil + } + cp := ns.DeepCopy() + cp.Name = encodeSharedNamespaceName(cid, cp.Name) + return cp, nil + } +} + +func encodeSharedNamespaceName(clusterID, name string) string { + if clusterID == "" || name == "" { + return name + } + prefix := sharedNamespaceNamePrefix + clusterID + "__" + if strings.HasPrefix(name, prefix) { + return name + } + return prefix + name +} + +func decodeSharedNamespaceName(clusterID, name string) (string, bool) { + prefix := sharedNamespaceNamePrefix + clusterID + "__" + if strings.HasPrefix(name, prefix) { + return strings.TrimPrefix(name, prefix), true + } + return name, false +} + +type scopedNamespaceInformer struct { + informer cache.SharedIndexInformer + lister corelisters.NamespaceLister +} + +func (i *scopedNamespaceInformer) Informer() cache.SharedIndexInformer { return i.informer } +func (i *scopedNamespaceInformer) Lister() corelisters.NamespaceLister { return i.lister } + +type scopedNamespaceLister struct { + indexer cache.Indexer + clusterID string +} + +func (l *scopedNamespaceLister) List(sel labels.Selector) (ret []*corev1.Namespace, err error) { + if sel == nil { + sel = labels.Everything() + } + for _, it := range filteredByCluster(l.indexer, l.clusterID) { + obj, ok := it.(*corev1.Namespace) + if !ok { + continue + } + if sel.Matches(labels.Set(obj.Labels)) { + cp := obj.DeepCopy() + cp.Name, _ = decodeSharedNamespaceName(l.clusterID, cp.Name) + ret = append(ret, cp) + } + } + return ret, nil +} + +func (l *scopedNamespaceLister) Get(name string) (*corev1.Namespace, error) { + encoded := encodeSharedNamespaceName(l.clusterID, name) + for _, it := range filteredByCluster(l.indexer, l.clusterID) { + obj, ok := it.(*corev1.Namespace) + if !ok { + continue + } + if obj.Name == name || obj.Name == encoded { + cp := obj.DeepCopy() + cp.Name, _ = decodeSharedNamespaceName(l.clusterID, cp.Name) + return cp, nil + } + } + return nil, fmt.Errorf("namespace %q not found", name) +} + +var _ corelisters.NamespaceLister = (*scopedNamespaceLister)(nil) +var _ coreinformersv1.NamespaceInformer = (*scopedNamespaceInformer)(nil) +var _ informers.SharedInformerFactory = (*scopedFactory)(nil) diff --git a/pkg/multicluster/admission/webhook/generic/config_source.go b/pkg/multicluster/admission/webhook/generic/config_source.go new file mode 100644 index 0000000..c2a39cb --- /dev/null +++ b/pkg/multicluster/admission/webhook/generic/config_source.go @@ -0,0 +1,149 @@ +package generic + +import ( + "fmt" + "sort" + "sync" + + v1 "k8s.io/api/admissionregistration/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apiserver/pkg/admission/plugin/webhook" + "k8s.io/client-go/informers" + admissionregistrationlisters "k8s.io/client-go/listers/admissionregistration/v1" +) + +type accessorCacheEntry struct { + resourceVersion string + accessors []webhook.WebhookAccessor +} + +type mutatingConfigSource struct { + lister admissionregistrationlisters.MutatingWebhookConfigurationLister + hasSynced func() bool + + mu sync.RWMutex + cache map[string]accessorCacheEntry +} + +type validatingConfigSource struct { + lister admissionregistrationlisters.ValidatingWebhookConfigurationLister + hasSynced func() bool + + mu sync.RWMutex + cache map[string]accessorCacheEntry +} + +// NewListBackedMutatingWebhookSource returns a webhook source backed by lister reads. +// Unlike upstream configuration managers, this source does not register per-plugin +// informer handlers, which avoids O(clusters) listener fanout. +func NewListBackedMutatingWebhookSource(f informers.SharedInformerFactory) Source { + inf := f.Admissionregistration().V1().MutatingWebhookConfigurations() + return &mutatingConfigSource{ + lister: inf.Lister(), + hasSynced: inf.Informer().HasSynced, + cache: map[string]accessorCacheEntry{}, + } +} + +// NewListBackedValidatingWebhookSource returns a webhook source backed by lister reads. +// Unlike upstream configuration managers, this source does not register per-plugin +// informer handlers, which avoids O(clusters) listener fanout. +func NewListBackedValidatingWebhookSource(f informers.SharedInformerFactory) Source { + inf := f.Admissionregistration().V1().ValidatingWebhookConfigurations() + return &validatingConfigSource{ + lister: inf.Lister(), + hasSynced: inf.Informer().HasSynced, + cache: map[string]accessorCacheEntry{}, + } +} + +func (s *mutatingConfigSource) Webhooks() []webhook.WebhookAccessor { + configs, err := s.lister.List(labels.Everything()) + if err != nil { + return nil + } + sort.SliceStable(configs, func(i, j int) bool { return configs[i].Name < configs[j].Name }) + total := 0 + for _, cfg := range configs { + total += len(cfg.Webhooks) + } + out := make([]webhook.WebhookAccessor, 0, total) + for _, cfg := range configs { + out = append(out, s.accessorsForMutatingConfig(cfg)...) + } + return out +} + +func (s *mutatingConfigSource) HasSynced() bool { + return s.hasSynced() +} + +func (s *mutatingConfigSource) accessorsForMutatingConfig(cfg *v1.MutatingWebhookConfiguration) []webhook.WebhookAccessor { + s.mu.RLock() + cached, ok := s.cache[cfg.Name] + s.mu.RUnlock() + if ok && cached.resourceVersion == cfg.ResourceVersion { + return cached.accessors + } + names := map[string]int{} + accessors := make([]webhook.WebhookAccessor, 0, len(cfg.Webhooks)) + for i := range cfg.Webhooks { + n := cfg.Webhooks[i].Name + uid := fmt.Sprintf("%s/%s/%d", cfg.Name, n, names[n]) + names[n]++ + accessors = append(accessors, webhook.NewMutatingWebhookAccessor(uid, cfg.Name, &cfg.Webhooks[i])) + } + s.mu.Lock() + s.cache[cfg.Name] = accessorCacheEntry{ + resourceVersion: cfg.ResourceVersion, + accessors: accessors, + } + s.mu.Unlock() + return accessors +} + +func (s *validatingConfigSource) Webhooks() []webhook.WebhookAccessor { + configs, err := s.lister.List(labels.Everything()) + if err != nil { + return nil + } + sort.SliceStable(configs, func(i, j int) bool { return configs[i].Name < configs[j].Name }) + total := 0 + for _, cfg := range configs { + total += len(cfg.Webhooks) + } + out := make([]webhook.WebhookAccessor, 0, total) + for _, cfg := range configs { + out = append(out, s.accessorsForValidatingConfig(cfg)...) + } + return out +} + +func (s *validatingConfigSource) HasSynced() bool { + return s.hasSynced() +} + +func (s *validatingConfigSource) accessorsForValidatingConfig(cfg *v1.ValidatingWebhookConfiguration) []webhook.WebhookAccessor { + s.mu.RLock() + cached, ok := s.cache[cfg.Name] + s.mu.RUnlock() + if ok && cached.resourceVersion == cfg.ResourceVersion { + return cached.accessors + } + names := map[string]int{} + accessors := make([]webhook.WebhookAccessor, 0, len(cfg.Webhooks)) + for i := range cfg.Webhooks { + n := cfg.Webhooks[i].Name + uid := fmt.Sprintf("%s/%s/%d", cfg.Name, n, names[n]) + names[n]++ + accessors = append(accessors, webhook.NewValidatingWebhookAccessor(uid, cfg.Name, &cfg.Webhooks[i])) + } + s.mu.Lock() + s.cache[cfg.Name] = accessorCacheEntry{ + resourceVersion: cfg.ResourceVersion, + accessors: accessors, + } + s.mu.Unlock() + return accessors +} + diff --git a/pkg/multicluster/admission/webhook/manager.go b/pkg/multicluster/admission/webhook/manager.go index 7c66e8b..d8ae25c 100644 --- a/pkg/multicluster/admission/webhook/manager.go +++ b/pkg/multicluster/admission/webhook/manager.go @@ -1,6 +1,7 @@ package webhook import ( + "fmt" "reflect" "sync" @@ -8,8 +9,10 @@ import ( clientgoinformers "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" + "k8s.io/client-go/tools/cache" mc "github.com/kplane-dev/apiserver/pkg/multicluster" + "github.com/kplane-dev/apiserver/pkg/multicluster/scopedinformer" ) type Options struct { @@ -46,17 +49,22 @@ type Manager struct { mu sync.Mutex clusters map[string]*clusterEnv + + sharedOnce sync.Once + sharedErr error + shared clientgoinformers.SharedInformerFactory + sharedStop <-chan struct{} + sharedOwn chan struct{} + sharedSync chan struct{} } type clusterEnv struct { cid string stopCh <-chan struct{} + ownCh chan struct{} synced chan struct{} - okMu sync.Mutex - ok bool - clientset kubernetes.Interface informers clientgoinformers.SharedInformerFactory @@ -67,9 +75,6 @@ func NewManager(opts Options) *Manager { if opts.ClientPool == nil && opts.BaseLoopbackClientConfig != nil { opts.ClientPool = mc.NewClientPool(opts.BaseLoopbackClientConfig, opts.PathPrefix, opts.ControlPlaneSegment) } - if opts.InformerPool == nil && opts.ClientPool != nil { - opts.InformerPool = mc.NewInformerPoolFromClientPool(opts.ClientPool, 0, nil) - } return &Manager{ opts: opts, clusters: map[string]*clusterEnv{}, @@ -84,46 +89,102 @@ func (m *Manager) envForCluster(clusterID string) (*clusterEnv, error) { return e, nil } - if m.opts.InformerPool == nil { + if m.opts.ClientPool == nil { return nil, mc.ErrMissingClientFactory } - cs, inf, stopCh, err := m.opts.InformerPool.Get(clusterID) + cs, err := m.opts.ClientPool.KubeClientForCluster(clusterID) + if err != nil { + return nil, err + } + scoped, err := m.scopedWebhookFactory(clusterID) if err != nil { return nil, err } + stopCh := make(chan struct{}) - sr := newDirectServiceResolver(cs, m.opts.EnableAggregatorRouting, m.opts.Hostname) + sr := newDirectServiceResolver( + scoped.Core().V1().Services().Lister(), + scoped.Discovery().V1().EndpointSlices().Lister(), + m.opts.EnableAggregatorRouting, + m.opts.Hostname, + ) e := &clusterEnv{ cid: clusterID, stopCh: stopCh, - synced: make(chan struct{}), + ownCh: stopCh, + synced: m.sharedSync, clientset: cs, - informers: inf, + informers: scoped, serviceResolver: sr, } // Warm required informers (must happen before Start()). - _ = inf.Core().V1().Namespaces().Informer() - _ = inf.Core().V1().Services().Informer() - _ = inf.Discovery().V1().EndpointSlices().Informer() - _ = inf.Admissionregistration().V1().MutatingWebhookConfigurations().Informer() - _ = inf.Admissionregistration().V1().ValidatingWebhookConfigurations().Informer() - inf.Start(stopCh) - - // Start informers for resources needed by webhook admission. - go func() { - ok := inf.WaitForCacheSync(e.stopCh) - e.okMu.Lock() - e.ok = allSynced(ok) - e.okMu.Unlock() - close(e.synced) - }() + _ = scoped.Core().V1().Namespaces().Informer() + _ = scoped.Core().V1().Services().Informer() + _ = scoped.Discovery().V1().EndpointSlices().Informer() + _ = scoped.Admissionregistration().V1().MutatingWebhookConfigurations().Informer() + _ = scoped.Admissionregistration().V1().ValidatingWebhookConfigurations().Informer() + scoped.Start(stopCh) m.clusters[clusterID] = e return e, nil } +func (m *Manager) scopedWebhookFactory(clusterID string) (clientgoinformers.SharedInformerFactory, error) { + shared, err := m.ensureSharedFactory() + if err != nil { + return nil, err + } + return newScopedFactory(clusterID, mc.DefaultClusterAnnotation, shared), nil +} + +func (m *Manager) ensureSharedFactory() (clientgoinformers.SharedInformerFactory, error) { + m.sharedOnce.Do(func() { + if m.opts.BaseLoopbackClientConfig == nil { + m.sharedErr = fmt.Errorf("base loopback config is required for shared webhook factory") + return + } + cs, err := scopedinformer.NewAllClustersKubeClient(m.opts.BaseLoopbackClientConfig) + if err != nil { + m.sharedErr = err + return + } + factory := clientgoinformers.NewSharedInformerFactory(cs, 0) + webhookInformers := []cache.SharedIndexInformer{ + factory.Core().V1().Namespaces().Informer(), + factory.Core().V1().Services().Informer(), + factory.Discovery().V1().EndpointSlices().Informer(), + factory.Admissionregistration().V1().MutatingWebhookConfigurations().Informer(), + factory.Admissionregistration().V1().ValidatingWebhookConfigurations().Informer(), + } + for _, inf := range webhookInformers { + if err := scopedinformer.EnsureClusterIndex(inf, mc.DefaultClusterAnnotation); err != nil { + m.sharedErr = err + return + } + } + if m.sharedStop == nil { + m.sharedOwn = make(chan struct{}) + m.sharedStop = m.sharedOwn + } + factory.Start(m.sharedStop) + // One shared cache-sync signal for all clusters; scoped informers are projections over shared caches. + m.sharedSync = make(chan struct{}) + go func() { + ok := factory.WaitForCacheSync(m.sharedStop) + if allSynced(ok) { + close(m.sharedSync) + } + }() + m.shared = factory + }) + if m.sharedErr != nil { + return nil, m.sharedErr + } + return m.shared, nil +} + func allSynced(m map[reflect.Type]bool) bool { for _, v := range m { if !v { @@ -137,10 +198,10 @@ func allSynced(m map[reflect.Type]bool) bool { func (m *Manager) StopCluster(clusterID string) { m.mu.Lock() defer m.mu.Unlock() - if _, ok := m.clusters[clusterID]; ok { + if e, ok := m.clusters[clusterID]; ok { + if e.ownCh != nil { + close(e.ownCh) + } delete(m.clusters, clusterID) } - if m.opts.InformerPool != nil { - m.opts.InformerPool.StopCluster(clusterID) - } } diff --git a/pkg/multicluster/admission/webhook/mutating/plugin.go b/pkg/multicluster/admission/webhook/mutating/plugin.go index d7d2c5e..b25cbbe 100644 --- a/pkg/multicluster/admission/webhook/mutating/plugin.go +++ b/pkg/multicluster/admission/webhook/mutating/plugin.go @@ -22,7 +22,6 @@ import ( "github.com/kplane-dev/apiserver/pkg/multicluster/admission/webhook/generic" "k8s.io/apiserver/pkg/admission" - "k8s.io/apiserver/pkg/admission/configuration" ) const ( @@ -54,7 +53,7 @@ func NewMutatingWebhook(configFile io.Reader) (*Plugin, error) { handler := admission.NewHandler(admission.Connect, admission.Create, admission.Delete, admission.Update) p := &Plugin{} var err error - p.Webhook, err = generic.NewWebhook(handler, configFile, configuration.NewMutatingWebhookConfigurationManager, newMutatingDispatcher(p)) + p.Webhook, err = generic.NewWebhook(handler, configFile, generic.NewListBackedMutatingWebhookSource, newMutatingDispatcher(p)) if err != nil { return nil, err } diff --git a/pkg/multicluster/admission/webhook/resolver.go b/pkg/multicluster/admission/webhook/resolver.go index c8bc525..a37da89 100644 --- a/pkg/multicluster/admission/webhook/resolver.go +++ b/pkg/multicluster/admission/webhook/resolver.go @@ -1,7 +1,6 @@ package webhook import ( - "context" "fmt" "net" "net/url" @@ -9,20 +8,22 @@ import ( corev1 "k8s.io/api/core/v1" discoveryv1 "k8s.io/api/discovery/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" - "k8s.io/client-go/kubernetes" + corelisters "k8s.io/client-go/listers/core/v1" + discoverylisters "k8s.io/client-go/listers/discovery/v1" ) type directServiceResolver struct { - cs kubernetes.Interface + services corelisters.ServiceLister + endpointSlices discoverylisters.EndpointSliceLister enableEndpointRouting bool hostname string } -func newDirectServiceResolver(cs kubernetes.Interface, enableEndpointRouting bool, hostname string) *directServiceResolver { +func newDirectServiceResolver(services corelisters.ServiceLister, endpointSlices discoverylisters.EndpointSliceLister, enableEndpointRouting bool, hostname string) *directServiceResolver { return &directServiceResolver{ - cs: cs, + services: services, + endpointSlices: endpointSlices, enableEndpointRouting: enableEndpointRouting, hostname: hostname, } @@ -40,8 +41,10 @@ func (r *directServiceResolver) ResolveEndpoint(namespace, name string, port int } } - ctx := context.Background() - svc, err := r.cs.CoreV1().Services(namespace).Get(ctx, name, metav1.GetOptions{}) + if r.services == nil { + return nil, fmt.Errorf("service lister is not configured") + } + svc, err := r.services.Services(namespace).Get(name) if err != nil { return nil, err } @@ -54,14 +57,16 @@ func (r *directServiceResolver) ResolveEndpoint(namespace, name string, port int return &url.URL{Scheme: "https", Host: net.JoinHostPort(ip, strconv.Itoa(int(port)))}, nil } + if r.endpointSlices == nil { + return nil, fmt.Errorf("endpointslice lister is not configured") + } targetName, targetPort := serviceTargetPort(svc, port) - selector := labels.Set{discoveryv1.LabelServiceName: name}.AsSelector().String() - slices, err := r.cs.DiscoveryV1().EndpointSlices(namespace).List(ctx, metav1.ListOptions{LabelSelector: selector}) + slices, err := r.endpointSlices.EndpointSlices(namespace).List(labels.SelectorFromSet(labels.Set{discoveryv1.LabelServiceName: name})) if err != nil { return nil, err } - if len(slices.Items) == 0 { + if len(slices) == 0 { return nil, fmt.Errorf("no endpointslices found for service %q", name) } @@ -70,8 +75,8 @@ func (r *directServiceResolver) ResolveEndpoint(namespace, name string, port int epPort = targetPort } else if targetName != "" { found := false - for i := range slices.Items { - for _, p := range slices.Items[i].Ports { + for i := range slices { + for _, p := range slices[i].Ports { if p.Name != nil && *p.Name == targetName && p.Port != nil { epPort = *p.Port found = true @@ -84,7 +89,7 @@ func (r *directServiceResolver) ResolveEndpoint(namespace, name string, port int } } - addr, err := firstEndpointAddress(slices.Items) + addr, err := firstEndpointAddress(slices) if err != nil { return nil, err } @@ -108,7 +113,7 @@ func serviceTargetPort(svc *corev1.Service, servicePort int32) (targetName strin return "", servicePort } -func firstEndpointAddress(slices []discoveryv1.EndpointSlice) (string, error) { +func firstEndpointAddress(slices []*discoveryv1.EndpointSlice) (string, error) { for i := range slices { for _, ep := range slices[i].Endpoints { ready := true diff --git a/pkg/multicluster/admission/webhook/scoped_factory.go b/pkg/multicluster/admission/webhook/scoped_factory.go new file mode 100644 index 0000000..9b74036 --- /dev/null +++ b/pkg/multicluster/admission/webhook/scoped_factory.go @@ -0,0 +1,509 @@ +package webhook + +import ( + "fmt" + "reflect" + + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" + corev1 "k8s.io/api/core/v1" + discoveryv1 "k8s.io/api/discovery/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/informers" + admissionregistrationinformers "k8s.io/client-go/informers/admissionregistration" + admissionregistrationinformersv1 "k8s.io/client-go/informers/admissionregistration/v1" + admissionregistrationinformersv1alpha1 "k8s.io/client-go/informers/admissionregistration/v1alpha1" + admissionregistrationinformersv1beta1 "k8s.io/client-go/informers/admissionregistration/v1beta1" + apiserverinternalinformers "k8s.io/client-go/informers/apiserverinternal" + appsinformers "k8s.io/client-go/informers/apps" + autoscalinginformers "k8s.io/client-go/informers/autoscaling" + batchinformers "k8s.io/client-go/informers/batch" + certificatesinformers "k8s.io/client-go/informers/certificates" + coordinationinformers "k8s.io/client-go/informers/coordination" + coreinformers "k8s.io/client-go/informers/core" + coreinformersv1 "k8s.io/client-go/informers/core/v1" + discoveryinformers "k8s.io/client-go/informers/discovery" + discoveryinformersv1 "k8s.io/client-go/informers/discovery/v1" + discoveryinformersv1beta1 "k8s.io/client-go/informers/discovery/v1beta1" + eventsinformers "k8s.io/client-go/informers/events" + extensionsinformers "k8s.io/client-go/informers/extensions" + flowcontrolinformers "k8s.io/client-go/informers/flowcontrol" + internalinformers "k8s.io/client-go/informers/internalinterfaces" + networkinginformers "k8s.io/client-go/informers/networking" + nodeinformers "k8s.io/client-go/informers/node" + policyinformers "k8s.io/client-go/informers/policy" + rbacinformers "k8s.io/client-go/informers/rbac" + resourceinformers "k8s.io/client-go/informers/resource" + schedulinginformers "k8s.io/client-go/informers/scheduling" + storageinformers "k8s.io/client-go/informers/storage" + storagemigrationinformers "k8s.io/client-go/informers/storagemigration" + admissionregistrationlisters "k8s.io/client-go/listers/admissionregistration/v1" + corelisters "k8s.io/client-go/listers/core/v1" + discoverylisters "k8s.io/client-go/listers/discovery/v1" + "k8s.io/client-go/tools/cache" + + mc "github.com/kplane-dev/apiserver/pkg/multicluster" + "github.com/kplane-dev/apiserver/pkg/multicluster/scopedinformer" +) + +type scopedFactory struct { + clusterID string + clusterLabelKey string + shared informers.SharedInformerFactory +} + +func newScopedFactory(clusterID, clusterLabelKey string, shared informers.SharedInformerFactory) informers.SharedInformerFactory { + if clusterLabelKey == "" { + clusterLabelKey = mc.DefaultClusterAnnotation + } + return &scopedFactory{ + clusterID: clusterID, + clusterLabelKey: clusterLabelKey, + shared: shared, + } +} + +func (f *scopedFactory) Start(stopCh <-chan struct{}) { + _ = stopCh +} + +func (f *scopedFactory) Shutdown() { + // shared informers are owned by manager-level lifecycle. +} + +func (f *scopedFactory) WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool { + out := f.shared.WaitForCacheSync(stopCh) + if out == nil { + out = map[reflect.Type]bool{} + } + if f.shared != nil { + out[reflect.TypeOf(&corev1.Namespace{})] = f.shared.Core().V1().Namespaces().Informer().HasSynced() + out[reflect.TypeOf(&corev1.Service{})] = f.shared.Core().V1().Services().Informer().HasSynced() + out[reflect.TypeOf(&discoveryv1.EndpointSlice{})] = f.shared.Discovery().V1().EndpointSlices().Informer().HasSynced() + out[reflect.TypeOf(&admissionregistrationv1.MutatingWebhookConfiguration{})] = f.shared.Admissionregistration().V1().MutatingWebhookConfigurations().Informer().HasSynced() + out[reflect.TypeOf(&admissionregistrationv1.ValidatingWebhookConfiguration{})] = f.shared.Admissionregistration().V1().ValidatingWebhookConfigurations().Informer().HasSynced() + } + return out +} + +func (f *scopedFactory) ForResource(resource schema.GroupVersionResource) (informers.GenericInformer, error) { + return f.shared.ForResource(resource) +} + +func (f *scopedFactory) InformerFor(obj runtime.Object, newFunc internalinformers.NewInformerFunc) cache.SharedIndexInformer { + return f.shared.InformerFor(obj, newFunc) +} + +func (f *scopedFactory) Core() coreinformers.Interface { + return &scopedCoreGroup{f: f} +} + +func (f *scopedFactory) Discovery() discoveryinformers.Interface { + return &scopedDiscoveryGroup{f: f} +} + +func (f *scopedFactory) Admissionregistration() admissionregistrationinformers.Interface { + return &scopedAdmissionregistrationGroup{f: f} +} + +func (f *scopedFactory) Internal() apiserverinternalinformers.Interface { return f.shared.Internal() } +func (f *scopedFactory) Apps() appsinformers.Interface { return f.shared.Apps() } +func (f *scopedFactory) Autoscaling() autoscalinginformers.Interface { return f.shared.Autoscaling() } +func (f *scopedFactory) Batch() batchinformers.Interface { return f.shared.Batch() } +func (f *scopedFactory) Certificates() certificatesinformers.Interface { + return f.shared.Certificates() +} +func (f *scopedFactory) Coordination() coordinationinformers.Interface { + return f.shared.Coordination() +} +func (f *scopedFactory) Events() eventsinformers.Interface { return f.shared.Events() } +func (f *scopedFactory) Extensions() extensionsinformers.Interface { return f.shared.Extensions() } +func (f *scopedFactory) Flowcontrol() flowcontrolinformers.Interface { return f.shared.Flowcontrol() } +func (f *scopedFactory) Networking() networkinginformers.Interface { return f.shared.Networking() } +func (f *scopedFactory) Node() nodeinformers.Interface { return f.shared.Node() } +func (f *scopedFactory) Policy() policyinformers.Interface { return f.shared.Policy() } +func (f *scopedFactory) Rbac() rbacinformers.Interface { return f.shared.Rbac() } +func (f *scopedFactory) Resource() resourceinformers.Interface { return f.shared.Resource() } +func (f *scopedFactory) Scheduling() schedulinginformers.Interface { return f.shared.Scheduling() } +func (f *scopedFactory) Storage() storageinformers.Interface { return f.shared.Storage() } +func (f *scopedFactory) Storagemigration() storagemigrationinformers.Interface { + return f.shared.Storagemigration() +} + +type scopedCoreGroup struct{ f *scopedFactory } + +func (g *scopedCoreGroup) V1() coreinformersv1.Interface { return &scopedCoreV1{f: g.f} } + +type scopedCoreV1 struct{ f *scopedFactory } + +func (v *scopedCoreV1) ComponentStatuses() coreinformersv1.ComponentStatusInformer { + return v.f.shared.Core().V1().ComponentStatuses() +} +func (v *scopedCoreV1) ConfigMaps() coreinformersv1.ConfigMapInformer { + return v.f.shared.Core().V1().ConfigMaps() +} +func (v *scopedCoreV1) Endpoints() coreinformersv1.EndpointsInformer { + return v.f.shared.Core().V1().Endpoints() +} +func (v *scopedCoreV1) Events() coreinformersv1.EventInformer { + return v.f.shared.Core().V1().Events() +} +func (v *scopedCoreV1) LimitRanges() coreinformersv1.LimitRangeInformer { + return v.f.shared.Core().V1().LimitRanges() +} +func (v *scopedCoreV1) Namespaces() coreinformersv1.NamespaceInformer { + base := v.f.shared.Core().V1().Namespaces().Informer() + return &scopedNamespaceInformer{ + informer: newFilteredSharedIndexInformer(base, v.f.clusterID, v.f.clusterLabelKey), + lister: &scopedNamespaceLister{indexer: base.GetIndexer(), clusterID: v.f.clusterID}, + } +} +func (v *scopedCoreV1) Nodes() coreinformersv1.NodeInformer { return v.f.shared.Core().V1().Nodes() } +func (v *scopedCoreV1) PersistentVolumes() coreinformersv1.PersistentVolumeInformer { + return v.f.shared.Core().V1().PersistentVolumes() +} +func (v *scopedCoreV1) PersistentVolumeClaims() coreinformersv1.PersistentVolumeClaimInformer { + return v.f.shared.Core().V1().PersistentVolumeClaims() +} +func (v *scopedCoreV1) Pods() coreinformersv1.PodInformer { return v.f.shared.Core().V1().Pods() } +func (v *scopedCoreV1) PodTemplates() coreinformersv1.PodTemplateInformer { + return v.f.shared.Core().V1().PodTemplates() +} +func (v *scopedCoreV1) ReplicationControllers() coreinformersv1.ReplicationControllerInformer { + return v.f.shared.Core().V1().ReplicationControllers() +} +func (v *scopedCoreV1) ResourceQuotas() coreinformersv1.ResourceQuotaInformer { + return v.f.shared.Core().V1().ResourceQuotas() +} +func (v *scopedCoreV1) Secrets() coreinformersv1.SecretInformer { + return v.f.shared.Core().V1().Secrets() +} +func (v *scopedCoreV1) Services() coreinformersv1.ServiceInformer { + base := v.f.shared.Core().V1().Services().Informer() + return &scopedServiceInformer{ + informer: newFilteredSharedIndexInformer(base, v.f.clusterID, v.f.clusterLabelKey), + lister: &scopedServiceLister{indexer: base.GetIndexer(), clusterID: v.f.clusterID}, + } +} +func (v *scopedCoreV1) ServiceAccounts() coreinformersv1.ServiceAccountInformer { + return v.f.shared.Core().V1().ServiceAccounts() +} + +type scopedDiscoveryGroup struct{ f *scopedFactory } + +func (g *scopedDiscoveryGroup) V1() discoveryinformersv1.Interface { + return &scopedDiscoveryV1{f: g.f} +} +func (g *scopedDiscoveryGroup) V1beta1() discoveryinformersv1beta1.Interface { + return g.f.shared.Discovery().V1beta1() +} + +type scopedDiscoveryV1 struct{ f *scopedFactory } + +func (v *scopedDiscoveryV1) EndpointSlices() discoveryinformersv1.EndpointSliceInformer { + base := v.f.shared.Discovery().V1().EndpointSlices().Informer() + return &scopedEndpointSliceInformer{ + informer: newFilteredSharedIndexInformer(base, v.f.clusterID, v.f.clusterLabelKey), + lister: &scopedEndpointSliceLister{indexer: base.GetIndexer(), clusterID: v.f.clusterID}, + } +} + +type scopedAdmissionregistrationGroup struct{ f *scopedFactory } + +func (g *scopedAdmissionregistrationGroup) V1() admissionregistrationinformersv1.Interface { + return &scopedAdmissionregistrationV1{f: g.f} +} +func (g *scopedAdmissionregistrationGroup) V1alpha1() admissionregistrationinformersv1alpha1.Interface { + return g.f.shared.Admissionregistration().V1alpha1() +} +func (g *scopedAdmissionregistrationGroup) V1beta1() admissionregistrationinformersv1beta1.Interface { + return g.f.shared.Admissionregistration().V1beta1() +} + +type scopedAdmissionregistrationV1 struct{ f *scopedFactory } + +func (v *scopedAdmissionregistrationV1) MutatingWebhookConfigurations() admissionregistrationinformersv1.MutatingWebhookConfigurationInformer { + base := v.f.shared.Admissionregistration().V1().MutatingWebhookConfigurations().Informer() + return &scopedMutatingWebhookConfigurationInformer{ + informer: newFilteredSharedIndexInformer(base, v.f.clusterID, v.f.clusterLabelKey), + lister: &scopedMutatingWebhookConfigurationLister{indexer: base.GetIndexer(), clusterID: v.f.clusterID}, + } +} +func (v *scopedAdmissionregistrationV1) ValidatingWebhookConfigurations() admissionregistrationinformersv1.ValidatingWebhookConfigurationInformer { + base := v.f.shared.Admissionregistration().V1().ValidatingWebhookConfigurations().Informer() + return &scopedValidatingWebhookConfigurationInformer{ + informer: newFilteredSharedIndexInformer(base, v.f.clusterID, v.f.clusterLabelKey), + lister: &scopedValidatingWebhookConfigurationLister{indexer: base.GetIndexer(), clusterID: v.f.clusterID}, + } +} +func (v *scopedAdmissionregistrationV1) ValidatingAdmissionPolicies() admissionregistrationinformersv1.ValidatingAdmissionPolicyInformer { + return v.f.shared.Admissionregistration().V1().ValidatingAdmissionPolicies() +} +func (v *scopedAdmissionregistrationV1) ValidatingAdmissionPolicyBindings() admissionregistrationinformersv1.ValidatingAdmissionPolicyBindingInformer { + return v.f.shared.Admissionregistration().V1().ValidatingAdmissionPolicyBindings() +} + +func newFilteredSharedIndexInformer(shared cache.SharedIndexInformer, clusterID, clusterLabelKey string) cache.SharedIndexInformer { + return scopedinformer.NewFilteredSharedIndexInformer(shared, clusterID, clusterLabelKey) +} + +func objectCluster(obj interface{}, clusterLabelKey string) string { + return scopedinformer.ObjectCluster(obj, clusterLabelKey) +} + +func filteredByCluster(indexer cache.Indexer, clusterID string) []interface{} { + return scopedinformer.FilteredByCluster(indexer, clusterID) +} + +type scopedNamespaceInformer struct { + informer cache.SharedIndexInformer + lister corelisters.NamespaceLister +} + +func (i *scopedNamespaceInformer) Informer() cache.SharedIndexInformer { return i.informer } +func (i *scopedNamespaceInformer) Lister() corelisters.NamespaceLister { return i.lister } + +type scopedServiceInformer struct { + informer cache.SharedIndexInformer + lister corelisters.ServiceLister +} + +func (i *scopedServiceInformer) Informer() cache.SharedIndexInformer { return i.informer } +func (i *scopedServiceInformer) Lister() corelisters.ServiceLister { return i.lister } + +type scopedEndpointSliceInformer struct { + informer cache.SharedIndexInformer + lister discoverylisters.EndpointSliceLister +} + +func (i *scopedEndpointSliceInformer) Informer() cache.SharedIndexInformer { return i.informer } +func (i *scopedEndpointSliceInformer) Lister() discoverylisters.EndpointSliceLister { + return i.lister +} + +type scopedMutatingWebhookConfigurationInformer struct { + informer cache.SharedIndexInformer + lister admissionregistrationlisters.MutatingWebhookConfigurationLister +} + +func (i *scopedMutatingWebhookConfigurationInformer) Informer() cache.SharedIndexInformer { + return i.informer +} +func (i *scopedMutatingWebhookConfigurationInformer) Lister() admissionregistrationlisters.MutatingWebhookConfigurationLister { + return i.lister +} + +type scopedValidatingWebhookConfigurationInformer struct { + informer cache.SharedIndexInformer + lister admissionregistrationlisters.ValidatingWebhookConfigurationLister +} + +func (i *scopedValidatingWebhookConfigurationInformer) Informer() cache.SharedIndexInformer { + return i.informer +} +func (i *scopedValidatingWebhookConfigurationInformer) Lister() admissionregistrationlisters.ValidatingWebhookConfigurationLister { + return i.lister +} + +type scopedNamespaceLister struct { + indexer cache.Indexer + clusterID string +} + +func (l *scopedNamespaceLister) List(sel labels.Selector) (ret []*corev1.Namespace, err error) { + if sel == nil { + sel = labels.Everything() + } + for _, it := range filteredByCluster(l.indexer, l.clusterID) { + obj, ok := it.(*corev1.Namespace) + if !ok { + continue + } + if sel.Matches(labels.Set(obj.Labels)) { + ret = append(ret, obj) + } + } + return ret, nil +} + +func (l *scopedNamespaceLister) Get(name string) (*corev1.Namespace, error) { + for _, it := range filteredByCluster(l.indexer, l.clusterID) { + obj, ok := it.(*corev1.Namespace) + if !ok { + continue + } + if obj.Name == name { + return obj, nil + } + } + return nil, fmt.Errorf("namespace %q not found", name) +} + +type scopedServiceLister struct { + indexer cache.Indexer + clusterID string +} + +func (l *scopedServiceLister) List(sel labels.Selector) (ret []*corev1.Service, err error) { + if sel == nil { + sel = labels.Everything() + } + for _, it := range filteredByCluster(l.indexer, l.clusterID) { + obj, ok := it.(*corev1.Service) + if !ok { + continue + } + if sel.Matches(labels.Set(obj.Labels)) { + ret = append(ret, obj) + } + } + return ret, nil +} + +func (l *scopedServiceLister) Services(namespace string) corelisters.ServiceNamespaceLister { + return &scopedServiceNamespaceLister{parent: l, namespace: namespace} +} + +type scopedServiceNamespaceLister struct { + parent *scopedServiceLister + namespace string +} + +func (l *scopedServiceNamespaceLister) List(sel labels.Selector) (ret []*corev1.Service, err error) { + all, _ := l.parent.List(sel) + for _, obj := range all { + if obj.Namespace == l.namespace { + ret = append(ret, obj) + } + } + return ret, nil +} + +func (l *scopedServiceNamespaceLister) Get(name string) (*corev1.Service, error) { + all, _ := l.parent.List(labels.Everything()) + for _, obj := range all { + if obj.Namespace == l.namespace && obj.Name == name { + return obj, nil + } + } + return nil, fmt.Errorf("service %s/%s not found", l.namespace, name) +} + +type scopedEndpointSliceLister struct { + indexer cache.Indexer + clusterID string +} + +func (l *scopedEndpointSliceLister) List(sel labels.Selector) (ret []*discoveryv1.EndpointSlice, err error) { + if sel == nil { + sel = labels.Everything() + } + for _, it := range filteredByCluster(l.indexer, l.clusterID) { + obj, ok := it.(*discoveryv1.EndpointSlice) + if !ok { + continue + } + if sel.Matches(labels.Set(obj.Labels)) { + ret = append(ret, obj) + } + } + return ret, nil +} + +func (l *scopedEndpointSliceLister) EndpointSlices(namespace string) discoverylisters.EndpointSliceNamespaceLister { + return &scopedEndpointSliceNamespaceLister{parent: l, namespace: namespace} +} + +type scopedEndpointSliceNamespaceLister struct { + parent *scopedEndpointSliceLister + namespace string +} + +func (l *scopedEndpointSliceNamespaceLister) List(sel labels.Selector) (ret []*discoveryv1.EndpointSlice, err error) { + all, _ := l.parent.List(sel) + for _, obj := range all { + if obj.Namespace == l.namespace { + ret = append(ret, obj) + } + } + return ret, nil +} + +func (l *scopedEndpointSliceNamespaceLister) Get(name string) (*discoveryv1.EndpointSlice, error) { + all, _ := l.parent.List(labels.Everything()) + for _, obj := range all { + if obj.Namespace == l.namespace && obj.Name == name { + return obj, nil + } + } + return nil, fmt.Errorf("endpointslice %s/%s not found", l.namespace, name) +} + +type scopedMutatingWebhookConfigurationLister struct { + indexer cache.Indexer + clusterID string +} + +func (l *scopedMutatingWebhookConfigurationLister) List(sel labels.Selector) (ret []*admissionregistrationv1.MutatingWebhookConfiguration, err error) { + if sel == nil { + sel = labels.Everything() + } + for _, it := range filteredByCluster(l.indexer, l.clusterID) { + obj, ok := it.(*admissionregistrationv1.MutatingWebhookConfiguration) + if !ok { + continue + } + if sel.Matches(labels.Set(obj.Labels)) { + ret = append(ret, obj) + } + } + return ret, nil +} + +func (l *scopedMutatingWebhookConfigurationLister) Get(name string) (*admissionregistrationv1.MutatingWebhookConfiguration, error) { + for _, it := range filteredByCluster(l.indexer, l.clusterID) { + obj, ok := it.(*admissionregistrationv1.MutatingWebhookConfiguration) + if !ok { + continue + } + if obj.Name == name { + return obj, nil + } + } + return nil, fmt.Errorf("mutatingwebhookconfiguration %q not found", name) +} + +type scopedValidatingWebhookConfigurationLister struct { + indexer cache.Indexer + clusterID string +} + +func (l *scopedValidatingWebhookConfigurationLister) List(sel labels.Selector) (ret []*admissionregistrationv1.ValidatingWebhookConfiguration, err error) { + if sel == nil { + sel = labels.Everything() + } + for _, it := range filteredByCluster(l.indexer, l.clusterID) { + obj, ok := it.(*admissionregistrationv1.ValidatingWebhookConfiguration) + if !ok { + continue + } + if sel.Matches(labels.Set(obj.Labels)) { + ret = append(ret, obj) + } + } + return ret, nil +} + +func (l *scopedValidatingWebhookConfigurationLister) Get(name string) (*admissionregistrationv1.ValidatingWebhookConfiguration, error) { + for _, it := range filteredByCluster(l.indexer, l.clusterID) { + obj, ok := it.(*admissionregistrationv1.ValidatingWebhookConfiguration) + if !ok { + continue + } + if obj.Name == name { + return obj, nil + } + } + return nil, fmt.Errorf("validatingwebhookconfiguration %q not found", name) +} diff --git a/pkg/multicluster/admission/webhook/validating/plugin.go b/pkg/multicluster/admission/webhook/validating/plugin.go index 524f5b6..a851373 100644 --- a/pkg/multicluster/admission/webhook/validating/plugin.go +++ b/pkg/multicluster/admission/webhook/validating/plugin.go @@ -22,7 +22,6 @@ import ( "github.com/kplane-dev/apiserver/pkg/multicluster/admission/webhook/generic" "k8s.io/apiserver/pkg/admission" - "k8s.io/apiserver/pkg/admission/configuration" ) const ( @@ -54,7 +53,7 @@ func NewValidatingAdmissionWebhook(configFile io.Reader) (*Plugin, error) { handler := admission.NewHandler(admission.Connect, admission.Create, admission.Delete, admission.Update) p := &Plugin{} var err error - p.Webhook, err = generic.NewWebhook(handler, configFile, configuration.NewValidatingWebhookConfigurationManager, newValidatingDispatcher(p)) + p.Webhook, err = generic.NewWebhook(handler, configFile, generic.NewListBackedValidatingWebhookSource, newValidatingDispatcher(p)) if err != nil { return nil, err } diff --git a/pkg/multicluster/auth/manager.go b/pkg/multicluster/auth/manager.go index f55925e..0738acd 100644 --- a/pkg/multicluster/auth/manager.go +++ b/pkg/multicluster/auth/manager.go @@ -3,21 +3,31 @@ package auth import ( "context" "fmt" + "strings" "sync" mc "github.com/kplane-dev/apiserver/pkg/multicluster" + "github.com/kplane-dev/apiserver/pkg/multicluster/scopedinformer" + rbacv1 "k8s.io/api/rbac/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apiserver/pkg/authentication/authenticator" + "k8s.io/apiserver/pkg/authentication/user" "k8s.io/apiserver/pkg/authorization/authorizer" + "k8s.io/apiserver/pkg/authorization/authorizerfactory" + authzunion "k8s.io/apiserver/pkg/authorization/union" "k8s.io/apiserver/pkg/server/egressselector" utilfeature "k8s.io/apiserver/pkg/util/feature" - clientgoinformers "k8s.io/client-go/informers" + "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" v1 "k8s.io/client-go/listers/core/v1" + corelisters "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/rest" + "k8s.io/client-go/tools/cache" "k8s.io/kubernetes/pkg/controller/serviceaccount" "k8s.io/kubernetes/pkg/features" + rbacregistryvalidation "k8s.io/kubernetes/pkg/registry/rbac/validation" kubeoptions "k8s.io/kubernetes/pkg/kubeapiserver/options" + rbacauthorizer "k8s.io/kubernetes/plugin/pkg/auth/authorizer/rbac" "k8s.io/kubernetes/plugin/pkg/auth/authenticator/token/bootstrap" ) @@ -41,13 +51,20 @@ type Manager struct { mu sync.Mutex clusters map[string]*clusterEnv + + sharedOnce sync.Once + sharedErr error + sharedAuth informers.SharedInformerFactory + rbacStore *rbacProjectionStore + sharedStop <-chan struct{} + sharedOwn chan struct{} } type clusterEnv struct { cid string clientset kubernetes.Interface - informers clientgoinformers.SharedInformerFactory + informers informers.SharedInformerFactory authenticator authenticator.Request authorizer authorizer.Authorizer @@ -62,9 +79,6 @@ func NewManager(ctx context.Context, opts Options) *Manager { if opts.ClientPool == nil && opts.BaseLoopbackClientConfig != nil { opts.ClientPool = mc.NewClientPool(opts.BaseLoopbackClientConfig, opts.PathPrefix, opts.ControlPlaneSegment) } - if opts.InformerPool == nil && opts.ClientPool != nil { - opts.InformerPool = mc.NewInformerPoolFromClientPool(opts.ClientPool, 0, nil) - } return &Manager{ ctx: ctx, opts: opts, @@ -94,16 +108,11 @@ func (m *Manager) AuthorizerForCluster(clusterID string) (authorizer.Authorizer, func (m *Manager) StopCluster(clusterID string) { m.mu.Lock() defer m.mu.Unlock() - env, ok := m.clusters[clusterID] + _, ok := m.clusters[clusterID] if !ok { return } delete(m.clusters, clusterID) - if m.opts.InformerPool != nil { - m.opts.InformerPool.StopCluster(clusterID) - } else { - _ = env - } } func (m *Manager) envForCluster(clusterID string) (*clusterEnv, error) { @@ -113,33 +122,59 @@ func (m *Manager) envForCluster(clusterID string) (*clusterEnv, error) { return env, nil } - if m.opts.InformerPool == nil { + if m.opts.ClientPool == nil { m.mu.Unlock() - return nil, fmt.Errorf("loopback informer pool is required for cluster auth") + return nil, fmt.Errorf("loopback client pool is required for cluster auth") } - cs, informers, stopCh, err := m.opts.InformerPool.Get(clusterID) + cs, err := m.opts.ClientPool.KubeClientForCluster(clusterID) if err != nil { m.mu.Unlock() return nil, err } - authn, err := buildAuthenticator(m.ctx, m.opts, cs, informers) - if err != nil { - m.mu.Unlock() - return nil, err + var ( + scopedFactory informers.SharedInformerFactory + authn authenticator.Request + ) + var ( + authz authorizer.Authorizer + resolver authorizer.RuleResolver + ) + if m.useSharedRBACAuthorizer() { + listers, err := m.coreListersForCluster(clusterID) + if err != nil { + m.mu.Unlock() + return nil, err + } + authn, err = buildAuthenticatorWithCoreListers(m.ctx, m.opts, cs, listers) + if err != nil { + m.mu.Unlock() + return nil, err + } + authz, resolver, err = m.buildSharedRBACAuthorizerForCluster(clusterID) + } else { + scopedFactory, err = m.scopedAuthFactory(clusterID) + if err != nil { + m.mu.Unlock() + return nil, err + } + authn, err = buildAuthenticator(m.ctx, m.opts, cs, scopedFactory) + if err != nil { + m.mu.Unlock() + return nil, err + } + authz, resolver, err = buildAuthorizer(m.ctx, m.opts, scopedFactory) } - authz, resolver, err := buildAuthorizer(m.ctx, m.opts, informers) if err != nil { m.mu.Unlock() return nil, err } - informers.Start(stopCh) env := &clusterEnv{ cid: clusterID, clientset: cs, - informers: informers, + informers: scopedFactory, authenticator: authn, authorizer: authz, ruleResolver: resolver, @@ -150,7 +185,210 @@ func (m *Manager) envForCluster(clusterID string) (*clusterEnv, error) { return env, nil } -func buildAuthenticator(ctx context.Context, opts Options, clientset kubernetes.Interface, informers clientgoinformers.SharedInformerFactory) (authenticator.Request, error) { +func (m *Manager) scopedAuthFactory(clusterID string) (informers.SharedInformerFactory, error) { + shared, err := m.ensureSharedAuthFactory() + if err != nil { + return nil, err + } + return newScopedFactory(clusterID, mc.DefaultClusterAnnotation, shared, m.rbacStore), nil +} + +func (m *Manager) ensureSharedAuthFactory() (informers.SharedInformerFactory, error) { + m.sharedOnce.Do(func() { + if m.opts.BaseLoopbackClientConfig == nil { + m.sharedErr = fmt.Errorf("base loopback config is required for shared auth factory") + return + } + cs, err := scopedinformer.NewAllClustersKubeClient(m.opts.BaseLoopbackClientConfig) + if err != nil { + m.sharedErr = err + return + } + factory := informers.NewSharedInformerFactory(cs, 0) + // Warm and index shared auth-critical informers once. + authInformers := []cache.SharedIndexInformer{ + factory.Core().V1().Secrets().Informer(), + factory.Core().V1().ServiceAccounts().Informer(), + factory.Core().V1().Pods().Informer(), + factory.Core().V1().Nodes().Informer(), + factory.Rbac().V1().Roles().Informer(), + factory.Rbac().V1().RoleBindings().Informer(), + factory.Rbac().V1().ClusterRoles().Informer(), + factory.Rbac().V1().ClusterRoleBindings().Informer(), + } + for _, inf := range authInformers { + if err := scopedinformer.EnsureClusterIndex(inf, mc.DefaultClusterAnnotation); err != nil { + m.sharedErr = err + return + } + } + rbacStore := newRBACProjectionStore(mc.DefaultClusterAnnotation) + if err := registerRBACProjectionHandlers( + rbacStore, + factory.Rbac().V1().Roles().Informer(), + factory.Rbac().V1().RoleBindings().Informer(), + factory.Rbac().V1().ClusterRoles().Informer(), + factory.Rbac().V1().ClusterRoleBindings().Informer(), + ); err != nil { + m.sharedErr = err + return + } + if m.sharedStop == nil { + m.sharedOwn = make(chan struct{}) + m.sharedStop = m.sharedOwn + } + factory.Start(m.sharedStop) + m.sharedAuth = factory + m.rbacStore = rbacStore + }) + if m.sharedErr != nil { + return nil, m.sharedErr + } + return m.sharedAuth, nil +} + +type coreAuthListers struct { + secrets corelisters.SecretLister + serviceAccounts corelisters.ServiceAccountLister + pods corelisters.PodLister + nodes corelisters.NodeLister +} + +func (m *Manager) coreListersForCluster(clusterID string) (*coreAuthListers, error) { + shared, err := m.ensureSharedAuthFactory() + if err != nil { + return nil, err + } + return &coreAuthListers{ + secrets: &scopedSecretLister{ + indexer: shared.Core().V1().Secrets().Informer().GetIndexer(), + clusterID: clusterID, + clusterLabelKey: mc.DefaultClusterAnnotation, + }, + serviceAccounts: &scopedServiceAccountLister{ + indexer: shared.Core().V1().ServiceAccounts().Informer().GetIndexer(), + clusterID: clusterID, + clusterLabelKey: mc.DefaultClusterAnnotation, + }, + pods: &scopedPodLister{ + indexer: shared.Core().V1().Pods().Informer().GetIndexer(), + clusterID: clusterID, + clusterLabelKey: mc.DefaultClusterAnnotation, + }, + nodes: &scopedNodeLister{ + indexer: shared.Core().V1().Nodes().Informer().GetIndexer(), + clusterID: clusterID, + clusterLabelKey: mc.DefaultClusterAnnotation, + }, + }, nil +} + +func (m *Manager) useSharedRBACAuthorizer() bool { + if m == nil || m.opts.Authorization == nil { + return false + } + if len(m.opts.Authorization.Modes) != 1 { + return false + } + return strings.EqualFold(m.opts.Authorization.Modes[0], "RBAC") +} + +func (m *Manager) buildSharedRBACAuthorizerForCluster(clusterID string) (authorizer.Authorizer, authorizer.RuleResolver, error) { + if _, err := m.ensureSharedAuthFactory(); err != nil { + return nil, nil, err + } + if m.rbacStore == nil { + return nil, nil, fmt.Errorf("shared RBAC projection store is not initialized") + } + resolver := &clusterAwareRBACDataSource{ + store: m.rbacStore, + defaultCluster: clusterID, + fixedCluster: clusterID, + } + rbacAuthz := rbacauthorizer.New(resolver, resolver, resolver, resolver) + superuser := authorizerfactory.NewPrivilegedGroups(user.SystemPrivilegedGroup) + // Match upstream shape: privileged groups short-circuit before RBAC checks. + return authzunion.New(superuser, rbacAuthz), rbacAuthz, nil +} + +type clusterAwareRBACDataSource struct { + store *rbacProjectionStore + defaultCluster string + fixedCluster string +} + +func (s *clusterAwareRBACDataSource) clusterFromContext(ctx context.Context) string { + if s != nil && s.fixedCluster != "" { + return s.fixedCluster + } + cid, _, _ := mc.FromContext(ctx) + if cid != "" { + return cid + } + if s != nil && s.defaultCluster != "" { + return s.defaultCluster + } + return mc.DefaultClusterName +} + +func (s *clusterAwareRBACDataSource) GetRole(ctx context.Context, namespace, name string) (*rbacv1.Role, error) { + if s == nil || s.store == nil { + return nil, fmt.Errorf("rbac projection store is not initialized") + } + clusterID := s.clusterFromContext(ctx) + for _, role := range s.store.listRoles(clusterID) { + if role == nil { + continue + } + if role.Namespace == namespace && role.Name == name { + return role, nil + } + } + return nil, fmt.Errorf("role %s/%s not found", namespace, name) +} + +func (s *clusterAwareRBACDataSource) ListRoleBindings(ctx context.Context, namespace string) ([]*rbacv1.RoleBinding, error) { + if s == nil || s.store == nil { + return nil, fmt.Errorf("rbac projection store is not initialized") + } + clusterID := s.clusterFromContext(ctx) + items := s.store.listRoleBindings(clusterID) + out := make([]*rbacv1.RoleBinding, 0, len(items)) + for _, rb := range items { + if rb == nil || rb.Namespace != namespace { + continue + } + out = append(out, rb) + } + return out, nil +} + +func (s *clusterAwareRBACDataSource) GetClusterRole(ctx context.Context, name string) (*rbacv1.ClusterRole, error) { + if s == nil || s.store == nil { + return nil, fmt.Errorf("rbac projection store is not initialized") + } + clusterID := s.clusterFromContext(ctx) + role := s.store.getClusterRole(clusterID, name) + if role == nil { + return nil, fmt.Errorf("clusterrole %s not found", name) + } + return role, nil +} + +func (s *clusterAwareRBACDataSource) ListClusterRoleBindings(ctx context.Context) ([]*rbacv1.ClusterRoleBinding, error) { + if s == nil || s.store == nil { + return nil, fmt.Errorf("rbac projection store is not initialized") + } + clusterID := s.clusterFromContext(ctx) + return s.store.listClusterRoleBindings(clusterID), nil +} + +var _ rbacregistryvalidation.RoleGetter = (*clusterAwareRBACDataSource)(nil) +var _ rbacregistryvalidation.RoleBindingLister = (*clusterAwareRBACDataSource)(nil) +var _ rbacregistryvalidation.ClusterRoleGetter = (*clusterAwareRBACDataSource)(nil) +var _ rbacregistryvalidation.ClusterRoleBindingLister = (*clusterAwareRBACDataSource)(nil) + +func buildAuthenticator(ctx context.Context, opts Options, clientset kubernetes.Interface, informers informers.SharedInformerFactory) (authenticator.Request, error) { if opts.Authentication == nil { return nil, nil } @@ -199,7 +437,55 @@ func buildAuthenticator(ctx context.Context, opts Options, clientset kubernetes. return authenticator, nil } -func buildAuthorizer(ctx context.Context, opts Options, informers clientgoinformers.SharedInformerFactory) (authorizer.Authorizer, authorizer.RuleResolver, error) { +func buildAuthenticatorWithCoreListers(ctx context.Context, opts Options, clientset kubernetes.Interface, listers *coreAuthListers) (authenticator.Request, error) { + if opts.Authentication == nil { + return nil, nil + } + if listers == nil { + return nil, fmt.Errorf("core auth listers are required") + } + if opts.Authentication.ServiceAccounts != nil && opts.Authentication.ServiceAccounts.OptionalTokenGetter != nil { + return nil, fmt.Errorf("optional token getter requires informer factory path") + } + authConfig, err := opts.Authentication.ToAuthenticationConfig() + if err != nil { + return nil, err + } + + var nodeLister v1.NodeLister + if utilfeature.DefaultFeatureGate.Enabled(features.ServiceAccountTokenNodeBindingValidation) { + nodeLister = listers.nodes + } + authConfig.ServiceAccountTokenGetter = serviceaccount.NewGetterFromClient( + clientset, + listers.secrets, + listers.serviceAccounts, + listers.pods, + nodeLister, + ) + authConfig.SecretsWriter = clientset.CoreV1() + + if authConfig.BootstrapToken { + authConfig.BootstrapTokenAuthenticator = bootstrap.NewTokenAuthenticator( + listers.secrets.Secrets(metav1.NamespaceSystem), + ) + } + if opts.EgressSelector != nil { + egressDialer, err := opts.EgressSelector.Lookup(egressselector.ControlPlane.AsNetworkContext()) + if err != nil { + return nil, err + } + authConfig.CustomDial = egressDialer + authConfig.EgressLookup = opts.EgressSelector.Lookup + } + authenticator, _, _, _, err := authConfig.New(ctx) + if err != nil { + return nil, err + } + return authenticator, nil +} + +func buildAuthorizer(ctx context.Context, opts Options, informers informers.SharedInformerFactory) (authorizer.Authorizer, authorizer.RuleResolver, error) { if opts.Authorization == nil { return nil, nil, nil } diff --git a/pkg/multicluster/auth/scoped_factory.go b/pkg/multicluster/auth/scoped_factory.go new file mode 100644 index 0000000..2eb46af --- /dev/null +++ b/pkg/multicluster/auth/scoped_factory.go @@ -0,0 +1,907 @@ +package auth + +import ( + "fmt" + "reflect" + "strings" + "sync" + + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/informers" + admissionregistrationinformers "k8s.io/client-go/informers/admissionregistration" + apiserverinternalinformers "k8s.io/client-go/informers/apiserverinternal" + appsinformers "k8s.io/client-go/informers/apps" + autoscalinginformers "k8s.io/client-go/informers/autoscaling" + batchinformers "k8s.io/client-go/informers/batch" + certificatesinformers "k8s.io/client-go/informers/certificates" + coordinationinformers "k8s.io/client-go/informers/coordination" + coreinformers "k8s.io/client-go/informers/core" + coreinformersv1 "k8s.io/client-go/informers/core/v1" + discoveryinformers "k8s.io/client-go/informers/discovery" + eventsinformers "k8s.io/client-go/informers/events" + extensionsinformers "k8s.io/client-go/informers/extensions" + flowcontrolinformers "k8s.io/client-go/informers/flowcontrol" + internalinformers "k8s.io/client-go/informers/internalinterfaces" + networkinginformers "k8s.io/client-go/informers/networking" + nodeinformers "k8s.io/client-go/informers/node" + policyinformers "k8s.io/client-go/informers/policy" + rbacinformers "k8s.io/client-go/informers/rbac" + rbacinformersv1 "k8s.io/client-go/informers/rbac/v1" + rbacinformersv1alpha1 "k8s.io/client-go/informers/rbac/v1alpha1" + rbacinformersv1beta1 "k8s.io/client-go/informers/rbac/v1beta1" + resourceinformers "k8s.io/client-go/informers/resource" + schedulinginformers "k8s.io/client-go/informers/scheduling" + storageinformers "k8s.io/client-go/informers/storage" + storagemigrationinformers "k8s.io/client-go/informers/storagemigration" + corelisters "k8s.io/client-go/listers/core/v1" + rbaclisters "k8s.io/client-go/listers/rbac/v1" + "k8s.io/client-go/tools/cache" + + mc "github.com/kplane-dev/apiserver/pkg/multicluster" + "github.com/kplane-dev/apiserver/pkg/multicluster/scopedinformer" +) + +type scopedFactory struct { + clusterID string + clusterLabelKey string + shared informers.SharedInformerFactory + rbacStore *rbacProjectionStore +} + +func newScopedFactory(clusterID, clusterLabelKey string, shared informers.SharedInformerFactory, rbacStore *rbacProjectionStore) informers.SharedInformerFactory { + if clusterLabelKey == "" { + clusterLabelKey = mc.DefaultClusterAnnotation + } + if rbacStore == nil { + rbacStore = newRBACProjectionStore(clusterLabelKey) + } + return &scopedFactory{ + clusterID: clusterID, + clusterLabelKey: clusterLabelKey, + shared: shared, + rbacStore: rbacStore, + } +} + +func (f *scopedFactory) Start(stopCh <-chan struct{}) { + _ = stopCh +} + +func (f *scopedFactory) Shutdown() { + // shared informers are owned by manager-level lifecycle. +} + +func (f *scopedFactory) WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool { + out := f.shared.WaitForCacheSync(stopCh) + if out == nil { + out = map[reflect.Type]bool{} + } + // shared informers are started/synced by manager startup; include scoped essentials. + if f.shared != nil { + out[reflect.TypeOf(&corev1.Secret{})] = f.shared.Core().V1().Secrets().Informer().HasSynced() + out[reflect.TypeOf(&corev1.ServiceAccount{})] = f.shared.Core().V1().ServiceAccounts().Informer().HasSynced() + out[reflect.TypeOf(&corev1.Pod{})] = f.shared.Core().V1().Pods().Informer().HasSynced() + out[reflect.TypeOf(&corev1.Node{})] = f.shared.Core().V1().Nodes().Informer().HasSynced() + out[reflect.TypeOf(&rbacv1.Role{})] = f.shared.Rbac().V1().Roles().Informer().HasSynced() + out[reflect.TypeOf(&rbacv1.RoleBinding{})] = f.shared.Rbac().V1().RoleBindings().Informer().HasSynced() + out[reflect.TypeOf(&rbacv1.ClusterRole{})] = f.shared.Rbac().V1().ClusterRoles().Informer().HasSynced() + out[reflect.TypeOf(&rbacv1.ClusterRoleBinding{})] = f.shared.Rbac().V1().ClusterRoleBindings().Informer().HasSynced() + } + return out +} + +func (f *scopedFactory) ForResource(resource schema.GroupVersionResource) (informers.GenericInformer, error) { + return f.shared.ForResource(resource) +} + +func (f *scopedFactory) InformerFor(obj runtime.Object, newFunc internalinformers.NewInformerFunc) cache.SharedIndexInformer { + return f.shared.InformerFor(obj, newFunc) +} + +func (f *scopedFactory) Core() coreinformers.Interface { + return &scopedCoreGroup{f: f} +} + +func (f *scopedFactory) Rbac() rbacinformers.Interface { + return &scopedRbacGroup{f: f} +} + +func (f *scopedFactory) Admissionregistration() admissionregistrationinformers.Interface { + return f.shared.Admissionregistration() +} +func (f *scopedFactory) Internal() apiserverinternalinformers.Interface { return f.shared.Internal() } +func (f *scopedFactory) Apps() appsinformers.Interface { return f.shared.Apps() } +func (f *scopedFactory) Autoscaling() autoscalinginformers.Interface { + return f.shared.Autoscaling() +} +func (f *scopedFactory) Batch() batchinformers.Interface { return f.shared.Batch() } +func (f *scopedFactory) Certificates() certificatesinformers.Interface { + return f.shared.Certificates() +} +func (f *scopedFactory) Coordination() coordinationinformers.Interface { + return f.shared.Coordination() +} +func (f *scopedFactory) Discovery() discoveryinformers.Interface { return f.shared.Discovery() } +func (f *scopedFactory) Events() eventsinformers.Interface { return f.shared.Events() } +func (f *scopedFactory) Extensions() extensionsinformers.Interface { + return f.shared.Extensions() +} +func (f *scopedFactory) Flowcontrol() flowcontrolinformers.Interface { + return f.shared.Flowcontrol() +} +func (f *scopedFactory) Networking() networkinginformers.Interface { + return f.shared.Networking() +} +func (f *scopedFactory) Node() nodeinformers.Interface { return f.shared.Node() } +func (f *scopedFactory) Policy() policyinformers.Interface { return f.shared.Policy() } +func (f *scopedFactory) Resource() resourceinformers.Interface { + return f.shared.Resource() +} +func (f *scopedFactory) Scheduling() schedulinginformers.Interface { + return f.shared.Scheduling() +} +func (f *scopedFactory) Storage() storageinformers.Interface { + return f.shared.Storage() +} +func (f *scopedFactory) Storagemigration() storagemigrationinformers.Interface { + return f.shared.Storagemigration() +} + +type scopedCoreGroup struct{ f *scopedFactory } + +func (g *scopedCoreGroup) V1() coreinformersv1.Interface { + return &scopedCoreV1{f: g.f} +} + +type scopedCoreV1 struct{ f *scopedFactory } + +func (v *scopedCoreV1) ComponentStatuses() coreinformersv1.ComponentStatusInformer { + return v.f.shared.Core().V1().ComponentStatuses() +} +func (v *scopedCoreV1) ConfigMaps() coreinformersv1.ConfigMapInformer { + return v.f.shared.Core().V1().ConfigMaps() +} +func (v *scopedCoreV1) Endpoints() coreinformersv1.EndpointsInformer { + return v.f.shared.Core().V1().Endpoints() +} +func (v *scopedCoreV1) Events() coreinformersv1.EventInformer { + return v.f.shared.Core().V1().Events() +} +func (v *scopedCoreV1) LimitRanges() coreinformersv1.LimitRangeInformer { + return v.f.shared.Core().V1().LimitRanges() +} +func (v *scopedCoreV1) Namespaces() coreinformersv1.NamespaceInformer { + return v.f.shared.Core().V1().Namespaces() +} +func (v *scopedCoreV1) Nodes() coreinformersv1.NodeInformer { + base := v.f.shared.Core().V1().Nodes().Informer() + return &scopedNodeInformer{ + informer: newFilteredSharedIndexInformer(base, v.f.clusterID, v.f.clusterLabelKey), + lister: &scopedNodeLister{indexer: base.GetIndexer(), clusterID: v.f.clusterID, clusterLabelKey: v.f.clusterLabelKey}, + } +} +func (v *scopedCoreV1) PersistentVolumes() coreinformersv1.PersistentVolumeInformer { + return v.f.shared.Core().V1().PersistentVolumes() +} +func (v *scopedCoreV1) PersistentVolumeClaims() coreinformersv1.PersistentVolumeClaimInformer { + return v.f.shared.Core().V1().PersistentVolumeClaims() +} +func (v *scopedCoreV1) Pods() coreinformersv1.PodInformer { + base := v.f.shared.Core().V1().Pods().Informer() + return &scopedPodInformer{ + informer: newFilteredSharedIndexInformer(base, v.f.clusterID, v.f.clusterLabelKey), + lister: &scopedPodLister{indexer: base.GetIndexer(), clusterID: v.f.clusterID, clusterLabelKey: v.f.clusterLabelKey}, + } +} +func (v *scopedCoreV1) PodTemplates() coreinformersv1.PodTemplateInformer { + return v.f.shared.Core().V1().PodTemplates() +} +func (v *scopedCoreV1) ReplicationControllers() coreinformersv1.ReplicationControllerInformer { + return v.f.shared.Core().V1().ReplicationControllers() +} +func (v *scopedCoreV1) ResourceQuotas() coreinformersv1.ResourceQuotaInformer { + return v.f.shared.Core().V1().ResourceQuotas() +} +func (v *scopedCoreV1) Secrets() coreinformersv1.SecretInformer { + base := v.f.shared.Core().V1().Secrets().Informer() + return &scopedSecretInformer{ + informer: newFilteredSharedIndexInformer(base, v.f.clusterID, v.f.clusterLabelKey), + lister: &scopedSecretLister{indexer: base.GetIndexer(), clusterID: v.f.clusterID, clusterLabelKey: v.f.clusterLabelKey}, + } +} +func (v *scopedCoreV1) Services() coreinformersv1.ServiceInformer { + return v.f.shared.Core().V1().Services() +} +func (v *scopedCoreV1) ServiceAccounts() coreinformersv1.ServiceAccountInformer { + base := v.f.shared.Core().V1().ServiceAccounts().Informer() + return &scopedServiceAccountInformer{ + informer: newFilteredSharedIndexInformer(base, v.f.clusterID, v.f.clusterLabelKey), + lister: &scopedServiceAccountLister{indexer: base.GetIndexer(), clusterID: v.f.clusterID, clusterLabelKey: v.f.clusterLabelKey}, + } +} + +type scopedRbacGroup struct{ f *scopedFactory } + +func (g *scopedRbacGroup) V1() rbacinformersv1.Interface { + return &scopedRbacV1{f: g.f} +} +func (g *scopedRbacGroup) V1alpha1() rbacinformersv1alpha1.Interface { + return g.f.shared.Rbac().V1alpha1() +} +func (g *scopedRbacGroup) V1beta1() rbacinformersv1beta1.Interface { + return g.f.shared.Rbac().V1beta1() +} + +type scopedRbacV1 struct{ f *scopedFactory } + +func (v *scopedRbacV1) ClusterRoleBindings() rbacinformersv1.ClusterRoleBindingInformer { + base := v.f.shared.Rbac().V1().ClusterRoleBindings().Informer() + return &scopedClusterRoleBindingInformer{ + informer: newFilteredSharedIndexInformer(base, v.f.clusterID, v.f.clusterLabelKey), + lister: &scopedClusterRoleBindingLister{store: v.f.rbacStore, clusterID: v.f.clusterID}, + } +} +func (v *scopedRbacV1) ClusterRoles() rbacinformersv1.ClusterRoleInformer { + base := v.f.shared.Rbac().V1().ClusterRoles().Informer() + return &scopedClusterRoleInformer{ + informer: newFilteredSharedIndexInformer(base, v.f.clusterID, v.f.clusterLabelKey), + lister: &scopedClusterRoleLister{store: v.f.rbacStore, clusterID: v.f.clusterID}, + } +} +func (v *scopedRbacV1) RoleBindings() rbacinformersv1.RoleBindingInformer { + base := v.f.shared.Rbac().V1().RoleBindings().Informer() + return &scopedRoleBindingInformer{ + informer: newFilteredSharedIndexInformer(base, v.f.clusterID, v.f.clusterLabelKey), + lister: &scopedRoleBindingLister{store: v.f.rbacStore, clusterID: v.f.clusterID}, + } +} +func (v *scopedRbacV1) Roles() rbacinformersv1.RoleInformer { + base := v.f.shared.Rbac().V1().Roles().Informer() + return &scopedRoleInformer{ + informer: newFilteredSharedIndexInformer(base, v.f.clusterID, v.f.clusterLabelKey), + lister: &scopedRoleLister{store: v.f.rbacStore, clusterID: v.f.clusterID}, + } +} + +func newFilteredSharedIndexInformer(shared cache.SharedIndexInformer, clusterID, clusterLabelKey string) cache.SharedIndexInformer { + return scopedinformer.NewFilteredSharedIndexInformer(shared, clusterID, clusterLabelKey) +} + +func objectCluster(obj interface{}, clusterLabelKey string) string { + return scopedinformer.ObjectCluster(obj, clusterLabelKey) +} + +func filteredByCluster(indexer cache.Indexer, clusterID string) []interface{} { + return scopedinformer.FilteredByCluster(indexer, clusterID) +} + +type rbacProjectionStore struct { + mu sync.RWMutex + clusterLabelKey string + clusterRoles map[string]*rbacv1.ClusterRole + clusterBindings map[string]*rbacv1.ClusterRoleBinding + roles map[string]*rbacv1.Role + roleBindings map[string]*rbacv1.RoleBinding +} + +func newRBACProjectionStore(clusterLabelKey string) *rbacProjectionStore { + if clusterLabelKey == "" { + clusterLabelKey = mc.DefaultClusterAnnotation + } + return &rbacProjectionStore{ + clusterLabelKey: clusterLabelKey, + clusterRoles: map[string]*rbacv1.ClusterRole{}, + clusterBindings: map[string]*rbacv1.ClusterRoleBinding{}, + roles: map[string]*rbacv1.Role{}, + roleBindings: map[string]*rbacv1.RoleBinding{}, + } +} + +func registerRBACProjectionHandlers(store *rbacProjectionStore, roleInf, roleBindingInf, clusterRoleInf, clusterRoleBindingInf cache.SharedIndexInformer) error { + register := func(inf cache.SharedIndexInformer, upsert func(interface{}), del func(interface{})) error { + _, err := inf.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: upsert, + UpdateFunc: func(_, newObj interface{}) { upsert(newObj) }, + DeleteFunc: del, + }) + return err + } + if err := register(roleInf, store.upsertRole, store.deleteRole); err != nil { + return err + } + if err := register(roleBindingInf, store.upsertRoleBinding, store.deleteRoleBinding); err != nil { + return err + } + if err := register(clusterRoleInf, store.upsertClusterRole, store.deleteClusterRole); err != nil { + return err + } + if err := register(clusterRoleBindingInf, store.upsertClusterRoleBinding, store.deleteClusterRoleBinding); err != nil { + return err + } + return nil +} + +func (s *rbacProjectionStore) objectKey(obj runtime.Object) string { + acc, err := meta.Accessor(obj) + if err != nil { + return "" + } + clusterID := acc.GetLabels()[s.clusterLabelKey] + if clusterID == "" { + return "" + } + return clusterID + "/" + acc.GetNamespace() + "/" + acc.GetName() +} + +func tombstoneObj(obj interface{}) interface{} { + if tombstone, ok := obj.(cache.DeletedFinalStateUnknown); ok { + return tombstone.Obj + } + return obj +} + +func (s *rbacProjectionStore) upsertClusterRole(obj interface{}) { + cr, ok := tombstoneObj(obj).(*rbacv1.ClusterRole) + if !ok || cr == nil { + return + } + key := s.objectKey(cr) + if key == "" { + return + } + s.mu.Lock() + defer s.mu.Unlock() + s.clusterRoles[key] = cr +} + +func (s *rbacProjectionStore) deleteClusterRole(obj interface{}) { + cr, ok := tombstoneObj(obj).(*rbacv1.ClusterRole) + if !ok || cr == nil { + return + } + key := s.objectKey(cr) + if key == "" { + return + } + s.mu.Lock() + defer s.mu.Unlock() + delete(s.clusterRoles, key) +} + +func (s *rbacProjectionStore) upsertClusterRoleBinding(obj interface{}) { + crb, ok := tombstoneObj(obj).(*rbacv1.ClusterRoleBinding) + if !ok || crb == nil { + return + } + key := s.objectKey(crb) + if key == "" { + return + } + s.mu.Lock() + defer s.mu.Unlock() + s.clusterBindings[key] = crb +} + +func (s *rbacProjectionStore) deleteClusterRoleBinding(obj interface{}) { + crb, ok := tombstoneObj(obj).(*rbacv1.ClusterRoleBinding) + if !ok || crb == nil { + return + } + key := s.objectKey(crb) + if key == "" { + return + } + s.mu.Lock() + defer s.mu.Unlock() + delete(s.clusterBindings, key) +} + +func (s *rbacProjectionStore) upsertRole(obj interface{}) { + r, ok := tombstoneObj(obj).(*rbacv1.Role) + if !ok || r == nil { + return + } + key := s.objectKey(r) + if key == "" { + return + } + s.mu.Lock() + defer s.mu.Unlock() + s.roles[key] = r +} + +func (s *rbacProjectionStore) deleteRole(obj interface{}) { + r, ok := tombstoneObj(obj).(*rbacv1.Role) + if !ok || r == nil { + return + } + key := s.objectKey(r) + if key == "" { + return + } + s.mu.Lock() + defer s.mu.Unlock() + delete(s.roles, key) +} + +func (s *rbacProjectionStore) upsertRoleBinding(obj interface{}) { + rb, ok := tombstoneObj(obj).(*rbacv1.RoleBinding) + if !ok || rb == nil { + return + } + key := s.objectKey(rb) + if key == "" { + return + } + s.mu.Lock() + defer s.mu.Unlock() + s.roleBindings[key] = rb +} + +func (s *rbacProjectionStore) deleteRoleBinding(obj interface{}) { + rb, ok := tombstoneObj(obj).(*rbacv1.RoleBinding) + if !ok || rb == nil { + return + } + key := s.objectKey(rb) + if key == "" { + return + } + s.mu.Lock() + defer s.mu.Unlock() + delete(s.roleBindings, key) +} + +func (s *rbacProjectionStore) listClusterRoles(clusterID string) []*rbacv1.ClusterRole { + s.mu.RLock() + defer s.mu.RUnlock() + prefix := clusterID + "/" + ret := make([]*rbacv1.ClusterRole, 0) + for k, v := range s.clusterRoles { + if strings.HasPrefix(k, prefix) { + ret = append(ret, v) + } + } + return ret +} + +func (s *rbacProjectionStore) getClusterRole(clusterID, name string) *rbacv1.ClusterRole { + s.mu.RLock() + defer s.mu.RUnlock() + return s.clusterRoles[clusterID+"//"+name] +} + +func (s *rbacProjectionStore) listClusterRoleBindings(clusterID string) []*rbacv1.ClusterRoleBinding { + s.mu.RLock() + defer s.mu.RUnlock() + prefix := clusterID + "/" + ret := make([]*rbacv1.ClusterRoleBinding, 0) + for k, v := range s.clusterBindings { + if strings.HasPrefix(k, prefix) { + ret = append(ret, v) + } + } + return ret +} + +func (s *rbacProjectionStore) getClusterRoleBinding(clusterID, name string) *rbacv1.ClusterRoleBinding { + s.mu.RLock() + defer s.mu.RUnlock() + return s.clusterBindings[clusterID+"//"+name] +} + +func (s *rbacProjectionStore) listRoles(clusterID string) []*rbacv1.Role { + s.mu.RLock() + defer s.mu.RUnlock() + prefix := clusterID + "/" + ret := make([]*rbacv1.Role, 0) + for k, v := range s.roles { + if strings.HasPrefix(k, prefix) { + ret = append(ret, v) + } + } + return ret +} + +func (s *rbacProjectionStore) listRoleBindings(clusterID string) []*rbacv1.RoleBinding { + s.mu.RLock() + defer s.mu.RUnlock() + prefix := clusterID + "/" + ret := make([]*rbacv1.RoleBinding, 0) + for k, v := range s.roleBindings { + if strings.HasPrefix(k, prefix) { + ret = append(ret, v) + } + } + return ret +} + +// RBAC listers/informers +type scopedClusterRoleInformer struct { + informer cache.SharedIndexInformer + lister rbaclisters.ClusterRoleLister +} + +func (i *scopedClusterRoleInformer) Informer() cache.SharedIndexInformer { return i.informer } +func (i *scopedClusterRoleInformer) Lister() rbaclisters.ClusterRoleLister { return i.lister } + +type scopedClusterRoleBindingInformer struct { + informer cache.SharedIndexInformer + lister rbaclisters.ClusterRoleBindingLister +} + +func (i *scopedClusterRoleBindingInformer) Informer() cache.SharedIndexInformer { return i.informer } +func (i *scopedClusterRoleBindingInformer) Lister() rbaclisters.ClusterRoleBindingLister { + return i.lister +} + +type scopedRoleInformer struct { + informer cache.SharedIndexInformer + lister rbaclisters.RoleLister +} + +func (i *scopedRoleInformer) Informer() cache.SharedIndexInformer { return i.informer } +func (i *scopedRoleInformer) Lister() rbaclisters.RoleLister { return i.lister } + +type scopedRoleBindingInformer struct { + informer cache.SharedIndexInformer + lister rbaclisters.RoleBindingLister +} + +func (i *scopedRoleBindingInformer) Informer() cache.SharedIndexInformer { return i.informer } +func (i *scopedRoleBindingInformer) Lister() rbaclisters.RoleBindingLister { return i.lister } + +type scopedClusterRoleLister struct { + store *rbacProjectionStore + clusterID string +} + +func (l *scopedClusterRoleLister) List(sel labels.Selector) (ret []*rbacv1.ClusterRole, err error) { + if sel == nil { + sel = labels.Everything() + } + for _, obj := range l.store.listClusterRoles(l.clusterID) { + if sel.Matches(labels.Set(obj.Labels)) { + ret = append(ret, obj) + } + } + return ret, nil +} +func (l *scopedClusterRoleLister) Get(name string) (*rbacv1.ClusterRole, error) { + if obj := l.store.getClusterRole(l.clusterID, name); obj != nil { + return obj, nil + } + return nil, fmt.Errorf("clusterrole %q not found", name) +} + +type scopedClusterRoleBindingLister struct { + store *rbacProjectionStore + clusterID string +} + +func (l *scopedClusterRoleBindingLister) List(sel labels.Selector) (ret []*rbacv1.ClusterRoleBinding, err error) { + if sel == nil { + sel = labels.Everything() + } + for _, obj := range l.store.listClusterRoleBindings(l.clusterID) { + if sel.Matches(labels.Set(obj.Labels)) { + ret = append(ret, obj) + } + } + return ret, nil +} +func (l *scopedClusterRoleBindingLister) Get(name string) (*rbacv1.ClusterRoleBinding, error) { + if obj := l.store.getClusterRoleBinding(l.clusterID, name); obj != nil { + return obj, nil + } + return nil, fmt.Errorf("clusterrolebinding %q not found", name) +} + +type scopedRoleLister struct { + store *rbacProjectionStore + clusterID string +} + +func (l *scopedRoleLister) List(sel labels.Selector) (ret []*rbacv1.Role, err error) { + if sel == nil { + sel = labels.Everything() + } + for _, obj := range l.store.listRoles(l.clusterID) { + if !sel.Matches(labels.Set(obj.Labels)) { + continue + } + ret = append(ret, obj) + } + return ret, nil +} +func (l *scopedRoleLister) Roles(ns string) rbaclisters.RoleNamespaceLister { + return &scopedRoleNamespaceLister{parent: l, namespace: ns} +} + +type scopedRoleNamespaceLister struct { + parent *scopedRoleLister + namespace string +} + +func (l *scopedRoleNamespaceLister) List(sel labels.Selector) (ret []*rbacv1.Role, err error) { + all, _ := l.parent.List(sel) + for _, obj := range all { + if obj.Namespace == l.namespace { + ret = append(ret, obj) + } + } + return ret, nil +} +func (l *scopedRoleNamespaceLister) Get(name string) (*rbacv1.Role, error) { + all, _ := l.parent.List(labels.Everything()) + for _, obj := range all { + if obj.Namespace == l.namespace && obj.Name == name { + return obj, nil + } + } + return nil, fmt.Errorf("role %s/%s not found", l.namespace, name) +} + +type scopedRoleBindingLister struct { + store *rbacProjectionStore + clusterID string +} + +func (l *scopedRoleBindingLister) List(sel labels.Selector) (ret []*rbacv1.RoleBinding, err error) { + if sel == nil { + sel = labels.Everything() + } + for _, obj := range l.store.listRoleBindings(l.clusterID) { + if !sel.Matches(labels.Set(obj.Labels)) { + continue + } + ret = append(ret, obj) + } + return ret, nil +} +func (l *scopedRoleBindingLister) RoleBindings(ns string) rbaclisters.RoleBindingNamespaceLister { + return &scopedRoleBindingNamespaceLister{parent: l, namespace: ns} +} + +type scopedRoleBindingNamespaceLister struct { + parent *scopedRoleBindingLister + namespace string +} + +func (l *scopedRoleBindingNamespaceLister) List(sel labels.Selector) (ret []*rbacv1.RoleBinding, err error) { + all, _ := l.parent.List(sel) + for _, obj := range all { + if obj.Namespace == l.namespace { + ret = append(ret, obj) + } + } + return ret, nil +} +func (l *scopedRoleBindingNamespaceLister) Get(name string) (*rbacv1.RoleBinding, error) { + all, _ := l.parent.List(labels.Everything()) + for _, obj := range all { + if obj.Namespace == l.namespace && obj.Name == name { + return obj, nil + } + } + return nil, fmt.Errorf("rolebinding %s/%s not found", l.namespace, name) +} + +// Core listers/informers for authn +type scopedSecretInformer struct { + informer cache.SharedIndexInformer + lister corelisters.SecretLister +} + +func (i *scopedSecretInformer) Informer() cache.SharedIndexInformer { return i.informer } +func (i *scopedSecretInformer) Lister() corelisters.SecretLister { return i.lister } + +type scopedServiceAccountInformer struct { + informer cache.SharedIndexInformer + lister corelisters.ServiceAccountLister +} + +func (i *scopedServiceAccountInformer) Informer() cache.SharedIndexInformer { return i.informer } +func (i *scopedServiceAccountInformer) Lister() corelisters.ServiceAccountLister { return i.lister } + +type scopedPodInformer struct { + informer cache.SharedIndexInformer + lister corelisters.PodLister +} + +func (i *scopedPodInformer) Informer() cache.SharedIndexInformer { return i.informer } +func (i *scopedPodInformer) Lister() corelisters.PodLister { return i.lister } + +type scopedNodeInformer struct { + informer cache.SharedIndexInformer + lister corelisters.NodeLister +} + +func (i *scopedNodeInformer) Informer() cache.SharedIndexInformer { return i.informer } +func (i *scopedNodeInformer) Lister() corelisters.NodeLister { return i.lister } + +type scopedSecretLister struct { + indexer cache.Indexer + clusterID string + clusterLabelKey string +} + +func (l *scopedSecretLister) List(sel labels.Selector) (ret []*corev1.Secret, err error) { + if sel == nil { + sel = labels.Everything() + } + for _, it := range filteredByCluster(l.indexer, l.clusterID) { + obj, ok := it.(*corev1.Secret) + if !ok { + continue + } + if sel.Matches(labels.Set(obj.Labels)) { + ret = append(ret, obj) + } + } + return ret, nil +} +func (l *scopedSecretLister) Secrets(ns string) corelisters.SecretNamespaceLister { + return &scopedSecretNamespaceLister{parent: l, namespace: ns} +} + +type scopedSecretNamespaceLister struct { + parent *scopedSecretLister + namespace string +} + +func (l *scopedSecretNamespaceLister) List(sel labels.Selector) (ret []*corev1.Secret, err error) { + all, _ := l.parent.List(sel) + for _, obj := range all { + if obj.Namespace == l.namespace { + ret = append(ret, obj) + } + } + return ret, nil +} +func (l *scopedSecretNamespaceLister) Get(name string) (*corev1.Secret, error) { + all, _ := l.parent.List(labels.Everything()) + for _, obj := range all { + if obj.Namespace == l.namespace && obj.Name == name { + return obj, nil + } + } + return nil, fmt.Errorf("secret %s/%s not found", l.namespace, name) +} + +type scopedServiceAccountLister struct { + indexer cache.Indexer + clusterID string + clusterLabelKey string +} + +func (l *scopedServiceAccountLister) List(sel labels.Selector) (ret []*corev1.ServiceAccount, err error) { + if sel == nil { + sel = labels.Everything() + } + for _, it := range filteredByCluster(l.indexer, l.clusterID) { + obj, ok := it.(*corev1.ServiceAccount) + if !ok { + continue + } + if sel.Matches(labels.Set(obj.Labels)) { + ret = append(ret, obj) + } + } + return ret, nil +} +func (l *scopedServiceAccountLister) ServiceAccounts(ns string) corelisters.ServiceAccountNamespaceLister { + return &scopedServiceAccountNamespaceLister{parent: l, namespace: ns} +} + +type scopedServiceAccountNamespaceLister struct { + parent *scopedServiceAccountLister + namespace string +} + +func (l *scopedServiceAccountNamespaceLister) List(sel labels.Selector) (ret []*corev1.ServiceAccount, err error) { + all, _ := l.parent.List(sel) + for _, obj := range all { + if obj.Namespace == l.namespace { + ret = append(ret, obj) + } + } + return ret, nil +} +func (l *scopedServiceAccountNamespaceLister) Get(name string) (*corev1.ServiceAccount, error) { + all, _ := l.parent.List(labels.Everything()) + for _, obj := range all { + if obj.Namespace == l.namespace && obj.Name == name { + return obj, nil + } + } + return nil, fmt.Errorf("serviceaccount %s/%s not found", l.namespace, name) +} + +type scopedPodLister struct { + indexer cache.Indexer + clusterID string + clusterLabelKey string +} + +func (l *scopedPodLister) List(sel labels.Selector) (ret []*corev1.Pod, err error) { + if sel == nil { + sel = labels.Everything() + } + for _, it := range filteredByCluster(l.indexer, l.clusterID) { + obj, ok := it.(*corev1.Pod) + if !ok { + continue + } + if sel.Matches(labels.Set(obj.Labels)) { + ret = append(ret, obj) + } + } + return ret, nil +} +func (l *scopedPodLister) Pods(ns string) corelisters.PodNamespaceLister { + return &scopedPodNamespaceLister{parent: l, namespace: ns} +} + +type scopedPodNamespaceLister struct { + parent *scopedPodLister + namespace string +} + +func (l *scopedPodNamespaceLister) List(sel labels.Selector) (ret []*corev1.Pod, err error) { + all, _ := l.parent.List(sel) + for _, obj := range all { + if obj.Namespace == l.namespace { + ret = append(ret, obj) + } + } + return ret, nil +} +func (l *scopedPodNamespaceLister) Get(name string) (*corev1.Pod, error) { + all, _ := l.parent.List(labels.Everything()) + for _, obj := range all { + if obj.Namespace == l.namespace && obj.Name == name { + return obj, nil + } + } + return nil, fmt.Errorf("pod %s/%s not found", l.namespace, name) +} + +type scopedNodeLister struct { + indexer cache.Indexer + clusterID string + clusterLabelKey string +} + +func (l *scopedNodeLister) List(sel labels.Selector) (ret []*corev1.Node, err error) { + if sel == nil { + sel = labels.Everything() + } + for _, it := range filteredByCluster(l.indexer, l.clusterID) { + obj, ok := it.(*corev1.Node) + if !ok { + continue + } + if sel.Matches(labels.Set(obj.Labels)) { + ret = append(ret, obj) + } + } + return ret, nil +} +func (l *scopedNodeLister) Get(name string) (*corev1.Node, error) { + for _, it := range filteredByCluster(l.indexer, l.clusterID) { + obj, ok := it.(*corev1.Node) + if !ok { + continue + } + if obj.Name == name { + return obj, nil + } + } + return nil, fmt.Errorf("node %q not found", name) +} diff --git a/pkg/multicluster/bootstrap/controllers.go b/pkg/multicluster/bootstrap/controllers.go index 5bd86a2..4035189 100644 --- a/pkg/multicluster/bootstrap/controllers.go +++ b/pkg/multicluster/bootstrap/controllers.go @@ -2,15 +2,206 @@ package bootstrap import ( "context" + "encoding/json" + "fmt" "sync" + "time" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" clusterauthenticationtrust "k8s.io/kubernetes/pkg/controlplane/controller/clusterauthenticationtrust" legacytokentracking "k8s.io/kubernetes/pkg/controlplane/controller/legacytokentracking" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/util/workqueue" "k8s.io/klog/v2" ) +const ( + internalControllerWorkerCount = 4 + legacyTokenDateFormat = "2006-01-02" +) + +type caQueueListener struct { + onChange func() +} + +func (l *caQueueListener) Enqueue() { + if l != nil && l.onChange != nil { + l.onChange() + } +} + +func nonEmptyJSONList(v []string) (string, bool) { + if len(v) == 0 { + return "", false + } + b, err := json.Marshal(v) + if err != nil { + return "", false + } + return string(b), true +} + +func providerValues(p interface{ Value() []string }) []string { + if p == nil { + return nil + } + return p.Value() +} + +func desiredClusterAuthConfigData(info clusterauthenticationtrust.ClusterAuthenticationInfo) map[string]string { + data := map[string]string{} + if info.ClientCA != nil { + if b := info.ClientCA.CurrentCABundleContent(); len(b) > 0 { + data["client-ca-file"] = string(b) + } + } + if info.RequestHeaderCA == nil { + return data + } + requestHeaderCA := info.RequestHeaderCA.CurrentCABundleContent() + if len(requestHeaderCA) == 0 { + return data + } + data["requestheader-client-ca-file"] = string(requestHeaderCA) + if s, ok := nonEmptyJSONList(providerValues(info.RequestHeaderUsernameHeaders)); ok { + data["requestheader-username-headers"] = s + } + if s, ok := nonEmptyJSONList(providerValues(info.RequestHeaderUIDHeaders)); ok { + data["requestheader-uid-headers"] = s + } + if s, ok := nonEmptyJSONList(providerValues(info.RequestHeaderGroupHeaders)); ok { + data["requestheader-group-headers"] = s + } + if s, ok := nonEmptyJSONList(providerValues(info.RequestHeaderExtraHeaderPrefixes)); ok { + data["requestheader-extra-headers-prefix"] = s + } + if s, ok := nonEmptyJSONList(providerValues(info.RequestHeaderAllowedNames)); ok { + data["requestheader-allowed-names"] = s + } + return data +} + +func sameConfigData(lhs, rhs map[string]string) bool { + if len(lhs) != len(rhs) { + return false + } + for k, v := range lhs { + if rhs[k] != v { + return false + } + } + return true +} + +func ensureNamespace(ctx context.Context, cs kubernetes.Interface, ns string) error { + if cs == nil || ns == "" { + return nil + } + _, err := cs.CoreV1().Namespaces().Get(ctx, ns, metav1.GetOptions{}) + if err == nil { + return nil + } + if !apierrors.IsNotFound(err) { + return err + } + _, err = cs.CoreV1().Namespaces().Create(ctx, &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: ns}}, metav1.CreateOptions{}) + if err != nil && !apierrors.IsAlreadyExists(err) { + return err + } + return nil +} + +func reconcileLegacyTokenTracking(ctx context.Context, cs kubernetes.Interface) error { + if cs == nil { + return fmt.Errorf("nil clientset") + } + client := cs.CoreV1().ConfigMaps(metav1.NamespaceSystem) + cm, err := client.Get(ctx, legacytokentracking.ConfigMapName, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + _, err = client.Create(ctx, &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: metav1.NamespaceSystem, + Name: legacytokentracking.ConfigMapName, + }, + Data: map[string]string{ + legacytokentracking.ConfigMapDataKey: time.Now().UTC().Format(legacyTokenDateFormat), + }, + }, metav1.CreateOptions{}) + if err != nil && !apierrors.IsAlreadyExists(err) { + return err + } + return nil + } + if err != nil { + return err + } + if cm.Data == nil { + cm.Data = map[string]string{} + } + if _, parseErr := time.Parse(legacyTokenDateFormat, cm.Data[legacytokentracking.ConfigMapDataKey]); parseErr == nil { + return nil + } + copy := cm.DeepCopy() + if copy.Data == nil { + copy.Data = map[string]string{} + } + copy.Data[legacytokentracking.ConfigMapDataKey] = time.Now().UTC().Format(legacyTokenDateFormat) + _, err = client.Update(ctx, copy, metav1.UpdateOptions{}) + if apierrors.IsNotFound(err) || apierrors.IsConflict(err) { + return nil + } + return err +} + +func reconcileClusterAuthenticationTrust(ctx context.Context, cs kubernetes.Interface, info clusterauthenticationtrust.ClusterAuthenticationInfo) error { + if cs == nil { + return fmt.Errorf("nil clientset") + } + if err := ensureNamespace(ctx, cs, "kube-system"); err != nil { + return err + } + desiredData := desiredClusterAuthConfigData(info) + client := cs.CoreV1().ConfigMaps("kube-system") + cm, err := client.Get(ctx, "extension-apiserver-authentication", metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + _, err = client.Create(ctx, &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "kube-system", + Name: "extension-apiserver-authentication", + }, + Data: desiredData, + }, metav1.CreateOptions{}) + if err != nil && !apierrors.IsAlreadyExists(err) { + return err + } + return nil + } + if err != nil { + return err + } + if sameConfigData(cm.Data, desiredData) { + return nil + } + copy := cm.DeepCopy() + copy.Data = desiredData + _, err = client.Update(ctx, copy, metav1.UpdateOptions{}) + if apierrors.IsNotFound(err) || apierrors.IsConflict(err) { + return nil + } + return err +} + +func reconcileInternalControllersForCluster(ctx context.Context, cs kubernetes.Interface, info clusterauthenticationtrust.ClusterAuthenticationInfo) error { + if err := reconcileLegacyTokenTracking(ctx, cs); err != nil { + return err + } + return reconcileClusterAuthenticationTrust(ctx, cs, info) +} + type InternalControllerOptions struct { ClientForCluster func(clusterID string) (kubernetes.Interface, error) StopChForCluster func(clusterID string) (<-chan struct{}, error) @@ -21,10 +212,22 @@ type InternalControllerManager struct { opts InternalControllerOptions mu sync.Mutex run map[string]struct{} + + stopCh <-chan struct{} + started bool + queue workqueue.TypedRateLimitingInterface[string] + listener *caQueueListener } func NewInternalControllerManager(opts InternalControllerOptions) *InternalControllerManager { - return &InternalControllerManager{opts: opts, run: map[string]struct{}{}} + return &InternalControllerManager{ + opts: opts, + run: map[string]struct{}{}, + queue: workqueue.NewTypedRateLimitingQueueWithConfig( + workqueue.DefaultTypedControllerRateLimiter[string](), + workqueue.TypedRateLimitingQueueConfig[string]{Name: "mc_internal_controllers"}, + ), + } } func (m *InternalControllerManager) Ensure(clusterID string) { @@ -37,33 +240,74 @@ func (m *InternalControllerManager) Ensure(clusterID string) { return } m.run[clusterID] = struct{}{} + if !m.started { + stopCh, err := m.opts.StopChForCluster(clusterID) + if err != nil { + m.mu.Unlock() + klog.Errorf("mc.bootstrap internal controllers stop channel failed cluster=%s: %v", clusterID, err) + return + } + m.stopCh = stopCh + m.started = true + m.startWorkersLocked() + } m.mu.Unlock() + m.queue.Add(clusterID) +} - cs, err := m.opts.ClientForCluster(clusterID) - if err != nil { - klog.Errorf("mc.bootstrap internal controllers client failed cluster=%s: %v", clusterID, err) +func (m *InternalControllerManager) startWorkersLocked() { + stopCh := m.stopCh + if stopCh == nil { return } - stopCh, err := m.opts.StopChForCluster(clusterID) - if err != nil { - klog.Errorf("mc.bootstrap internal controllers stop channel failed cluster=%s: %v", clusterID, err) - return + for i := 0; i < internalControllerWorkerCount; i++ { + go wait.Until(func() { m.runWorker() }, time.Second, stopCh) } - ctx, cancel := context.WithCancel(context.Background()) go func() { <-stopCh - cancel() + m.queue.ShutDown() }() + if m.listener == nil { + m.listener = &caQueueListener{onChange: m.enqueueAllClusters} + if m.opts.ClusterAuthInfo.ClientCA != nil { + m.opts.ClusterAuthInfo.ClientCA.AddListener(m.listener) + } + if m.opts.ClusterAuthInfo.RequestHeaderCA != nil { + m.opts.ClusterAuthInfo.RequestHeaderCA.AddListener(m.listener) + } + } +} - legacy := legacytokentracking.NewController(cs) - go legacy.Run(stopCh) - - auth := clusterauthenticationtrust.NewClusterAuthenticationTrustController(m.opts.ClusterAuthInfo, cs) - if m.opts.ClusterAuthInfo.ClientCA != nil { - m.opts.ClusterAuthInfo.ClientCA.AddListener(auth) +func (m *InternalControllerManager) enqueueAllClusters() { + m.mu.Lock() + defer m.mu.Unlock() + for cid := range m.run { + m.queue.Add(cid) } - if m.opts.ClusterAuthInfo.RequestHeaderCA != nil { - m.opts.ClusterAuthInfo.RequestHeaderCA.AddListener(auth) +} + +func (m *InternalControllerManager) runWorker() { + for { + clusterID, quit := m.queue.Get() + if quit { + return + } + func() { + defer m.queue.Done(clusterID) + cs, err := m.opts.ClientForCluster(clusterID) + if err != nil { + klog.Errorf("mc.bootstrap internal controllers client failed cluster=%s: %v", clusterID, err) + m.queue.AddRateLimited(clusterID) + return + } + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + if err := reconcileInternalControllersForCluster(ctx, cs, m.opts.ClusterAuthInfo); err != nil { + klog.Errorf("mc.bootstrap internal controllers reconcile failed cluster=%s: %v", clusterID, err) + m.queue.AddRateLimited(clusterID) + return + } + m.queue.Forget(clusterID) + }() } - go auth.Run(ctx, 1) } diff --git a/pkg/multicluster/bootstrap/crd_controller.go b/pkg/multicluster/bootstrap/crd_controller.go new file mode 100644 index 0000000..2247812 --- /dev/null +++ b/pkg/multicluster/bootstrap/crd_controller.go @@ -0,0 +1,88 @@ +package bootstrap + +import ( + "sync" + + "k8s.io/klog/v2" +) + +type MulticlusterCRDController struct { + runtimeManager *CRDRuntimeManager + defaultCluster string + + mu sync.Mutex + started bool + stopCh <-chan struct{} + queue chan string + enqueued map[string]struct{} +} + +func NewMulticlusterCRDController(runtimeManager *CRDRuntimeManager, defaultCluster string) *MulticlusterCRDController { + return &MulticlusterCRDController{ + runtimeManager: runtimeManager, + defaultCluster: defaultCluster, + queue: make(chan string, 2048), + enqueued: map[string]struct{}{}, + } +} + +func (c *MulticlusterCRDController) Start(stopCh <-chan struct{}) { + if c == nil { + return + } + + c.mu.Lock() + if c.started { + c.mu.Unlock() + return + } + c.started = true + c.stopCh = stopCh + c.mu.Unlock() + + go c.run() +} + +func (c *MulticlusterCRDController) EnsureCluster(clusterID string) { + if c == nil || clusterID == "" || clusterID == c.defaultCluster { + return + } + + c.mu.Lock() + if !c.started { + c.mu.Unlock() + return + } + if _, ok := c.enqueued[clusterID]; ok { + c.mu.Unlock() + return + } + c.enqueued[clusterID] = struct{}{} + queue := c.queue + stopCh := c.stopCh + c.mu.Unlock() + + select { + case queue <- clusterID: + case <-stopCh: + } +} + +func (c *MulticlusterCRDController) run() { + for { + select { + case <-c.stopCh: + return + case clusterID := <-c.queue: + if c.runtimeManager != nil { + if err := c.runtimeManager.EnsureCluster(clusterID, c.stopCh); err != nil { + klog.Errorf("mc.crdController ensure cluster failed cluster=%s err=%v", clusterID, err) + } + } + + c.mu.Lock() + delete(c.enqueued, clusterID) + c.mu.Unlock() + } + } +} diff --git a/pkg/multicluster/bootstrap/crd_runtime_manager_wrapped.go b/pkg/multicluster/bootstrap/crd_runtime_manager_wrapped.go index 3ad5841..3ccb215 100644 --- a/pkg/multicluster/bootstrap/crd_runtime_manager_wrapped.go +++ b/pkg/multicluster/bootstrap/crd_runtime_manager_wrapped.go @@ -4,30 +4,36 @@ import ( "context" "fmt" "net/http" + "strings" "sync" "time" "golang.org/x/sync/singleflight" + apiextensionshelpers "k8s.io/apiextensions-apiserver/pkg/apihelpers" apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" apiextensionsapiserver "k8s.io/apiextensions-apiserver/pkg/apiserver" apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" + apiextensionsinformers "k8s.io/apiextensions-apiserver/pkg/client/informers/externalversions" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apiserver/pkg/server" + "k8s.io/apiserver/pkg/util/webhook" "k8s.io/client-go/rest" "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" "k8s.io/component-base/metrics" "k8s.io/component-base/metrics/legacyregistry" + "k8s.io/klog/v2" + "k8s.io/apimachinery/pkg/util/validation/field" mc "github.com/kplane-dev/apiserver/pkg/multicluster" ) -const ( - servesLookupTimeout = 2 * time.Second -) - var ( crdRuntimeMetricsOnce = sync.Once{} - crdRuntimeCreateTotal = metrics.NewCounterVec(&metrics.CounterOpts{Name: "mc_crd_runtime_create_total", Help: "Per-cluster CRD runtime creations."}, []string{"status"}) + crdRuntimeCreateTotal = metrics.NewCounterVec(&metrics.CounterOpts{Name: "mc_crd_runtime_create_total", Help: "Shared CRD runtime creations."}, []string{"status"}) crdServesLookupTotal = metrics.NewCounterVec(&metrics.CounterOpts{Name: "mc_crd_serves_lookup_total", Help: "CRD serves-group-version lookups."}, []string{"result"}) crdServesCacheHit = metrics.NewCounterVec(&metrics.CounterOpts{Name: "mc_crd_serves_cache_hit_total", Help: "CRD serves cache hits."}, []string{"result"}) crdServesCacheMiss = metrics.NewCounterVec(&metrics.CounterOpts{Name: "mc_crd_serves_cache_miss_total", Help: "CRD serves cache misses."}, []string{"result"}) @@ -49,27 +55,27 @@ type runtimeEntry struct { cancel context.CancelFunc } -type clusterState struct { - r runtimeEntry - c apiextensionsclient.Interface -} - type CRDRuntimeManager struct { opts CRDRuntimeManagerOptions mu sync.Mutex - runtimes map[string]runtimeEntry - clients map[string]apiextensionsclient.Interface - createSF singleflight.Group + sharedRuntime *runtimeEntry + clients map[string]apiextensionsclient.Interface + apiClientPool *mc.APIExtensionsClientPool + runtimeSF singleflight.Group + clientSF singleflight.Group + sharedCRDSF singleflight.Group // Informer-backed serves index state. - informerStarted map[string]bool - clusterSynced map[string]bool - serves map[string]bool - clusterKeys map[string]map[string]struct{} - crdKeys map[string]map[string][]string - informerSF singleflight.Group + servesIndex *CRDServesIndex + sharedProjection *crdProjectionStore + sharedStarted bool + sharedFactory apiextensionsinformers.SharedInformerFactory + sharedStopCh <-chan struct{} + sharedOwnedStop chan struct{} + crdQueue workqueue.TypedRateLimitingInterface[string] + crdWorkersStarted bool } func NewCRDRuntimeManager(opts CRDRuntimeManagerOptions) *CRDRuntimeManager { @@ -77,14 +83,14 @@ func NewCRDRuntimeManager(opts CRDRuntimeManagerOptions) *CRDRuntimeManager { legacyregistry.MustRegister(crdRuntimeCreateTotal, crdServesLookupTotal, crdServesCacheHit, crdServesCacheMiss, crdServesLookupLat) }) return &CRDRuntimeManager{ - opts: opts, - runtimes: map[string]runtimeEntry{}, - clients: map[string]apiextensionsclient.Interface{}, - informerStarted: map[string]bool{}, - clusterSynced: map[string]bool{}, - serves: map[string]bool{}, - clusterKeys: map[string]map[string]struct{}{}, - crdKeys: map[string]map[string][]string{}, + opts: opts, + clients: map[string]apiextensionsclient.Interface{}, + servesIndex: NewCRDServesIndex(), + sharedProjection: newCRDProjectionStore(), + crdQueue: workqueue.NewTypedRateLimitingQueueWithConfig( + workqueue.DefaultTypedControllerRateLimiter[string](), + workqueue.TypedRateLimitingQueueConfig[string]{Name: "mc_shared_crd_status"}, + ), } } @@ -92,11 +98,11 @@ func (m *CRDRuntimeManager) Runtime(clusterID string, stopCh <-chan struct{}) (h if m == nil || clusterID == "" || clusterID == m.opts.DefaultCluster || m.opts.BaseAPIExtensionsConfig == nil { return nil, nil } - state, err := m.ensureClusterState(clusterID, stopCh) + entry, err := m.ensureSharedRuntime(stopCh) if err != nil { return nil, err } - return state.r.handler, nil + return entry.handler, nil } func (m *CRDRuntimeManager) ServesGroupVersion(clusterID, group, version string, stopCh <-chan struct{}) (bool, error) { @@ -104,8 +110,7 @@ func (m *CRDRuntimeManager) ServesGroupVersion(clusterID, group, version string, return false, nil } start := time.Now() - key := clusterID + "\x00" + group + "\x00" + version - if served, ok := m.lookupFromInformerIndex(clusterID, key); ok { + if served, ok := m.lookupFromInformerIndex(clusterID, group, version); ok { r := result(served) crdServesCacheHit.WithLabelValues(r).Inc() crdServesLookupTotal.WithLabelValues(r).Inc() @@ -114,71 +119,40 @@ func (m *CRDRuntimeManager) ServesGroupVersion(clusterID, group, version string, } crdServesCacheMiss.WithLabelValues("miss").Inc() - // Prefer shared informer-backed state for served checks. - if err := m.ensureInformerState(clusterID, stopCh); err == nil { - if served, ok := m.lookupFromInformerIndex(clusterID, key); ok { - r := result(served) - crdServesCacheHit.WithLabelValues(r).Inc() - crdServesLookupTotal.WithLabelValues(r).Inc() - crdServesLookupLat.WithLabelValues(r).Observe(time.Since(start).Seconds()) - return served, nil - } - } - - // Fallback to direct API list if informer state is unavailable. - state, err := m.ensureClusterState(clusterID, stopCh) - if err != nil { + if err := m.ensureSharedCRDState(stopCh); err != nil { crdServesLookupTotal.WithLabelValues("error").Inc() crdServesLookupLat.WithLabelValues("error").Observe(time.Since(start).Seconds()) return false, err } - - ctx, cancel := context.WithTimeout(context.Background(), servesLookupTimeout) - defer cancel() - list, err := state.c.ApiextensionsV1().CustomResourceDefinitions().List(ctx, metav1.ListOptions{}) - if err != nil { - crdServesLookupTotal.WithLabelValues("error").Inc() - crdServesLookupLat.WithLabelValues("error").Observe(time.Since(start).Seconds()) - return false, err - } - - served := false - for i := range list.Items { - crd := &list.Items[i] - if crd.Spec.Group != group || !isCRDEstablished(crd) { - continue - } - for _, v := range crd.Spec.Versions { - if v.Name == version && v.Served { - served = true - break - } - } - if served { - break - } + if served, ok := m.lookupFromInformerIndex(clusterID, group, version); ok { + r := result(served) + crdServesCacheHit.WithLabelValues(r).Inc() + crdServesLookupTotal.WithLabelValues(r).Inc() + crdServesLookupLat.WithLabelValues(r).Observe(time.Since(start).Seconds()) + return served, nil } - r := result(served) + // No fallback direct API lookup: shared projection is the source of truth. + r := result(false) crdServesLookupTotal.WithLabelValues(r).Inc() crdServesLookupLat.WithLabelValues(r).Observe(time.Since(start).Seconds()) - return served, nil + return false, nil } -func (m *CRDRuntimeManager) ensureClusterState(clusterID string, stopCh <-chan struct{}) (clusterState, error) { +func (m *CRDRuntimeManager) ensureSharedRuntime(stopCh <-chan struct{}) (runtimeEntry, error) { m.mu.Lock() - if r, ok := m.runtimes[clusterID]; ok { - c := m.clients[clusterID] + if m.sharedRuntime != nil { + entry := *m.sharedRuntime m.mu.Unlock() - return clusterState{r: r, c: c}, nil + return entry, nil } m.mu.Unlock() - v, err, _ := m.createSF.Do(clusterID, func() (any, error) { + v, err, _ := m.runtimeSF.Do("shared", func() (any, error) { m.mu.Lock() - if r, ok := m.runtimes[clusterID]; ok { - c := m.clients[clusterID] + if m.sharedRuntime != nil { + entry := *m.sharedRuntime m.mu.Unlock() - return clusterState{r: r, c: c}, nil + return entry, nil } m.mu.Unlock() @@ -187,18 +161,6 @@ func (m *CRDRuntimeManager) ensureClusterState(clusterID string, stopCh <-chan s return nil, fmt.Errorf("base apiextensions config is required") } baseGeneric := *m.opts.BaseAPIExtensionsConfig.GenericConfig - loopback := rest.CopyConfig(baseGeneric.LoopbackClientConfig) - host, err := mc.ClusterHost(loopback.Host, mc.Options{ - PathPrefix: m.opts.PathPrefix, - ControlPlaneSegment: m.opts.ControlPlaneSegment, - }, clusterID) - if err != nil { - crdRuntimeCreateTotal.WithLabelValues("error").Inc() - return nil, fmt.Errorf("build cluster host: %w", err) - } - loopback.Host = host - baseGeneric.LoopbackClientConfig = loopback - baseCfg := *m.opts.BaseAPIExtensionsConfig baseCfg.GenericConfig = &baseGeneric completed := baseCfg.Complete() @@ -214,13 +176,6 @@ func (m *CRDRuntimeManager) ensureClusterState(clusterID string, stopCh <-chan s runCtx, cancel := context.WithCancel(context.Background()) go crdServer.GenericAPIServer.RunPostStartHooks(runCtx) - cs, err := apiextensionsclient.NewForConfig(rest.CopyConfig(loopback)) - if err != nil { - cancel() - crdServer.GenericAPIServer.Destroy() - crdRuntimeCreateTotal.WithLabelValues("error").Inc() - return nil, err - } entry := runtimeEntry{ handler: crdServer.GenericAPIServer.Handler.NonGoRestfulMux, server: crdServer.GenericAPIServer, @@ -235,92 +190,131 @@ func (m *CRDRuntimeManager) ensureClusterState(clusterID string, stopCh <-chan s } m.mu.Lock() - m.runtimes[clusterID] = entry - m.clients[clusterID] = cs + m.sharedRuntime = &entry m.mu.Unlock() crdRuntimeCreateTotal.WithLabelValues("success").Inc() - return clusterState{r: entry, c: cs}, nil + return entry, nil }) if err != nil { - return clusterState{}, err + return runtimeEntry{}, err } - state, ok := v.(clusterState) + entry, ok := v.(runtimeEntry) if !ok { - return clusterState{}, fmt.Errorf("unexpected cluster state type %T", v) + return runtimeEntry{}, fmt.Errorf("unexpected runtime entry type %T", v) } - return state, nil + return entry, nil } -func (m *CRDRuntimeManager) lookupFromInformerIndex(clusterID, key string) (bool, bool) { +func (m *CRDRuntimeManager) ensureClusterClient(clusterID string) (apiextensionsclient.Interface, error) { m.mu.Lock() - defer m.mu.Unlock() - if !m.clusterSynced[clusterID] { - return false, false + if c, ok := m.clients[clusterID]; ok { + m.mu.Unlock() + return c, nil } - _, served := m.serves[key] - return served, true -} + if m.apiClientPool == nil { + base := m.baseLoopbackConfig() + if base == nil { + m.mu.Unlock() + return nil, fmt.Errorf("base apiextensions loopback config is required") + } + m.apiClientPool = mc.NewAPIExtensionsClientPool(base, m.opts.PathPrefix, m.opts.ControlPlaneSegment) + } + pool := m.apiClientPool + m.mu.Unlock() -func (m *CRDRuntimeManager) ensureInformerState(clusterID string, stopCh <-chan struct{}) error { - if m.opts.APIExtensionsInformerPool == nil { - return fmt.Errorf("apiextensions informer pool not configured") + v, err, _ := m.clientSF.Do(clusterID, func() (any, error) { + m.mu.Lock() + if c, ok := m.clients[clusterID]; ok { + m.mu.Unlock() + return c, nil + } + p := m.apiClientPool + m.mu.Unlock() + + if p == nil { + p = pool + } + cs, err := p.APIExtensionsClientForCluster(clusterID) + if err != nil { + return nil, err + } + m.mu.Lock() + m.clients[clusterID] = cs + m.mu.Unlock() + return cs, nil + }) + if err != nil { + return nil, err + } + cs, ok := v.(apiextensionsclient.Interface) + if !ok { + return nil, fmt.Errorf("unexpected apiextensions client type %T", v) } + return cs, nil +} + +func (m *CRDRuntimeManager) lookupFromInformerIndex(clusterID, group, version string) (bool, bool) { + return m.servesIndex.Lookup(clusterID, group, version) +} + +func (m *CRDRuntimeManager) ensureSharedCRDState(stopCh <-chan struct{}) error { m.mu.Lock() - if m.informerStarted[clusterID] && m.clusterSynced[clusterID] { + if m.sharedStarted { m.mu.Unlock() return nil } m.mu.Unlock() - _, err, _ := m.informerSF.Do(clusterID, func() (any, error) { + _, err, _ := m.sharedCRDSF.Do("shared", func() (any, error) { m.mu.Lock() - if m.informerStarted[clusterID] && m.clusterSynced[clusterID] { + if m.sharedStarted { m.mu.Unlock() return nil, nil } m.mu.Unlock() - cs, factory, poolStopCh, err := m.opts.APIExtensionsInformerPool.Get(clusterID) + base := m.baseLoopbackConfig() + if base == nil { + return nil, fmt.Errorf("base apiextensions loopback config is required for shared CRD informer") + } + cs, err := allClustersAPIExtensionsClient(base) if err != nil { return nil, err } + + factory := apiextensionsinformers.NewSharedInformerFactory(cs, 0) crdInformer := factory.Apiextensions().V1().CustomResourceDefinitions().Informer() + if err := crdInformer.SetTransform(transformCRDForShared(mc.DefaultClusterAnnotation)); err != nil { + return nil, err + } _, err = crdInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj interface{}) { - m.onCRDUpsert(clusterID, obj) + m.onSharedCRDUpsert(obj) }, UpdateFunc: func(_, newObj interface{}) { - m.onCRDUpsert(clusterID, newObj) + m.onSharedCRDUpsert(newObj) }, DeleteFunc: func(obj interface{}) { - m.onCRDDelete(clusterID, obj) + m.onSharedCRDDelete(obj) }, }) if err != nil { return nil, err } - stop := poolStopCh - if stop == nil { - stop = stopCh - } - if stop == nil { - return nil, fmt.Errorf("missing stop channel for apiextensions informer") - } - factory.Start(stop) - if !cache.WaitForCacheSync(stop, crdInformer.HasSynced) { - return nil, fmt.Errorf("failed waiting for CRD informer sync for cluster=%s", clusterID) + startStop := m.sharedStartStopCh(stopCh) + factory.Start(startStop) + if !cache.WaitForCacheSync(startStop, crdInformer.HasSynced) { + return nil, fmt.Errorf("failed waiting for shared CRD informer sync") } + m.startSharedCRDWorkers(startStop) - m.rebuildClusterIndex(clusterID, crdInformer.GetStore().List()) + m.rebuildSharedProjection(crdInformer.GetStore().List()) m.mu.Lock() - m.informerStarted[clusterID] = true - m.clusterSynced[clusterID] = true - if _, ok := m.clients[clusterID]; !ok { - m.clients[clusterID] = cs - } + m.sharedFactory = factory + m.sharedStarted = true m.mu.Unlock() return nil, nil }) @@ -328,69 +322,105 @@ func (m *CRDRuntimeManager) ensureInformerState(clusterID string, stopCh <-chan } func (m *CRDRuntimeManager) rebuildClusterIndex(clusterID string, objs []interface{}) { - m.mu.Lock() - defer m.mu.Unlock() - - for k := range m.clusterKeys[clusterID] { - delete(m.serves, k) - } - m.clusterKeys[clusterID] = map[string]struct{}{} - m.crdKeys[clusterID] = map[string][]string{} + m.servesIndex.RebuildCluster(clusterID, objs) +} +func (m *CRDRuntimeManager) rebuildSharedProjection(objs []interface{}) { + decodedObjs := make([]interface{}, 0, len(objs)) + byCluster := map[string][]interface{}{} for _, obj := range objs { crd, ok := crdFromObj(obj) if !ok { continue } - keys := servedKeysForCRD(clusterID, crd) - m.setCRDKeysLocked(clusterID, crd.Name, keys) + clusterID := objectClusterID(crd, mc.DefaultClusterAnnotation) + if clusterID == "" { + continue + } + decoded := decodeSharedCRD(clusterID, crd) + decodedObjs = append(decodedObjs, decoded) + byCluster[clusterID] = append(byCluster[clusterID], decoded) + } + m.sharedProjection.ReplaceAll(decodedObjs, mc.DefaultClusterAnnotation) + for clusterID, clusterObjs := range byCluster { + m.servesIndex.RebuildCluster(clusterID, clusterObjs) } } -func (m *CRDRuntimeManager) onCRDUpsert(clusterID string, obj interface{}) { +func (m *CRDRuntimeManager) onSharedCRDUpsert(obj interface{}) { crd, ok := crdFromObj(obj) if !ok { return } - keys := servedKeysForCRD(clusterID, crd) - m.mu.Lock() - defer m.mu.Unlock() - m.setCRDKeysLocked(clusterID, crd.Name, keys) + clusterID := objectClusterID(crd, mc.DefaultClusterAnnotation) + if clusterID == "" { + return + } + crd = decodeSharedCRD(clusterID, crd) + m.sharedProjection.Upsert(clusterID, crd) + m.servesIndex.UpsertCRD(clusterID, crd) + m.enqueueCRDStatus(clusterID, crd.Name) } -func (m *CRDRuntimeManager) onCRDDelete(clusterID string, obj interface{}) { +func (m *CRDRuntimeManager) onSharedCRDDelete(obj interface{}) { crd, ok := crdFromObj(obj) if !ok { return } - m.mu.Lock() - defer m.mu.Unlock() - m.setCRDKeysLocked(clusterID, crd.Name, nil) + clusterID := objectClusterID(crd, mc.DefaultClusterAnnotation) + if clusterID == "" { + return + } + crd = decodeSharedCRD(clusterID, crd) + m.sharedProjection.Delete(clusterID, crd.Name) + m.servesIndex.DeleteCRD(clusterID, crd) } -func (m *CRDRuntimeManager) setCRDKeysLocked(clusterID, crdName string, keys []string) { - if m.clusterKeys[clusterID] == nil { - m.clusterKeys[clusterID] = map[string]struct{}{} +func (m *CRDRuntimeManager) EnsureCluster(clusterID string, stopCh <-chan struct{}) error { + if m == nil || clusterID == "" || clusterID == m.opts.DefaultCluster { + return nil } - if m.crdKeys[clusterID] == nil { - m.crdKeys[clusterID] = map[string][]string{} + if _, err := m.ensureSharedRuntime(stopCh); err != nil { + return err } - for _, old := range m.crdKeys[clusterID][crdName] { - delete(m.serves, old) - delete(m.clusterKeys[clusterID], old) + if err := m.ensureSharedCRDState(stopCh); err != nil { + return err } - if len(keys) == 0 { - delete(m.crdKeys[clusterID], crdName) - return + _, err := m.ensureClusterClient(clusterID) + return err +} + +func (m *CRDRuntimeManager) CRDGetterForRequest(ctx context.Context, name string) (*apiextensionsv1.CustomResourceDefinition, error) { + clusterID := m.clusterFromContext(ctx) + if err := m.ensureSharedCRDState(nil); err != nil { + return nil, err } - m.crdKeys[clusterID][crdName] = keys - for _, k := range keys { - m.serves[k] = true - m.clusterKeys[clusterID][k] = struct{}{} + if crd, ok := m.sharedProjection.Get(clusterID, name); ok { + return crd, nil + } + return nil, apierrors.NewNotFound(apiextensionsv1.Resource("customresourcedefinitions"), name) +} + +func (m *CRDRuntimeManager) CRDListerForRequest(ctx context.Context) ([]*apiextensionsv1.CustomResourceDefinition, error) { + clusterID := m.clusterFromContext(ctx) + if err := m.ensureSharedCRDState(nil); err != nil { + return nil, err } + return m.sharedProjection.List(clusterID), nil } -func servedKeysForCRD(clusterID string, crd *apiextensionsv1.CustomResourceDefinition) []string { +func (m *CRDRuntimeManager) clusterFromContext(ctx context.Context) string { + cid, _, _ := mc.FromContext(ctx) + if cid == "" { + cid = m.opts.DefaultCluster + } + if cid == "" { + cid = mc.DefaultClusterName + } + return cid +} + +func ServedKeysForCRD(clusterID string, crd *apiextensionsv1.CustomResourceDefinition) []string { if crd == nil || !isCRDEstablished(crd) { return nil } @@ -419,17 +449,6 @@ func crdFromObj(obj interface{}) (*apiextensionsv1.CustomResourceDefinition, boo return nil, false } -func (m *CRDRuntimeManager) invalidateCluster(clusterID string) { - m.mu.Lock() - defer m.mu.Unlock() - for k := range m.clusterKeys[clusterID] { - delete(m.serves, k) - } - delete(m.clusterKeys, clusterID) - delete(m.crdKeys, clusterID) - delete(m.clusterSynced, clusterID) -} - func isCRDEstablished(crd *apiextensionsv1.CustomResourceDefinition) bool { for _, c := range crd.Status.Conditions { if c.Type == apiextensionsv1.Established && c.Status == apiextensionsv1.ConditionTrue { @@ -445,3 +464,309 @@ func result(served bool) string { } return "not_served" } + +type crdProjectionStore struct { + mu sync.RWMutex + byCluster map[string]map[string]*apiextensionsv1.CustomResourceDefinition +} + +func newCRDProjectionStore() *crdProjectionStore { + return &crdProjectionStore{ + byCluster: map[string]map[string]*apiextensionsv1.CustomResourceDefinition{}, + } +} + +func (s *crdProjectionStore) Upsert(clusterID string, crd *apiextensionsv1.CustomResourceDefinition) { + if s == nil || clusterID == "" || crd == nil { + return + } + s.mu.Lock() + defer s.mu.Unlock() + clusterMap, ok := s.byCluster[clusterID] + if !ok { + clusterMap = map[string]*apiextensionsv1.CustomResourceDefinition{} + s.byCluster[clusterID] = clusterMap + } + clusterMap[crd.Name] = crd.DeepCopy() +} + +func (s *crdProjectionStore) Delete(clusterID, name string) { + if s == nil || clusterID == "" || name == "" { + return + } + s.mu.Lock() + defer s.mu.Unlock() + clusterMap, ok := s.byCluster[clusterID] + if !ok { + return + } + delete(clusterMap, name) + if len(clusterMap) == 0 { + delete(s.byCluster, clusterID) + } +} + +func (s *crdProjectionStore) Get(clusterID, name string) (*apiextensionsv1.CustomResourceDefinition, bool) { + if s == nil || clusterID == "" || name == "" { + return nil, false + } + s.mu.RLock() + defer s.mu.RUnlock() + clusterMap, ok := s.byCluster[clusterID] + if !ok { + return nil, false + } + crd, ok := clusterMap[name] + if !ok || crd == nil { + return nil, false + } + return crd.DeepCopy(), true +} + +func (s *crdProjectionStore) List(clusterID string) []*apiextensionsv1.CustomResourceDefinition { + if s == nil || clusterID == "" { + return nil + } + s.mu.RLock() + defer s.mu.RUnlock() + clusterMap, ok := s.byCluster[clusterID] + if !ok { + return nil + } + out := make([]*apiextensionsv1.CustomResourceDefinition, 0, len(clusterMap)) + for _, crd := range clusterMap { + if crd == nil { + continue + } + out = append(out, crd.DeepCopy()) + } + return out +} + +func (s *crdProjectionStore) ReplaceAll(objs []interface{}, clusterLabelKey string) { + if s == nil { + return + } + next := map[string]map[string]*apiextensionsv1.CustomResourceDefinition{} + for _, obj := range objs { + crd, ok := crdFromObj(obj) + if !ok { + continue + } + clusterID := objectClusterID(crd, clusterLabelKey) + if clusterID == "" { + continue + } + clusterMap, ok := next[clusterID] + if !ok { + clusterMap = map[string]*apiextensionsv1.CustomResourceDefinition{} + next[clusterID] = clusterMap + } + clusterMap[crd.Name] = crd.DeepCopy() + } + s.mu.Lock() + s.byCluster = next + s.mu.Unlock() +} + +func (m *CRDRuntimeManager) baseLoopbackConfig() *rest.Config { + if m == nil || m.opts.BaseAPIExtensionsConfig == nil || m.opts.BaseAPIExtensionsConfig.GenericConfig == nil { + return nil + } + return m.opts.BaseAPIExtensionsConfig.GenericConfig.LoopbackClientConfig +} + +func allClustersAPIExtensionsClient(base *rest.Config) (apiextensionsclient.Interface, error) { + if base == nil { + return nil, fmt.Errorf("base loopback config is required") + } + cfg := rest.CopyConfig(base) + cfg.Impersonate.UserName = mc.DefaultInternalCrossClusterUser + cfg.Impersonate.Groups = []string{"system:authenticated", "system:masters"} + if cfg.Impersonate.Extra == nil { + cfg.Impersonate.Extra = map[string][]string{} + } + cfg.Impersonate.Extra[mc.DefaultInternalCrossClusterCapability] = []string{"true"} + if cfg.UserAgent == "" { + cfg.UserAgent = mc.DefaultInternalCrossClusterUserAgent + } else { + cfg.UserAgent = mc.DefaultInternalCrossClusterUserAgent + " " + cfg.UserAgent + } + return apiextensionsclient.NewForConfig(cfg) +} + +func (m *CRDRuntimeManager) sharedStartStopCh(stopCh <-chan struct{}) <-chan struct{} { + m.mu.Lock() + defer m.mu.Unlock() + if m.sharedStopCh != nil { + return m.sharedStopCh + } + if stopCh != nil { + m.sharedStopCh = stopCh + return m.sharedStopCh + } + m.sharedOwnedStop = make(chan struct{}) + m.sharedStopCh = m.sharedOwnedStop + return m.sharedStopCh +} + +func objectClusterID(obj interface{}, clusterLabelKey string) string { + if clusterLabelKey == "" { + clusterLabelKey = mc.DefaultClusterAnnotation + } + accessor, err := meta.Accessor(obj) + if err != nil { + return "" + } + return accessor.GetLabels()[clusterLabelKey] +} + +const sharedCRDNamePrefix = "__mc_shared_crd__" + +func transformCRDForShared(clusterLabelKey string) cache.TransformFunc { + return func(obj interface{}) (interface{}, error) { + crd, ok := obj.(*apiextensionsv1.CustomResourceDefinition) + if !ok || crd == nil { + return obj, nil + } + clusterID := objectClusterID(crd, clusterLabelKey) + if clusterID == "" { + return obj, nil + } + cp := crd.DeepCopy() + cp.Name = encodeSharedCRDName(clusterID, cp.Name) + return cp, nil + } +} + +func encodeSharedCRDName(clusterID, name string) string { + if clusterID == "" || name == "" { + return name + } + prefix := sharedCRDNamePrefix + clusterID + "__" + if strings.HasPrefix(name, prefix) { + return name + } + return prefix + name +} + +func decodeSharedCRDName(clusterID, name string) string { + if clusterID == "" || name == "" { + return name + } + prefix := sharedCRDNamePrefix + clusterID + "__" + return strings.TrimPrefix(name, prefix) +} + +func decodeSharedCRD(clusterID string, crd *apiextensionsv1.CustomResourceDefinition) *apiextensionsv1.CustomResourceDefinition { + if crd == nil { + return nil + } + cp := crd.DeepCopy() + cp.Name = decodeSharedCRDName(clusterID, cp.Name) + return cp +} + +const sharedCRDWorkerCount = 6 + +func (m *CRDRuntimeManager) startSharedCRDWorkers(stopCh <-chan struct{}) { + m.mu.Lock() + if m.crdWorkersStarted { + m.mu.Unlock() + return + } + m.crdWorkersStarted = true + m.mu.Unlock() + for i := 0; i < sharedCRDWorkerCount; i++ { + go func() { + for { + key, quit := m.crdQueue.Get() + if quit { + return + } + func() { + defer m.crdQueue.Done(key) + if err := m.reconcileCRDStatusKey(key); err != nil { + klog.Errorf("mc.crd shared status reconcile failed key=%s err=%v", key, err) + m.crdQueue.AddRateLimited(key) + return + } + m.crdQueue.Forget(key) + }() + } + }() + } + go func() { + <-stopCh + m.crdQueue.ShutDown() + }() +} + +func (m *CRDRuntimeManager) enqueueCRDStatus(clusterID, name string) { + if clusterID == "" || name == "" { + return + } + m.crdQueue.Add(clusterID + "\x00" + name) +} + +func splitCRDStatusKey(key string) (string, string, bool) { + clusterID, name, ok := strings.Cut(key, "\x00") + if !ok || clusterID == "" || name == "" { + return "", "", false + } + return clusterID, name, true +} + +func (m *CRDRuntimeManager) reconcileCRDStatusKey(key string) error { + clusterID, name, ok := splitCRDStatusKey(key) + if !ok { + return nil + } + crd, found := m.sharedProjection.Get(clusterID, name) + if !found || crd == nil { + return nil + } + desired := crd.DeepCopy() + desired.Status.AcceptedNames = desired.Spec.Names + + namesAccepted := apiextensionsv1.CustomResourceDefinitionCondition{ + Type: apiextensionsv1.NamesAccepted, + Status: apiextensionsv1.ConditionTrue, + Reason: "NoConflicts", + Message: "cluster-scoped naming accepted", + } + apiextensionshelpers.SetCRDCondition(desired, namesAccepted) + + established := apiextensionsv1.CustomResourceDefinitionCondition{ + Type: apiextensionsv1.Established, + Status: apiextensionsv1.ConditionTrue, + Reason: "InitialNamesAccepted", + Message: "the initial names have been accepted", + } + if desired.Spec.Conversion != nil && + desired.Spec.Conversion.Webhook != nil && + desired.Spec.Conversion.Webhook.ClientConfig != nil && + len(webhook.ValidateCABundle(field.NewPath(""), desired.Spec.Conversion.Webhook.ClientConfig.CABundle)) > 0 { + established.Status = apiextensionsv1.ConditionFalse + established.Reason = "InvalidCABundle" + established.Message = "The conversion webhook CABundle is invalid" + } + apiextensionshelpers.SetCRDCondition(desired, established) + + if equality.Semantic.DeepEqual(crd.Status, desired.Status) { + return nil + } + + cs, err := m.ensureClusterClient(clusterID) + if err != nil { + return err + } + _, err = cs.ApiextensionsV1().CustomResourceDefinitions().UpdateStatus(context.TODO(), desired, metav1.UpdateOptions{}) + if apierrors.IsNotFound(err) { + return nil + } + if apierrors.IsConflict(err) { + return err + } + return err +} diff --git a/pkg/multicluster/bootstrap/crd_serves_index.go b/pkg/multicluster/bootstrap/crd_serves_index.go new file mode 100644 index 0000000..c4fd36a --- /dev/null +++ b/pkg/multicluster/bootstrap/crd_serves_index.go @@ -0,0 +1,112 @@ +package bootstrap + +import ( + "sync" + + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" +) + +type CRDServesIndex struct { + mu sync.Mutex + + clusterSynced map[string]bool + serves map[string]bool + clusterKeys map[string]map[string]struct{} + crdKeys map[string]map[string][]string +} + +func NewCRDServesIndex() *CRDServesIndex { + return &CRDServesIndex{ + clusterSynced: map[string]bool{}, + serves: map[string]bool{}, + clusterKeys: map[string]map[string]struct{}{}, + crdKeys: map[string]map[string][]string{}, + } +} + +func MakeServesKey(clusterID, group, version string) string { + return clusterID + "\x00" + group + "\x00" + version +} + +func (i *CRDServesIndex) Lookup(clusterID, group, version string) (bool, bool) { + i.mu.Lock() + defer i.mu.Unlock() + if !i.clusterSynced[clusterID] { + return false, false + } + _, served := i.serves[MakeServesKey(clusterID, group, version)] + return served, true +} + +func (i *CRDServesIndex) RebuildCluster(clusterID string, objs []interface{}) { + i.mu.Lock() + defer i.mu.Unlock() + + for k := range i.clusterKeys[clusterID] { + delete(i.serves, k) + } + i.clusterKeys[clusterID] = map[string]struct{}{} + i.crdKeys[clusterID] = map[string][]string{} + + for _, obj := range objs { + crd, ok := crdFromObj(obj) + if !ok { + continue + } + keys := ServedKeysForCRD(clusterID, crd) + i.setCRDKeysLocked(clusterID, crd.Name, keys) + } + i.clusterSynced[clusterID] = true +} + +func (i *CRDServesIndex) UpsertCRD(clusterID string, crd *apiextensionsv1.CustomResourceDefinition) { + if crd == nil { + return + } + keys := ServedKeysForCRD(clusterID, crd) + i.mu.Lock() + defer i.mu.Unlock() + i.setCRDKeysLocked(clusterID, crd.Name, keys) +} + +func (i *CRDServesIndex) DeleteCRD(clusterID string, crd *apiextensionsv1.CustomResourceDefinition) { + if crd == nil { + return + } + i.mu.Lock() + defer i.mu.Unlock() + i.setCRDKeysLocked(clusterID, crd.Name, nil) +} + +func (i *CRDServesIndex) InvalidateCluster(clusterID string) { + i.mu.Lock() + defer i.mu.Unlock() + for k := range i.clusterKeys[clusterID] { + delete(i.serves, k) + } + delete(i.clusterKeys, clusterID) + delete(i.crdKeys, clusterID) + delete(i.clusterSynced, clusterID) +} + +func (i *CRDServesIndex) setCRDKeysLocked(clusterID, crdName string, keys []string) { + if i.clusterKeys[clusterID] == nil { + i.clusterKeys[clusterID] = map[string]struct{}{} + } + if i.crdKeys[clusterID] == nil { + i.crdKeys[clusterID] = map[string][]string{} + } + for _, old := range i.crdKeys[clusterID][crdName] { + delete(i.serves, old) + delete(i.clusterKeys[clusterID], old) + } + if len(keys) == 0 { + delete(i.crdKeys[clusterID], crdName) + return + } + i.crdKeys[clusterID][crdName] = keys + for _, k := range keys { + i.serves[k] = true + i.clusterKeys[clusterID][k] = struct{}{} + } +} diff --git a/pkg/multicluster/handler.go b/pkg/multicluster/handler.go index 564dc23..225473e 100644 --- a/pkg/multicluster/handler.go +++ b/pkg/multicluster/handler.go @@ -2,6 +2,8 @@ package multicluster import ( "net/http" + + "github.com/kplane-dev/apiserver/pkg/multicluster/internalcap" ) // WithClusterRouting wraps an http.Handler to extract cluster from the request using the provided Extractor, @@ -15,7 +17,11 @@ func WithClusterRouting(next http.Handler, ex Extractor, o Options) http.Handler o.DefaultCluster = DefaultClusterName } return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - cid, all, _ := ex.Extract(r.Context(), r) + cid, all, err := ex.Extract(r.Context(), r) + if err != nil { + http.Error(w, "forbidden", http.StatusForbidden) + return + } if cid == "" { if existingCID, existingAll, ok := FromContext(r.Context()); ok && existingCID != "" { cid = existingCID @@ -27,6 +33,10 @@ func WithClusterRouting(next http.Handler, ex Extractor, o Options) http.Handler if o.OnClusterSelected != nil && cid != "" { o.OnClusterSelected(cid) } - next.ServeHTTP(w, r.WithContext(WithCluster(r.Context(), cid, all))) + ctx := WithCluster(r.Context(), cid, all) + if all { + ctx = internalcap.WithAllClustersCapability(ctx) + } + next.ServeHTTP(w, r.WithContext(ctx)) }) } diff --git a/pkg/multicluster/handler_test.go b/pkg/multicluster/handler_test.go new file mode 100644 index 0000000..f4c7f24 --- /dev/null +++ b/pkg/multicluster/handler_test.go @@ -0,0 +1,121 @@ +package multicluster + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "testing" + + "github.com/kplane-dev/apiserver/pkg/multicluster/internalcap" + "k8s.io/apiserver/pkg/authentication/user" + apirequest "k8s.io/apiserver/pkg/endpoints/request" +) + +func TestWithClusterRoutingExtractorErrorForbidden(t *testing.T) { + next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }) + h := WithClusterRouting(next, errorExtractor{}, DefaultOptions) + rec := httptest.NewRecorder() + req := httptest.NewRequest(http.MethodGet, "http://example.test/apis", nil) + + h.ServeHTTP(rec, req) + if rec.Code != http.StatusForbidden { + t.Fatalf("expected status 403, got %d", rec.Code) + } +} + +func TestWithClusterRoutingDoesNotTrustUserForScope(t *testing.T) { + var ( + gotScope ResourceScope + gotCap bool + ) + next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, scope, ok := FromContextScope(r.Context()) + if !ok { + t.Fatalf("expected cluster selection in context") + } + gotScope = scope + gotCap = internalcap.HasAllClustersCapability(r.Context()) + w.WriteHeader(http.StatusOK) + }) + h := WithClusterRouting(next, PathExtractor{}, DefaultOptions) + + req := httptest.NewRequest(http.MethodGet, "http://example.test/apis", nil) + req = req.WithContext(apirequest.WithUser(req.Context(), &user.DefaultInfo{ + Name: DefaultInternalCrossClusterUser, + Extra: map[string][]string{ + DefaultInternalCrossClusterCapability: {"true"}, + }, + })) + rec := httptest.NewRecorder() + + h.ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("expected status 200, got %d", rec.Code) + } + if gotScope != ResourceScopeCluster { + t.Fatalf("expected cluster scope, got %q", gotScope) + } + if gotCap { + t.Fatalf("expected no all-clusters capability on context") + } +} + +func TestWithClusterRoutingNonInternalRequestClusterScope(t *testing.T) { + var gotScope ResourceScope + next := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, scope, ok := FromContextScope(r.Context()) + if !ok { + t.Fatalf("expected cluster selection in context") + } + gotScope = scope + w.WriteHeader(http.StatusOK) + }) + h := WithClusterRouting(next, PathExtractor{}, DefaultOptions) + + req := httptest.NewRequest(http.MethodGet, "http://example.test/apis", nil) + req = req.WithContext(apirequest.WithUser(req.Context(), &user.DefaultInfo{ + Name: DefaultInternalCrossClusterUser, + })) + rec := httptest.NewRecorder() + + h.ServeHTTP(rec, req) + if rec.Code != http.StatusOK { + t.Fatalf("expected status 200, got %d", rec.Code) + } + if gotScope != ResourceScopeCluster { + t.Fatalf("expected cluster scope, got %q", gotScope) + } +} + +func TestHasInternalCrossClusterCapability(t *testing.T) { + ctx := apirequest.WithUser(context.Background(), &user.DefaultInfo{ + Name: DefaultInternalCrossClusterUser, + Extra: map[string][]string{ + DefaultInternalCrossClusterCapability: {"true"}, + }, + }) + if !HasInternalCrossClusterCapability(ctx) { + t.Fatalf("expected internal cross-cluster capability to be detected") + } +} + +func TestHasInternalCrossClusterCapabilityFalseForRegularUser(t *testing.T) { + ctx := apirequest.WithUser(context.Background(), &user.DefaultInfo{ + Name: "alice", + Extra: map[string][]string{ + DefaultInternalCrossClusterCapability: {"true"}, + }, + }) + if HasInternalCrossClusterCapability(ctx) { + t.Fatalf("expected capability check to reject non-internal user") + } +} + +type errorExtractor struct{} + +func (errorExtractor) Extract(context.Context, *http.Request) (string, bool, error) { + return "", false, errors.New("boom") +} diff --git a/pkg/multicluster/internalcap/allclusters.go b/pkg/multicluster/internalcap/allclusters.go new file mode 100644 index 0000000..ae6c19b --- /dev/null +++ b/pkg/multicluster/internalcap/allclusters.go @@ -0,0 +1,16 @@ +package internalcap + +import "context" + +type allClustersCapabilityKey struct{} + +// WithAllClustersCapability marks context as trusted to access all-clusters reads. +func WithAllClustersCapability(ctx context.Context) context.Context { + return context.WithValue(ctx, allClustersCapabilityKey{}, true) +} + +// HasAllClustersCapability reports whether the context has trusted all-clusters capability. +func HasAllClustersCapability(ctx context.Context) bool { + v, _ := ctx.Value(allClustersCapabilityKey{}).(bool) + return v +} diff --git a/pkg/multicluster/options.go b/pkg/multicluster/options.go index 45cad35..d0f017b 100644 --- a/pkg/multicluster/options.go +++ b/pkg/multicluster/options.go @@ -3,6 +3,9 @@ package multicluster import ( "context" "net/http" + + "github.com/kplane-dev/apiserver/pkg/multicluster/internalcap" + apirequest "k8s.io/apiserver/pkg/endpoints/request" ) // Options holds multicluster configuration. @@ -32,13 +35,27 @@ type Options struct { ServerName string } +// ResourceScope defines which keyspace view a request should use. +type ResourceScope string + +const ( + // ResourceScopeCluster scopes reads/writes to one cluster keyspace. + ResourceScopeCluster ResourceScope = "cluster" + // ResourceScopeCrossClusterRead scopes reads to the shared all-clusters keyspace. + // This scope is intended for internal readers only. + ResourceScopeCrossClusterRead ResourceScope = "cross-cluster-read" +) + const ( - DefaultEtcdPrefix = "/registry" - DefaultClusterName = "root" - DefaultPathPrefix = "/clusters/" - DefaultControlPlaneSegment = "control-plane" - DefaultClusterAnnotation = "multicluster.k8s.io/cluster" - DefaultClusterField = "metadata.cluster" + DefaultEtcdPrefix = "/registry" + DefaultClusterName = "root" + DefaultPathPrefix = "/clusters/" + DefaultControlPlaneSegment = "control-plane" + DefaultInternalCrossClusterUser = "system:apiserver" + DefaultInternalCrossClusterCapability = "kplane.internal/cross-cluster-read" + DefaultInternalCrossClusterUserAgent = "kplane-internal-cross-cluster" + DefaultClusterAnnotation = "multicluster.k8s.io/cluster" + DefaultClusterField = "metadata.cluster" ) var DefaultOptions = Options{ @@ -52,8 +69,8 @@ var DefaultOptions = Options{ } // Extractor extracts the cluster selection from an HTTP request/context. -// all=false indicates the request is scoped to a single cluster (required default). -// all=true could be reserved for special admin endpoints; not used by default. +// all=false indicates per-cluster scope. +// all=true maps to all-clusters scope and should only be set by trusted internal code. type Extractor interface { Extract(ctx context.Context, r *http.Request) (clusterID string, all bool, err error) @@ -150,21 +167,65 @@ func (c ComposeExtractor) Extract(ctx context.Context, r *http.Request) (string, type clusterContextKey struct{} type clusterSelection struct { - ID string - All bool + ID string + Scope ResourceScope } func WithCluster(ctx context.Context, id string, all bool) context.Context { - return context.WithValue(ctx, clusterContextKey{}, clusterSelection{ID: id, All: all}) + scope := ResourceScopeCluster + if all { + scope = ResourceScopeCrossClusterRead + } + return WithClusterScope(ctx, id, scope) +} + +func WithClusterScope(ctx context.Context, id string, scope ResourceScope) context.Context { + if scope == "" { + scope = ResourceScopeCluster + } + return context.WithValue(ctx, clusterContextKey{}, clusterSelection{ID: id, Scope: scope}) +} + +// WithInternalCrossClusterRead marks context for trusted internal cross-cluster reads. +func WithInternalCrossClusterRead(ctx context.Context, id string) context.Context { + ctx = WithClusterScope(ctx, id, ResourceScopeCrossClusterRead) + return internalcap.WithAllClustersCapability(ctx) } func FromContext(ctx context.Context) (id string, all bool, ok bool) { + id, scope, ok := FromContextScope(ctx) + return id, scope == ResourceScopeCrossClusterRead, ok +} + +func FromContextScope(ctx context.Context) (id string, scope ResourceScope, ok bool) { v := ctx.Value(clusterContextKey{}) if v == nil { - return "", false, false + return "", ResourceScopeCluster, false } cs := v.(clusterSelection) - return cs.ID, cs.All, true + if cs.Scope == "" { + cs.Scope = ResourceScopeCluster + } + return cs.ID, cs.Scope, true +} + +// HasInternalCrossClusterCapability reports whether the authenticated request user +// is trusted for internal cross-cluster reads. +func HasInternalCrossClusterCapability(ctx context.Context) bool { + u, ok := apirequest.UserFrom(ctx) + if !ok || u == nil || u.GetName() != DefaultInternalCrossClusterUser { + return false + } + extras := u.GetExtra() + if len(extras) == 0 { + return false + } + for _, v := range extras[DefaultInternalCrossClusterCapability] { + if v == "true" { + return true + } + } + return false } // Watch strategy plumbing diff --git a/pkg/multicluster/scopedinformer/shared.go b/pkg/multicluster/scopedinformer/shared.go new file mode 100644 index 0000000..d23fb84 --- /dev/null +++ b/pkg/multicluster/scopedinformer/shared.go @@ -0,0 +1,149 @@ +package scopedinformer + +import ( + "context" + "fmt" + "time" + + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/cache" + + mc "github.com/kplane-dev/apiserver/pkg/multicluster" +) + +const ClusterIndexName = "mc.cluster" + +func NewAllClustersKubeClient(base *rest.Config) (kubernetes.Interface, error) { + if base == nil { + return nil, fmt.Errorf("base loopback config is required") + } + cfg := rest.CopyConfig(base) + cfg.Impersonate.UserName = mc.DefaultInternalCrossClusterUser + cfg.Impersonate.Groups = []string{"system:authenticated", "system:masters"} + if cfg.Impersonate.Extra == nil { + cfg.Impersonate.Extra = map[string][]string{} + } + cfg.Impersonate.Extra[mc.DefaultInternalCrossClusterCapability] = []string{"true"} + if cfg.UserAgent == "" { + cfg.UserAgent = mc.DefaultInternalCrossClusterUserAgent + } else { + cfg.UserAgent = mc.DefaultInternalCrossClusterUserAgent + " " + cfg.UserAgent + } + return kubernetes.NewForConfig(cfg) +} + +func EnsureClusterIndex(inf cache.SharedIndexInformer, clusterLabelKey string) error { + return inf.AddIndexers(cache.Indexers{ + ClusterIndexName: func(obj interface{}) ([]string, error) { + cid := ObjectCluster(obj, clusterLabelKey) + if cid == "" { + return nil, nil + } + return []string{cid}, nil + }, + }) +} + +func ObjectCluster(obj interface{}, clusterLabelKey string) string { + if tombstone, ok := obj.(cache.DeletedFinalStateUnknown); ok { + obj = tombstone.Obj + } + acc, err := meta.Accessor(obj) + if err != nil { + return "" + } + return acc.GetLabels()[clusterLabelKey] +} + +func FilteredByCluster(indexer cache.Indexer, clusterID string) []interface{} { + items, err := indexer.ByIndex(ClusterIndexName, clusterID) + if err != nil { + return nil + } + return items +} + +func NewFilteredSharedIndexInformer(shared cache.SharedIndexInformer, clusterID, clusterLabelKey string) cache.SharedIndexInformer { + return &filteredSharedIndexInformer{shared: shared, clusterID: clusterID, clusterLabelKey: clusterLabelKey} +} + +type filteredSharedIndexInformer struct { + shared cache.SharedIndexInformer + clusterID string + clusterLabelKey string +} + +func (f *filteredSharedIndexInformer) AddEventHandler(handler cache.ResourceEventHandler) (cache.ResourceEventHandlerRegistration, error) { + return f.shared.AddEventHandler(f.wrap(handler)) +} + +func (f *filteredSharedIndexInformer) AddEventHandlerWithResyncPeriod(handler cache.ResourceEventHandler, resyncPeriod time.Duration) (cache.ResourceEventHandlerRegistration, error) { + return f.shared.AddEventHandlerWithResyncPeriod(f.wrap(handler), resyncPeriod) +} + +func (f *filteredSharedIndexInformer) AddEventHandlerWithOptions(handler cache.ResourceEventHandler, options cache.HandlerOptions) (cache.ResourceEventHandlerRegistration, error) { + return f.shared.AddEventHandlerWithOptions(f.wrap(handler), options) +} + +func (f *filteredSharedIndexInformer) RemoveEventHandler(handle cache.ResourceEventHandlerRegistration) error { + return f.shared.RemoveEventHandler(handle) +} + +func (f *filteredSharedIndexInformer) GetStore() cache.Store { return f.shared.GetStore() } +func (f *filteredSharedIndexInformer) GetController() cache.Controller { + return f.shared.GetController() +} +func (f *filteredSharedIndexInformer) Run(stopCh <-chan struct{}) {} +func (f *filteredSharedIndexInformer) RunWithContext(ctx context.Context) {} +func (f *filteredSharedIndexInformer) HasSynced() bool { return f.shared.HasSynced() } +func (f *filteredSharedIndexInformer) LastSyncResourceVersion() string { + return f.shared.LastSyncResourceVersion() +} +func (f *filteredSharedIndexInformer) SetWatchErrorHandler(handler cache.WatchErrorHandler) error { + return f.shared.SetWatchErrorHandler(handler) +} +func (f *filteredSharedIndexInformer) SetWatchErrorHandlerWithContext(handler cache.WatchErrorHandlerWithContext) error { + return f.shared.SetWatchErrorHandlerWithContext(handler) +} +func (f *filteredSharedIndexInformer) SetTransform(handler cache.TransformFunc) error { + return f.shared.SetTransform(handler) +} +func (f *filteredSharedIndexInformer) IsStopped() bool { return f.shared.IsStopped() } +func (f *filteredSharedIndexInformer) AddIndexers(indexers cache.Indexers) error { + return f.shared.AddIndexers(indexers) +} +func (f *filteredSharedIndexInformer) GetIndexer() cache.Indexer { return f.shared.GetIndexer() } + +func (f *filteredSharedIndexInformer) wrap(handler cache.ResourceEventHandler) cache.ResourceEventHandler { + if handler == nil { + return cache.ResourceEventHandlerFuncs{} + } + return cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + if ObjectCluster(obj, f.clusterLabelKey) == f.clusterID { + handler.OnAdd(obj, false) + } + }, + UpdateFunc: func(oldObj, newObj interface{}) { + oldMatch := ObjectCluster(oldObj, f.clusterLabelKey) == f.clusterID + newMatch := ObjectCluster(newObj, f.clusterLabelKey) == f.clusterID + switch { + case oldMatch && newMatch: + handler.OnUpdate(oldObj, newObj) + case !oldMatch && newMatch: + handler.OnAdd(newObj, false) + case oldMatch && !newMatch: + handler.OnDelete(oldObj) + } + }, + DeleteFunc: func(obj interface{}) { + if ObjectCluster(obj, f.clusterLabelKey) == f.clusterID { + handler.OnDelete(obj) + } + }, + } +} + +var _ cache.SharedIndexInformer = (*filteredSharedIndexInformer)(nil) diff --git a/pkg/multicluster/storage.go b/pkg/multicluster/storage.go index 37917cf..a14a887 100644 --- a/pkg/multicluster/storage.go +++ b/pkg/multicluster/storage.go @@ -4,6 +4,7 @@ import ( "context" "crypto/sha256" "encoding/hex" + "errors" "fmt" "os" "runtime/debug" @@ -24,6 +25,8 @@ import ( "k8s.io/component-base/metrics" "k8s.io/component-base/metrics/legacyregistry" + + "github.com/kplane-dev/apiserver/pkg/multicluster/internalcap" ) // RESTOptionsDecorator wraps the underlying getter to inject a decorator that @@ -45,6 +48,12 @@ var ( Help: "Number of base storage decorator invocations by server and resource prefix.", }, []string{"server", "resourcePrefix"}) debugStoreAndKey = os.Getenv("MC_STOREANDKEY_DEBUG") == "1" + + // ErrAllClustersScopeForbidden is returned when all-clusters scope is requested + // without trusted internal capability. + ErrAllClustersScopeForbidden = errors.New("all-clusters scope is internal-only") + // ErrAllClustersMutationForbidden is returned when write operations attempt all-clusters scope. + ErrAllClustersMutationForbidden = errors.New("mutating operations are not allowed for all-clusters scope") ) func init() { @@ -161,6 +170,10 @@ func (c *clusteredStorage) Versioner() storage.Versioner { } func (c *clusteredStorage) Create(ctx context.Context, key string, obj, out runtime.Object, ttl uint64) error { + if err := c.rejectAllClustersMutation(ctx); err != nil { + return err + } + c.enforceObjectClusterLabel(obj, c.clusterFromContext(ctx)) store, key, err := c.storeAndKey(ctx, key) if err != nil { return err @@ -169,6 +182,9 @@ func (c *clusteredStorage) Create(ctx context.Context, key string, obj, out runt } func (c *clusteredStorage) Delete(ctx context.Context, key string, out runtime.Object, preconditions *storage.Preconditions, validateDeletion storage.ValidateObjectFunc, cachedExistingObject runtime.Object, opts storage.DeleteOptions) error { + if err := c.rejectAllClustersMutation(ctx); err != nil { + return err + } store, key, err := c.storeAndKey(ctx, key) if err != nil { return err @@ -201,11 +217,23 @@ func (c *clusteredStorage) GetList(ctx context.Context, key string, opts storage } func (c *clusteredStorage) GuaranteedUpdate(ctx context.Context, key string, destination runtime.Object, ignoreNotFound bool, precond *storage.Preconditions, tryUpdate storage.UpdateFunc, cachedExistingObject runtime.Object) error { + if err := c.rejectAllClustersMutation(ctx); err != nil { + return err + } store, key, err := c.storeAndKey(ctx, key) if err != nil { return err } - return store.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, precond, tryUpdate, cachedExistingObject) + cid := c.clusterFromContext(ctx) + wrappedUpdate := func(input runtime.Object, res storage.ResponseMeta) (runtime.Object, *uint64, error) { + outObj, ttl, err := tryUpdate(input, res) + if err != nil || outObj == nil { + return outObj, ttl, err + } + c.enforceObjectClusterLabel(outObj, cid) + return outObj, ttl, nil + } + return store.GuaranteedUpdate(ctx, key, destination, ignoreNotFound, precond, wrappedUpdate, cachedExistingObject) } func (c *clusteredStorage) Stats(ctx context.Context) (storage.Stats, error) { @@ -265,7 +293,11 @@ func (c *clusteredStorage) destroy() { } func (c *clusteredStorage) storeAndKey(ctx context.Context, key string) (storage.Interface, string, error) { - cid, all, _ := FromContext(ctx) + cid, scope, _ := FromContextScope(ctx) + if scope != ResourceScopeCrossClusterRead && HasInternalCrossClusterCapability(ctx) { + scope = ResourceScopeCrossClusterRead + ctx = internalcap.WithAllClustersCapability(ctx) + } if cid == "" { cid = c.defaultCluster() } @@ -273,12 +305,15 @@ func (c *clusteredStorage) storeAndKey(ctx context.Context, key string) (storage if err != nil { return nil, key, err } - if all { + if scope == ResourceScopeCrossClusterRead { + if !internalcap.HasAllClustersCapability(ctx) { + return nil, key, ErrAllClustersScopeForbidden + } rewritten := c.kindRootPrefix() fullKey := strings.TrimSuffix(c.config.Prefix, "/") + "/" + strings.TrimPrefix(rewritten, "/") if debugStoreAndKey { - fmt.Fprintf(os.Stderr, "mc.storeAndKey all=true store=%T/%p resourcePrefix=%s key=%s cid=%s rewritten=%s etcdPrefix=%s fullKey=%s\n", - store, store, c.resourcePrefix, key, cid, rewritten, c.config.Prefix, fullKey, + fmt.Fprintf(os.Stderr, "mc.storeAndKey scope=%s store=%T/%p resourcePrefix=%s key=%s cid=%s rewritten=%s etcdPrefix=%s fullKey=%s\n", + scope, store, store, c.resourcePrefix, key, cid, rewritten, c.config.Prefix, fullKey, ) } return store, rewritten, nil @@ -286,13 +321,21 @@ func (c *clusteredStorage) storeAndKey(ctx context.Context, key string) (storage rewritten := c.rewriteKey(cid, key) fullKey := strings.TrimSuffix(c.config.Prefix, "/") + "/" + strings.TrimPrefix(rewritten, "/") if debugStoreAndKey { - fmt.Fprintf(os.Stderr, "mc.storeAndKey all=false store=%T/%p resourcePrefix=%s key=%s cid=%s rewritten=%s etcdPrefix=%s fullKey=%s\n", - store, store, c.resourcePrefix, key, cid, rewritten, c.config.Prefix, fullKey, + fmt.Fprintf(os.Stderr, "mc.storeAndKey scope=%s store=%T/%p resourcePrefix=%s key=%s cid=%s rewritten=%s etcdPrefix=%s fullKey=%s\n", + scope, store, store, c.resourcePrefix, key, cid, rewritten, c.config.Prefix, fullKey, ) } return store, rewritten, nil } +func (c *clusteredStorage) rejectAllClustersMutation(ctx context.Context) error { + _, scope, _ := FromContextScope(ctx) + if scope == ResourceScopeCrossClusterRead { + return ErrAllClustersMutationForbidden + } + return nil +} + func (c *clusteredStorage) defaultCluster() string { if c.options.DefaultCluster != "" { return c.options.DefaultCluster @@ -300,6 +343,34 @@ func (c *clusteredStorage) defaultCluster() string { return DefaultClusterName } +func (c *clusteredStorage) clusterFromContext(ctx context.Context) string { + cid, _, _ := FromContextScope(ctx) + if cid == "" { + cid = c.defaultCluster() + } + return cid +} + +func (c *clusteredStorage) enforceObjectClusterLabel(obj runtime.Object, cid string) { + if obj == nil { + return + } + acc, err := meta.Accessor(obj) + if err != nil { + return + } + key := c.options.ClusterAnnotationKey + if key == "" { + key = DefaultClusterAnnotation + } + lbls := acc.GetLabels() + if lbls == nil { + lbls = map[string]string{} + } + lbls[key] = cid + acc.SetLabels(lbls) +} + func (c *clusteredStorage) clusterFromObject(obj runtime.Object) string { if obj == nil { return c.defaultCluster() diff --git a/pkg/multicluster/storage_test.go b/pkg/multicluster/storage_test.go index dfc12dd..fc5398e 100644 --- a/pkg/multicluster/storage_test.go +++ b/pkg/multicluster/storage_test.go @@ -6,12 +6,15 @@ import ( "testing" corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/watch" "k8s.io/apiserver/pkg/storage" "k8s.io/apiserver/pkg/storage/storagebackend" "k8s.io/apiserver/pkg/storage/storagebackend/factory" "k8s.io/client-go/tools/cache" + + "github.com/kplane-dev/apiserver/pkg/multicluster/internalcap" ) type fakeStorage struct{} @@ -44,6 +47,27 @@ func (f *fakeStorage) GetCurrentResourceVersion(context.Context) (uint64, error) func (f *fakeStorage) SetKeysFunc(storage.KeysFunc) {} func (f *fakeStorage) CompactRevision() int64 { return 0 } +type recordingStorage struct { + fakeStorage + created runtime.Object + updated runtime.Object +} + +func (r *recordingStorage) Create(_ context.Context, _ string, obj, _ runtime.Object, _ uint64) error { + r.created = obj + return nil +} + +func (r *recordingStorage) GuaranteedUpdate(_ context.Context, _ string, _ runtime.Object, _ bool, _ *storage.Preconditions, tryUpdate storage.UpdateFunc, _ runtime.Object) error { + existing := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "existing"}} + next, _, err := tryUpdate(existing, storage.ResponseMeta{}) + if err != nil { + return err + } + r.updated = next + return nil +} + func TestRewriteKey_ClusterInsertion(t *testing.T) { cs := &clusteredStorage{ resourcePrefix: "/registry/pods", @@ -160,8 +184,17 @@ func TestStoreAndKey_AllClustersRoot(t *testing.T) { ctxAll := WithCluster(context.Background(), "", true) _, key, err := cs.(*clusteredStorage).storeAndKey(ctxAll, "/registry/pods") + if err == nil { + t.Fatalf("expected internal scope rejection") + } + if err != ErrAllClustersScopeForbidden { + t.Fatalf("expected ErrAllClustersScopeForbidden, got %v", err) + } + + ctxAll = internalcap.WithAllClustersCapability(ctxAll) + _, key, err = cs.(*clusteredStorage).storeAndKey(ctxAll, "/registry/pods") if err != nil { - t.Fatalf("storeAndKey all: %v", err) + t.Fatalf("storeAndKey all with capability: %v", err) } if key != "/registry/pods/clusters" { t.Fatalf("unexpected all-clusters key: %s", key) @@ -170,3 +203,132 @@ func TestStoreAndKey_AllClustersRoot(t *testing.T) { t.Fatalf("expected base to be called once, got %d", got) } } + +func TestStoreAndKey_AllClustersMutationRejected(t *testing.T) { + cs := &clusteredStorage{} + ctxAll := WithClusterScope(context.Background(), "", ResourceScopeCrossClusterRead) + + if err := cs.rejectAllClustersMutation(ctxAll); err == nil { + t.Fatalf("expected mutation rejection") + } + if err := cs.rejectAllClustersMutation(WithClusterScope(context.Background(), "c1", ResourceScopeCluster)); err != nil { + t.Fatalf("expected per-cluster mutation allowance, got %v", err) + } +} + +func TestWithInternalCrossClusterRead(t *testing.T) { + ctx := WithInternalCrossClusterRead(context.Background(), "root") + id, scope, ok := FromContextScope(ctx) + if !ok { + t.Fatalf("expected context selection") + } + if id != "root" { + t.Fatalf("expected root id, got %s", id) + } + if scope != ResourceScopeCrossClusterRead { + t.Fatalf("expected ResourceScopeCrossClusterRead, got %s", scope) + } + if !internalcap.HasAllClustersCapability(ctx) { + t.Fatalf("expected all-clusters capability in context") + } +} + +func TestCreateEnforcesClusterLabel(t *testing.T) { + record := &recordingStorage{} + base := func( + _ *storagebackend.ConfigForResource, + _ string, + _ func(obj runtime.Object) (string, error), + _ func() runtime.Object, + _ func() runtime.Object, + _ storage.AttrFunc, + _ storage.IndexerFuncs, + _ *cache.Indexers, + ) (storage.Interface, factory.DestroyFunc, error) { + return record, func() {}, nil + } + cs, destroy, err := newClusteredStorage( + base, + &storagebackend.ConfigForResource{}, + "/registry/configmaps", + func(obj runtime.Object) (string, error) { return "/registry/configmaps/default/cm", nil }, + func() runtime.Object { return &corev1.ConfigMap{} }, + func() runtime.Object { return &corev1.ConfigMapList{} }, + nil, + nil, + nil, + Options{DefaultCluster: "root"}, + ) + if err != nil { + t.Fatalf("newClusteredStorage: %v", err) + } + defer destroy() + + obj := &corev1.ConfigMap{ObjectMeta: metav1.ObjectMeta{Name: "cm"}} + if err := cs.Create(WithCluster(context.Background(), "c1", false), "/registry/configmaps/default/cm", obj, &corev1.ConfigMap{}, 0); err != nil { + t.Fatalf("create: %v", err) + } + got, ok := record.created.(*corev1.ConfigMap) + if !ok || got == nil { + t.Fatalf("expected recorded configmap create object") + } + if got.Labels[DefaultClusterAnnotation] != "c1" { + t.Fatalf("expected cluster label c1, got %q", got.Labels[DefaultClusterAnnotation]) + } +} + +func TestGuaranteedUpdateEnforcesClusterLabel(t *testing.T) { + record := &recordingStorage{} + base := func( + _ *storagebackend.ConfigForResource, + _ string, + _ func(obj runtime.Object) (string, error), + _ func() runtime.Object, + _ func() runtime.Object, + _ storage.AttrFunc, + _ storage.IndexerFuncs, + _ *cache.Indexers, + ) (storage.Interface, factory.DestroyFunc, error) { + return record, func() {}, nil + } + cs, destroy, err := newClusteredStorage( + base, + &storagebackend.ConfigForResource{}, + "/registry/configmaps", + func(obj runtime.Object) (string, error) { return "/registry/configmaps/default/cm", nil }, + func() runtime.Object { return &corev1.ConfigMap{} }, + func() runtime.Object { return &corev1.ConfigMapList{} }, + nil, + nil, + nil, + Options{DefaultCluster: "root"}, + ) + if err != nil { + t.Fatalf("newClusteredStorage: %v", err) + } + defer destroy() + + err = cs.GuaranteedUpdate( + WithCluster(context.Background(), "c2", false), + "/registry/configmaps/default/cm", + &corev1.ConfigMap{}, + false, + nil, + func(input runtime.Object, _ storage.ResponseMeta) (runtime.Object, *uint64, error) { + cm := input.(*corev1.ConfigMap).DeepCopy() + cm.Name = "cm" + return cm, nil, nil + }, + nil, + ) + if err != nil { + t.Fatalf("guaranteed update: %v", err) + } + got, ok := record.updated.(*corev1.ConfigMap) + if !ok || got == nil { + t.Fatalf("expected recorded updated configmap") + } + if got.Labels[DefaultClusterAnnotation] != "c2" { + t.Fatalf("expected cluster label c2, got %q", got.Labels[DefaultClusterAnnotation]) + } +} diff --git a/test/smoke/auth_test.go b/test/smoke/auth_test.go index 0378b3d..f832abb 100644 --- a/test/smoke/auth_test.go +++ b/test/smoke/auth_test.go @@ -7,6 +7,7 @@ import ( "testing" "time" + mc "github.com/kplane-dev/apiserver/pkg/multicluster" authenticationv1 "k8s.io/api/authentication/v1" authorizationv1 "k8s.io/api/authorization/v1" corev1 "k8s.io/api/core/v1" @@ -46,6 +47,11 @@ func TestRBACIsolationAcrossClusters(t *testing.T) { if err != nil { t.Fatalf("cluster=%s create clusterrole: %v", clusterA, err) } + if got, getErr := csA.RbacV1().ClusterRoles().Get(ctx, roleName, metav1.GetOptions{}); getErr != nil { + t.Fatalf("cluster=%s get clusterrole: %v", clusterA, getErr) + } else if got.Labels[mc.DefaultClusterAnnotation] != clusterA { + t.Fatalf("cluster=%s clusterrole missing label %q=%q labels=%v", clusterA, mc.DefaultClusterAnnotation, clusterA, got.Labels) + } t.Cleanup(func() { _ = csA.RbacV1().ClusterRoles().Delete(context.Background(), roleName, metav1.DeleteOptions{}) }) @@ -64,6 +70,11 @@ func TestRBACIsolationAcrossClusters(t *testing.T) { if err != nil { t.Fatalf("cluster=%s create clusterrolebinding: %v", clusterA, err) } + if got, getErr := csA.RbacV1().ClusterRoleBindings().Get(ctx, bindingName, metav1.GetOptions{}); getErr != nil { + t.Fatalf("cluster=%s get clusterrolebinding: %v", clusterA, getErr) + } else if got.Labels[mc.DefaultClusterAnnotation] != clusterA { + t.Fatalf("cluster=%s clusterrolebinding missing label %q=%q labels=%v", clusterA, mc.DefaultClusterAnnotation, clusterA, got.Labels) + } t.Cleanup(func() { _ = csA.RbacV1().ClusterRoleBindings().Delete(context.Background(), bindingName, metav1.DeleteOptions{}) }) @@ -80,8 +91,8 @@ func TestRBACIsolationAcrossClusters(t *testing.T) { }, } - waitForSubjectAccessReview(ctx, t, csA, sar, true) - waitForSubjectAccessReview(ctx, t, csRoot, sar, false) + waitForSubjectAccessReviewWithLogs(ctx, t, csA, sar, true, s.logs) + waitForSubjectAccessReviewWithLogs(ctx, t, csRoot, sar, false, s.logs) } func TestServiceAccountTokenIsolationAcrossClusters(t *testing.T) { @@ -130,6 +141,35 @@ func TestServiceAccountTokenIsolationAcrossClusters(t *testing.T) { waitForTokenReview(ctx, t, csA, tokenRoot, false) } +func TestRBACCreateSetsClusterLabel(t *testing.T) { + etcd := os.Getenv("ETCD_ENDPOINTS") + s := startAPIServer(t, etcd) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + + clusterA := "c-" + randSuffix(3) + csA := kubeClientForCluster(t, s, clusterA) + roleName := "role-" + randSuffix(4) + + obj, err := csA.RbacV1().ClusterRoles().Create(ctx, &rbacv1.ClusterRole{ + ObjectMeta: metav1.ObjectMeta{Name: roleName}, + Rules: []rbacv1.PolicyRule{ + { + APIGroups: []string{""}, + Resources: []string{"configmaps"}, + Verbs: []string{"get"}, + }, + }, + }, metav1.CreateOptions{}) + if err != nil { + t.Fatalf("cluster=%s create clusterrole: %v", clusterA, err) + } + if obj.Labels[mc.DefaultClusterAnnotation] != clusterA { + t.Fatalf("expected cluster label %q=%q, got labels=%v", mc.DefaultClusterAnnotation, clusterA, obj.Labels) + } +} + func waitForSubjectAccessReview(ctx context.Context, t *testing.T, cs kubernetes.Interface, sar *authorizationv1.SubjectAccessReview, wantAllowed bool) { t.Helper() @@ -154,6 +194,29 @@ func waitForSubjectAccessReview(ctx context.Context, t *testing.T, cs kubernetes t.Fatalf("expected SAR allowed=%v, last error: %v", wantAllowed, lastErr) } +func waitForSubjectAccessReviewWithLogs(ctx context.Context, t *testing.T, cs kubernetes.Interface, sar *authorizationv1.SubjectAccessReview, wantAllowed bool, logsFn func() string) { + t.Helper() + + deadline := time.Now().Add(30 * time.Second) + var lastErr error + for time.Now().Before(deadline) { + resp, err := cs.AuthorizationV1().SubjectAccessReviews().Create(ctx, sar, metav1.CreateOptions{}) + if err == nil { + if resp.Status.Allowed == wantAllowed { + return + } + lastErr = fmt.Errorf("allowed=%v reason=%s", resp.Status.Allowed, resp.Status.Reason) + } else { + lastErr = err + } + time.Sleep(500 * time.Millisecond) + } + if logsFn != nil { + t.Fatalf("expected SAR allowed=%v, last error: %v\nlogs:\n%s", wantAllowed, lastErr, logsFn()) + } + t.Fatalf("expected SAR allowed=%v, last error: %v", wantAllowed, lastErr) +} + func requestServiceAccountToken(ctx context.Context, t *testing.T, cs kubernetes.Interface, namespace, name string) string { t.Helper() resp, err := cs.CoreV1().ServiceAccounts(namespace).CreateToken(ctx, name, &authenticationv1.TokenRequest{ diff --git a/test/smoke/crd_per_cluster_test.go b/test/smoke/crd_per_cluster_test.go index 8f1edfc..d0e8b4a 100644 --- a/test/smoke/crd_per_cluster_test.go +++ b/test/smoke/crd_per_cluster_test.go @@ -313,8 +313,18 @@ func createTestCRDWithStatusSubresource(ctx context.Context, t *testing.T, cs *a OpenAPIV3Schema: &apiextensionsv1.JSONSchemaProps{ Type: "object", Properties: map[string]apiextensionsv1.JSONSchemaProps{ - "spec": {Type: "object"}, - "status": {Type: "object"}, + "spec": { + Type: "object", + Properties: map[string]apiextensionsv1.JSONSchemaProps{ + "foo": {Type: "string"}, + }, + }, + "status": { + Type: "object", + Properties: map[string]apiextensionsv1.JSONSchemaProps{ + "phase": {Type: "string"}, + }, + }, }, }, }, diff --git a/test/smoke/internal_controllers_test.go b/test/smoke/internal_controllers_test.go index fa3286b..dd50460 100644 --- a/test/smoke/internal_controllers_test.go +++ b/test/smoke/internal_controllers_test.go @@ -2,7 +2,6 @@ package smoke import ( "os" - "strings" "testing" "time" @@ -21,12 +20,13 @@ func TestClusterAuthInfoControllerPerCluster(t *testing.T) { deadline := time.Now().Add(12 * time.Second) for { - // Root apiserver startup logs one instance; non-root bootstrap should start another. - if strings.Count(s.logs(), "Starting cluster_authentication_trust_controller controller") >= 2 { + // Ensure per-cluster auth trust config map exists and is writable. + cm, err := cs.CoreV1().ConfigMaps("kube-system").Get(t.Context(), "extension-apiserver-authentication", metav1.GetOptions{}) + if err == nil && cm != nil { return } if time.Now().After(deadline) { - t.Fatalf("timed out waiting for per-cluster cluster_authentication_trust_controller start in cluster=%s\nlogs:\n%s", clusterID, s.logs()) + t.Fatalf("timed out waiting for extension-apiserver-authentication configmap in cluster=%s: %v\nlogs:\n%s", clusterID, err, s.logs()) } time.Sleep(250 * time.Millisecond) } diff --git a/test/smoke/memory_200vcp_test.go b/test/smoke/memory_200vcp_test.go new file mode 100644 index 0000000..02f3ba7 --- /dev/null +++ b/test/smoke/memory_200vcp_test.go @@ -0,0 +1,224 @@ +package smoke + +import ( + "context" + "crypto/tls" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "strconv" + "strings" + "sync" + "testing" + "time" +) + +type memorySnapshot struct { + heapInuse float64 + rss float64 + goRoutines float64 +} + +const ( + postBootstrapSettle = 15 * time.Second + steadyStateWait = 90 * time.Second +) + +const currentCRDServingMode = "current-default" + +func TestMemoryAfter200VCPCurrentSystem(t *testing.T) { + runMemoryAfter200VCP(t) +} + +func runMemoryAfter200VCP(t *testing.T) { + etcd := os.Getenv("ETCD_ENDPOINTS") + if strings.TrimSpace(etcd) == "" { + t.Skip("ETCD_ENDPOINTS is not set; skipping memory smoke") + } + + s := startAPIServerWithOptions(t, etcd, apiserverOptions{ + extraArgs: nil, + }) + // Capture process baseline before creating additional VCP activity. + time.Sleep(5 * time.Second) + baseline, err := sampleMemorySnapshot(s) + if err != nil { + t.Fatalf("sample baseline metrics: %v", err) + } + + const totalClusters = 200 + clusterIDs := make([]string, 0, totalClusters) + for i := 0; i < totalClusters; i++ { + clusterIDs = append(clusterIDs, fmt.Sprintf("c-%03d", i)) + } + + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Minute) + defer cancel() + + workerN := 12 + workCh := make(chan string) + errCh := make(chan error, totalClusters) + var wg sync.WaitGroup + for i := 0; i < workerN; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for cid := range workCh { + if err := exerciseClusterForMemory(ctx, t, s, cid); err != nil { + errCh <- fmt.Errorf("cluster=%s: %w", cid, err) + return + } + } + }() + } + + for _, cid := range clusterIDs { + workCh <- cid + } + close(workCh) + wg.Wait() + close(errCh) + for err := range errCh { + t.Fatal(err) + } + + // Phase B: bootstrap burst stabilized. + time.Sleep(postBootstrapSettle) + postBootstrap, err := sampleMemorySnapshot(s) + if err != nil { + t.Fatalf("sample post-bootstrap metrics: %v", err) + } + // Phase C: steady state after no new cluster creation. + time.Sleep(steadyStateWait) + steady, err := sampleMemorySnapshot(s) + if err != nil { + t.Fatalf("sample steady-state metrics: %v", err) + } + + bootstrapDeltaHeap := postBootstrap.heapInuse - baseline.heapInuse + bootstrapDeltaRSS := postBootstrap.rss - baseline.rss + bootstrapDeltaGo := postBootstrap.goRoutines - baseline.goRoutines + steadyTailHeap := steady.heapInuse - postBootstrap.heapInuse + steadyTailRSS := steady.rss - postBootstrap.rss + steadyTailGo := steady.goRoutines - postBootstrap.goRoutines + + t.Logf( + "phase metrics mode=%s clusters=%d baseline(heap=%.2fMiB rss=%.2fMiB goroutines=%.0f) post_bootstrap(heap=%.2fMiB rss=%.2fMiB goroutines=%.0f) steady(heap=%.2fMiB rss=%.2fMiB goroutines=%.0f) bootstrap_delta(heap=%.2fMiB rss=%.2fMiB goroutines=%.0f) bootstrap_per_vcp(heap=%.2fMiB rss=%.2fMiB goroutines=%.2f) steady_tail_delta(heap=%.2fMiB rss=%.2fMiB goroutines=%.0f)", + currentCRDServingMode, + totalClusters, + baseline.heapInuse/(1024*1024), baseline.rss/(1024*1024), baseline.goRoutines, + postBootstrap.heapInuse/(1024*1024), postBootstrap.rss/(1024*1024), postBootstrap.goRoutines, + steady.heapInuse/(1024*1024), steady.rss/(1024*1024), steady.goRoutines, + bootstrapDeltaHeap/(1024*1024), bootstrapDeltaRSS/(1024*1024), bootstrapDeltaGo, + bootstrapDeltaHeap/(1024*1024)/totalClusters, bootstrapDeltaRSS/(1024*1024)/totalClusters, bootstrapDeltaGo/totalClusters, + steadyTailHeap/(1024*1024), steadyTailRSS/(1024*1024), steadyTailGo, + ) + + outDir := filepath.Join(repoRoot(t), "dev", "profiles") + if err := os.MkdirAll(outDir, 0o755); err != nil { + t.Fatalf("mkdir profiles dir: %v", err) + } + saveProfilesForPhase(t, s, outDir, currentCRDServingMode, "post-bootstrap") + saveProfilesForPhase(t, s, outDir, currentCRDServingMode, "steady") +} + +func exerciseClusterForMemory(ctx context.Context, t *testing.T, s *testAPIServer, clusterID string) error { + s.waitReady(t, clusterID) + + cs := kubeClientForCluster(t, s, clusterID) + if err := waitForNamespace(ctx, cs, "default"); err != nil { + return err + } + + crdClient := apixClientForCluster(t, s, clusterID) + group := fmt.Sprintf("mem-%s.kplane.dev", strings.ReplaceAll(clusterID, "_", "-")) + plural := "memwidgets" + crdName := plural + "." + group + createTestCRD(ctx, t, crdClient, crdName, group, plural) + waitForCRDEstablished(ctx, t, crdClient, clusterID, crdName) + waitForResourcePresence(t, cs, clusterID, group+"/v1", plural, true) + return nil +} + +func fetchFromRoot(s *testAPIServer, path string) (string, error) { + client := &http.Client{ + Timeout: 20 * time.Second, + Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, //nolint:gosec + }, + } + req, _ := http.NewRequestWithContext(context.Background(), http.MethodGet, s.baseURL()+path, nil) + req.Header.Set("Authorization", "Bearer smoketoken") + resp, err := client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + b, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + if resp.StatusCode < 200 || resp.StatusCode > 299 { + return "", fmt.Errorf("status=%d body=%s", resp.StatusCode, string(b)) + } + return string(b), nil +} + +func sampleMemorySnapshot(s *testAPIServer) (memorySnapshot, error) { + metricsText, err := fetchFromRoot(s, "/metrics") + if err != nil { + return memorySnapshot{}, err + } + heapInuse, err := parsePromGauge(metricsText, "go_memstats_heap_inuse_bytes") + if err != nil { + return memorySnapshot{}, err + } + rss, err := parsePromGauge(metricsText, "process_resident_memory_bytes") + if err != nil { + return memorySnapshot{}, err + } + goRoutines, err := parsePromGauge(metricsText, "go_goroutines") + if err != nil { + return memorySnapshot{}, err + } + return memorySnapshot{ + heapInuse: heapInuse, + rss: rss, + goRoutines: goRoutines, + }, nil +} + +func saveProfilesForPhase(t *testing.T, s *testAPIServer, outDir, mode, phase string) { + t.Helper() + heapProfile, err := fetchFromRoot(s, "/debug/pprof/heap") + if err != nil { + t.Fatalf("fetch heap profile (%s): %v", phase, err) + } + heapPath := filepath.Join(outDir, "heap-200vcp-"+mode+"-"+phase+".pb.gz") + if err := os.WriteFile(heapPath, []byte(heapProfile), 0o644); err != nil { + t.Fatalf("write heap profile (%s): %v", phase, err) + } + t.Logf("saved heap profile (%s): %s", phase, heapPath) + + goroutineProfile, err := fetchFromRoot(s, "/debug/pprof/goroutine?debug=1") + if err != nil { + t.Fatalf("fetch goroutine profile (%s): %v", phase, err) + } + gPath := filepath.Join(outDir, "goroutine-200vcp-"+mode+"-"+phase+".txt") + if err := os.WriteFile(gPath, []byte(goroutineProfile), 0o644); err != nil { + t.Fatalf("write goroutine profile (%s): %v", phase, err) + } + t.Logf("saved goroutine profile (%s): %s", phase, gPath) +} + +func parsePromGauge(metricsText, metric string) (float64, error) { + for _, line := range strings.Split(metricsText, "\n") { + if strings.HasPrefix(line, metric+" ") { + v := strings.TrimSpace(strings.TrimPrefix(line, metric)) + return strconv.ParseFloat(v, 64) + } + } + return 0, fmt.Errorf("metric %s not found", metric) +}