From 9a4deb2e6abcd48e3e5330763a86ec10bb3bd8db Mon Sep 17 00:00:00 2001 From: Evan Hearne Date: Thu, 8 Jan 2026 16:48:09 +0000 Subject: [PATCH] add service account to guard pod This change add a bespoke service account to a guard pod, and introduces checks to remove zombie service accounts in the case of manual guard pod deletion. Tests are included to test behaviour of service account in different scenarios. --- .../controller/guard/guard_controller.go | 65 +++++++++++++++++++ .../controller/guard/guard_controller_test.go | 59 ++++++++++++++++- .../controller/guard/manifests/guard-pod.yaml | 1 + pkg/operator/staticpod/controllers.go | 2 + 4 files changed, 126 insertions(+), 1 deletion(-) diff --git a/pkg/operator/staticpod/controller/guard/guard_controller.go b/pkg/operator/staticpod/controller/guard/guard_controller.go index 5b34749e20..3c846c2e92 100644 --- a/pkg/operator/staticpod/controller/guard/guard_controller.go +++ b/pkg/operator/staticpod/controller/guard/guard_controller.go @@ -48,6 +48,8 @@ type GuardController struct { podGetter corev1client.PodsGetter pdbGetter policyclientv1.PodDisruptionBudgetsGetter pdbLister policylisterv1.PodDisruptionBudgetLister + saLister corelisterv1.ServiceAccountLister + saGetter corev1client.ServiceAccountsGetter // installerPodImageFn returns the image name for the installer pod installerPodImageFn func() string @@ -70,6 +72,7 @@ func NewGuardController( operatorClient operatorv1helpers.StaticPodOperatorClient, podGetter corev1client.PodsGetter, pdbGetter policyclientv1.PodDisruptionBudgetsGetter, + saGetter corev1client.ServiceAccountsGetter, eventRecorder events.Recorder, createConditionalFunc func() (bool, bool, error), extraNodeSelector labels.Selector, @@ -102,6 +105,8 @@ func NewGuardController( podGetter: podGetter, pdbGetter: pdbGetter, pdbLister: kubeInformersForTargetNamespace.Policy().V1().PodDisruptionBudgets().Lister(), + saGetter: saGetter, + saLister: kubeInformersForTargetNamespace.Core().V1().ServiceAccounts().Lister(), installerPodImageFn: getInstallerPodImageFromEnv, createConditionalFunc: createConditionalFunc, extraNodeSelector: extraNodeSelector, @@ -195,6 +200,29 @@ func (c *GuardController) sync(ctx context.Context, syncCtx factory.SyncContext) return nil } + // Do a check for zombie service accounts if a guard pod gets manually + // deleted by the user. + pods, _ := c.podLister.Pods(c.targetNamespace).List(labels.SelectorFromSet(labels.Set{"app": "guard"})) + + podMap := map[string]bool{} + + for _, pod := range pods { + podMap[pod.Spec.ServiceAccountName] = true + } + + serviceAccounts, _ := c.saLister.ServiceAccounts(c.targetNamespace).List(labels.SelectorFromSet(labels.Set{"app": "guard"})) + + for _, sa := range serviceAccounts { + // If service account exists but pod map does not have a key for it, + // it is a zombie service account, and must be deleted. + if !podMap[sa.Name] { + err = c.saGetter.ServiceAccounts(c.targetNamespace).Delete(ctx, sa.Name, metav1.DeleteOptions{}) + if err != nil { + klog.Errorf("Error while deleting service account %s : %v", sa.Name, err) + } + } + } + errs := []error{} if !shouldCreate { pdb := resourceread.ReadPodDisruptionBudgetV1OrDie(pdbTemplate) @@ -232,6 +260,20 @@ func (c *GuardController) sync(ctx context.Context, syncCtx factory.SyncContext) } } } + + // delete service accounts associated with guard pod(s) + serviceAccounts, err := c.saLister.ServiceAccounts(c.targetNamespace).List(labels.SelectorFromSet(labels.Set{"app": "guard"})) + if err != nil { + errs = append(errs, err) + } else { + for _, sa := range serviceAccounts { + _, _, err = resourceapply.DeleteServiceAccount(ctx, c.saGetter, syncCtx.Recorder(), sa) + if err != nil { + klog.Errorf("Unable to delete Service Account: %v", err) + errs = append(errs, err) + } + } + } } else { nodes, err := c.nodeLister.List(c.masterNodesSelector) if err != nil { @@ -320,6 +362,24 @@ func (c *GuardController) sync(ctx context.Context, syncCtx factory.SyncContext) klog.V(5).Infof("Rendering guard pod for operand %v on node %v", operands[node.Name].Name, node.Name) + // Create Service Account + serviceAccount := &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: getGuardPodName(c.podResourcePrefix, node.Name), + Namespace: c.targetNamespace, + }, + } + // attach label so we can recognise for deletion later + serviceAccount.Labels = map[string]string{ + "app": "guard", + } + _, _, err = resourceapply.ApplyServiceAccount(ctx, c.saGetter, syncCtx.Recorder(), serviceAccount) + + if err != nil { + klog.Errorf("Unable to create service account %v for Guard Pod: %v", serviceAccount.Name, err) + errs = append(errs, fmt.Errorf("Unable to create service account %v for Guard Pod: %v", serviceAccount.Name, err)) + } + pod := resourceread.ReadPodV1OrDie(podTemplate) pod.ObjectMeta.Name = getGuardPodName(c.podResourcePrefix, node.Name) @@ -328,6 +388,7 @@ func (c *GuardController) sync(ctx context.Context, syncCtx factory.SyncContext) pod.Spec.NodeName = node.Name pod.Spec.Containers[0].Image = c.installerPodImageFn() pod.Spec.Containers[0].ReadinessProbe.HTTPGet.Host = operands[node.Name].Status.PodIP + pod.Spec.ServiceAccountName = serviceAccount.Name // The readyz port as string type is expected to be convertible into int!!! readyzPort, err := strconv.Atoi(c.readyzPort) if err != nil { @@ -364,6 +425,10 @@ func (c *GuardController) sync(ctx context.Context, syncCtx factory.SyncContext) klog.V(5).Infof("Pod phase is neither pending nor running, deleting %v so the guard can be re-created", pod.Name) delete = true } + if actual.Spec.ServiceAccountName != pod.Spec.ServiceAccountName { + klog.V(5).Infof("Service Account changed, deleting %v so the guard can be re-created", pod.Name) + delete = true + } if delete { _, _, err = resourceapply.DeletePod(ctx, c.podGetter, syncCtx.Recorder(), pod) if err != nil { diff --git a/pkg/operator/staticpod/controller/guard/guard_controller_test.go b/pkg/operator/staticpod/controller/guard/guard_controller_test.go index 273d2a1f40..d476039a8b 100644 --- a/pkg/operator/staticpod/controller/guard/guard_controller_test.go +++ b/pkg/operator/staticpod/controller/guard/guard_controller_test.go @@ -270,6 +270,7 @@ func TestRenderGuardPod(t *testing.T) { node *corev1.Node guardExists bool guardPod *corev1.Pod + guardServiceAccount *corev1.ServiceAccount createConditionalFunc func() (bool, bool, error) withArbiter bool }{ @@ -427,6 +428,13 @@ func TestRenderGuardPod(t *testing.T) { PodIP: "1.1.1.1", }, }, + guardServiceAccount: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: getGuardPodName("operand", "master1"), + Namespace: "test", + Labels: map[string]string{"app": "guard"}, + }, + }, node: fakeMasterNode("master1"), }, { @@ -466,6 +474,13 @@ func TestRenderGuardPod(t *testing.T) { PodIP: "1.1.1.1", }, }, + guardServiceAccount: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: getGuardPodName("operand", "arbiter"), + Namespace: "test", + Labels: map[string]string{"app": "guard"}, + }, + }, //createConditionalFunc: func() (bool, bool, error) { return true, true, nil }, node: fakeArbiterNode("arbiter"), withArbiter: true, @@ -522,6 +537,13 @@ func TestRenderGuardPod(t *testing.T) { Phase: corev1.PodSucceeded, }, }, + guardServiceAccount: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: getGuardPodName("operand", "master1"), + Namespace: "test", + Labels: map[string]string{"app": "guard"}, + }, + }, node: fakeMasterNode("master1"), guardExists: true, }, @@ -574,6 +596,13 @@ func TestRenderGuardPod(t *testing.T) { Phase: corev1.PodSucceeded, }, }, + guardServiceAccount: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: getGuardPodName("operand", "arbiter"), + Namespace: "test", + Labels: map[string]string{"app": "guard"}, + }, + }, createConditionalFunc: func() (bool, bool, error) { return true, true, nil }, node: fakeArbiterNode("arbiter"), guardExists: true, @@ -622,6 +651,9 @@ func TestRenderGuardPod(t *testing.T) { if test.guardPod != nil { kubeClient.Tracker().Add(test.guardPod) } + if test.guardServiceAccount != nil { + kubeClient.Tracker().Add(test.guardServiceAccount) + } kubeInformers := informers.NewSharedInformerFactoryWithOptions(kubeClient, 1*time.Minute) eventRecorder := events.NewRecorder(kubeClient.CoreV1().Events("test"), "test-operator", &corev1.ObjectReference{}, clocktesting.NewFakePassiveClock(time.Now())) @@ -651,6 +683,8 @@ func TestRenderGuardPod(t *testing.T) { podGetter: kubeClient.CoreV1(), pdbGetter: kubeClient.PolicyV1(), pdbLister: kubeInformers.Policy().V1().PodDisruptionBudgets().Lister(), + saGetter: kubeClient.CoreV1(), + saLister: kubeInformers.Core().V1().ServiceAccounts().Lister(), installerPodImageFn: getInstallerPodImageFromEnv, createConditionalFunc: createConditionalFunc, } @@ -696,12 +730,26 @@ func TestRenderGuardPod(t *testing.T) { if p.Status.Phase != "" { t.Errorf("%s: unexpected pod status: %v, expected no status set", test.name, p.Status.Phase) } + // check to ensure service account is created for pod. + expectedSAName := getGuardPodName("operand", test.node.Name) + if p.Spec.ServiceAccountName != expectedSAName { + t.Errorf("%s: expected ServiceAccountName %q, got %q", test.name, expectedSAName, p.Spec.ServiceAccountName) + } + _, saErr := kubeClient.CoreV1().ServiceAccounts("test").Get(ctx, p.Spec.ServiceAccountName, metav1.GetOptions{}) + if saErr != nil { + t.Errorf("%s: expected ServiceAccount %q to exist, got error instead: %q", test.name, p.Spec.ServiceAccountName, saErr) + } } } else { _, err := kubeClient.CoreV1().Pods("test").Get(ctx, getGuardPodName("operand", "master1"), metav1.GetOptions{}) if !apierrors.IsNotFound(err) { t.Errorf("%s: expected 'pods \"%v\" not found' error, got %q instead", test.name, getGuardPodName("operand", "master1"), err) } + // verify service account is not found (i.e. deleted) + _, err = kubeClient.CoreV1().ServiceAccounts("test").Get(ctx, getGuardPodName("operand", "master1"), metav1.GetOptions{}) + if !apierrors.IsNotFound(err) { + t.Errorf("%s: expected 'service account \"%v\" not found' error, but got %q instead", test.name, getGuardPodName("operand", "master1"), err) + } } } }) @@ -759,13 +807,20 @@ func TestRenderGuardPodPortChanged(t *testing.T) { PodIP: "1.1.1.1", }, } + guardServiceAccount := &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: getGuardPodName("operand", "master1"), + Namespace: "test", + Labels: map[string]string{"app": "guard"}, + }, + } indexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{}) if err := indexer.Add(infraObject); err != nil { t.Fatal(err.Error()) } - kubeClient := fake.NewSimpleClientset(fakeMasterNode("master1"), operandPod, guardPod) + kubeClient := fake.NewSimpleClientset(fakeMasterNode("master1"), operandPod, guardPod, guardServiceAccount) kubeInformers := informers.NewSharedInformerFactoryWithOptions(kubeClient, 1*time.Minute) eventRecorder := events.NewRecorder(kubeClient.CoreV1().Events("test"), "test-operator", &corev1.ObjectReference{}, clocktesting.NewFakePassiveClock(time.Now())) @@ -791,6 +846,8 @@ func TestRenderGuardPodPortChanged(t *testing.T) { podGetter: kubeClient.CoreV1(), pdbGetter: kubeClient.PolicyV1(), pdbLister: kubeInformers.Policy().V1().PodDisruptionBudgets().Lister(), + saGetter: kubeClient.CoreV1(), + saLister: kubeInformers.Core().V1().ServiceAccounts().Lister(), installerPodImageFn: getInstallerPodImageFromEnv, createConditionalFunc: staticcontrollercommon.NewIsSingleNodePlatformFn(informer), } diff --git a/pkg/operator/staticpod/controller/guard/manifests/guard-pod.yaml b/pkg/operator/staticpod/controller/guard/manifests/guard-pod.yaml index 29d4a345b3..230d48a5c3 100644 --- a/pkg/operator/staticpod/controller/guard/manifests/guard-pod.yaml +++ b/pkg/operator/staticpod/controller/guard/manifests/guard-pod.yaml @@ -9,6 +9,7 @@ metadata: app: guard ownerReferences: # Value set by operator spec: + serviceAccountName: # Value set by operator affinity: # Value set by operator priorityClassName: "system-cluster-critical" terminationGracePeriodSeconds: 3 diff --git a/pkg/operator/staticpod/controllers.go b/pkg/operator/staticpod/controllers.go index 0cbd3a9002..dc824715f1 100644 --- a/pkg/operator/staticpod/controllers.go +++ b/pkg/operator/staticpod/controllers.go @@ -242,6 +242,7 @@ func (b *staticPodOperatorControllerBuilder) ToControllers() (manager.Controller podClient := b.kubeClient.CoreV1() eventsClient := b.kubeClient.CoreV1() pdbClient := b.kubeClient.PolicyV1() + saClient := b.kubeClient.CoreV1() operandInformers := b.kubeNamespaceInformers.InformersFor(b.operandNamespace) clusterInformers := b.kubeClusterInformers infraInformers := b.configInformers.Config().V1().Infrastructures() @@ -398,6 +399,7 @@ func (b *staticPodOperatorControllerBuilder) ToControllers() (manager.Controller b.staticPodOperatorClient, podClient, pdbClient, + saClient, eventRecorder, b.guardCreateConditionalFunc, b.extraNodeSelector,