From 863ffafd3519b6215983c20ff62f8cf61898ea1e Mon Sep 17 00:00:00 2001 From: Seer Date: Fri, 12 Sep 2025 16:31:43 +0800 Subject: [PATCH 01/13] fix: correct IsPodAnnotationDiff function name in RDS controllers - Update function call from IsPodAnnonationDiff to IsPodAnnotationDiff - Maintain all other resource cleanup and finalizer functionality --- api/v1alpha1/cluster_types.go | 4 + internal/builder/const.go | 4 + internal/builder/helper.go | 29 ++++++ internal/controller/cluster_controller.go | 59 +++++++++++- internal/controller/failover_controller.go | 59 +++++++++++- internal/controller/rds/valkey/cluster.go | 15 ++- internal/controller/rds/valkey/failover.go | 19 +++- internal/controller/rds/valkey_controller.go | 14 +-- internal/controller/user_controller.go | 93 +++++++++++++++---- .../cluster/actor/actor_ensure_resource.go | 4 +- .../failover/actor/actor_ensure_resource.go | 4 +- .../sentinel/actor/actor_ensure_resource.go | 4 +- internal/util/kubernetes.go | 67 ++++++++++--- internal/valkey/failover/failover.go | 9 +- 14 files changed, 326 insertions(+), 58 deletions(-) diff --git a/api/v1alpha1/cluster_types.go b/api/v1alpha1/cluster_types.go index 15f1b25..0a99f68 100644 --- a/api/v1alpha1/cluster_types.go +++ b/api/v1alpha1/cluster_types.go @@ -22,6 +22,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +const ( + ClusterResourceCleanFinalizer = "buf.red/cluster-resource-clean" +) + type ShardConfig struct { // Slots is the slot range for the shard, eg: 0-1000,1002,1005-1100 //+kubebuilder:validation:Pattern:=`^(\d{1,5}|(\d{1,5}-\d{1,5}))(,(\d{1,5}|(\d{1,5}-\d{1,5})))*$` diff --git a/internal/builder/const.go b/internal/builder/const.go index 83e865e..9fac09f 100644 --- a/internal/builder/const.go +++ b/internal/builder/const.go @@ -68,3 +68,7 @@ const ( OperatorVersionAnnotation = "operatorVersion" ) + +const ( + ResourceCleanFinalizer = "buf.red/resource-clean" +) diff --git a/internal/builder/helper.go b/internal/builder/helper.go index c3302bd..8512a3a 100644 --- a/internal/builder/helper.go +++ b/internal/builder/helper.go @@ -193,3 +193,32 @@ func MergeAnnotations(t, s map[string]string) map[string]string { } return t } + +func IsPodAnnotationDiff(d map[string]string, s map[string]string) bool { + if len(d) != len(s) { + return true + } + + for k, v := range d { + if k == RestartAnnotationKey { + if v == "" { + continue + } + targetV := s[RestartAnnotationKey] + if targetV == "" { + return true + } + newTime, err1 := time.Parse(time.RFC3339Nano, v) + targetTime, err2 := time.Parse(time.RFC3339Nano, targetV) + if err1 != nil || err2 != nil { + return true + } + if newTime.After(targetTime) { + return true + } + } else if s[k] != v { + return true + } + } + return false +} diff --git a/internal/controller/cluster_controller.go b/internal/controller/cluster_controller.go index 1bd52b2..3e3b36a 100644 --- a/internal/controller/cluster_controller.go +++ b/internal/controller/cluster_controller.go @@ -28,10 +28,12 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" "github.com/chideat/valkey-operator/api/v1alpha1" "github.com/chideat/valkey-operator/internal/builder" + "github.com/chideat/valkey-operator/internal/builder/clusterbuilder" "github.com/chideat/valkey-operator/internal/config" "github.com/chideat/valkey-operator/internal/ops" ) @@ -54,11 +56,60 @@ func (r *ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct logger := log.FromContext(ctx).WithValues("target", req.String()) var instance v1alpha1.Cluster - if err := r.Get(ctx, req.NamespacedName, &instance); errors.IsNotFound(err) { - return ctrl.Result{}, nil - } else if err != nil { + if err := r.Get(ctx, req.NamespacedName, &instance); err != nil { logger.Error(err, "get resource failed") - return ctrl.Result{}, err + return ctrl.Result{}, client.IgnoreNotFound(err) + } else if instance.GetDeletionTimestamp() != nil { + if controllerutil.ContainsFinalizer(&instance, builder.ResourceCleanFinalizer) { + // check if all pods is shutdown + labels := clusterbuilder.GenerateClusterLabels(instance.GetName(), nil) + stsList := appsv1.StatefulSetList{} + if err := r.List(ctx, &stsList, client.InNamespace(instance.Namespace), client.MatchingLabels(labels)); err != nil { + logger.Error(err, "get cluster statefulsets failed", "labels", labels) + return ctrl.Result{}, err + } + + needRequeue := false + for _, sts := range stsList.Items { + if sts.GetDeletionTimestamp() == nil { + if err := r.Delete(ctx, &sts); err != nil { + logger.Error(err, "delete cluster statefulset failed", "name", sts.GetName()) + } + needRequeue = true + } + } + if needRequeue { + return ctrl.Result{RequeueAfter: time.Second * 5}, nil + } + + podList := corev1.PodList{} + if err := r.List(ctx, &podList, client.InNamespace(instance.Namespace), client.MatchingLabels(labels)); err != nil { + logger.Error(err, "list pods failed", "namespace", instance.Namespace, "labels", labels) + return ctrl.Result{}, err + } else if len(podList.Items) > 0 { + // still has pods running, wait for them to shutdown + logger.Info("instance is deleting, but still has pods running, wait for them to shutdown") + return ctrl.Result{RequeueAfter: time.Second * 5}, nil + } + + // all pods is shutdown, remove finalizer + controllerutil.RemoveFinalizer(&instance, builder.ResourceCleanFinalizer) + if err := r.Update(ctx, &instance); err != nil { + logger.Error(err, "remove finalizer failed", "instance", instance.GetName()) + return ctrl.Result{}, err + } + } + return ctrl.Result{}, nil + } + + if !controllerutil.ContainsFinalizer(&instance, builder.ResourceCleanFinalizer) { + // add finalizer + controllerutil.AddFinalizer(&instance, builder.ResourceCleanFinalizer) + if err := r.Update(ctx, &instance); err != nil { + logger.Error(err, "add finalizer failed", "instance", instance.GetName()) + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: time.Second}, nil } // update default status diff --git a/internal/controller/failover_controller.go b/internal/controller/failover_controller.go index 611d458..b125f34 100644 --- a/internal/controller/failover_controller.go +++ b/internal/controller/failover_controller.go @@ -24,17 +24,18 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" "github.com/chideat/valkey-operator/api/v1alpha1" "github.com/chideat/valkey-operator/internal/builder" "github.com/chideat/valkey-operator/internal/builder/certbuilder" + "github.com/chideat/valkey-operator/internal/builder/failoverbuilder" "github.com/chideat/valkey-operator/internal/builder/sentinelbuilder" "github.com/chideat/valkey-operator/internal/config" "github.com/chideat/valkey-operator/internal/ops" @@ -58,11 +59,59 @@ func (r *FailoverReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c logger := log.FromContext(ctx).WithValues("target", req.String()) var instance v1alpha1.Failover - if err := r.Get(ctx, req.NamespacedName, &instance); errors.IsNotFound(err) { - return ctrl.Result{}, nil - } else if err != nil { + if err := r.Get(ctx, req.NamespacedName, &instance); err != nil { logger.Error(err, "get resource failed") - return ctrl.Result{}, err + return ctrl.Result{}, client.IgnoreNotFound(err) + } else if instance.GetDeletionTimestamp() != nil { + if controllerutil.ContainsFinalizer(&instance, builder.ResourceCleanFinalizer) { + // check if sts is marked for deletion + labels := failoverbuilder.GenerateCommonLabels(instance.GetName()) + stsList := appsv1.StatefulSetList{} + if err := r.List(ctx, &stsList, client.InNamespace(instance.Namespace), client.MatchingLabels(labels)); err != nil { + logger.Error(err, "get failover statefulsets failed", "labels", labels) + return ctrl.Result{}, err + } + + needRequeue := false + for _, sts := range stsList.Items { + if sts.GetDeletionTimestamp() == nil { + if err := r.Delete(ctx, &sts); err != nil { + logger.Error(err, "delete failover statefulset failed", "name", sts.GetName()) + } + needRequeue = true + } + } + if needRequeue { + return ctrl.Result{RequeueAfter: time.Second * 5}, nil + } + + // check if all pods is shutdown + podList := corev1.PodList{} + if err := r.List(ctx, &podList, client.InNamespace(instance.Namespace), client.MatchingLabels(labels)); err != nil { + logger.Error(err, "list pods failed", "namespace", instance.Namespace, "labels", labels) + return ctrl.Result{}, err + } + if len(podList.Items) > 0 { + logger.Info("failover pods is not shutdown, waiting for next reconcile", "pods", len(podList.Items)) + return ctrl.Result{RequeueAfter: time.Second * 10}, nil + } + + logger.Info("instance is deleting, remove finalizer", "name", instance.GetName()) + controllerutil.RemoveFinalizer(&instance, builder.ResourceCleanFinalizer) + if err := r.Update(ctx, &instance); err != nil { + logger.Error(err, "update instance finalizer failed") + return ctrl.Result{}, err + } + } + } + + if !controllerutil.ContainsFinalizer(&instance, builder.ResourceCleanFinalizer) { + controllerutil.AddFinalizer(&instance, builder.ResourceCleanFinalizer) + if err := r.Update(ctx, &instance); err != nil { + logger.Error(err, "add finalizer failed", "instance", instance.GetName()) + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: time.Second}, nil } if crVersion := instance.Annotations[builder.CRVersionKey]; crVersion == "" { diff --git a/internal/controller/rds/valkey/cluster.go b/internal/controller/rds/valkey/cluster.go index 674f87f..fc11642 100644 --- a/internal/controller/rds/valkey/cluster.go +++ b/internal/controller/rds/valkey/cluster.go @@ -27,6 +27,8 @@ import ( "github.com/chideat/valkey-operator/internal/config" "github.com/chideat/valkey-operator/internal/util" "github.com/go-logr/logr" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" v12 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -104,6 +106,7 @@ func GenerateValkeyCluster(instance *rdsv1alpha1.Valkey) (*v1alpha1.Cluster, err Labels: labels, Annotations: annotations, OwnerReferences: util.BuildOwnerReferences(instance), + Finalizers: []string{builder.ResourceCleanFinalizer}, }, Spec: v1alpha1.ClusterSpec{ Image: image, @@ -139,8 +142,18 @@ func ShouldUpdateCluster(cluster, newCluster *v1alpha1.Cluster, logger logr.Logg !reflect.DeepEqual(cluster.Annotations, newCluster.Annotations) { return true } + if !cmp.Equal(cluster.Spec, newCluster.Spec, + cmpopts.EquateEmpty(), + cmpopts.IgnoreFields(v1alpha1.ClusterSpec{}, "PodAnnotations"), + ) { + return true + } - return !reflect.DeepEqual(cluster.Spec, newCluster.Spec) + if builder.IsPodAnnotationDiff(newCluster.Spec.PodAnnotations, cluster.Spec.PodAnnotations) { + logger.V(3).Info("pod annotations diff") + return true + } + return false } func ClusterIsUp(cluster *v1alpha1.Cluster) bool { diff --git a/internal/controller/rds/valkey/failover.go b/internal/controller/rds/valkey/failover.go index a53cc05..908071b 100644 --- a/internal/controller/rds/valkey/failover.go +++ b/internal/controller/rds/valkey/failover.go @@ -26,6 +26,8 @@ import ( "github.com/chideat/valkey-operator/internal/builder/failoverbuilder" "github.com/chideat/valkey-operator/internal/config" "github.com/chideat/valkey-operator/internal/util" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/go-logr/logr" "github.com/samber/lo" @@ -102,6 +104,7 @@ func GenerateFailover(instance *rdsv1alpha1.Valkey) (*v1alpha1.Failover, error) Labels: failoverbuilder.GenerateSelectorLabels(instance.Name), Annotations: annotations, OwnerReferences: util.BuildOwnerReferences(instance), + Finalizers: []string{builder.ResourceCleanFinalizer}, }, Spec: v1alpha1.FailoverSpec{ Image: image, @@ -135,5 +138,19 @@ func ShouldUpdateFailover(failover, newFailover *v1alpha1.Failover, logger logr. !reflect.DeepEqual(newFailover.Labels, failover.Labels) { return true } - return !reflect.DeepEqual(failover.Spec, newFailover.Spec) + + if !cmp.Equal(newFailover.Spec, failover.Spec, + cmpopts.EquateEmpty(), + cmpopts.IgnoreFields(v1alpha1.FailoverSpec{}, "PodAnnotations"), + ) { + return true + } + + if builder.IsPodAnnotationDiff(newFailover.Spec.PodAnnotations, failover.Spec.PodAnnotations) || + (newFailover.Spec.Sentinel != nil && + failover.Spec.Sentinel != nil && + builder.IsPodAnnotationDiff(newFailover.Spec.Sentinel.PodAnnotations, failover.Spec.Sentinel.PodAnnotations)) { + return true + } + return false } diff --git a/internal/controller/rds/valkey_controller.go b/internal/controller/rds/valkey_controller.go index 13e69ac..f3d77f0 100644 --- a/internal/controller/rds/valkey_controller.go +++ b/internal/controller/rds/valkey_controller.go @@ -68,13 +68,9 @@ func (r *ValkeyReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr inst := &rdsv1alpha1.Valkey{} if err := r.Get(ctx, req.NamespacedName, inst); err != nil { - if errors.IsNotFound(err) { - return reconcile.Result{}, nil - } logger.Error(err, "Fail to get valkey instance") - return reconcile.Result{}, err - } - if inst.GetDeletionTimestamp() != nil { + return reconcile.Result{}, client.IgnoreNotFound(err) + } else if inst.GetDeletionTimestamp() != nil { if err := r.processFinalizer(inst); err != nil { logger.Error(err, "fail to process finalizer") return r.updateInstanceStatus(ctx, inst, err, logger) @@ -139,6 +135,8 @@ func (r *ValkeyReconciler) reconcileFailover(ctx context.Context, inst *rdsv1alp return nil } else if err != nil { return err + } else if failover.GetDeletionTimestamp() != nil { + return fmt.Errorf("redis failover %s is deleting, waiting for it to be deleted", failover.Name) } if len(inst.Status.MatchLabels) == 0 { @@ -220,6 +218,10 @@ func (r *ValkeyReconciler) reconcileCluster(ctx context.Context, inst *rdsv1alph return nil } else if err != nil { return err + } else if cluster.GetDeletionTimestamp() != nil { + // wait old resource deleted + logger.V(3).Info("redis cluster is deleting, waiting for it to be deleted") + return fmt.Errorf("redis cluster %s is deleting, waiting for it to be deleted", cluster.Name) } if len(inst.Status.MatchLabels) == 0 { diff --git a/internal/controller/user_controller.go b/internal/controller/user_controller.go index c82c857..e11bf41 100644 --- a/internal/controller/user_controller.go +++ b/internal/controller/user_controller.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "reflect" + "slices" "strings" "time" @@ -30,6 +31,7 @@ import ( "github.com/chideat/valkey-operator/internal/controller/user" "github.com/chideat/valkey-operator/internal/util" security "github.com/chideat/valkey-operator/pkg/security/password" + tuser "github.com/chideat/valkey-operator/pkg/types/user" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -70,17 +72,45 @@ func (r *UserReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. logger := log.FromContext(ctx).WithName("User").WithValues("target", req.String()) instance := v1alpha1.User{} - err := r.Client.Get(ctx, req.NamespacedName, &instance) - if err != nil { - if errors.IsNotFound(err) { - return reconcile.Result{}, nil - } + if err := r.Client.Get(ctx, req.NamespacedName, &instance); err != nil { logger.Error(err, "get valkey user failed") - return reconcile.Result{}, err - } + return reconcile.Result{}, client.IgnoreNotFound(err) + } else if instance.GetDeletionTimestamp() != nil { + if slices.Contains([]string{tuser.DefaultOperatorUserName}, instance.Spec.Username) { + switch instance.Spec.Arch { + case core.ValkeyReplica, core.ValkeyFailover: + rf := &v1alpha1.Failover{} + if err := r.Get(ctx, types.NamespacedName{Namespace: instance.Namespace, Name: instance.Spec.InstanceName}, rf); err != nil { + if !errors.IsNotFound(err) { + logger.Error(err, "get instance failed", "name", instance.Name) + return ctrl.Result{}, err + } + } else { + if rf.GetDeletionTimestamp() != nil { + logger.Info("failover is deleting, skip remove finalizer", "name", instance.Spec.InstanceName) + return ctrl.Result{RequeueAfter: time.Second * 10}, nil + } + // this should not happen, but we still return a requeue result + return ctrl.Result{RequeueAfter: time.Minute}, nil + } + case core.ValkeyCluster: + cluster := &v1alpha1.Cluster{} + if err := r.Get(ctx, types.NamespacedName{Namespace: instance.Namespace, Name: instance.Spec.InstanceName}, cluster); err != nil { + if !errors.IsNotFound(err) { + logger.Error(err, "get instance failed", "name", instance.Name) + return ctrl.Result{}, err + } + } else { + if cluster.GetDeletionTimestamp() != nil { + logger.Info("instance is deleting, skip remove finalizer", "name", instance.Spec.InstanceName) + return ctrl.Result{RequeueAfter: time.Second * 10}, nil + } + // this should not happen, but we still return a requeue result + return ctrl.Result{RequeueAfter: time.Minute}, nil + } + } + } - if instance.GetDeletionTimestamp() != nil { - logger.Info("user is being deleted", "instance", req.NamespacedName) if err := r.Handler.Delete(ctx, instance, logger); err != nil { if instance.Status.Message != err.Error() { instance.Status.Phase = v1alpha1.UserFail @@ -91,12 +121,36 @@ func (r *UserReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. } } return ctrl.Result{RequeueAfter: time.Second * 10}, err - } else { - controllerutil.RemoveFinalizer(&instance, UserFinalizer) - if err := r.Update(ctx, &instance); err != nil { - logger.Error(err, "remove finalizer failed", "instance", req.NamespacedName) - return ctrl.Result{RequeueAfter: time.Second * 10}, nil + } + + for _, name := range instance.Spec.PasswordSecrets { + if name == "" { + continue } + secret := &corev1.Secret{} + if err := r.Get(ctx, types.NamespacedName{Namespace: instance.Namespace, Name: name}, secret); err != nil { + if errors.IsNotFound(err) { + logger.Info("secret not found, skip remove finalizer", "name", name) + continue + } + logger.Error(err, "get secret failed", "secret name", name) + return ctrl.Result{}, err + } + + if slices.Contains(secret.GetFinalizers(), UserFinalizer) { + controllerutil.RemoveFinalizer(secret, UserFinalizer) + if err := r.Update(ctx, secret); err != nil { + logger.Error(err, "remove finalizer from secret failed", "secret name", name) + return ctrl.Result{}, err + } + } + } + + logger.Info("RemoveFinalizer", "instance", req.NamespacedName) + controllerutil.RemoveFinalizer(&instance, UserFinalizer) + if err := r.Update(ctx, &instance); err != nil { + logger.Error(err, "remove finalizer failed", "instance", req.NamespacedName) + return ctrl.Result{RequeueAfter: time.Second * 10}, nil } return ctrl.Result{}, nil } @@ -131,10 +185,7 @@ func (r *UserReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. continue } secret := &corev1.Secret{} - if err := r.Get(ctx, types.NamespacedName{ - Namespace: instance.Namespace, - Name: name, - }, secret); err != nil { + if err := r.Get(ctx, types.NamespacedName{Namespace: instance.Namespace, Name: name}, secret); err != nil { logger.Error(err, "get secret failed", "secret name", name) instance.Status.Message = err.Error() instance.Status.Phase = v1alpha1.UserFail @@ -157,12 +208,14 @@ func (r *UserReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. secret.SetLabels(map[string]string{}) } if secret.Labels[builder.InstanceNameLabelKey] != vkName || - len(secret.GetOwnerReferences()) == 0 || secret.OwnerReferences[0].UID != instance.GetUID() { + len(secret.GetOwnerReferences()) == 0 || secret.OwnerReferences[0].UID != instance.GetUID() || + controllerutil.ContainsFinalizer(secret, UserFinalizer) { secret.Labels[builder.ManagedByLabelKey] = config.AppName secret.Labels[builder.InstanceNameLabelKey] = vkName secret.OwnerReferences = util.BuildOwnerReferences(&instance) - if err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + controllerutil.AddFinalizer(secret, UserFinalizer) + if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { return r.Update(ctx, secret) }); err != nil { logger.Error(err, "update secret owner failed", "secret", secret.Name) diff --git a/internal/ops/cluster/actor/actor_ensure_resource.go b/internal/ops/cluster/actor/actor_ensure_resource.go index 46c65ac..cd2483d 100644 --- a/internal/ops/cluster/actor/actor_ensure_resource.go +++ b/internal/ops/cluster/actor/actor_ensure_resource.go @@ -38,6 +38,8 @@ import ( "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/types" "github.com/go-logr/logr" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -283,7 +285,7 @@ func (a *actorEnsureResource) ensureStatefulset(ctx context.Context, cluster typ } else if err != nil { logger.Error(err, "get poddisruptionbudget failed", "target", client.ObjectKeyFromObject(pdb)) return actor.RequeueWithError(err) - } else if !reflect.DeepEqual(oldPdb.Spec, pdb.Spec) { + } else if !cmp.Equal(oldPdb.Spec, pdb.Spec, cmpopts.EquateEmpty()) { pdb.ResourceVersion = oldPdb.ResourceVersion if err = a.client.UpdatePodDisruptionBudget(ctx, cr.GetNamespace(), pdb); err != nil { logger.Error(err, "update poddisruptionbudget failed", "target", client.ObjectKeyFromObject(pdb)) diff --git a/internal/ops/failover/actor/actor_ensure_resource.go b/internal/ops/failover/actor/actor_ensure_resource.go index 2be5baa..6f81dc5 100644 --- a/internal/ops/failover/actor/actor_ensure_resource.go +++ b/internal/ops/failover/actor/actor_ensure_resource.go @@ -39,6 +39,8 @@ import ( "github.com/chideat/valkey-operator/internal/util" "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/types" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/samber/lo" "github.com/go-logr/logr" @@ -345,7 +347,7 @@ func (a *actorEnsureResource) ensureSentinel(ctx context.Context, inst types.Fai logger.Error(err, "get sentinel failed", "target", client.ObjectKeyFromObject(newSen)) return actor.RequeueWithError(err) } - if !reflect.DeepEqual(newSen.Spec, oldSen.Spec) || + if !cmp.Equal(newSen.Spec, oldSen.Spec, cmpopts.EquateEmpty()) || !reflect.DeepEqual(newSen.Labels, oldSen.Labels) || !reflect.DeepEqual(newSen.Annotations, oldSen.Annotations) { oldSen.Spec = newSen.Spec diff --git a/internal/ops/sentinel/actor/actor_ensure_resource.go b/internal/ops/sentinel/actor/actor_ensure_resource.go index 131e981..26c0256 100644 --- a/internal/ops/sentinel/actor/actor_ensure_resource.go +++ b/internal/ops/sentinel/actor/actor_ensure_resource.go @@ -36,6 +36,8 @@ import ( "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/types" "github.com/go-logr/logr" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/samber/lo" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" @@ -204,7 +206,7 @@ func (a *actorEnsureResource) ensurePodDisruptionBudget(ctx context.Context, ins } else if err != nil { logger.Error(err, "get poddisruptionbudget failed", "target", client.ObjectKeyFromObject(pdb)) return actor.NewResultWithError(ops.CommandRequeue, err) - } else if !reflect.DeepEqual(oldPdb.Spec, pdb.Spec) { + } else if !cmp.Equal(oldPdb.Spec, pdb.Spec, cmpopts.EquateEmpty()) { pdb.ResourceVersion = oldPdb.ResourceVersion if err := a.client.UpdatePodDisruptionBudget(ctx, sen.Namespace, pdb); err != nil { logger.Error(err, "update poddisruptionbudget failed", "target", client.ObjectKeyFromObject(pdb)) diff --git a/internal/util/kubernetes.go b/internal/util/kubernetes.go index c19da50..906f306 100644 --- a/internal/util/kubernetes.go +++ b/internal/util/kubernetes.go @@ -22,6 +22,8 @@ import ( "time" "github.com/go-logr/logr" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -129,11 +131,29 @@ func GetVolumeByName(vols []corev1.Volume, name string) *corev1.Volume { return nil } +// isSubmap checks if map 'a' is a submap of map 'b'. +// It returns true if every key-value pair in 'a' is also present in 'b'. +func isSubmap[K, V comparable](a, b map[K]V) bool { + if len(a) == 0 { + return true + } + if len(b) == 0 { + return false + } + for keyA, valA := range a { + valB, ok := b[keyA] + if !ok || valA != valB { + return false + } + } + return true +} + // IsStatefulsetChanged func IsStatefulsetChanged(newSts, sts *appsv1.StatefulSet, logger logr.Logger) bool { // statefulset check - if !reflect.DeepEqual(newSts.GetLabels(), sts.GetLabels()) || - !reflect.DeepEqual(newSts.GetAnnotations(), sts.GetAnnotations()) { + if !cmp.Equal(newSts.GetLabels(), sts.GetLabels(), cmpopts.EquateEmpty()) || + !cmp.Equal(newSts.GetAnnotations(), sts.GetAnnotations(), cmpopts.EquateEmpty()) { logger.V(2).Info("labels or annotations diff") return true } @@ -166,7 +186,7 @@ func IsStatefulsetChanged(newSts, sts *appsv1.StatefulSet, logger logr.Logger) b return true } - if !reflect.DeepEqual(oldPvc.Spec, newPvc.Spec) { + if !cmp.Equal(oldPvc.Spec, newPvc.Spec, cmpopts.EquateEmpty()) { logger.V(2).Info("pvc diff", "name", name, "old", oldPvc.Spec, "new", newPvc.Spec) return true } @@ -177,9 +197,13 @@ func IsStatefulsetChanged(newSts, sts *appsv1.StatefulSet, logger logr.Logger) b func IsPodTemplasteChanged(newTplSpec, oldTplSpec *corev1.PodTemplateSpec, logger logr.Logger) bool { if (newTplSpec == nil && oldTplSpec != nil) || (newTplSpec != nil && oldTplSpec == nil) || - !reflect.DeepEqual(newTplSpec.Labels, oldTplSpec.Labels) || - !reflect.DeepEqual(newTplSpec.Annotations, oldTplSpec.Annotations) { - logger.V(2).Info("pod labels diff") + !cmp.Equal(newTplSpec.Labels, oldTplSpec.Labels, cmpopts.EquateEmpty()) || + !isSubmap(newTplSpec.Annotations, oldTplSpec.Annotations) { + + logger.V(2).Info("pod labels diff", + "newLabels", newTplSpec.Labels, "oldLabels", oldTplSpec.Labels, + "newAnnotations", newTplSpec.Annotations, "oldAnnotations", oldTplSpec.Annotations, + ) return true } @@ -191,14 +215,14 @@ func IsPodTemplasteChanged(newTplSpec, oldTplSpec *corev1.PodTemplateSpec, logge } // nodeselector - if !reflect.DeepEqual(newSpec.NodeSelector, oldSpec.NodeSelector) || - !reflect.DeepEqual(newSpec.Affinity, oldSpec.Affinity) || - !reflect.DeepEqual(newSpec.Tolerations, oldSpec.Tolerations) { + if !cmp.Equal(newSpec.NodeSelector, oldSpec.NodeSelector, cmpopts.EquateEmpty()) || + !cmp.Equal(newSpec.Affinity, oldSpec.Affinity, cmpopts.EquateEmpty()) || + !cmp.Equal(newSpec.Tolerations, oldSpec.Tolerations, cmpopts.EquateEmpty()) { logger.V(2).Info("pod nodeselector|affinity|tolerations diff") return true } - if !reflect.DeepEqual(newSpec.SecurityContext, oldSpec.SecurityContext) || + if !cmp.Equal(newSpec.SecurityContext, oldSpec.SecurityContext, cmpopts.EquateEmpty()) || newSpec.HostNetwork != oldSpec.HostNetwork || newSpec.ServiceAccountName != oldSpec.ServiceAccountName { logger.V(2).Info("pod securityContext or hostnetwork or serviceaccount diff", @@ -248,16 +272,32 @@ func IsPodTemplasteChanged(newTplSpec, oldTplSpec *corev1.PodTemplateSpec, logge return true } + nLimits, nReqs := newCon.Resources.Limits, newCon.Resources.Requests + oLimits, oReqs := oldCon.Resources.Limits, oldCon.Resources.Requests + if oLimits.Cpu().Cmp(*nLimits.Cpu()) != 0 || oLimits.Memory().Cmp(*nLimits.Memory()) != 0 || + oReqs.Cpu().Cmp(*nReqs.Cpu()) != 0 || oReqs.Memory().Cmp(*nReqs.Memory()) != 0 || + (!nLimits.StorageEphemeral().IsZero() && oLimits.StorageEphemeral().Cmp(*nLimits.StorageEphemeral()) != 0) || + (!nReqs.StorageEphemeral().IsZero() && oReqs.StorageEphemeral().Cmp(*nReqs.StorageEphemeral()) != 0) { + logger.V(2).Info("pod containers resources diff", + "CpuLimit", oLimits.Cpu().Cmp(*nLimits.Cpu()), + "MemLimit", oLimits.Memory().Cmp(*nLimits.Memory()), + "CpuRequest", oReqs.Cpu().Cmp(*nReqs.Cpu()), + "MemRequest", oReqs.Memory().Cmp(*nReqs.Memory()), + "StorageEphemeralLimit", oLimits.StorageEphemeral().Cmp(*nLimits.StorageEphemeral()), + "StorageEphemeralRequest", oReqs.StorageEphemeral().Cmp(*nReqs.StorageEphemeral()), + ) + return true + } + // check almost all fields of container // should make sure that apiserver not return noset default value if oldCon.Image != newCon.Image || oldCon.ImagePullPolicy != newCon.ImagePullPolicy || - !reflect.DeepEqual(oldCon.Resources, newCon.Resources) || !reflect.DeepEqual(loadEnvs(oldCon.Env), loadEnvs(newCon.Env)) || !reflect.DeepEqual(oldCon.Command, newCon.Command) || !reflect.DeepEqual(oldCon.Args, newCon.Args) || !reflect.DeepEqual(oldCon.Ports, newCon.Ports) || - !reflect.DeepEqual(oldCon.Lifecycle, newCon.Lifecycle) || - !reflect.DeepEqual(oldCon.VolumeMounts, newCon.VolumeMounts) { + !cmp.Equal(oldCon.Lifecycle, newCon.Lifecycle, cmpopts.EquateEmpty()) || + !cmp.Equal(oldCon.VolumeMounts, newCon.VolumeMounts, cmpopts.EquateEmpty()) { logger.V(2).Info("pod containers config diff", "image", oldCon.Image != newCon.Image, @@ -273,7 +313,6 @@ func IsPodTemplasteChanged(newTplSpec, oldTplSpec *corev1.PodTemplateSpec, logge return true } } - return false } diff --git a/internal/valkey/failover/failover.go b/internal/valkey/failover/failover.go index a41f21d..7efa608 100644 --- a/internal/valkey/failover/failover.go +++ b/internal/valkey/failover/failover.go @@ -20,7 +20,6 @@ import ( "context" "crypto/tls" "fmt" - "reflect" "strconv" "strings" @@ -39,6 +38,8 @@ import ( "github.com/chideat/valkey-operator/pkg/types" "github.com/chideat/valkey-operator/pkg/types/user" "github.com/chideat/valkey-operator/pkg/version" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/go-logr/logr" "github.com/samber/lo" @@ -659,9 +660,9 @@ func (s *Failover) IsResourceFullfilled(ctx context.Context) (bool, error) { s.logger.Error(err, "get sentinel failed", "target", client.ObjectKeyFromObject(newSen)) return false, err } - if !reflect.DeepEqual(newSen.Spec, oldSen.Spec) || - !reflect.DeepEqual(newSen.Labels, oldSen.Labels) || - !reflect.DeepEqual(newSen.Annotations, oldSen.Annotations) { + if !cmp.Equal(newSen.Spec, oldSen.Spec, cmpopts.EquateEmpty()) || + !cmp.Equal(newSen.Labels, oldSen.Labels, cmpopts.EquateEmpty()) || + !cmp.Equal(newSen.Annotations, oldSen.Annotations, cmpopts.EquateEmpty()) { oldSen.Spec = newSen.Spec oldSen.Labels = newSen.Labels oldSen.Annotations = newSen.Annotations From 50a0a2fb3e5a965e2985508b41d4eab81c691f79 Mon Sep 17 00:00:00 2001 From: Seer Date: Fri, 12 Sep 2025 18:23:02 +0800 Subject: [PATCH 02/13] refactor: major service comparison and actor improvements - Add comprehensive IsServiceChanged function for detailed service comparison - Refactor actor ensure resource ordering and method names - Improve service change detection with proper label/annotation comparison - Enhance statefulset handling with better error checking - Add utility functions for service port and spec comparison --- cmd/main.go | 2 +- config/rbac/role.yaml | 6 +- .../builder/clusterbuilder/configmap_test.go | 4 + .../builder/failoverbuilder/statefulset.go | 8 - .../builder/sentinelbuilder/statefulset.go | 8 - internal/config/env.go | 11 + internal/ops/cluster/actor/actor_heal_pod.go | 9 +- internal/ops/cluster/actor/actor_rebalance.go | 4 +- internal/ops/cluster/engine.go | 7 +- .../failover/actor/actor_ensure_resource.go | 227 ++++++++++++------ internal/ops/failover/actor/actor_heal_pod.go | 4 +- internal/ops/failover/engine.go | 12 +- .../sentinel/actor/actor_ensure_resource.go | 179 +++++++++----- internal/ops/sentinel/actor/actor_heal_pod.go | 9 +- internal/ops/sentinel/engine.go | 10 +- internal/util/kubernetes.go | 130 ++++++++++ internal/valkey/cluster/cluster.go | 28 +++ internal/valkey/failover/failover.go | 20 ++ .../failover/monitor/sentinel_monitor.go | 9 +- internal/valkey/node.go | 2 +- internal/valkey/sentinel/sentinel.go | 23 +- pkg/kubernetes/clientset/service.go | 6 +- pkg/types/cluster_instance.go | 1 + pkg/types/failover_instance.go | 1 + 24 files changed, 526 insertions(+), 194 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index 7c9aaa1..bc1ca9b 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -70,7 +70,7 @@ func init() { //+kubebuilder:rbac:groups=apps,resources=statefulsets;statefulsets/finalizers;deployments;deployments/finalizers;daemonsets;replicasets,verbs=get;list;watch;create;update;delete //+kubebuilder:rbac:groups=batch,resources=jobs;jobs/finalizers;cronjobs;cronjobs/finalizers,verbs=get;list;watch;create;update;delete;deletecollection //+kubebuilder:rbac:groups=*,resources=pods;pods/exec;configmaps;configmaps/finalizers;secrets;secrets/finalizers;services;services/finalizers;persistentvolumeclaims;persistentvolumeclaims/finalizers;endpoints,verbs=get;list;watch;create;update;patch;delete;deletecollection -//+kubebuilder:rbac:groups=*,resources=events,verbs=get;list;watch;create;update;delete;deletecollection +//+kubebuilder:rbac:groups=*,resources=events,verbs=create;update;patch //+kubebuilder:rbac:groups=policy,resources=poddisruptionbudgets;poddisruptionbudgets/finalizers,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=*,resources=pods;pods/exec;configmaps;endpoints;services;services/finalizers,verbs=* diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 211d8d5..2002cc8 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -46,12 +46,8 @@ rules: - events verbs: - create - - delete - - deletecollection - - get - - list + - patch - update - - watch - apiGroups: - '*' resources: diff --git a/internal/builder/clusterbuilder/configmap_test.go b/internal/builder/clusterbuilder/configmap_test.go index 0f1112c..6bed2d1 100644 --- a/internal/builder/clusterbuilder/configmap_test.go +++ b/internal/builder/clusterbuilder/configmap_test.go @@ -182,6 +182,10 @@ func (m *MockClusterInstance) Shards() []types.ClusterShard { return nil } +func (m *MockClusterInstance) Shard(i int) types.ClusterShard { + return nil +} + func (m *MockClusterInstance) RewriteShards(ctx context.Context, shards []*v1alpha1.ClusterShards) error { return nil } diff --git a/internal/builder/failoverbuilder/statefulset.go b/internal/builder/failoverbuilder/statefulset.go index 2be3acc..09b8132 100644 --- a/internal/builder/failoverbuilder/statefulset.go +++ b/internal/builder/failoverbuilder/statefulset.go @@ -214,10 +214,6 @@ func buildEnvs(inst types.FailoverInstance, opUser *user.User, aclConfigMapName Name: builder.OperatorSecretName, Value: opUser.GetPassword().GetSecretName(), }, - { - Name: "SERVICE_TYPE", - Value: string(rf.Spec.Access.ServiceType), - }, { Name: "IP_FAMILY_PREFER", Value: string(rf.Spec.Access.IPFamilyPrefer), @@ -390,10 +386,6 @@ func buildValkeyDataInitContainer(rf *v1alpha1.Failover) (*corev1.Container, err Name: "IP_FAMILY_PREFER", Value: string(rf.Spec.Access.IPFamilyPrefer), }, - { - Name: "SERVICE_TYPE", - Value: string(rf.Spec.Access.ServiceType), - }, }, Command: []string{"sh", "/opt/init_failover.sh"}, SecurityContext: builder.GetSecurityContext(rf.Spec.SecurityContext), diff --git a/internal/builder/sentinelbuilder/statefulset.go b/internal/builder/sentinelbuilder/statefulset.go index 96d42c7..adfe60f 100644 --- a/internal/builder/sentinelbuilder/statefulset.go +++ b/internal/builder/sentinelbuilder/statefulset.go @@ -173,10 +173,6 @@ func buildInitContainer(sen *v1alpha1.Sentinel, _ []corev1.EnvVar) (*corev1.Cont Name: "IP_FAMILY_PREFER", Value: string(sen.Spec.Access.IPFamilyPrefer), }, - { - Name: "SERVICE_TYPE", - Value: string(sen.Spec.Access.ServiceType), - }, }, Resources: corev1.ResourceRequirements{ Limits: corev1.ResourceList{ @@ -347,10 +343,6 @@ func buildEnvs(sen *v1alpha1.Sentinel) []corev1.EnvVar { Name: "TLS_ENABLED", Value: fmt.Sprintf("%t", sen.Spec.Access.EnableTLS), }, - { - Name: "SERVICE_TYPE", - Value: string(sen.Spec.Access.ServiceType), - }, { Name: "IP_FAMILY_PREFER", Value: string(sen.Spec.Access.IPFamilyPrefer), diff --git a/internal/config/env.go b/internal/config/env.go index c8957f8..25be204 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -21,6 +21,7 @@ import ( "fmt" "os" "strings" + "time" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -129,3 +130,13 @@ func GetValkeyExporterImage(obj v1.Object) string { imgVersion := Getenv("DEFAULT_EXPORTER_VERSION", "v1.67.0-alpine") return GetFullImageURL(imgName, imgVersion) } + +func LoadbalancerReadyTimeout() time.Duration { + timeout := os.Getenv("LOADBALANCER_WAIT_TIMEOUT") + if timeout != "" { + if d, err := time.ParseDuration(timeout); err == nil { + return d + } + } + return 2 * time.Minute +} diff --git a/internal/ops/cluster/actor/actor_heal_pod.go b/internal/ops/cluster/actor/actor_heal_pod.go index e783d76..705171c 100644 --- a/internal/ops/cluster/actor/actor_heal_pod.go +++ b/internal/ops/cluster/actor/actor_heal_pod.go @@ -122,7 +122,8 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR logger.Error(err, "get service failed", "name", node.GetName()) return actor.RequeueWithError(err) } - if typ == corev1.ServiceTypeNodePort { + switch typ { + case corev1.ServiceTypeNodePort: port := util.GetServicePortByName(svc, "client") if port != nil { if int(port.NodePort) != announcePort { @@ -132,14 +133,14 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR return actor.RequeueWithError(err) } else { cluster.SendEventf(corev1.EventTypeWarning, config.EventCleanResource, - "force delete pod with inconsist annotation %s", node.GetName()) + "force delete pod with inconsist announce %s", node.GetName()) return actor.Requeue() } } } else { logger.Error(fmt.Errorf("service port not found"), "service port not found", "name", node.GetName(), "port", "client") } - } else if typ == corev1.ServiceTypeLoadBalancer { + case corev1.ServiceTypeLoadBalancer: if index := slices.IndexFunc(svc.Status.LoadBalancer.Ingress, func(ing corev1.LoadBalancerIngress) bool { return ing.IP == announceIP || ing.Hostname == announceIP }); index < 0 { @@ -149,7 +150,7 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR return actor.RequeueWithError(err) } else { cluster.SendEventf(corev1.EventTypeWarning, config.EventCleanResource, - "force delete pod with inconsist annotation %s", node.GetName()) + "force delete pod with inconsist announce %s", node.GetName()) return actor.Requeue() } } diff --git a/internal/ops/cluster/actor/actor_rebalance.go b/internal/ops/cluster/actor/actor_rebalance.go index 1a12608..306ec77 100644 --- a/internal/ops/cluster/actor/actor_rebalance.go +++ b/internal/ops/cluster/actor/actor_rebalance.go @@ -131,8 +131,8 @@ type SlotMigrateStatus struct { // 槽迁移实现 // 由于槽迁移是一个标记然后后台任务一直执行的过程,为了槽迁移的健壮性,将槽迁移的任务进行拆分 // 1. operator 部分:operator 只负责标记哪些槽要迁移 -// 2. sidecar: sidercar 用于按照标记信息迁移槽,并在数据迁移完成之后,清理标记 -// 3. 即使在槽迁移过程中 node 重启或者关机(可能会数据丢失),operator 会重新标记,sidecar 会重新进行迁移 +// 2. agent: agent 用于按照标记信息迁移槽,并在数据迁移完成之后,清理标记 +// 3. 即使在槽迁移过程中 node 重启或者关机(可能会数据丢失),operator 会重新标记,agent 会重新进行迁移 func (a *actorRebalance) Do(ctx context.Context, val types.Instance) *actor.ActorResult { cluster := val.(types.ClusterInstance) logger := val.Logger().WithValues("actor", cops.CommandRebalance.String()) diff --git a/internal/ops/cluster/engine.go b/internal/ops/cluster/engine.go index 270d524..32a6318 100644 --- a/internal/ops/cluster/engine.go +++ b/internal/ops/cluster/engine.go @@ -316,7 +316,8 @@ func (g *RuleEngine) Inspect(ctx context.Context, val types.Instance) *actor.Act announceIP := node.DefaultIP().String() announcePort := node.Port() - if typ == corev1.ServiceTypeNodePort { + switch typ { + case corev1.ServiceTypeNodePort: port := util.GetServicePortByName(svc, "client") if port != nil { if int(port.NodePort) != announcePort { @@ -325,7 +326,7 @@ func (g *RuleEngine) Inspect(ctx context.Context, val types.Instance) *actor.Act } else { logger.Error(fmt.Errorf("service %s not found", node.GetName()), "failed to get service, which should not happen") } - } else if typ == corev1.ServiceTypeLoadBalancer { + case corev1.ServiceTypeLoadBalancer: if slices.IndexFunc(svc.Status.LoadBalancer.Ingress, func(ing corev1.LoadBalancerIngress) bool { return ing.IP == announceIP }) < 0 { @@ -536,7 +537,7 @@ func (g *RuleEngine) isConfigMapChanged(ctx context.Context, cluster types.Clust logger := g.logger.WithName("isConfigMapChanged") newCm, _ := clusterbuilder.NewConfigMapForCR(cluster) oldCm, err := g.client.GetConfigMap(ctx, newCm.Namespace, newCm.Name) - if errors.IsNotFound(err) || oldCm.Data[builder.ValkeyConfigKey] == "" { + if errors.IsNotFound(err) || (oldCm != nil && oldCm.Data[builder.ValkeyConfigKey] == "") { return true, nil } else if err != nil { logger.Error(err, "get old configmap failed") diff --git a/internal/ops/failover/actor/actor_ensure_resource.go b/internal/ops/failover/actor/actor_ensure_resource.go index 6f81dc5..eb070d8 100644 --- a/internal/ops/failover/actor/actor_ensure_resource.go +++ b/internal/ops/failover/actor/actor_ensure_resource.go @@ -101,19 +101,19 @@ func (a *actorEnsureResource) Do(ctx context.Context, val types.Instance) *actor if ret := a.ensureSentinel(ctx, inst, logger); ret != nil { return ret } - if ret := a.ensureService(ctx, inst, logger); ret != nil { + if ret := a.ensureConfigMap(ctx, inst, logger); ret != nil { return ret } - if ret := a.ensureConfigMap(ctx, inst, logger); ret != nil { + if ret := a.ensureService(ctx, inst, logger); ret != nil { return ret } - if ret := a.ensureValkeyStatefulSet(ctx, inst, logger); ret != nil { + if ret := a.ensureStatefulSet(ctx, inst, logger); ret != nil { return ret } return nil } -func (a *actorEnsureResource) ensureValkeyStatefulSet(ctx context.Context, inst types.FailoverInstance, logger logr.Logger) *actor.ActorResult { +func (a *actorEnsureResource) ensureStatefulSet(ctx context.Context, inst types.FailoverInstance, logger logr.Logger) *actor.ActorResult { var ( err error cr = inst.Definition() @@ -172,7 +172,7 @@ func (a *actorEnsureResource) ensureValkeyStatefulSet(ctx context.Context, inst logger.Error(err, "delete old statefulset failed", "target", client.ObjectKeyFromObject(sts)) return actor.RequeueWithError(err) } - if err = a.client.CreateStatefulSet(ctx, cr.Namespace, sts); err != nil { + if err = a.client.CreateStatefulSet(ctx, cr.Namespace, sts); err != nil && !errors.IsAlreadyExists(err) { logger.Error(err, "update statefulset failed", "target", client.ObjectKeyFromObject(sts)) return actor.RequeueWithError(err) } @@ -362,47 +362,55 @@ func (a *actorEnsureResource) ensureSentinel(ctx context.Context, inst types.Fai } func (a *actorEnsureResource) ensureService(ctx context.Context, inst types.FailoverInstance, logger logr.Logger) *actor.ActorResult { - cr := inst.Definition() - // read write svc - rwSvc := failoverbuilder.GenerateReadWriteService(cr) - roSvc := failoverbuilder.GenerateReadonlyService(cr) - if err := a.client.CreateOrUpdateIfServiceChanged(ctx, inst.GetNamespace(), rwSvc); err != nil { - return actor.RequeueWithError(err) - } - if err := a.client.CreateOrUpdateIfServiceChanged(ctx, inst.GetNamespace(), roSvc); err != nil { - return actor.RequeueWithError(err) - } - - selector := inst.Selector() - exporterService := failoverbuilder.GenerateExporterService(cr) - if err := a.client.CreateOrUpdateIfServiceChanged(ctx, inst.GetNamespace(), exporterService); err != nil { - return actor.RequeueWithError(err) - } + var ( + cr = inst.Definition() + selector = inst.Selector() + ) if ret := a.cleanUselessService(ctx, cr, logger, selector); ret != nil { return ret } - switch cr.Spec.Access.ServiceType { - case corev1.ServiceTypeNodePort: - if ret := a.ensureValkeySpecifiedNodePortService(ctx, inst, logger, selector); ret != nil { + + if cr.Spec.Access.ServiceType == corev1.ServiceTypeNodePort && cr.Spec.Access.Ports != "" { + if ret := a.ensureValkeySpecifiedNodePortService(ctx, inst, logger); ret != nil { return ret } - case corev1.ServiceTypeLoadBalancer: - if ret := a.ensureValkeyPodService(ctx, cr, logger, selector); ret != nil { - return ret + } else if ret := a.ensureValkeyPodService(ctx, inst, logger); ret != nil { + return ret + } + + for _, newSvc := range []*corev1.Service{ + failoverbuilder.GenerateReadWriteService(cr), + failoverbuilder.GenerateReadonlyService(cr), + failoverbuilder.GenerateExporterService(cr), + } { + if oldSvc, err := a.client.GetService(ctx, inst.GetNamespace(), newSvc.Name); errors.IsNotFound(err) { + if err := a.client.CreateService(ctx, inst.GetNamespace(), newSvc); err != nil { + logger.Error(err, "create service failed", "target", client.ObjectKeyFromObject(newSvc)) + return actor.RequeueWithError(err) + } + } else if err != nil { + logger.Error(err, "get service failed", "target", client.ObjectKeyFromObject(newSvc)) + return actor.RequeueWithError(err) + } else if util.IsServiceChanged(newSvc, oldSvc, logger) { + if err := a.client.UpdateService(ctx, inst.GetNamespace(), newSvc); err != nil { + logger.Error(err, "update service failed", "target", client.ObjectKeyFromObject(newSvc)) + return actor.RequeueWithError(err) + } + } else if oldSvc.Spec.Type == corev1.ServiceTypeLoadBalancer && + len(oldSvc.Status.LoadBalancer.Ingress) == 0 && + time.Since(oldSvc.GetCreationTimestamp().Time) >= config.LoadbalancerReadyTimeout() { + // if lb block ed pending for 2mins, return no lb usable error + return actor.RequeueWithError(fmt.Errorf("no loadbalancer available, please check the cloud provider")) } } return nil } func (a *actorEnsureResource) ensureValkeySpecifiedNodePortService(ctx context.Context, - inst types.FailoverInstance, logger logr.Logger, selectors map[string]string) *actor.ActorResult { + inst types.FailoverInstance, logger logr.Logger) *actor.ActorResult { cr := inst.Definition() - if cr.Spec.Access.Ports == "" { - return a.ensureValkeyPodService(ctx, cr, logger, selectors) - } - logger.V(3).Info("ensure cluster nodeports", "namepspace", cr.Namespace, "name", cr.Name) configedPorts, err := helper.ParsePorts(cr.Spec.Access.Ports) if err != nil { @@ -456,6 +464,22 @@ func (a *actorEnsureResource) ensureValkeySpecifiedNodePortService(ctx context.C } } } + + for _, name := range []string{ + failoverbuilder.RWServiceName(cr.GetName()), + failoverbuilder.ROServiceName(cr.GetName()), + } { + if svc, err := a.client.GetService(ctx, cr.GetNamespace(), name); err != nil && !errors.IsNotFound(err) { + a.logger.Error(err, "get cluster nodeport service failed", "target", name) + return actor.RequeueWithError(err) + } else if svc != nil && slices.Contains(configedPorts, getClientPort(svc, "server")) { + if err := a.client.DeleteService(ctx, cr.GetNamespace(), svc.GetName()); err != nil { + a.logger.Error(err, "delete service failed", "target", client.ObjectKeyFromObject(svc)) + return actor.RequeueWithError(err) + } + } + } + if services, ret = a.fetchAllPodBindedServices(ctx, cr.Namespace, labels); ret != nil { return ret } @@ -467,8 +491,9 @@ func (a *actorEnsureResource) ensureValkeySpecifiedNodePortService(ctx context.C needUpdateServices []*corev1.Service ) for _, svc := range services { - svc := svc.DeepCopy() - bindedNodeports = append(bindedNodeports, getClientPort(svc)) + if svc.Spec.Type == corev1.ServiceTypeNodePort { + bindedNodeports = append(bindedNodeports, getClientPort(svc.DeepCopy())) + } } // filter used ports for _, port := range configedPorts { @@ -497,52 +522,81 @@ func (a *actorEnsureResource) ensureValkeySpecifiedNodePortService(ctx context.C svc := failoverbuilder.GeneratePodNodePortService(cr, i, getClientPort(oldService)) // check old service for compatibility - if len(oldService.OwnerReferences) == 0 || - oldService.OwnerReferences[0].Kind == "Pod" || - !reflect.DeepEqual(oldService.Spec, svc.Spec) || - !reflect.DeepEqual(oldService.Labels, svc.Labels) || - !reflect.DeepEqual(oldService.Annotations, svc.Annotations) { - - oldService.OwnerReferences = util.BuildOwnerReferences(cr) - oldService.Spec = svc.Spec - oldService.Labels = svc.Labels - oldService.Annotations = svc.Annotations - if err := a.client.UpdateService(ctx, oldService.Namespace, oldService); err != nil { + if util.IsServiceChanged(svc, oldService, logger) { + if err := a.client.UpdateService(ctx, oldService.Namespace, svc); err != nil { a.logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(oldService)) return actor.NewResultWithValue(ops.CommandRequeue, err) } } - if port := getClientPort(oldService); port != 0 && !slices.Contains(configedPorts, port) { - needUpdateServices = append(needUpdateServices, oldService) + svc.Spec.Type = corev1.ServiceTypeNodePort + if port := getClientPort(oldService); (port != 0 && !slices.Contains(configedPorts, port)) || + oldService.Spec.Type != corev1.ServiceTypeNodePort { + needUpdateServices = append(needUpdateServices, svc) } } // 3. update existing service and restart pod (only one pod is restarted at a same time for each shard) if len(needUpdateServices) > 0 && len(newPorts) > 0 { - port, svc := newPorts[0], needUpdateServices[0] - if sp := util.GetServicePortByName(svc, "client"); sp != nil { - sp.NodePort = port + // node must be ready, and the latest pod must ready for about 60s for cluster to sync info + if inst.Replication() != nil && (!inst.Replication().IsReady() || !func() bool { + ts := time.Now() + for _, node := range inst.Replication().Nodes() { + if cond, exists := lo.Find(node.Definition().Status.Conditions, func(item corev1.PodCondition) bool { + return item.Type == corev1.PodReady && item.Status == corev1.ConditionTrue + }); !exists || cond.LastTransitionTime.Time.Add(time.Second*30).After(ts) { + return false + } + } + return len(inst.Replication().Nodes()) == int(*inst.Replication().Definition().Spec.Replicas) + }()) { + logger.Info("wait statefulset ready to update next NodePort") + return actor.Requeue() } - // NOTE: here not make sure the failover success, because the nodeport updated, the communication will be failed - // in k8s, the nodeport can still access for sometime after the nodeport updated - // - // update service - if err = a.client.UpdateService(ctx, svc.Namespace, svc); err != nil { - a.logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(svc), "port", port) - return actor.NewResultWithValue(ops.CommandRequeue, err) - } - if pod, _ := a.client.GetPod(ctx, cr.Namespace, svc.Spec.Selector[builder.PodNameLabelKey]); pod != nil { - if err := a.client.DeletePod(ctx, cr.Namespace, pod.Name); err != nil { - return actor.RequeueWithError(err) + for i := len(needUpdateServices) - 1; i >= 0; i-- { + if len(newPorts) <= i { + logger.Error(fmt.Errorf("update nodeport failed"), "not enough nodeport for service", "ports", newPorts) + return actor.NewResultWithValue(ops.CommandRequeue, fmt.Errorf("not enough nodeport for service, please check the config")) + } + port, svc := newPorts[i], needUpdateServices[i] + if oldPort := getClientPort(svc); slices.Contains(newPorts, oldPort) { + port = oldPort + } + if sp := util.GetServicePortByName(svc, "client"); sp != nil { + sp.NodePort = port + } + tmpNewPorts := newPorts + newPorts = newPorts[0:0] + for _, p := range tmpNewPorts { + if p != port { + newPorts = append(newPorts, p) + } + } + // NOTE: here not make sure the failover success, because the nodeport updated, the communication will be failed + // in k8s, the nodeport can still access for sometime after the nodeport updated + // + // update service + if err = a.client.UpdateService(ctx, svc.Namespace, svc); err != nil { + a.logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(svc), "port", port) + return actor.NewResultWithValue(ops.CommandRequeue, err) + } + if pod, _ := a.client.GetPod(ctx, cr.Namespace, svc.Spec.Selector[builder.PodNameLabelKey]); pod != nil { + if err := a.client.DeletePod(ctx, cr.Namespace, pod.Name); err != nil { + return actor.RequeueWithError(err) + } + return actor.RequeueAfter(time.Second * 5) } - return actor.NewResult(ops.CommandRequeue) } } return nil } -func (a *actorEnsureResource) ensureValkeyPodService(ctx context.Context, rf *v1alpha1.Failover, logger logr.Logger, selectors map[string]string) *actor.ActorResult { +func (a *actorEnsureResource) ensureValkeyPodService(ctx context.Context, inst types.FailoverInstance, logger logr.Logger) *actor.ActorResult { + var ( + rf = inst.Definition() + needUpdateServices []*corev1.Service + ) + for i := 0; i < int(rf.Spec.Replicas); i++ { newSvc := failoverbuilder.GeneratePodService(rf, i) if svc, err := a.client.GetService(ctx, rf.Namespace, newSvc.Name); errors.IsNotFound(err) { @@ -553,16 +607,43 @@ func (a *actorEnsureResource) ensureValkeyPodService(ctx context.Context, rf *v1 } else if err != nil { logger.Error(err, "get service failed", "target", client.ObjectKeyFromObject(newSvc)) return actor.NewResult(ops.CommandRequeue) - } else if newSvc.Spec.Type != svc.Spec.Type || - !reflect.DeepEqual(newSvc.Spec.Selector, svc.Spec.Selector) || - !reflect.DeepEqual(newSvc.Labels, svc.Labels) || - !reflect.DeepEqual(newSvc.Annotations, svc.Annotations) { - svc.Spec = newSvc.Spec - svc.Labels = newSvc.Labels - svc.Annotations = newSvc.Annotations - if err = a.client.UpdateService(ctx, rf.Namespace, svc); err != nil { - logger.Error(err, "update service failed", "target", client.ObjectKeyFromObject(svc)) - return actor.RequeueWithError(err) + } else if util.IsServiceChanged(newSvc, svc, logger) { + needUpdateServices = append(needUpdateServices, newSvc) + } else if svc.Spec.Type == corev1.ServiceTypeLoadBalancer && + len(svc.Status.LoadBalancer.Ingress) == 0 && + time.Since(svc.GetCreationTimestamp().Time) >= config.LoadbalancerReadyTimeout() { + // if lb block ed pending for 2mins, return no lb usable error + return actor.RequeueWithError(fmt.Errorf("no loadbalancer available, please check the cloud provider")) + } + } + + if len(needUpdateServices) > 0 { + if inst.Replication() != nil && !(inst.Replication().Definition().Status.ReadyReplicas == 0 || (inst.Replication().IsReady() && func() bool { + ts := time.Now() + for _, node := range inst.Replication().Nodes() { + if cond, exists := lo.Find(node.Definition().Status.Conditions, func(item corev1.PodCondition) bool { + return item.Type == corev1.PodReady && item.Status == corev1.ConditionTrue + }); !exists || cond.LastTransitionTime.Time.Add(time.Second*30).After(ts) { + return false + } + } + return len(inst.Replication().Nodes()) == int(*inst.Replication().Definition().Spec.Replicas) + }())) { + logger.V(1).Info("wait statefulset ready to update next service") + return actor.Requeue() + } + + for i := len(needUpdateServices) - 1; i >= 0; i-- { + svc := needUpdateServices[i] + if err := a.client.UpdateService(ctx, inst.GetNamespace(), svc); err != nil { + logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(svc)) + return actor.NewResultWithValue(ops.CommandRequeue, err) + } + if pod, _ := a.client.GetPod(ctx, inst.GetNamespace(), svc.Spec.Selector[builder.PodNameLabelKey]); pod != nil { + if err := a.client.DeletePod(ctx, inst.GetNamespace(), pod.Name); err != nil { + return actor.NewResultWithError(ops.CommandRequeue, err) + } + return actor.RequeueAfter(time.Second * 5) } } } diff --git a/internal/ops/failover/actor/actor_heal_pod.go b/internal/ops/failover/actor/actor_heal_pod.go index d5b0d09..b3cb0ab 100644 --- a/internal/ops/failover/actor/actor_heal_pod.go +++ b/internal/ops/failover/actor/actor_heal_pod.go @@ -129,7 +129,7 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR return actor.RequeueWithError(err) } else { inst.SendEventf(corev1.EventTypeWarning, config.EventCleanResource, - "force delete pod with inconsist annotation %s", node.GetName()) + "force delete pod with inconsist announce %s", node.GetName()) return actor.Requeue() } } @@ -145,7 +145,7 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR return actor.RequeueWithError(err) } else { inst.SendEventf(corev1.EventTypeWarning, config.EventCleanResource, - "force delete pod with inconsist annotation %s", node.GetName()) + "force delete pod with inconsist announce %s", node.GetName()) return actor.Requeue() } } diff --git a/internal/ops/failover/engine.go b/internal/ops/failover/engine.go index d5d87a0..a81fd6a 100644 --- a/internal/ops/failover/engine.go +++ b/internal/ops/failover/engine.go @@ -194,7 +194,7 @@ func (g *RuleEngine) isConfigChanged(ctx context.Context, inst types.FailoverIns return actor.RequeueWithError(err) } oldCm, err := g.client.GetConfigMap(ctx, newCm.GetNamespace(), newCm.GetName()) - if errors.IsNotFound(err) || oldCm.Data[builder.ValkeyConfigKey] == "" { + if errors.IsNotFound(err) || (oldCm != nil && oldCm.Data[builder.ValkeyConfigKey] == "") { err := fmt.Errorf("configmap %s not found", newCm.GetName()) return actor.NewResultWithError(CommandEnsureResource, err) } else if err != nil { @@ -232,7 +232,13 @@ func (g *RuleEngine) isNodesHealthy(ctx context.Context, inst types.FailoverInst } else if err != nil { return actor.RequeueWithError(err) } - if typ == corev1.ServiceTypeNodePort { + + if svc.Spec.Type != typ { + return actor.NewResult(CommandEnsureResource) + } + + switch typ { + case corev1.ServiceTypeNodePort: port := util.GetServicePortByName(svc, "client") if port != nil { if int(port.NodePort) != announcePort { @@ -241,7 +247,7 @@ func (g *RuleEngine) isNodesHealthy(ctx context.Context, inst types.FailoverInst } else { logger.Error(fmt.Errorf("service %s not found", node.GetName()), "failed to get service, which should not happen") } - } else if typ == corev1.ServiceTypeLoadBalancer { + case corev1.ServiceTypeLoadBalancer: if slices.IndexFunc(svc.Status.LoadBalancer.Ingress, func(i corev1.LoadBalancerIngress) bool { return i.IP == announceIP }) < 0 { diff --git a/internal/ops/sentinel/actor/actor_ensure_resource.go b/internal/ops/sentinel/actor/actor_ensure_resource.go index 26c0256..bddd114 100644 --- a/internal/ops/sentinel/actor/actor_ensure_resource.go +++ b/internal/ops/sentinel/actor/actor_ensure_resource.go @@ -31,6 +31,7 @@ import ( "github.com/chideat/valkey-operator/internal/builder/certbuilder" "github.com/chideat/valkey-operator/internal/builder/sabuilder" "github.com/chideat/valkey-operator/internal/builder/sentinelbuilder" + "github.com/chideat/valkey-operator/internal/config" ops "github.com/chideat/valkey-operator/internal/ops/sentinel" "github.com/chideat/valkey-operator/internal/util" "github.com/chideat/valkey-operator/pkg/kubernetes" @@ -90,16 +91,15 @@ func (a *actorEnsureResource) Do(ctx context.Context, val types.Instance) *actor if ret := a.ensureServiceAccount(ctx, sentinel, logger); ret != nil { return ret } - if ret := a.ensureService(ctx, sentinel, logger); ret != nil { - return ret - } - // ensure configMap if ret := a.ensureConfigMap(ctx, sentinel, logger); ret != nil { return ret } if ret := a.ensureValkeySSL(ctx, sentinel, logger); ret != nil { return ret } + if ret := a.ensureService(ctx, sentinel, logger); ret != nil { + return ret + } if ret := a.ensureStatefulSet(ctx, sentinel, logger); ret != nil { return ret } @@ -187,7 +187,7 @@ func (a *actorEnsureResource) ensureStatefulSet(ctx context.Context, inst types. return actor.RequeueWithError(err) } time.Sleep(time.Second * 3) - if err = a.client.CreateStatefulSet(ctx, sen.Namespace, sts); err != nil { + if err = a.client.CreateStatefulSet(ctx, sen.Namespace, sts); err != nil && !errors.IsAlreadyExists(err) { logger.Error(err, "update statefulset failed", "target", client.ObjectKeyFromObject(sts)) return actor.RequeueWithError(err) } @@ -382,39 +382,36 @@ func (a *actorEnsureResource) ensureService(ctx context.Context, inst types.Sent return ret } - createService := func(senService *corev1.Service) *actor.ActorResult { + ensureService := func(senService *corev1.Service) *actor.ActorResult { if oldService, err := a.client.GetService(ctx, sen.GetNamespace(), senService.Name); errors.IsNotFound(err) { if err := a.client.CreateService(ctx, sen.GetNamespace(), senService); err != nil { return actor.NewResultWithError(ops.CommandRequeue, err) } } else if err != nil { return actor.NewResultWithError(ops.CommandRequeue, err) - } else if senService.Spec.Type != oldService.Spec.Type || - (senService.Spec.Type == corev1.ServiceTypeNodePort && senService.Spec.Ports[0].NodePort != oldService.Spec.Ports[0].NodePort) || - !reflect.DeepEqual(senService.Spec.Selector, oldService.Spec.Selector) || - !reflect.DeepEqual(senService.Labels, oldService.Labels) || - !reflect.DeepEqual(senService.Annotations, oldService.Annotations) { - + } else if util.IsServiceChanged(senService, oldService, logger) { if err := a.client.UpdateService(ctx, sen.GetNamespace(), senService); err != nil { return actor.NewResultWithError(ops.CommandRequeue, err) } + } else if oldService.Spec.Type == corev1.ServiceTypeLoadBalancer && + len(oldService.Status.LoadBalancer.Ingress) == 0 && + time.Since(oldService.GetCreationTimestamp().Time) >= config.LoadbalancerReadyTimeout() { + // if lb block ed pending for 2mins, return no lb usable error + return actor.RequeueWithError(fmt.Errorf("no loadbalancer available for service %s, please check the cloud provider", oldService.Name)) } return nil } - if ret := createService(sentinelbuilder.GenerateSentinelHeadlessService(sen)); ret != nil { - return ret - } - - switch sen.Spec.Access.ServiceType { - case corev1.ServiceTypeNodePort: + if sen.Spec.Access.ServiceType == corev1.ServiceTypeNodePort && sen.Spec.Access.Ports != "" { if ret := a.ensureValkeySpecifiedNodePortService(ctx, inst, logger); ret != nil { return ret } - case corev1.ServiceTypeLoadBalancer: - if ret := a.ensureValkeyPodService(ctx, inst, logger); ret != nil { - return ret - } + } else if ret := a.ensureValkeyPodService(ctx, inst, logger); ret != nil { + return ret + } + + if ret := ensureService(sentinelbuilder.GenerateSentinelHeadlessService(sen)); ret != nil { + return ret } return nil } @@ -422,10 +419,6 @@ func (a *actorEnsureResource) ensureService(ctx context.Context, inst types.Sent func (a *actorEnsureResource) ensureValkeySpecifiedNodePortService(ctx context.Context, inst types.SentinelInstance, logger logr.Logger) *actor.ActorResult { sen := inst.Definition() - if sen.Spec.Access.Ports == "" { - return a.ensureValkeyPodService(ctx, inst, logger) - } - logger.V(3).Info("ensure sentinel nodeports", "namepspace", sen.Namespace, "name", sen.Name) configedPorts, err := helper.ParsePorts(sen.Spec.Access.Ports) if err != nil { @@ -491,7 +484,9 @@ func (a *actorEnsureResource) ensureValkeySpecifiedNodePortService(ctx context.C needUpdateServices []*corev1.Service ) for _, svc := range services { - bindedNodeports = append(bindedNodeports, getClientPort(svc.DeepCopy())) + if svc.Spec.Type == corev1.ServiceTypeNodePort { + bindedNodeports = append(bindedNodeports, getClientPort(svc.DeepCopy())) + } } // filter used ports @@ -521,52 +516,81 @@ func (a *actorEnsureResource) ensureValkeySpecifiedNodePortService(ctx context.C svc := sentinelbuilder.GeneratePodNodePortService(sen, i, getClientPort(oldService)) // check old service for compatibility - if !reflect.DeepEqual(oldService.Spec.Selector, svc.Spec.Selector) || - len(oldService.Spec.Ports) != len(svc.Spec.Ports) || - !reflect.DeepEqual(oldService.Labels, svc.Labels) || - !reflect.DeepEqual(oldService.Annotations, svc.Annotations) { - - oldService.OwnerReferences = util.BuildOwnerReferences(sen) - oldService.Spec = svc.Spec - oldService.Labels = svc.Labels - oldService.Annotations = svc.Annotations - if err := a.client.UpdateService(ctx, oldService.Namespace, oldService); err != nil { + svc.Spec.Type = oldService.Spec.Type + if util.IsServiceChanged(oldService, svc, logger) { + if err := a.client.UpdateService(ctx, oldService.Namespace, svc); err != nil { a.logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(oldService)) return actor.NewResultWithValue(ops.CommandRequeue, err) } } - if port := getClientPort(oldService); port != 0 && !slices.Contains(configedPorts, port) { - needUpdateServices = append(needUpdateServices, oldService) + svc.Spec.Type = corev1.ServiceTypeNodePort + if port := getClientPort(oldService); (port != 0 && !slices.Contains(configedPorts, port)) || + oldService.Spec.Type != corev1.ServiceTypeNodePort { + needUpdateServices = append(needUpdateServices, svc) } } // 3. update existing service and restart pod (only one pod is restarted at a same time for each shard) if len(needUpdateServices) > 0 && len(newPorts) > 0 { - port, svc := newPorts[0], needUpdateServices[0] - if sp := util.GetServicePortByName(svc, "sentinel"); sp != nil { - sp.NodePort = port + if inst.Replication() != nil && !(inst.Replication().Definition().Status.ReadyReplicas == 0 || (inst.Replication().IsReady() && func() bool { + ts := time.Now() + for _, node := range inst.Replication().Nodes() { + if cond, exists := lo.Find(node.Definition().Status.Conditions, func(item corev1.PodCondition) bool { + return item.Type == corev1.PodReady && item.Status == corev1.ConditionTrue + }); !exists || cond.LastTransitionTime.Time.Add(time.Second*30).After(ts) { + return false + } + } + return len(inst.Replication().Nodes()) == int(*inst.Replication().Definition().Spec.Replicas) + }())) { + logger.Info("wait statefulset ready to update next NodePort") + return actor.Requeue() } - // NOTE: here not make sure the failover success, because the nodeport updated, the communication will be failed - // in k8s, the nodeport can still access for sometime after the nodeport updated - // - // update service - if err = a.client.UpdateService(ctx, svc.Namespace, svc); err != nil { - a.logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(svc), "port", port) - return actor.NewResultWithValue(ops.CommandRequeue, err) - } - if pod, _ := a.client.GetPod(ctx, sen.Namespace, svc.Spec.Selector[builder.PodNameLabelKey]); pod != nil { - if err := a.client.DeletePod(ctx, sen.Namespace, pod.Name); err != nil { - return actor.NewResultWithError(ops.CommandRequeue, err) + for i := len(needUpdateServices) - 1; i >= 0; i-- { + if len(newPorts) <= i { + logger.Error(fmt.Errorf("update nodeport failed"), "not enough nodeport for service", "ports", newPorts) + return actor.NewResultWithValue(ops.CommandRequeue, fmt.Errorf("not enough nodeport for service, please check the config")) + } + port, svc := newPorts[i], needUpdateServices[i] + if oldPort := getClientPort(svc); slices.Contains(newPorts, oldPort) { + port = oldPort + } + if sp := util.GetServicePortByName(svc, "sentinel"); sp != nil { + sp.NodePort = port + } + tmpNewPorts := newPorts + newPorts = newPorts[0:0] + for _, p := range tmpNewPorts { + if p != port { + newPorts = append(newPorts, p) + } + } + // NOTE: here not make sure the failover success, because the nodeport updated, the communication will be failed + // in k8s, the nodeport can still access for sometime after the nodeport updated + // + // update service + if err = a.client.UpdateService(ctx, svc.Namespace, svc); err != nil { + logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(svc), "port", port) + return actor.NewResultWithValue(ops.CommandRequeue, err) + } + if pod, _ := a.client.GetPod(ctx, inst.GetNamespace(), svc.Spec.Selector[builder.PodNameLabelKey]); pod != nil { + if err := a.client.DeletePod(ctx, inst.GetNamespace(), pod.Name); err != nil { + return actor.NewResultWithError(ops.CommandRequeue, err) + } + return actor.RequeueAfter(time.Second * 5) } - return actor.NewResult(ops.CommandRequeue) } } return nil } func (a *actorEnsureResource) ensureValkeyPodService(ctx context.Context, inst types.SentinelInstance, logger logr.Logger) *actor.ActorResult { - sen := inst.Definition() + var ( + sen = inst.Definition() + needUpdateServices []*corev1.Service + ) + for i := 0; i < int(sen.Spec.Replicas); i++ { newSvc := sentinelbuilder.GeneratePodService(sen, i) if svc, err := a.client.GetService(ctx, sen.Namespace, newSvc.Name); errors.IsNotFound(err) { @@ -577,14 +601,43 @@ func (a *actorEnsureResource) ensureValkeyPodService(ctx context.Context, inst t } else if err != nil { logger.Error(err, "get service failed", "target", client.ObjectKeyFromObject(newSvc)) return actor.NewResult(ops.CommandRequeue) - } else if newSvc.Spec.Type != svc.Spec.Type || - !reflect.DeepEqual(newSvc.Spec.Selector, svc.Spec.Selector) || - !reflect.DeepEqual(newSvc.Labels, svc.Labels) || - !reflect.DeepEqual(newSvc.Annotations, svc.Annotations) { - svc.Spec = newSvc.Spec - if err = a.client.UpdateService(ctx, sen.Namespace, svc); err != nil { - logger.Error(err, "update service failed", "target", client.ObjectKeyFromObject(svc)) - return actor.NewResultWithError(ops.CommandRequeue, err) + } else if util.IsServiceChanged(newSvc, svc, logger) { + needUpdateServices = append(needUpdateServices, newSvc) + } else if svc.Spec.Type == corev1.ServiceTypeLoadBalancer && + len(svc.Status.LoadBalancer.Ingress) == 0 && + time.Since(svc.GetCreationTimestamp().Time) >= config.LoadbalancerReadyTimeout() { + // if lb block ed pending for 2mins, return no lb usable error + return actor.RequeueWithError(fmt.Errorf("no loadbalancer available for service %s, please check the cloud provider", svc.Name)) + } + } + + if len(needUpdateServices) > 0 { + if inst.Replication() != nil && !(inst.Replication().Definition().Status.ReadyReplicas == 0 || (inst.Replication().IsReady() && func() bool { + ts := time.Now() + for _, node := range inst.Replication().Nodes() { + if cond, exists := lo.Find(node.Definition().Status.Conditions, func(item corev1.PodCondition) bool { + return item.Type == corev1.PodReady && item.Status == corev1.ConditionTrue + }); !exists || cond.LastTransitionTime.Time.Add(time.Second*30).After(ts) { + return false + } + } + return len(inst.Replication().Nodes()) == int(*inst.Replication().Definition().Spec.Replicas) + }())) { + logger.Info("wait statefulset ready to update next service") + return actor.Requeue() + } + + for i := len(needUpdateServices) - 1; i >= 0; i-- { + svc := needUpdateServices[i] + if err := a.client.UpdateService(ctx, inst.GetNamespace(), svc); err != nil { + logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(svc)) + return actor.NewResultWithValue(ops.CommandRequeue, err) + } + if pod, _ := a.client.GetPod(ctx, inst.GetNamespace(), svc.Spec.Selector[builder.PodNameLabelKey]); pod != nil { + if err := a.client.DeletePod(ctx, inst.GetNamespace(), pod.Name); err != nil { + return actor.NewResultWithError(ops.CommandRequeue, err) + } + return actor.RequeueAfter(time.Second * 5) } } } diff --git a/internal/ops/sentinel/actor/actor_heal_pod.go b/internal/ops/sentinel/actor/actor_heal_pod.go index 91bec54..49b183c 100644 --- a/internal/ops/sentinel/actor/actor_heal_pod.go +++ b/internal/ops/sentinel/actor/actor_heal_pod.go @@ -132,7 +132,8 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR logger.Error(err, "get service failed", "name", node.GetName()) return actor.RequeueWithError(err) } - if typ == corev1.ServiceTypeNodePort { + switch typ { + case corev1.ServiceTypeNodePort: port := util.GetServicePortByName(svc, "sentinel") if port != nil { if int(port.NodePort) != announcePort { @@ -141,14 +142,14 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR return actor.RequeueWithError(err) } else { inst.SendEventf(corev1.EventTypeWarning, config.EventCleanResource, - "force delete pod with inconsist annotation %s", node.GetName()) + "force delete pod with inconsist announce %s", node.GetName()) return actor.Requeue() } } } else { logger.Error(fmt.Errorf("service port not found"), "service port not found", "name", node.GetName(), "port", "sentinel") } - } else if typ == corev1.ServiceTypeLoadBalancer { + case corev1.ServiceTypeLoadBalancer: if index := slices.IndexFunc(svc.Status.LoadBalancer.Ingress, func(ing corev1.LoadBalancerIngress) bool { return ing.IP == announceIP || ing.Hostname == announceIP }); index < 0 { @@ -157,7 +158,7 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR return actor.RequeueWithError(err) } else { inst.SendEventf(corev1.EventTypeWarning, config.EventCleanResource, - "force delete pod with inconsist annotation %s", node.GetName()) + "force delete pod with inconsist announce %s", node.GetName()) return actor.Requeue() } } diff --git a/internal/ops/sentinel/engine.go b/internal/ops/sentinel/engine.go index 70ece8e..b5f079d 100644 --- a/internal/ops/sentinel/engine.go +++ b/internal/ops/sentinel/engine.go @@ -114,7 +114,13 @@ func (g *RuleEngine) isPodHealNeeded(ctx context.Context, inst types.SentinelIns } else if err != nil { return actor.RequeueWithError(err) } - if typ == corev1.ServiceTypeNodePort { + + if svc.Spec.Type != typ { + return actor.NewResult(CommandEnsureResource) + } + + switch typ { + case corev1.ServiceTypeNodePort: port := util.GetServicePortByName(svc, "sentinel") if port != nil { if int(port.NodePort) != announcePort { @@ -123,7 +129,7 @@ func (g *RuleEngine) isPodHealNeeded(ctx context.Context, inst types.SentinelIns } else { logger.Error(fmt.Errorf("service %s not found", node.GetName()), "failed to get service, which should not happen") } - } else if typ == corev1.ServiceTypeLoadBalancer { + case corev1.ServiceTypeLoadBalancer: if slices.IndexFunc(svc.Status.LoadBalancer.Ingress, func(i corev1.LoadBalancerIngress) bool { return i.IP == announceIP }) < 0 { diff --git a/internal/util/kubernetes.go b/internal/util/kubernetes.go index 906f306..636a3e7 100644 --- a/internal/util/kubernetes.go +++ b/internal/util/kubernetes.go @@ -316,6 +316,136 @@ func IsPodTemplasteChanged(newTplSpec, oldTplSpec *corev1.PodTemplateSpec, logge return false } +func IsServiceChanged(ns, os *corev1.Service, logger logr.Logger) bool { + if (ns == nil && os != nil) || (ns != nil && os == nil) { + return true + } + newSvc, oldSvc := ns.DeepCopy(), os.DeepCopy() + + isSubset := func(n, o map[string]string) bool { + if len(n) > len(o) { + return false + } + for k, v := range n { + if val, ok := o[k]; !ok || val != v { + return false + } + } + return true + } + + if !isSubset(newSvc.Labels, oldSvc.Labels) || + !isSubset(newSvc.Annotations, oldSvc.Annotations) { + logger.V(1).Info("Service labels or annotations changed", + "newLabels", newSvc.Labels, + "oldLabels", oldSvc.Labels, + "newAnnotations", newSvc.Annotations, + "oldAnnotations", oldSvc.Annotations, + ) + return true + } + + if newSvc.Spec.Type == "" { + newSvc.Spec.Type = corev1.ServiceTypeClusterIP + } + if oldSvc.Spec.Type == "" { + oldSvc.Spec.Type = corev1.ServiceTypeClusterIP + } + + if newSvc.Spec.Type != oldSvc.Spec.Type { + logger.V(1).Info("Service type changed") + return true + } + if newSvc.Spec.Type == corev1.ServiceTypeLoadBalancer { + if newSvc.Spec.AllocateLoadBalancerNodePorts == nil { + newSvc.Spec.AllocateLoadBalancerNodePorts = ptr.To(true) + } + if oldSvc.Spec.AllocateLoadBalancerNodePorts == nil { + oldSvc.Spec.AllocateLoadBalancerNodePorts = ptr.To(true) + } + } + if newSvc.Spec.SessionAffinity == "" { + newSvc.Spec.SessionAffinity = corev1.ServiceAffinityNone + } + if oldSvc.Spec.SessionAffinity == "" { + oldSvc.Spec.SessionAffinity = corev1.ServiceAffinityNone + } + if newSvc.Spec.InternalTrafficPolicy == nil { + newSvc.Spec.InternalTrafficPolicy = ptr.To(corev1.ServiceInternalTrafficPolicyCluster) + } + if oldSvc.Spec.InternalTrafficPolicy == nil { + oldSvc.Spec.InternalTrafficPolicy = ptr.To(corev1.ServiceInternalTrafficPolicyCluster) + } + if newSvc.Spec.ExternalTrafficPolicy == "" { + newSvc.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeCluster + } + if oldSvc.Spec.ExternalTrafficPolicy == "" { + oldSvc.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeCluster + } + + if !cmp.Equal(newSvc.Spec.Selector, oldSvc.Spec.Selector, cmpopts.EquateEmpty()) || + !cmp.Equal(newSvc.Spec.IPFamilyPolicy, oldSvc.Spec.IPFamilyPolicy, cmpopts.EquateEmpty()) || + !cmp.Equal(newSvc.Spec.IPFamilies, oldSvc.Spec.IPFamilies, cmpopts.EquateEmpty()) || + newSvc.Spec.HealthCheckNodePort != oldSvc.Spec.HealthCheckNodePort || + newSvc.Spec.PublishNotReadyAddresses != oldSvc.Spec.PublishNotReadyAddresses || + newSvc.Spec.SessionAffinity != oldSvc.Spec.SessionAffinity || + !cmp.Equal(newSvc.Spec.InternalTrafficPolicy, oldSvc.Spec.InternalTrafficPolicy, cmpopts.EquateEmpty()) || + newSvc.Spec.ExternalTrafficPolicy != oldSvc.Spec.ExternalTrafficPolicy || + !cmp.Equal(newSvc.Spec.TrafficDistribution, oldSvc.Spec.TrafficDistribution, cmpopts.EquateEmpty()) || + + (newSvc.Spec.Type == corev1.ServiceTypeLoadBalancer && + (newSvc.Spec.LoadBalancerIP != oldSvc.Spec.LoadBalancerIP || + !cmp.Equal(newSvc.Spec.LoadBalancerSourceRanges, oldSvc.Spec.LoadBalancerSourceRanges, cmpopts.EquateEmpty()) || + !cmp.Equal(newSvc.Spec.AllocateLoadBalancerNodePorts, oldSvc.Spec.AllocateLoadBalancerNodePorts, cmpopts.EquateEmpty()))) { + + logger.V(1).Info("Service spec changed", + "selector", !cmp.Equal(newSvc.Spec.Selector, oldSvc.Spec.Selector, cmpopts.EquateEmpty()), + "familypolicy", !cmp.Equal(newSvc.Spec.IPFamilyPolicy, oldSvc.Spec.IPFamilyPolicy, cmpopts.EquateEmpty()), + "IPFamilies", !cmp.Equal(newSvc.Spec.IPFamilies, oldSvc.Spec.IPFamilies, cmpopts.EquateEmpty()), + "allocatelbport", !cmp.Equal(newSvc.Spec.AllocateLoadBalancerNodePorts, oldSvc.Spec.AllocateLoadBalancerNodePorts, cmpopts.EquateEmpty()), + "HealthCheckNodePort", newSvc.Spec.HealthCheckNodePort != oldSvc.Spec.HealthCheckNodePort, + "LoadBalancerIP", newSvc.Spec.LoadBalancerIP != oldSvc.Spec.LoadBalancerIP, + "LoadBalancerSourceRanges", !cmp.Equal(newSvc.Spec.LoadBalancerSourceRanges, oldSvc.Spec.LoadBalancerSourceRanges, cmpopts.EquateEmpty()), + "PublishNotReadyAddresses", newSvc.Spec.PublishNotReadyAddresses != oldSvc.Spec.PublishNotReadyAddresses, + "TrafficDistribution", !cmp.Equal(newSvc.Spec.TrafficDistribution, oldSvc.Spec.TrafficDistribution, cmpopts.EquateEmpty()), + ) + return true + } + + if len(newSvc.Spec.Ports) != len(oldSvc.Spec.Ports) { + logger.V(1).Info("Service ports length changed") + return true + } + for i, port := range newSvc.Spec.Ports { + oldPort := oldSvc.Spec.Ports[i] + if port.Protocol == "" { + port.Protocol = corev1.ProtocolTCP + } + if oldPort.Protocol == "" { + oldPort.Protocol = corev1.ProtocolTCP + } + + if port.Name != oldPort.Name || + port.Protocol != oldPort.Protocol || + port.Port != oldPort.Port || + (newSvc.Spec.Type == corev1.ServiceTypeNodePort && port.NodePort != 0 && port.NodePort != oldPort.NodePort) { + + logger.V(1).Info("Service port changed", + "portName", port.Name, + "portProtocol", port.Protocol, + "portNumber", port.Port, + "nodePort", port.NodePort, + "oldPortName", oldPort.Name, + "oldPortProtocol", oldPort.Protocol, + "oldPortNumber", oldPort.Port, + "oldNodePort", oldPort.NodePort, + ) + return true + } + } + return false +} + func loadEnvs(envs []corev1.EnvVar) map[string]string { kvs := map[string]string{} for _, item := range envs { diff --git a/internal/valkey/cluster/cluster.go b/internal/valkey/cluster/cluster.go index 7f1d118..2d7a0ff 100644 --- a/internal/valkey/cluster/cluster.go +++ b/internal/valkey/cluster/cluster.go @@ -23,6 +23,7 @@ import ( "fmt" "strconv" "strings" + "time" certmetav1 "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" "github.com/chideat/valkey-operator/api/core" @@ -31,6 +32,7 @@ import ( "github.com/chideat/valkey-operator/internal/builder" "github.com/chideat/valkey-operator/internal/builder/aclbuilder" "github.com/chideat/valkey-operator/internal/builder/clusterbuilder" + "github.com/chideat/valkey-operator/internal/config" "github.com/chideat/valkey-operator/internal/util" clientset "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/security/acl" @@ -422,6 +424,21 @@ func (c *ValkeyCluster) Shards() []types.ClusterShard { return c.shards } +func (c *ValkeyCluster) Shard(index int) types.ClusterShard { + if c == nil { + return nil + } + if index < 0 || index >= int(c.Definition().Spec.Replicas.Shards) { + return nil + } + for _, shard := range c.shards { + if shard.Index() == index { + return shard + } + } + return nil +} + func (c *ValkeyCluster) Nodes() []types.ValkeyNode { var ret []types.ValkeyNode for _, shard := range c.shards { @@ -674,6 +691,17 @@ func (c *ValkeyCluster) IsResourceFullfilled(ctx context.Context) (bool, error) c.logger.Error(err, "get resource failed", "target", util.ObjectKey(c.GetNamespace(), name)) return false, err } + + if obj.GroupVersionKind() == serviceKey { + ts := obj.GetCreationTimestamp() + typ, _, _ := unstructured.NestedString(obj.Object, "spec", "type") + lbs, _, _ := unstructured.NestedSlice(obj.Object, "status", "loadBalancer", "ingress") + if typ == string(corev1.ServiceTypeLoadBalancer) && len(lbs) == 0 && + time.Since(ts.Time) >= config.LoadbalancerReadyTimeout() { + c.logger.V(3).Info("load balancer service not ready", "target", util.ObjectKey(c.GetNamespace(), name), "createdAt", ts.Time) + return false, nil + } + } } } diff --git a/internal/valkey/failover/failover.go b/internal/valkey/failover/failover.go index 7efa608..700333a 100644 --- a/internal/valkey/failover/failover.go +++ b/internal/valkey/failover/failover.go @@ -22,6 +22,7 @@ import ( "fmt" "strconv" "strings" + "time" certmetav1 "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" "github.com/chideat/valkey-operator/api/core" @@ -31,6 +32,7 @@ import ( "github.com/chideat/valkey-operator/internal/builder" "github.com/chideat/valkey-operator/internal/builder/aclbuilder" "github.com/chideat/valkey-operator/internal/builder/failoverbuilder" + "github.com/chideat/valkey-operator/internal/config" "github.com/chideat/valkey-operator/internal/util" "github.com/chideat/valkey-operator/internal/valkey/failover/monitor" clientset "github.com/chideat/valkey-operator/pkg/kubernetes" @@ -302,6 +304,13 @@ func (s *Failover) Masters() []types.ValkeyNode { return ret } +func (s *Failover) Replication() types.Replication { + if s == nil { + return nil + } + return s.replication +} + func (s *Failover) Nodes() []types.ValkeyNode { if s == nil || s.replication == nil { return nil @@ -647,6 +656,17 @@ func (s *Failover) IsResourceFullfilled(ctx context.Context) (bool, error) { s.logger.Error(err, "get resource failed", "kind", gvk.Kind, "target", util.ObjectKey(s.GetNamespace(), name)) return false, err } + + if obj.GroupVersionKind() == serviceKey { + ts := obj.GetCreationTimestamp() + typ, _, _ := unstructured.NestedString(obj.Object, "spec", "type") + lbs, _, _ := unstructured.NestedSlice(obj.Object, "status", "loadBalancer", "ingress") + if typ == string(corev1.ServiceTypeLoadBalancer) && len(lbs) == 0 && + time.Since(ts.Time) >= config.LoadbalancerReadyTimeout() { + s.logger.V(3).Info("load balancer service not ready", "target", util.ObjectKey(s.GetNamespace(), name), "createdAt", ts.Time) + return false, nil + } + } } } diff --git a/internal/valkey/failover/monitor/sentinel_monitor.go b/internal/valkey/failover/monitor/sentinel_monitor.go index 7d763a9..f48b066 100644 --- a/internal/valkey/failover/monitor/sentinel_monitor.go +++ b/internal/valkey/failover/monitor/sentinel_monitor.go @@ -39,6 +39,7 @@ import ( ) var ( + ErrNoUseableNode = fmt.Errorf("no usable sentinel node") ErrNoMaster = fmt.Errorf("no master") ErrDoFailover = fmt.Errorf("sentinel doing failover") ErrMultipleMaster = fmt.Errorf("multiple master without majority agreement") @@ -315,7 +316,7 @@ func (s *SentinelMonitor) Replicas(ctx context.Context) ([]*vkcli.SentinelMonito func (s *SentinelMonitor) Inited(ctx context.Context) (bool, error) { if s == nil || len(s.nodes) == 0 { - return false, fmt.Errorf("no sentinel nodes") + return false, ErrNoUseableNode } for _, node := range s.nodes { @@ -333,7 +334,7 @@ func (s *SentinelMonitor) Inited(ctx context.Context) (bool, error) { // AllNodeMonitored checks if all sentinel nodes are monitoring all the master and replicas func (s *SentinelMonitor) AllNodeMonitored(ctx context.Context) (bool, error) { if s == nil || len(s.nodes) == 0 { - return false, fmt.Errorf("no sentinel nodes") + return false, ErrNoUseableNode } var ( @@ -409,7 +410,7 @@ func (s *SentinelMonitor) AllNodeMonitored(ctx context.Context) (bool, error) { func (s *SentinelMonitor) UpdateConfig(ctx context.Context, params map[string]string) error { if s == nil || len(s.nodes) == 0 { - return fmt.Errorf("no sentinel nodes") + return ErrNoUseableNode } logger := s.logger.WithName("UpdateConfig") @@ -455,7 +456,7 @@ func (s *SentinelMonitor) UpdateConfig(ctx context.Context, params map[string]st func (s *SentinelMonitor) Failover(ctx context.Context) error { if s == nil || len(s.nodes) == 0 { - return fmt.Errorf("no sentinel nodes") + return ErrNoUseableNode } logger := s.logger.WithName("failover") diff --git a/internal/valkey/node.go b/internal/valkey/node.go index 1f1a066..1c1e700 100644 --- a/internal/valkey/node.go +++ b/internal/valkey/node.go @@ -742,7 +742,7 @@ func (n *ValkeyNode) InternalPort() int { func (n *ValkeyNode) DefaultIP() net.IP { if value := n.Pod.Labels[builder.AnnounceIPLabelKey]; value != "" { - address := strings.Replace(value, "-", ":", -1) + address := strings.ReplaceAll(value, "-", ":") return net.ParseIP(address) } return n.DefaultInternalIP() diff --git a/internal/valkey/sentinel/sentinel.go b/internal/valkey/sentinel/sentinel.go index 81510b4..269e419 100644 --- a/internal/valkey/sentinel/sentinel.go +++ b/internal/valkey/sentinel/sentinel.go @@ -24,6 +24,7 @@ import ( "slices" "strconv" "strings" + "time" certmetav1 "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" "github.com/chideat/valkey-operator/api/core" @@ -32,6 +33,7 @@ import ( databasesv1 "github.com/chideat/valkey-operator/api/v1alpha1" "github.com/chideat/valkey-operator/internal/builder" "github.com/chideat/valkey-operator/internal/builder/sentinelbuilder" + "github.com/chideat/valkey-operator/internal/config" "github.com/chideat/valkey-operator/internal/util" clientset "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/types" @@ -331,15 +333,18 @@ func (s *ValkeySentinel) IsResourceFullfilled(ctx context.Context) (bool, error) s.logger.Error(err, "get resource failed", "target", util.ObjectKey(s.GetNamespace(), name)) return false, err } - // if gvk == stsKey { - // if replicas, found, err := unstructured.NestedInt64(obj.Object, "spec", "replicas"); err != nil { - // s.logger.Error(err, "get service replicas failed", "target", util.ObjectKey(s.GetNamespace(), name)) - // return false, err - // } else if found && replicas != int64(s.Spec.Replicas) { - // s.logger.Info("@@@@@@@ found", "replicas", replicas, "s.Spec.Replicas", s.Spec.Replicas) - // return false, nil - // } - // } + + if obj.GroupVersionKind() == serviceKey { + ts := obj.GetCreationTimestamp() + typ, _, _ := unstructured.NestedString(obj.Object, "spec", "type") + lbs, _, _ := unstructured.NestedSlice(obj.Object, "status", "loadBalancer", "ingress") + if typ == string(corev1.ServiceTypeLoadBalancer) && len(lbs) == 0 && + time.Since(ts.Time) >= config.LoadbalancerReadyTimeout() { + s.logger.V(3).Info("load balancer service not ready", "target", util.ObjectKey(s.GetNamespace(), name), "createdAt", ts.Time) + return false, nil + } + } + } } return true, nil diff --git a/pkg/kubernetes/clientset/service.go b/pkg/kubernetes/clientset/service.go index a7820db..7c96c6a 100644 --- a/pkg/kubernetes/clientset/service.go +++ b/pkg/kubernetes/clientset/service.go @@ -27,6 +27,8 @@ import ( "k8s.io/client-go/util/retry" "github.com/go-logr/logr" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -129,8 +131,8 @@ func (s *ServiceOption) CreateOrUpdateIfServiceChanged(ctx context.Context, name return err } if !reflect.DeepEqual(oldSvc.Labels, service.Labels) || - !reflect.DeepEqual(oldSvc.Spec.Selector, service.Spec.Selector) || - len(oldSvc.Spec.Ports) != len(service.Spec.Ports) { + !reflect.DeepEqual(oldSvc.Annotations, service.Annotations) || + !cmp.Equal(oldSvc.Spec, service.Spec, cmpopts.EquateEmpty()) { return s.UpdateService(ctx, namespace, service) } diff --git a/pkg/types/cluster_instance.go b/pkg/types/cluster_instance.go index f5084ac..702fd5c 100644 --- a/pkg/types/cluster_instance.go +++ b/pkg/types/cluster_instance.go @@ -57,5 +57,6 @@ type ClusterInstance interface { Nodes() []ValkeyNode RawNodes(ctx context.Context) ([]corev1.Pod, error) Shards() []ClusterShard + Shard(index int) ClusterShard RewriteShards(ctx context.Context, shards []*v1alpha1.ClusterShards) error } diff --git a/pkg/types/failover_instance.go b/pkg/types/failover_instance.go index cfb650f..f3086b2 100644 --- a/pkg/types/failover_instance.go +++ b/pkg/types/failover_instance.go @@ -40,6 +40,7 @@ type FailoverInstance interface { Instance Definition() *v1alpha1.Failover + Replication() Replication Masters() []ValkeyNode Nodes() []ValkeyNode RawNodes(ctx context.Context) ([]corev1.Pod, error) From 6ec0593033e026af2d3497060ed7885e84dacfa7 Mon Sep 17 00:00:00 2001 From: Seer Date: Mon, 15 Sep 2025 10:21:41 +0800 Subject: [PATCH 03/13] fix: try meed failed cluster nodes --- .../ops/cluster/actor/actor_ensure_slots.go | 50 +++++++++++++++++-- .../ops/sentinel/actor/actor_heal_monitor.go | 5 ++ internal/valkey/sentinel/sentinel.go | 5 +- 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/internal/ops/cluster/actor/actor_ensure_slots.go b/internal/ops/cluster/actor/actor_ensure_slots.go index d6a178b..d9d06ed 100644 --- a/internal/ops/cluster/actor/actor_ensure_slots.go +++ b/internal/ops/cluster/actor/actor_ensure_slots.go @@ -29,6 +29,7 @@ import ( "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/slot" "github.com/chideat/valkey-operator/pkg/types" + "github.com/chideat/valkey-operator/pkg/valkey" "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" ) @@ -144,6 +145,20 @@ func (a *actorEnsureSlots) Do(ctx context.Context, val types.Instance) *actor.Ac if len(failedShards) > 0 { for _, shard := range failedShards { + for _, node := range shard.Replicas() { + if !node.IsReady() { + continue + } + + if node.ClusterInfo().ClusterState != valkey.ClusterStateOk { + logger.Info("node is not in cluster state", "node", node.GetName(), "state", node.ClusterInfo().ClusterState) + if err := a.meetNode(ctx, cluster, node, logger); err != nil { + time.Sleep(time.Second * 2) + } + } + + } + if err := shard.Refresh(ctx); err != nil { logger.Error(err, "refresh shard info failed", "shard", shard.GetName()) continue @@ -153,10 +168,15 @@ func (a *actorEnsureSlots) Do(ctx context.Context, val types.Instance) *actor.Ac } for _, node := range shard.Replicas() { - if !node.IsReady() && node.Role() != core.NodeRoleReplica { + if !node.IsReady() { continue } + if node.ClusterInfo().ClusterState != valkey.ClusterStateOk { + logger.Info("node is not in cluster state", "node", node.GetName(), "state", node.ClusterInfo().ClusterState) + _ = a.meetNode(ctx, cluster, node, logger) + } + // disable takeover when shard in importing or migrating if a.doFailover(ctx, node, 10, !shard.IsImporting() && !shard.IsMigrating(), logger) == nil { cluster.SendEventf(corev1.EventTypeWarning, config.EventFailover, "healed shard %s with new master %s", shard.GetName(), node.GetName()) @@ -256,19 +276,39 @@ func (a *actorEnsureSlots) Do(ctx context.Context, val types.Instance) *actor.Ac return nil } +func (a *actorEnsureSlots) meetNode(ctx context.Context, cluster types.ClusterInstance, node types.ValkeyNode, logger logr.Logger) error { + if cluster == nil || node == nil { + return fmt.Errorf("cluster or node is nil") + } + + arg := []any{"CLUSTER", "MEET", node.DefaultInternalIP().String(), node.InternalPort(), node.InternalIPort()} + for _, shard := range cluster.Shards() { + for _, snode := range shard.Nodes() { + if snode.ID() == node.ID() { + continue + } + if err := snode.Setup(ctx, arg); err != nil { + logger.Error(err, "meet node failed", "node", snode.GetName()) + return err + } + } + } + return nil +} + func (a *actorEnsureSlots) doFailover(ctx context.Context, node types.ValkeyNode, retry int, ensure bool, logger logr.Logger) error { ctx, cancel := context.WithTimeout(ctx, time.Minute) defer cancel() args := []any{"CLUSTER", "FAILOVER", "FORCE"} for i := 0; i < retry+1; i++ { - logger.Info("do shard failover", "node", node.GetName(), "action", args[2]) + logger.Info("do shard force failover", "node", node.GetName(), "action", args[2]) if err := node.Setup(ctx, args); err != nil { logger.Error(err, "do failover failed", "node", node.GetName()) return err } - for j := 0; j < 3; j++ { + for range 3 { time.Sleep(time.Second * 2) if err := node.Refresh(ctx); err != nil { logger.Error(err, "refresh node info failed") @@ -287,13 +327,13 @@ func (a *actorEnsureSlots) doFailover(ctx context.Context, node types.ValkeyNode } args[2] = "TAKEOVER" - logger.Info("do shard failover", "node", node.GetName(), "action", args[2]) + logger.Info("do shard takeover failover", "node", node.GetName(), "action", args[2]) if err := node.Setup(ctx, args); err != nil { logger.Error(err, "do failover failed", "node", node.GetName()) return err } - for j := 0; j < 3; j++ { + for range 3 { time.Sleep(time.Second * 2) if err := node.Refresh(ctx); err != nil { logger.Error(err, "refresh node info failed") diff --git a/internal/ops/sentinel/actor/actor_heal_monitor.go b/internal/ops/sentinel/actor/actor_heal_monitor.go index 6d4c71a..8880fb5 100644 --- a/internal/ops/sentinel/actor/actor_heal_monitor.go +++ b/internal/ops/sentinel/actor/actor_heal_monitor.go @@ -26,6 +26,7 @@ import ( ops "github.com/chideat/valkey-operator/internal/ops/sentinel" "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/types" + ptypes "github.com/chideat/valkey-operator/pkg/types/user" "github.com/chideat/valkey-operator/pkg/valkey" "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" @@ -79,6 +80,10 @@ func (a *actorHealMonitor) Do(ctx context.Context, val types.Instance) *actor.Ac user = inst.Users().GetOpUser() tls = inst.TLSConfig() ) + if user == nil { + user = &ptypes.User{} + } + for name, nodes := range unknownSentinels { // set sentinels for _, node := range inst.Nodes() { diff --git a/internal/valkey/sentinel/sentinel.go b/internal/valkey/sentinel/sentinel.go index 269e419..1bab67c 100644 --- a/internal/valkey/sentinel/sentinel.go +++ b/internal/valkey/sentinel/sentinel.go @@ -189,7 +189,10 @@ func (s *ValkeySentinel) Definition() *v1alpha1.Sentinel { } func (s *ValkeySentinel) Users() types.Users { - return nil + if s == nil { + return nil + } + return s.users } func (s *ValkeySentinel) Replication() types.SentinelReplication { From 0979ddb4e6a67a6c02310c88d0f7ebfe83ee6595 Mon Sep 17 00:00:00 2001 From: Seer Date: Mon, 15 Sep 2025 10:49:13 +0800 Subject: [PATCH 04/13] feat: Refactor annotation merging for restart annotation Refactored the annotation merging logic to specifically handle the `RestartAnnotationKey`. - Introduced `MergeRestartAnnotation` to compare and merge restart annotations based on timestamps. - Replaced the generic `MergeAnnotations` function with the new specialized function. - Ensured that the restart annotation is correctly propagated during updates. --- internal/builder/helper.go | 47 ++++++++++--------- internal/builder/helper_test.go | 14 +++--- internal/controller/rds/valkey/cluster.go | 3 +- internal/controller/rds/valkey_controller.go | 13 ++--- .../cluster/actor/actor_ensure_resource.go | 31 +----------- 5 files changed, 42 insertions(+), 66 deletions(-) diff --git a/internal/builder/helper.go b/internal/builder/helper.go index 8512a3a..e239f23 100644 --- a/internal/builder/helper.go +++ b/internal/builder/helper.go @@ -165,33 +165,36 @@ func ParsePodShardAndIndex(name string) (shard int, index int, err error) { return shard, index, nil } -func MergeAnnotations(t, s map[string]string) map[string]string { - if t == nil { - return s +func MergeRestartAnnotation(n, o map[string]string) map[string]string { + if n == nil { + n = make(map[string]string) } - if s == nil { - return t + + oldTimeStr, exists := o[RestartAnnotationKey] + if !exists || oldTimeStr == "" { + return n + } + oldTime, err := time.Parse(time.RFC3339Nano, oldTimeStr) + if err != nil { + return n } - for k, v := range s { - if k == RestartAnnotationKey { - tRestartAnn := t[k] - if tRestartAnn == "" && v != "" { - t[k] = v - } + newTimeStr, exists := n[RestartAnnotationKey] + if !exists || newTimeStr == "" { + n[RestartAnnotationKey] = oldTimeStr + return n + } + newTime, err := time.Parse(time.RFC3339Nano, newTimeStr) + if err != nil { + n[RestartAnnotationKey] = oldTimeStr + return n + } - tTime, err1 := time.Parse(time.RFC3339Nano, tRestartAnn) - sTime, err2 := time.Parse(time.RFC3339Nano, v) - if err1 != nil || err2 != nil || sTime.After(tTime) { - t[k] = v - } else { - t[k] = tRestartAnn - } - } else { - t[k] = v - } + if oldTime.After(newTime) { + n[RestartAnnotationKey] = oldTimeStr + return n } - return t + return n } func IsPodAnnotationDiff(d map[string]string, s map[string]string) bool { diff --git a/internal/builder/helper_test.go b/internal/builder/helper_test.go index 84ae3c9..1e2eae0 100644 --- a/internal/builder/helper_test.go +++ b/internal/builder/helper_test.go @@ -419,7 +419,7 @@ func TestParsePodShardAndIndex(t *testing.T) { } } -func TestMergeAnnotations(t *testing.T) { +func TestMergeRestartAnnotation(t *testing.T) { now := time.Now() older := now.Add(-1 * time.Hour) newer := now.Add(1 * time.Hour) @@ -434,7 +434,7 @@ func TestMergeAnnotations(t *testing.T) { name: "nil target", t: nil, s: map[string]string{"key": "value"}, - want: map[string]string{"key": "value"}, + want: map[string]string{}, }, { name: "nil source", @@ -446,12 +446,12 @@ func TestMergeAnnotations(t *testing.T) { name: "merge regular keys", t: map[string]string{"key1": "value1"}, s: map[string]string{"key2": "value2"}, - want: map[string]string{"key1": "value1", "key2": "value2"}, + want: map[string]string{"key1": "value1"}, }, { name: "source overwrites target", - t: map[string]string{"key": "old"}, - s: map[string]string{"key": "new"}, + t: map[string]string{"key": "new"}, + s: map[string]string{"key": "old"}, want: map[string]string{"key": "new"}, }, { @@ -482,9 +482,9 @@ func TestMergeAnnotations(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := MergeAnnotations(tt.t, tt.s) + got := MergeRestartAnnotation(tt.t, tt.s) if !reflect.DeepEqual(got, tt.want) { - t.Errorf("MergeAnnotations() = %v, want %v", got, tt.want) + t.Errorf("MergeRestartAnnotation() = %v, want %v", got, tt.want) } }) } diff --git a/internal/controller/rds/valkey/cluster.go b/internal/controller/rds/valkey/cluster.go index fc11642..63bd9c8 100644 --- a/internal/controller/rds/valkey/cluster.go +++ b/internal/controller/rds/valkey/cluster.go @@ -29,6 +29,7 @@ import ( "github.com/go-logr/logr" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" + "github.com/samber/lo" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" v12 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -122,7 +123,7 @@ func GenerateValkeyCluster(instance *rdsv1alpha1.Valkey) (*v1alpha1.Cluster, err NodeSelector: instance.Spec.NodeSelector, Tolerations: instance.Spec.Tolerations, SecurityContext: instance.Spec.SecurityContext, - PodAnnotations: instance.Spec.PodAnnotations, + PodAnnotations: lo.Assign(instance.Spec.PodAnnotations), Access: instance.Spec.Access, Exporter: exporter, Storage: instance.Spec.Storage, diff --git a/internal/controller/rds/valkey_controller.go b/internal/controller/rds/valkey_controller.go index f3d77f0..6a03959 100644 --- a/internal/controller/rds/valkey_controller.go +++ b/internal/controller/rds/valkey_controller.go @@ -142,15 +142,16 @@ func (r *ValkeyReconciler) reconcileFailover(ctx context.Context, inst *rdsv1alp if len(inst.Status.MatchLabels) == 0 { inst.Status.MatchLabels = failoverbuilder.GenerateSelectorLabels(inst.Name) } + if inst.Spec.PodAnnotations == nil { + inst.Spec.PodAnnotations = make(map[string]string) + } for key := range vkHandler.GetValkeyConfigsApplyPolicyByVersion(inst.Spec.Version) { if inst.Spec.CustomConfigs[key] != failover.Spec.CustomConfigs[key] { - if inst.Spec.PodAnnotations == nil { - inst.Spec.PodAnnotations = map[string]string{} - } inst.Spec.PodAnnotations[builder.RestartAnnotationKey] = time.Now().Format(time.RFC3339Nano) break } } + inst.Spec.PodAnnotations = builder.MergeRestartAnnotation(inst.Spec.PodAnnotations, failover.Spec.PodAnnotations) newFailover, err := vkHandler.GenerateFailover(inst) if err != nil { @@ -227,15 +228,15 @@ func (r *ValkeyReconciler) reconcileCluster(ctx context.Context, inst *rdsv1alph if len(inst.Status.MatchLabels) == 0 { inst.Status.MatchLabels = clusterbuilder.GenerateClusterLabels(inst.Name, nil) } + for key := range vkHandler.GetValkeyConfigsApplyPolicyByVersion(inst.Spec.Version) { if inst.Spec.CustomConfigs[key] != cluster.Spec.CustomConfigs[key] { - if inst.Spec.PodAnnotations == nil { - inst.Spec.PodAnnotations = map[string]string{} - } inst.Spec.PodAnnotations[builder.RestartAnnotationKey] = time.Now().Format(time.RFC3339Nano) break } } + inst.Spec.PodAnnotations = builder.MergeRestartAnnotation(inst.Spec.PodAnnotations, cluster.Spec.PodAnnotations) + newCluster, err := vkHandler.GenerateValkeyCluster(inst) if err != nil { return err diff --git a/internal/ops/cluster/actor/actor_ensure_resource.go b/internal/ops/cluster/actor/actor_ensure_resource.go index cd2483d..634d190 100644 --- a/internal/ops/cluster/actor/actor_ensure_resource.go +++ b/internal/ops/cluster/actor/actor_ensure_resource.go @@ -322,7 +322,7 @@ func (a *actorEnsureResource) ensureStatefulset(ctx context.Context, cluster typ newSts.Spec.VolumeClaimTemplates = oldSts.Spec.VolumeClaimTemplates // merge restart annotations, if statefulset is more new, not restart statefulset - newSts.Spec.Template.Annotations = MergeAnnotations(newSts.Spec.Template.Annotations, oldSts.Spec.Template.Annotations) + newSts.Spec.Template.Annotations = builder.MergeRestartAnnotation(newSts.Spec.Template.Annotations, oldSts.Spec.Template.Annotations) if util.IsStatefulsetChanged(newSts, oldSts, logger) { if err := a.client.UpdateStatefulSet(ctx, cr.GetNamespace(), newSts); err != nil { @@ -659,32 +659,3 @@ func (a *actorEnsureResource) fetchAllPodBindedServices(ctx context.Context, nam } return services, nil } - -func MergeAnnotations(t, s map[string]string) map[string]string { - if t == nil { - return s - } - if s == nil { - return t - } - - for k, v := range s { - if k == builder.RestartAnnotationKey { - tRestartAnn := t[k] - if tRestartAnn == "" && v != "" { - t[k] = v - } - - tTime, err1 := time.Parse(time.RFC3339Nano, tRestartAnn) - sTime, err2 := time.Parse(time.RFC3339Nano, v) - if err1 != nil || err2 != nil || sTime.After(tTime) { - t[k] = v - } else { - t[k] = tRestartAnn - } - } else { - t[k] = v - } - } - return t -} From 69aabd08c5256dbb230a262f309283cd9b35aed7 Mon Sep 17 00:00:00 2001 From: Seer Date: Mon, 15 Sep 2025 11:09:25 +0800 Subject: [PATCH 05/13] feat: set pause status after all pods deleted --- internal/builder/sentinelbuilder/statefulset.go | 2 +- internal/ops/cluster/actor/actor_ensure_resource.go | 5 ++++- internal/ops/failover/actor/actor_ensure_resource.go | 5 ++++- internal/ops/sentinel/actor/actor_ensure_resource.go | 5 ++++- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/internal/builder/sentinelbuilder/statefulset.go b/internal/builder/sentinelbuilder/statefulset.go index adfe60f..eb35481 100644 --- a/internal/builder/sentinelbuilder/statefulset.go +++ b/internal/builder/sentinelbuilder/statefulset.go @@ -214,7 +214,7 @@ func buildServerContainer(sen *v1alpha1.Sentinel, envs []corev1.EnvVar) (*corev1 }, }, StartupProbe: &corev1.Probe{ - InitialDelaySeconds: 3, + InitialDelaySeconds: 30, TimeoutSeconds: 5, FailureThreshold: 3, ProbeHandler: corev1.ProbeHandler{ diff --git a/internal/ops/cluster/actor/actor_ensure_resource.go b/internal/ops/cluster/actor/actor_ensure_resource.go index 634d190..d6a30f4 100644 --- a/internal/ops/cluster/actor/actor_ensure_resource.go +++ b/internal/ops/cluster/actor/actor_ensure_resource.go @@ -81,7 +81,10 @@ func (a *actorEnsureResource) Do(ctx context.Context, val types.Instance) *actor if ret := a.pauseStatefulSet(ctx, cluster, logger); ret != nil { return ret } - return actor.NewResult(cops.CommandPaused) + if len(cluster.Nodes()) == 0 { + return actor.Pause() + } + return actor.Requeue() } if ret := a.ensureServiceAccount(ctx, cluster, logger); ret != nil { diff --git a/internal/ops/failover/actor/actor_ensure_resource.go b/internal/ops/failover/actor/actor_ensure_resource.go index eb070d8..4ac36fd 100644 --- a/internal/ops/failover/actor/actor_ensure_resource.go +++ b/internal/ops/failover/actor/actor_ensure_resource.go @@ -89,7 +89,10 @@ func (a *actorEnsureResource) Do(ctx context.Context, val types.Instance) *actor if ret := a.pauseSentinel(ctx, inst, logger); ret != nil { return ret } - return actor.Pause() + if len(inst.Nodes()) == 0 { + return actor.Pause() + } + return actor.Requeue() } if ret := a.ensureValkeySSL(ctx, inst, logger); ret != nil { diff --git a/internal/ops/sentinel/actor/actor_ensure_resource.go b/internal/ops/sentinel/actor/actor_ensure_resource.go index bddd114..96aa8d7 100644 --- a/internal/ops/sentinel/actor/actor_ensure_resource.go +++ b/internal/ops/sentinel/actor/actor_ensure_resource.go @@ -85,7 +85,10 @@ func (a *actorEnsureResource) Do(ctx context.Context, val types.Instance) *actor if ret := a.ensurePauseStatefulSet(ctx, sentinel, logger); ret != nil { return ret } - return actor.NewResult(ops.CommandPaused) + if len(sentinel.Nodes()) == 0 { + return actor.Pause() + } + return actor.Requeue() } if ret := a.ensureServiceAccount(ctx, sentinel, logger); ret != nil { From e35181dc4205e13a5d07ac4088d9654b4fdf1123 Mon Sep 17 00:00:00 2001 From: Seer Date: Mon, 15 Sep 2025 18:23:31 +0800 Subject: [PATCH 06/13] feat: Improve pause logic and increase sentinel startup probe delay - Modified the pause logic in the cluster, failover, and sentinel controllers to requeue the resource if nodes still exist, allowing them to scale down gracefully. The operator will now pause reconciliation only after all pods have been terminated. - Increased the initial delay for the sentinel startup probe to 30 seconds to prevent premature failures on slower systems. --- api/rds/v1alpha1/valkey_types.go | 5 +- .../crd/bases/rds.valkey.buf.red_valkeys.yaml | 5 +- internal/builder/clusterbuilder/configmap.go | 6 + internal/builder/config.go | 4 +- internal/builder/const.go | 13 +- internal/builder/failoverbuilder/configmap.go | 6 + internal/controller/rds/valkey/cluster.go | 5 +- internal/controller/rds/valkey/failover.go | 1 + .../ops/cluster/actor/actor_update_config.go | 126 ++++++++++-------- internal/ops/cluster/engine.go | 3 + .../failover/actor/actor_ensure_resource.go | 74 +++++----- .../ops/failover/actor/actor_update_config.go | 106 +++++++++------ internal/ops/failover/engine.go | 3 + .../sentinel/actor/actor_ensure_resource.go | 66 +++++---- internal/util/kubernetes.go | 14 ++ pkg/valkey/valkey.go | 5 +- 16 files changed, 276 insertions(+), 166 deletions(-) diff --git a/api/rds/v1alpha1/valkey_types.go b/api/rds/v1alpha1/valkey_types.go index ee64bed..4dc9206 100644 --- a/api/rds/v1alpha1/valkey_types.go +++ b/api/rds/v1alpha1/valkey_types.go @@ -72,7 +72,10 @@ type ValkeySpec struct { // for detailed settings, please refer to https://github.com/valkey-io/valkey/blob/unstable/valkey.conf CustomConfigs map[string]string `json:"customConfigs,omitempty"` - // Modules defines the module settings for Valkey + // Modules defines a list of modules to be loaded into the valkey instance. + // Each module is specified by its name and version. + // Modules are loaded at startup and can extend Redis functionality. + // +optional Modules []core.ValkeyModule `json:"modules,omitempty"` // Storage defines the storage settings for Valkey diff --git a/config/crd/bases/rds.valkey.buf.red_valkeys.yaml b/config/crd/bases/rds.valkey.buf.red_valkeys.yaml index 2f76660..dd8a74c 100644 --- a/config/crd/bases/rds.valkey.buf.red_valkeys.yaml +++ b/config/crd/bases/rds.valkey.buf.red_valkeys.yaml @@ -1317,7 +1317,10 @@ spec: type: object type: object modules: - description: Modules defines the module settings for Valkey + description: |- + Modules defines a list of modules to be loaded into the valkey instance. + Each module is specified by its name and version. + Modules are loaded at startup and can extend Redis functionality. items: description: ValkeyModule defines the module for Valkey properties: diff --git a/internal/builder/clusterbuilder/configmap.go b/internal/builder/clusterbuilder/configmap.go index 09e05e4..2fe5669 100644 --- a/internal/builder/clusterbuilder/configmap.go +++ b/internal/builder/clusterbuilder/configmap.go @@ -45,6 +45,7 @@ func NewConfigMapForCR(cluster types.ClusterInstance) (*corev1.ConfigMap, error) Name: ValkeyConfigMapName(cluster.GetName()), Namespace: cluster.GetNamespace(), Labels: GenerateClusterLabels(cluster.GetName(), nil), + Annotations: map[string]string{}, OwnerReferences: util.BuildOwnerReferences(cluster.Definition()), }, Data: map[string]string{ @@ -160,6 +161,11 @@ func buildValkeyConfigs(cluster types.ClusterInstance) (string, error) { buffer.WriteString(fmt.Sprintf("%s %s\n", k, v)) } } + + for _, mod := range cr.Spec.Modules { + args := append([]string{"loadmodule", mod.Path}, mod.Args...) + buffer.WriteString(strings.Join(args, " ") + "\n") + } return buffer.String(), nil } diff --git a/internal/builder/config.go b/internal/builder/config.go index 2f3f02e..7fb9451 100644 --- a/internal/builder/config.go +++ b/internal/builder/config.go @@ -62,7 +62,6 @@ const ( var ValkeyConfigRestartPolicy = map[string]ValkeyConfigSettingRule{ // forbid "include": Forbid, - "loadmodule": Forbid, "bind": Forbid, "protected-mode": Forbid, "port": Forbid, @@ -106,6 +105,7 @@ var ValkeyConfigRestartPolicy = map[string]ValkeyConfigSettingRule{ "rdbchecksum": RequireRestart, "io-threads": RequireRestart, "io-threads-do-reads": RequireRestart, + "loadmodule": RequireRestart, } type ValkeyConfigValues []string @@ -124,7 +124,7 @@ type ValkeyConfig map[string]ValkeyConfigValues // LoadValkeyConfig func LoadValkeyConfig(data string) (ValkeyConfig, error) { conf := ValkeyConfig{} - for _, line := range strings.Split(data, "\n") { + for line := range strings.SplitSeq(data, "\n") { line = strings.TrimSpace(line) if line == "" || strings.HasPrefix(line, "#") { continue diff --git a/internal/builder/const.go b/internal/builder/const.go index 9fac09f..f9fb191 100644 --- a/internal/builder/const.go +++ b/internal/builder/const.go @@ -25,12 +25,13 @@ const ( AppComponentLabelKey = "app.kubernetes.io/component" AppNameLabelKey = "app.kubernetes.io/name" - ArchLabelKey = "valkeyarch" - RoleLabelKey = "valkey.buf.red/role" - AnnounceIPLabelKey = "valkey.buf.red/announce_ip" - AnnouncePortLabelKey = "valkey.buf.red/announce_port" - AnnounceIPortLabelKey = "valkey.buf.red/announce_iport" - ChecksumLabelKey = "valkey.buf.red/checksum" + ArchLabelKey = "valkeyarch" + RoleLabelKey = "valkey.buf.red/role" + AnnounceIPLabelKey = "valkey.buf.red/announce_ip" + AnnouncePortLabelKey = "valkey.buf.red/announce_port" + AnnounceIPortLabelKey = "valkey.buf.red/announce_iport" + ChecksumLabelKey = "valkey.buf.red/checksum" + LastAppliedConfigAnnotationKey = "valkey.buf.red/last-applied-config" InstanceTypeLabelKey = "buf.red/type" InstanceNameLabelKey = "buf.red/name" diff --git a/internal/builder/failoverbuilder/configmap.go b/internal/builder/failoverbuilder/configmap.go index f5d4e05..08815fa 100644 --- a/internal/builder/failoverbuilder/configmap.go +++ b/internal/builder/failoverbuilder/configmap.go @@ -140,12 +140,18 @@ func GenerateConfigMap(inst types.FailoverInstance) (*corev1.ConfigMap, error) { } } + for _, mod := range rf.Spec.Modules { + args := append([]string{"loadmodule", mod.Path}, mod.Args...) + buffer.WriteString(strings.Join(args, " ") + "\n") + } + return &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: ConfigMapName(rf.Name), Namespace: rf.Namespace, Labels: GenerateCommonLabels(rf.Name), OwnerReferences: util.BuildOwnerReferences(rf), + Annotations: map[string]string{}, }, Data: map[string]string{ builder.ValkeyConfigKey: buffer.String(), diff --git a/internal/controller/rds/valkey/cluster.go b/internal/controller/rds/valkey/cluster.go index 63bd9c8..61e9f70 100644 --- a/internal/controller/rds/valkey/cluster.go +++ b/internal/controller/rds/valkey/cluster.go @@ -127,6 +127,7 @@ func GenerateValkeyCluster(instance *rdsv1alpha1.Valkey) (*v1alpha1.Cluster, err Access: instance.Spec.Access, Exporter: exporter, Storage: instance.Spec.Storage, + Modules: instance.Spec.Modules, }, } @@ -143,8 +144,8 @@ func ShouldUpdateCluster(cluster, newCluster *v1alpha1.Cluster, logger logr.Logg !reflect.DeepEqual(cluster.Annotations, newCluster.Annotations) { return true } - if !cmp.Equal(cluster.Spec, newCluster.Spec, - cmpopts.EquateEmpty(), + + if !cmp.Equal(cluster.Spec, newCluster.Spec, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(v1alpha1.ClusterSpec{}, "PodAnnotations"), ) { return true diff --git a/internal/controller/rds/valkey/failover.go b/internal/controller/rds/valkey/failover.go index 908071b..fb35d10 100644 --- a/internal/controller/rds/valkey/failover.go +++ b/internal/controller/rds/valkey/failover.go @@ -115,6 +115,7 @@ func GenerateFailover(instance *rdsv1alpha1.Valkey) (*v1alpha1.Failover, error) Exporter: exporter, Access: *access, Storage: instance.Spec.Storage.DeepCopy(), + Modules: instance.Spec.Modules, Affinity: instance.Spec.CustomAffinity, NodeSelector: instance.Spec.NodeSelector, diff --git a/internal/ops/cluster/actor/actor_update_config.go b/internal/ops/cluster/actor/actor_update_config.go index 4f0ef47..688a9cc 100644 --- a/internal/ops/cluster/actor/actor_update_config.go +++ b/internal/ops/cluster/actor/actor_update_config.go @@ -18,13 +18,14 @@ package actor import ( "context" + "maps" "github.com/Masterminds/semver/v3" "github.com/chideat/valkey-operator/api/core" "github.com/chideat/valkey-operator/internal/actor" "github.com/chideat/valkey-operator/internal/builder" "github.com/chideat/valkey-operator/internal/builder/clusterbuilder" - cops "github.com/chideat/valkey-operator/internal/ops/cluster" + ops "github.com/chideat/valkey-operator/internal/ops/cluster" "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/types" "github.com/go-logr/logr" @@ -53,7 +54,7 @@ type actorUpdateConfig struct { // SupportedCommands func (a *actorUpdateConfig) SupportedCommands() []actor.Command { - return []actor.Command{cops.CommandUpdateConfig} + return []actor.Command{ops.CommandUpdateConfig} } func (a *actorUpdateConfig) Version() *semver.Version { @@ -65,83 +66,100 @@ func (a *actorUpdateConfig) Version() *semver.Version { // two type config: hotconfig and restartconfig // use cm to check the difference of the config func (a *actorUpdateConfig) Do(ctx context.Context, val types.Instance) *actor.ActorResult { - logger := val.Logger().WithValues("actor", cops.CommandUpdateConfig.String()) + logger := val.Logger().WithValues("actor", ops.CommandUpdateConfig.String()) cluster := val.(types.ClusterInstance) newCm, _ := clusterbuilder.NewConfigMapForCR(cluster) oldCm, err := a.client.GetConfigMap(ctx, newCm.Namespace, newCm.Name) if err != nil && !errors.IsNotFound(err) { logger.Error(err, "get configmap failed", "target", client.ObjectKeyFromObject(newCm)) - return actor.NewResultWithError(cops.CommandRequeue, err) + return actor.NewResultWithError(ops.CommandRequeue, err) } else if oldCm == nil || oldCm.Data[builder.ValkeyConfigKey] == "" { if err = a.client.CreateConfigMap(ctx, cluster.GetNamespace(), newCm); err != nil { logger.Error(err, "create configmap failed", "target", client.ObjectKeyFromObject(newCm)) - return actor.NewResultWithError(cops.CommandRequeue, err) + return actor.NewResultWithError(ops.CommandRequeue, err) } return nil } - // check if config changed - newConf, _ := builder.LoadValkeyConfig(newCm.Data[builder.ValkeyConfigKey]) - oldConf, _ := builder.LoadValkeyConfig(oldCm.Data[builder.ValkeyConfigKey]) - added, changed, deleted := oldConf.Diff(newConf) - - if len(deleted) > 0 || len(added) > 0 || len(changed) > 0 { - // NOTE: update configmap first may cause the hot config fail for it will not retry again - if err := a.client.UpdateConfigMap(ctx, cluster.GetNamespace(), newCm); err != nil { - logger.Error(err, "update config failed", "target", client.ObjectKeyFromObject(newCm)) - return actor.NewResultWithError(cops.CommandRequeue, err) - } + var ( + newConf builder.ValkeyConfig + oldConf builder.ValkeyConfig + ) + if lastAppliedConf := oldCm.Annotations[builder.LastAppliedConfigAnnotationKey]; lastAppliedConf != "" { + newConf, _ = builder.LoadValkeyConfig(newCm.Data[builder.ValkeyConfigKey]) + oldConf, _ = builder.LoadValkeyConfig(lastAppliedConf) + } else { + newConf, _ = builder.LoadValkeyConfig(newCm.Data[builder.ValkeyConfigKey]) + oldConf, _ = builder.LoadValkeyConfig(oldCm.Data[builder.ValkeyConfigKey]) } - for k, v := range added { - changed[k] = v - } - if len(changed) == 0 { - return nil - } + added, changed, deleted := oldConf.Diff(newConf) + maps.Copy(changed, added) + + if len(deleted)+len(changed) > 0 { + conf := newCm.DeepCopy() + if lastAppliedConf := oldCm.Annotations[builder.LastAppliedConfigAnnotationKey]; lastAppliedConf != "" { + conf.Annotations[builder.LastAppliedConfigAnnotationKey] = lastAppliedConf + } else { + conf.Annotations[builder.LastAppliedConfigAnnotationKey] = oldCm.Data[builder.ValkeyConfigKey] + } - foundRestartApplyConfig := false - for key := range changed { - if policy := builder.ValkeyConfigRestartPolicy[key]; policy == builder.RequireRestart { - foundRestartApplyConfig = true - break + // update configmap with last applied config + if err := a.client.UpdateConfigMap(ctx, conf.GetNamespace(), conf); err != nil { + logger.Error(err, "update config failed", "target", client.ObjectKeyFromObject(conf)) + return actor.RequeueWithError(err) } } - if foundRestartApplyConfig { - logger.Info("rolling restart all shard") - // NOTE: the restart is done by RDS - // rolling update all statefulset - // if err := cluster.Restart(ctx); err != nil { - // logger.Error(err, "restart instance failed") - // } - return actor.NewResult(cops.CommandEnsureResource) - } else { - var margs [][]any - for key, vals := range changed { - logger.V(2).Info("hot config ", "key", key, "value", vals.String()) - margs = append(margs, []any{"config", "set", key, vals.String()}) + if len(changed) > 0 { + foundRestartApplyConfig := false + for key := range changed { + if policy := builder.ValkeyConfigRestartPolicy[key]; policy == builder.RequireRestart { + foundRestartApplyConfig = true + break + } } + if foundRestartApplyConfig { + logger.Info("rolling restart all shard") + // NOTE: the restart is done by RDS + // rolling update all statefulset + if err := cluster.Restart(ctx); err != nil { + logger.Error(err, "restart instance failed") + return actor.NewResultWithError(ops.CommandRequeue, err) + } + } else { + var margs [][]any + for key, vals := range changed { + logger.V(2).Info("hot config ", "key", key, "value", vals.String()) + margs = append(margs, []any{"config", "set", key, vals.String()}) + } - var ( - isUpdateFailed = false - err error - ) - for _, node := range cluster.Nodes() { - if node.ContainerStatus() == nil || !node.ContainerStatus().Ready || - node.IsTerminating() { - continue + var ( + isUpdateFailed = false + err error + ) + for _, node := range cluster.Nodes() { + if node.ContainerStatus() == nil || !node.ContainerStatus().Ready || + node.IsTerminating() { + continue + } + if err = node.Setup(ctx, margs...); err != nil { + isUpdateFailed = true + break + } } - if err = node.Setup(ctx, margs...); err != nil { - isUpdateFailed = true - break + if isUpdateFailed { + return actor.NewResultWithError(ops.CommandRequeue, err) } } + } - if !isUpdateFailed { - return actor.NewResultWithError(cops.CommandRequeue, err) - } + // update configmap without last applied config + if err := a.client.UpdateConfigMap(ctx, cluster.GetNamespace(), newCm); err != nil { + logger.Error(err, "update config failed", "target", client.ObjectKeyFromObject(newCm)) + return actor.NewResultWithError(ops.CommandRequeue, err) } + return nil } diff --git a/internal/ops/cluster/engine.go b/internal/ops/cluster/engine.go index 32a6318..065bbe9 100644 --- a/internal/ops/cluster/engine.go +++ b/internal/ops/cluster/engine.go @@ -551,6 +551,9 @@ func (g *RuleEngine) isConfigMapChanged(ctx context.Context, cluster types.Clust if len(added)+len(changed)+len(deleted) != 0 { return true, nil } + if oldCm.Annotations[builder.LastAppliedConfigAnnotationKey] != "" { + return true, nil + } return false, nil } diff --git a/internal/ops/failover/actor/actor_ensure_resource.go b/internal/ops/failover/actor/actor_ensure_resource.go index 4ac36fd..360ae2f 100644 --- a/internal/ops/failover/actor/actor_ensure_resource.go +++ b/internal/ops/failover/actor/actor_ensure_resource.go @@ -21,6 +21,7 @@ import ( "fmt" "reflect" "slices" + "strings" "time" "github.com/Masterminds/semver/v3" @@ -142,42 +143,55 @@ func (a *actorEnsureResource) ensureStatefulSet(ctx context.Context, inst types. return actor.RequeueWithError(err) } - if util.IsStatefulsetChanged(sts, oldSts, logger) { - if *oldSts.Spec.Replicas > *sts.Spec.Replicas { - // scale down - oldSts.Spec.Replicas = sts.Spec.Replicas - if err := a.client.UpdateStatefulSet(ctx, cr.Namespace, oldSts); err != nil { - logger.Error(err, "scale down statefulset failed", "target", client.ObjectKeyFromObject(oldSts)) - return actor.RequeueWithError(err) + sts.Spec.Template.Annotations = builder.MergeRestartAnnotation(sts.Spec.Template.Annotations, oldSts.Spec.Template.Annotations) + + if changed, ichanged := util.IsStatefulsetChanged2(sts, oldSts, logger); changed { + // check if only mutable fields changed + if !ichanged { + if err := a.client.UpdateStatefulSet(ctx, cr.Namespace, sts); err != nil { + if strings.Contains(err.Error(), "updates to statefulset spec for fields other than") { + ichanged = true + } else { + logger.Error(err, "update statefulset failed", "target", client.ObjectKeyFromObject(oldSts)) + return actor.RequeueWithError(err) + } } - time.Sleep(time.Second * 3) } - // patch pods with new labels in selector - pods, err := inst.RawNodes(ctx) - if err != nil { - logger.Error(err, "get pods failed") - return actor.RequeueWithError(err) - } - for _, item := range pods { - pod := item.DeepCopy() - pod.Labels = lo.Assign(pod.Labels, inst.Selector()) - if !reflect.DeepEqual(pod.Labels, item.Labels) { - if err := a.client.UpdatePod(ctx, pod.GetNamespace(), pod); err != nil { - logger.Error(err, "patch pod label failed", "target", client.ObjectKeyFromObject(pod)) + if ichanged { + if *oldSts.Spec.Replicas > *sts.Spec.Replicas { + oldSts.Spec.Replicas = sts.Spec.Replicas + if err := a.client.UpdateStatefulSet(ctx, cr.Namespace, oldSts); err != nil { + logger.Error(err, "scale down statefulset failed", "target", client.ObjectKeyFromObject(oldSts)) return actor.RequeueWithError(err) } + time.Sleep(time.Second * 3) } - } - time.Sleep(time.Second * 3) - if err := a.client.DeleteStatefulSet(ctx, cr.Namespace, sts.Name, - client.PropagationPolicy(metav1.DeletePropagationOrphan)); err != nil && !errors.IsNotFound(err) { - logger.Error(err, "delete old statefulset failed", "target", client.ObjectKeyFromObject(sts)) - return actor.RequeueWithError(err) - } - if err = a.client.CreateStatefulSet(ctx, cr.Namespace, sts); err != nil && !errors.IsAlreadyExists(err) { - logger.Error(err, "update statefulset failed", "target", client.ObjectKeyFromObject(sts)) - return actor.RequeueWithError(err) + + // patch pods with new labels in selector + pods, err := inst.RawNodes(ctx) + if err != nil { + logger.Error(err, "get pods failed") + return actor.RequeueWithError(err) + } + for _, item := range pods { + pod := item.DeepCopy() + pod.Labels = lo.Assign(pod.Labels, sts.Spec.Selector.MatchLabels) + logger.V(4).Info("check patch pod labels", "pod", item.Name, "labels", pod.Labels) + if !reflect.DeepEqual(pod.Labels, item.Labels) { + if err := a.client.UpdatePod(ctx, pod.GetNamespace(), pod); err != nil { + logger.Error(err, "patch pod label failed", "target", client.ObjectKeyFromObject(pod)) + return actor.RequeueWithError(err) + } + } + } + + if err := a.client.DeleteStatefulSet(ctx, cr.Namespace, sts.Name, + client.PropagationPolicy(metav1.DeletePropagationOrphan)); err != nil && !errors.IsNotFound(err) { + logger.Error(err, "delete old statefulset failed", "target", client.ObjectKeyFromObject(sts)) + return actor.RequeueWithError(err) + } + return actor.Requeue() } } return nil diff --git a/internal/ops/failover/actor/actor_update_config.go b/internal/ops/failover/actor/actor_update_config.go index 3acc3a7..d91a163 100644 --- a/internal/ops/failover/actor/actor_update_config.go +++ b/internal/ops/failover/actor/actor_update_config.go @@ -72,57 +72,85 @@ func (a *actorUpdateConfigMap) Do(ctx context.Context, val types.Instance) *acto if errors.IsNotFound(err) || oldCm.Data[builder.ValkeyConfigKey] == "" { return actor.NewResultWithError(ops.CommandEnsureResource, fmt.Errorf("configmap %s not found", newCm.GetName())) } else if err != nil { - return actor.NewResultWithError(ops.CommandRequeue, err) + return actor.RequeueWithError(err) } - newConf, _ := builder.LoadValkeyConfig(newCm.Data[builder.ValkeyConfigKey]) - oldConf, _ := builder.LoadValkeyConfig(oldCm.Data[builder.ValkeyConfigKey]) - added, changed, deleted := oldConf.Diff(newConf) - if len(deleted) > 0 || len(added) > 0 || len(changed) > 0 { - // NOTE: update configmap first may cause the hot config fail for it will not retry again - if err := a.client.UpdateConfigMap(ctx, newCm.GetNamespace(), newCm); err != nil { - logger.Error(err, "update config failed", "target", client.ObjectKeyFromObject(newCm)) - return actor.NewResultWithError(ops.CommandRequeue, err) - } + + var ( + newConf builder.ValkeyConfig + oldConf builder.ValkeyConfig + ) + if lastAppliedConf := oldCm.Annotations[builder.LastAppliedConfigAnnotationKey]; lastAppliedConf != "" { + newConf, _ = builder.LoadValkeyConfig(newCm.Data[builder.ValkeyConfigKey]) + oldConf, _ = builder.LoadValkeyConfig(lastAppliedConf) + } else { + newConf, _ = builder.LoadValkeyConfig(newCm.Data[builder.ValkeyConfigKey]) + oldConf, _ = builder.LoadValkeyConfig(oldCm.Data[builder.ValkeyConfigKey]) } + + added, changed, deleted := oldConf.Diff(newConf) maps.Copy(changed, added) - foundRestartApplyConfig := false - for key := range changed { - if policy := builder.ValkeyConfigRestartPolicy[key]; policy == builder.RequireRestart { - foundRestartApplyConfig = true - break + if len(deleted)+len(changed) > 0 { + conf := newCm.DeepCopy() + if lastAppliedConf := oldCm.Annotations[builder.LastAppliedConfigAnnotationKey]; lastAppliedConf != "" { + conf.Annotations[builder.LastAppliedConfigAnnotationKey] = lastAppliedConf + } else { + conf.Annotations[builder.LastAppliedConfigAnnotationKey] = oldCm.Data[builder.ValkeyConfigKey] } - } - if foundRestartApplyConfig { - err := st.Restart(ctx) - if err != nil { - logger.Error(err, "restart instance failed") + + // update configmap with last applied config + if err := a.client.UpdateConfigMap(ctx, conf.GetNamespace(), conf); err != nil { + logger.Error(err, "update config failed", "target", client.ObjectKeyFromObject(conf)) return actor.NewResultWithError(ops.CommandRequeue, err) } - } else { - var margs [][]any - for key, vals := range changed { - logger.V(2).Info("hot config ", "key", key, "value", vals.String()) - margs = append(margs, []any{"config", "set", key, vals.String()}) - } - var ( - isUpdateFailed = false - err error - ) - for _, node := range st.Nodes() { - if node.ContainerStatus() == nil || !node.ContainerStatus().Ready || - node.IsTerminating() { - continue - } - if err = node.Setup(ctx, margs...); err != nil { - isUpdateFailed = true + } + + if len(changed) > 0 { + foundRestartApplyConfig := false + for key := range changed { + if policy := builder.ValkeyConfigRestartPolicy[key]; policy == builder.RequireRestart { + foundRestartApplyConfig = true break } } + if foundRestartApplyConfig { + err := st.Restart(ctx) + if err != nil { + logger.Error(err, "restart redis failed") + return actor.NewResultWithError(ops.CommandRequeue, err) + } + } else { + var margs [][]any + for key, vals := range changed { + logger.V(2).Info("hot config ", "key", key, "value", vals.String()) + margs = append(margs, []any{"config", "set", key, vals.String()}) + } + var ( + isUpdateFailed = false + err error + ) + for _, node := range st.Nodes() { + if node.ContainerStatus() == nil || !node.ContainerStatus().Ready || + node.IsTerminating() { + continue + } + if err = node.Setup(ctx, margs...); err != nil { + isUpdateFailed = true + break + } + } - if !isUpdateFailed { - return actor.NewResultWithError(ops.CommandRequeue, err) + if isUpdateFailed { + return actor.NewResultWithError(ops.CommandRequeue, err) + } } } + + // update configmap without last applied config + if err := a.client.UpdateConfigMap(ctx, newCm.GetNamespace(), newCm); err != nil { + logger.Error(err, "update config failed", "target", client.ObjectKeyFromObject(newCm)) + return actor.NewResultWithError(ops.CommandRequeue, err) + } + return nil } diff --git a/internal/ops/failover/engine.go b/internal/ops/failover/engine.go index a81fd6a..a2b17c2 100644 --- a/internal/ops/failover/engine.go +++ b/internal/ops/failover/engine.go @@ -206,6 +206,9 @@ func (g *RuleEngine) isConfigChanged(ctx context.Context, inst types.FailoverIns if len(added)+len(changed)+len(deleted) != 0 { return actor.NewResult(CommandUpdateConfig) } + if oldCm.Annotations[builder.LastAppliedConfigAnnotationKey] != "" { + return actor.NewResult(CommandUpdateConfig) + } if inst.Monitor().Policy() == v1.SentinelFailoverPolicy { // HACK: check and update sentinel monitor config diff --git a/internal/ops/sentinel/actor/actor_ensure_resource.go b/internal/ops/sentinel/actor/actor_ensure_resource.go index 96aa8d7..57d2e09 100644 --- a/internal/ops/sentinel/actor/actor_ensure_resource.go +++ b/internal/ops/sentinel/actor/actor_ensure_resource.go @@ -21,6 +21,7 @@ import ( "fmt" "reflect" "slices" + "strings" "time" "github.com/Masterminds/semver/v3" @@ -157,42 +158,51 @@ func (a *actorEnsureResource) ensureStatefulSet(ctx context.Context, inst types. } else if err != nil { logger.Error(err, "get statefulset failed", "target", client.ObjectKeyFromObject(sts)) return actor.NewResultWithError(ops.CommandRequeue, err) - } else if util.IsStatefulsetChanged(sts, oldSts, logger) { - if *oldSts.Spec.Replicas > *sts.Spec.Replicas { - oldSts.Spec.Replicas = sts.Spec.Replicas - if err := a.client.UpdateStatefulSet(ctx, sen.Namespace, oldSts); err != nil { - logger.Error(err, "scale down statefulset failed", "target", client.ObjectKeyFromObject(oldSts)) - return actor.RequeueWithError(err) + } else if changed, ichanged := util.IsStatefulsetChanged2(sts, oldSts, logger); changed { + if !ichanged { + if err := a.client.UpdateStatefulSet(ctx, sen.Namespace, sts); err != nil { + if strings.Contains(err.Error(), "updates to statefulset spec for fields other than") { + ichanged = true + } else { + logger.Error(err, "update statefulset failed", "target", client.ObjectKeyFromObject(oldSts)) + return actor.RequeueWithError(err) + } } - time.Sleep(time.Second * 3) } - pods, err := inst.RawNodes(ctx) - if err != nil { - logger.Error(err, "get pods failed") - return actor.RequeueWithError(err) - } - for _, item := range pods { - pod := item.DeepCopy() - pod.Labels = lo.Assign(pod.Labels, inst.Selector()) - if !reflect.DeepEqual(pod.Labels, item.Labels) { - if err := a.client.UpdatePod(ctx, pod.GetNamespace(), pod); err != nil { - logger.Error(err, "patch pod label failed", "target", client.ObjectKeyFromObject(pod)) + if ichanged { + if *oldSts.Spec.Replicas > *sts.Spec.Replicas { + oldSts.Spec.Replicas = sts.Spec.Replicas + if err := a.client.UpdateStatefulSet(ctx, sen.Namespace, oldSts); err != nil { + logger.Error(err, "scale down statefulset failed", "target", client.ObjectKeyFromObject(oldSts)) return actor.RequeueWithError(err) } + time.Sleep(time.Second * 3) } - } - if err := a.client.DeleteStatefulSet(ctx, sen.Namespace, sts.GetName(), - client.PropagationPolicy(metav1.DeletePropagationOrphan)); err != nil && !errors.IsNotFound(err) { + pods, err := inst.RawNodes(ctx) + if err != nil { + logger.Error(err, "get pods failed") + return actor.RequeueWithError(err) + } + for _, item := range pods { + pod := item.DeepCopy() + pod.Labels = lo.Assign(pod.Labels, sts.Spec.Selector.MatchLabels) + if !reflect.DeepEqual(pod.Labels, item.Labels) { + if err := a.client.UpdatePod(ctx, pod.GetNamespace(), pod); err != nil { + logger.Error(err, "patch pod label failed", "target", client.ObjectKeyFromObject(pod)) + return actor.RequeueWithError(err) + } + } + } - logger.Error(err, "delete old statefulset failed", "target", client.ObjectKeyFromObject(sts)) - return actor.RequeueWithError(err) - } - time.Sleep(time.Second * 3) - if err = a.client.CreateStatefulSet(ctx, sen.Namespace, sts); err != nil && !errors.IsAlreadyExists(err) { - logger.Error(err, "update statefulset failed", "target", client.ObjectKeyFromObject(sts)) - return actor.RequeueWithError(err) + if err := a.client.DeleteStatefulSet(ctx, sen.Namespace, sts.GetName(), + client.PropagationPolicy(metav1.DeletePropagationOrphan)); err != nil && !errors.IsNotFound(err) { + + logger.Error(err, "delete old statefulset failed", "target", client.ObjectKeyFromObject(sts)) + return actor.RequeueWithError(err) + } + return actor.Requeue() } } return nil diff --git a/internal/util/kubernetes.go b/internal/util/kubernetes.go index 636a3e7..a0ea59a 100644 --- a/internal/util/kubernetes.go +++ b/internal/util/kubernetes.go @@ -149,6 +149,20 @@ func isSubmap[K, V comparable](a, b map[K]V) bool { return true } +func IsStatefulsetChanged2(newSts, sts *appsv1.StatefulSet, logger logr.Logger) (bool, bool) { + changed := IsStatefulsetChanged(newSts, sts, logger) + if !changed { + return false, false + } + + immutableChanged := cmp.Equal(newSts.Spec, sts.Spec, cmpopts.EquateEmpty(), + cmpopts.IgnoreFields(appsv1.StatefulSetSpec{}, + "Replicas", "Ordinals", "Template", "UpdateStrategy", + "PersistentVolumeClaimRetentionPolicy", "MinReadySeconds")) + + return changed, immutableChanged +} + // IsStatefulsetChanged func IsStatefulsetChanged(newSts, sts *appsv1.StatefulSet, logger logr.Logger) bool { // statefulset check diff --git a/pkg/valkey/valkey.go b/pkg/valkey/valkey.go index 9aa1470..6a9c7a9 100644 --- a/pkg/valkey/valkey.go +++ b/pkg/valkey/valkey.go @@ -619,8 +619,7 @@ func (c *valkeyClient) ClusterInfo(ctx context.Context) (*ClusterNodeInfo, error parseInfo := func(data string) *ClusterNodeInfo { info := ClusterNodeInfo{} - lines := strings.Split(data, "\n") - for _, line := range lines { + for line := range strings.SplitSeq(data, "\n") { line = strings.TrimSpace(line) if line == "" || strings.HasPrefix(line, "#") { continue @@ -677,7 +676,7 @@ func (c *valkeyClient) Nodes(ctx context.Context) (ClusterNodes, error) { } var nodes ClusterNodes - for _, line := range strings.Split(strings.TrimSpace(data), "\n") { + for line := range strings.SplitSeq(strings.TrimSpace(data), "\n") { line = strings.TrimSpace(line) if node, err := ParseNodeFromClusterNode(line); err != nil { return nil, err From 5f1fb6d36444cae42b46e9c08cf0cd5113691b88 Mon Sep 17 00:00:00 2001 From: Seer Date: Tue, 16 Sep 2025 18:26:58 +0800 Subject: [PATCH 07/13] fix: clean dumplicate resource settings --- internal/controller/rds/valkey/cluster.go | 13 ------------- internal/controller/rds/valkey/failover.go | 13 ------------- 2 files changed, 26 deletions(-) diff --git a/internal/controller/rds/valkey/cluster.go b/internal/controller/rds/valkey/cluster.go index 61e9f70..e80b8c7 100644 --- a/internal/controller/rds/valkey/cluster.go +++ b/internal/controller/rds/valkey/cluster.go @@ -30,7 +30,6 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/samber/lo" - corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" v12 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -81,18 +80,6 @@ func GenerateValkeyCluster(instance *rdsv1alpha1.Valkey) (*v1alpha1.Cluster, err if exporter.Image == "" { exporter.Image = config.GetValkeyExporterImage(nil) } - if exporter.Resources == nil || exporter.Resources.Limits.Cpu().IsZero() || exporter.Resources.Limits.Memory().IsZero() { - exporter.Resources = &corev1.ResourceRequirements{ - Requests: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceCPU: resource.MustParse("100m"), - corev1.ResourceMemory: resource.MustParse("300Mi"), - }, - Limits: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceCPU: resource.MustParse("100m"), - corev1.ResourceMemory: resource.MustParse("300Mi"), - }, - } - } } shardsConf := instance.Spec.Replicas.ShardsConfig diff --git a/internal/controller/rds/valkey/failover.go b/internal/controller/rds/valkey/failover.go index fb35d10..c92f9e6 100644 --- a/internal/controller/rds/valkey/failover.go +++ b/internal/controller/rds/valkey/failover.go @@ -31,7 +31,6 @@ import ( "github.com/go-logr/logr" "github.com/samber/lo" - corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -66,18 +65,6 @@ func GenerateFailover(instance *rdsv1alpha1.Valkey) (*v1alpha1.Failover, error) if exporter.Image == "" { exporter.Image = config.GetValkeyExporterImage(nil) } - if exporter.Resources == nil || exporter.Resources.Limits.Cpu().IsZero() || exporter.Resources.Limits.Memory().IsZero() { - exporter.Resources = &corev1.ResourceRequirements{ - Requests: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceCPU: resource.MustParse("50m"), - corev1.ResourceMemory: resource.MustParse("128Mi"), - }, - Limits: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceCPU: resource.MustParse("100m"), - corev1.ResourceMemory: resource.MustParse("384Mi"), - }, - } - } } if sentinel != nil { From 71b16cffa09764ac5ce00a363787c58d97cc62e9 Mon Sep 17 00:00:00 2001 From: Seer Date: Tue, 16 Sep 2025 18:38:57 +0800 Subject: [PATCH 08/13] fix: fix bug of config update when do redis version update --- internal/ops/cluster/engine.go | 11 +++++++++++ internal/ops/failover/engine.go | 8 ++++++++ 2 files changed, 19 insertions(+) diff --git a/internal/ops/cluster/engine.go b/internal/ops/cluster/engine.go index 065bbe9..0b39140 100644 --- a/internal/ops/cluster/engine.go +++ b/internal/ops/cluster/engine.go @@ -535,6 +535,17 @@ func (g *RuleEngine) isCustomServerChanged(ctx context.Context, cluster types.Cl func (g *RuleEngine) isConfigMapChanged(ctx context.Context, cluster types.ClusterInstance) (bool, error) { logger := g.logger.WithName("isConfigMapChanged") + + // check if all pod fullfilled + for _, shard := range cluster.Shards() { + for _, node := range shard.Nodes() { + if node.CurrentVersion() != cluster.Version() { + // postpone the configmap check + return false, nil + } + } + } + newCm, _ := clusterbuilder.NewConfigMapForCR(cluster) oldCm, err := g.client.GetConfigMap(ctx, newCm.Namespace, newCm.Name) if errors.IsNotFound(err) || (oldCm != nil && oldCm.Data[builder.ValkeyConfigKey] == "") { diff --git a/internal/ops/failover/engine.go b/internal/ops/failover/engine.go index a2b17c2..cedc761 100644 --- a/internal/ops/failover/engine.go +++ b/internal/ops/failover/engine.go @@ -189,6 +189,14 @@ func (g *RuleEngine) isPasswordChanged(ctx context.Context, inst types.FailoverI } func (g *RuleEngine) isConfigChanged(ctx context.Context, inst types.FailoverInstance, logger logr.Logger) *actor.ActorResult { + // check if all pod fullfilled + for _, node := range inst.Nodes() { + if node.CurrentVersion() != inst.Version() { + logger.V(3).Info("node version not match", "node", node.GetName(), "version", node.CurrentVersion(), "expect", inst.Version()) + return actor.NewResult(CommandEnsureResource) + } + } + newCm, err := failoverbuilder.GenerateConfigMap(inst) if err != nil { return actor.RequeueWithError(err) From 89d9df78accfa1e631db6ebd58c0afb9f94e71ae Mon Sep 17 00:00:00 2001 From: Seer Date: Tue, 16 Sep 2025 18:43:07 +0800 Subject: [PATCH 09/13] fix: added support of force failover to force the sentinel refresh the nodes announce --- .../ops/failover/actor/actor_heal_monitor.go | 10 ++++++++- .../failover/monitor/sentinel_monitor.go | 21 +++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/internal/ops/failover/actor/actor_heal_monitor.go b/internal/ops/failover/actor/actor_heal_monitor.go index de9a5f0..da721a3 100644 --- a/internal/ops/failover/actor/actor_heal_monitor.go +++ b/internal/ops/failover/actor/actor_heal_monitor.go @@ -103,7 +103,15 @@ func (a *actorHealMaster) Do(ctx context.Context, val types.Instance) *actor.Act onlineNodeCount += 1 } } - } else if !errors.Is(err, monitor.ErrNoMaster) && !errors.Is(err, monitor.ErrAddressConflict) { + } else if errors.Is(err, monitor.ErrAddressConflict) { + // do failover to force sentinel update node's announce info + if err := instMonitor.Failover(ctx); err != nil { + logger.Error(err, "do manual failover failed") + // continue with master setup + } else { + return actor.RequeueAfter(time.Second * 10) + } + } else if !errors.Is(err, monitor.ErrNoMaster) { logger.Error(err, "failed to get master node") return actor.RequeueWithError(err) } diff --git a/internal/valkey/failover/monitor/sentinel_monitor.go b/internal/valkey/failover/monitor/sentinel_monitor.go index f48b066..c2ed6fe 100644 --- a/internal/valkey/failover/monitor/sentinel_monitor.go +++ b/internal/valkey/failover/monitor/sentinel_monitor.go @@ -152,6 +152,7 @@ func (s *SentinelMonitor) Master(ctx context.Context, flags ...bool) (*vkcli.Sen var ( masterStat []*Stat masterIds = map[string]int{} + idAddrMap = map[string][]string{} registeredNodes int ) for _, node := range s.nodes { @@ -186,6 +187,9 @@ func (s *SentinelMonitor) Master(ctx context.Context, flags ...bool) (*vkcli.Sen } if n.RunId != "" { masterIds[n.RunId] += 1 + if !slices.Contains(idAddrMap[n.RunId], n.Address()) { + idAddrMap[n.RunId] = append(idAddrMap[n.RunId], n.Address()) + } } } if len(masterStat) == 0 { @@ -204,6 +208,23 @@ func (s *SentinelMonitor) Master(ctx context.Context, flags ...bool) (*vkcli.Sen return nil, ErrAddressConflict } } + + for _, node := range s.failover.Nodes() { + if !node.IsReady() { + s.logger.Info("node not ready, ignored", "node", node.GetName()) + continue + } + registeredAddrs := idAddrMap[node.Info().RunId] + addr := net.JoinHostPort(node.DefaultIP().String(), strconv.Itoa(node.Port())) + addr2 := net.JoinHostPort(node.DefaultInternalIP().String(), strconv.Itoa(node.InternalPort())) + // same runid registered with different addr + // TODO: limit service InternalTrafficPolicy to Local + if (len(registeredAddrs) == 1 && registeredAddrs[0] != addr && registeredAddrs[0] != addr2) || + len(registeredAddrs) > 1 { + return nil, ErrAddressConflict + } + } + // masterStat[0].Count == registeredNodes used to check if all nodes are consistent no matter how many sentinel nodes if masterStat[0].Count >= 1+len(s.nodes)/2 || masterStat[0].Count == registeredNodes { return masterStat[0].Node, nil From d98f48f0560108b8fddd538ff8bacb01d0eabac2 Mon Sep 17 00:00:00 2001 From: Seer Date: Fri, 19 Sep 2025 18:42:01 +0800 Subject: [PATCH 10/13] Update helper commands and initialization scripts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Modified cluster, failover, and sentinel command implementations - Updated initialization scripts for different deployment modes - Improved helper functionality across command modules 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- cmd/helper/commands/cluster/access.go | 51 ++++++++++-------------- cmd/helper/commands/cluster/command.go | 16 ++------ cmd/helper/commands/failover/access.go | 48 +++++++++------------- cmd/helper/commands/failover/command.go | 21 ++-------- cmd/helper/commands/failover/shutdown.go | 3 ++ cmd/helper/commands/helper.go | 14 ++----- cmd/helper/commands/sentinel/access.go | 47 +++++++++------------- cmd/helper/commands/sentinel/command.go | 16 ++------ cmd/init_cluster.sh | 8 ++-- cmd/init_failover.sh | 8 ++-- cmd/init_sentinel.sh | 8 ++-- cmd/run_cluster.sh | 3 -- cmd/run_failover.sh | 4 -- cmd/run_sentinel.sh | 3 -- 14 files changed, 90 insertions(+), 160 deletions(-) diff --git a/cmd/helper/commands/cluster/access.go b/cmd/helper/commands/cluster/access.go index eb7d288..bf21ba8 100644 --- a/cmd/helper/commands/cluster/access.go +++ b/cmd/helper/commands/cluster/access.go @@ -35,9 +35,21 @@ import ( "k8s.io/client-go/kubernetes" ) -// ExposeNodePort -func ExposeNodePort(ctx context.Context, client *kubernetes.Clientset, namespace, podName, ipfamily string, serviceType corev1.ServiceType, logger logr.Logger) error { - logger.Info("Info", "serviceType", serviceType, "ipfamily", ipfamily) +// Access +func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podName, ipfamily string, logger logr.Logger) error { + podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, 20, logger) + if errors.IsNotFound(err) { + if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, 20, logger); err != nil { + logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) + return err + } + } else if err != nil { + logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) + return err + } + serviceType := podSvc.Spec.Type + + logger.Info("check pod service type", "ipfamily", ipfamily, "serviceType", serviceType, "podName", podName) pod, err := commands.GetPod(ctx, client, namespace, podName, logger) if err != nil { logger.Error(err, "get pods failed", "namespace", namespace, "name", podName) @@ -54,16 +66,6 @@ func ExposeNodePort(ctx context.Context, client *kubernetes.Clientset, namespace announceIPort int32 = 16379 ) if serviceType == corev1.ServiceTypeNodePort { - podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeNodePort, 20, logger) - if errors.IsNotFound(err) { - if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeNodePort, 20, logger); err != nil { - logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } - } else if err != nil { - logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } for _, v := range podSvc.Spec.Ports { if v.Name == "client" { announcePort = v.NodePort @@ -121,17 +123,6 @@ func ExposeNodePort(ctx context.Context, client *kubernetes.Clientset, namespace return err } } else if serviceType == corev1.ServiceTypeLoadBalancer { - podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeLoadBalancer, 20, logger) - if errors.IsNotFound(err) { - if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeLoadBalancer, 20, logger); err != nil { - logger.Error(err, "retry get lb service failed") - return err - } - } else if err != nil { - logger.Error(err, "get lb service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } - for _, v := range podSvc.Status.LoadBalancer.Ingress { if v.IP != "" { ip, err := netip.ParseAddr(v.IP) @@ -165,14 +156,14 @@ func ExposeNodePort(ctx context.Context, client *kubernetes.Clientset, namespace } } - format_announceIp := strings.Replace(announceIp, ":", "-", -1) - if strings.HasSuffix(format_announceIp, "-") { - format_announceIp = fmt.Sprintf("%s0", format_announceIp) + fAnnounceIp := strings.ReplaceAll(announceIp, ":", "-") + if strings.HasSuffix(fAnnounceIp, "-") { + fAnnounceIp = fmt.Sprintf("%s0", fAnnounceIp) } labelPatch := fmt.Sprintf(`[{"op":"add","path":"/metadata/labels/%s","value":"%s"},{"op":"add","path":"/metadata/labels/%s","value":"%s"},{"op":"add","path":"/metadata/labels/%s","value":"%s"}]`, - strings.Replace(builder.AnnounceIPLabelKey, "/", "~1", -1), format_announceIp, - strings.Replace(builder.AnnouncePortLabelKey, "/", "~1", -1), strconv.Itoa(int(announcePort)), - strings.Replace(builder.AnnounceIPortLabelKey, "/", "~1", -1), strconv.Itoa(int(announceIPort))) + strings.ReplaceAll(builder.AnnounceIPLabelKey, "/", "~1"), fAnnounceIp, + strings.ReplaceAll(builder.AnnouncePortLabelKey, "/", "~1"), strconv.Itoa(int(announcePort)), + strings.ReplaceAll(builder.AnnounceIPortLabelKey, "/", "~1"), strconv.Itoa(int(announceIPort))) logger.Info(labelPatch) _, err = client.CoreV1().Pods(pod.Namespace).Patch(ctx, podName, types.JSONPatchType, []byte(labelPatch), metav1.PatchOptions{}) diff --git a/cmd/helper/commands/cluster/command.go b/cmd/helper/commands/cluster/command.go index 2ea6391..be78d88 100644 --- a/cmd/helper/commands/cluster/command.go +++ b/cmd/helper/commands/cluster/command.go @@ -22,7 +22,6 @@ import ( "github.com/chideat/valkey-operator/cmd/helper/commands" "github.com/urfave/cli/v2" - corev1 "k8s.io/api/core/v1" ) func NewCommand(ctx context.Context) *cli.Command { @@ -99,12 +98,6 @@ func NewCommand(ctx context.Context) *cli.Command { Usage: "IP_FAMILY for expose", EnvVars: []string{"IP_FAMILY_PREFER"}, }, - &cli.StringFlag{ - Name: "service-type", - Usage: "Service type for sentinel service", - EnvVars: []string{"SERVICE_TYPE"}, - Value: "ClusterIP", - }, }, Subcommands: []*cli.Command{ { @@ -113,10 +106,9 @@ func NewCommand(ctx context.Context) *cli.Command { Flags: []cli.Flag{}, Action: func(c *cli.Context) error { var ( - namespace = c.String("namespace") - podName = c.String("pod-name") - ipFamily = c.String("ip-family") - serviceType = corev1.ServiceType(c.String("service-type")) + namespace = c.String("namespace") + podName = c.String("pod-name") + ipFamily = c.String("ip-family") ) if namespace == "" { return cli.Exit("require namespace", 1) @@ -133,7 +125,7 @@ func NewCommand(ctx context.Context) *cli.Command { return cli.Exit(err, 1) } - if err := ExposeNodePort(ctx, client, namespace, podName, ipFamily, serviceType, logger); err != nil { + if err := Access(ctx, client, namespace, podName, ipFamily, logger); err != nil { logger.Error(err, "expose node port failed") return cli.Exit(err, 1) } diff --git a/cmd/helper/commands/failover/access.go b/cmd/helper/commands/failover/access.go index 449071d..0c788b1 100644 --- a/cmd/helper/commands/failover/access.go +++ b/cmd/helper/commands/failover/access.go @@ -34,14 +34,25 @@ import ( "k8s.io/client-go/kubernetes" ) -func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podName, ipfamily string, serviceType corev1.ServiceType, logger logr.Logger) error { - logger.Info("service access", "serviceType", serviceType, "ipfamily", ipfamily, "podName", podName) +func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podName, ipfamily string, logger logr.Logger) error { + podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, 20, logger) + if errors.IsNotFound(err) { + if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, 20, logger); err != nil { + logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) + return err + } + } else if err != nil { + logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) + return err + } + serviceType := podSvc.Spec.Type + + logger.Info("check pod service type", "ipfamily", ipfamily, "serviceType", serviceType, "podName", podName) pod, err := commands.GetPod(ctx, client, namespace, podName, logger) if err != nil { logger.Error(err, "get pods failed", "namespace", namespace, "name", podName) return err } - if pod.Status.HostIP == "" { return fmt.Errorf("pod not found or pod with invalid hostIP") } @@ -51,16 +62,6 @@ func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podNam announcePort int32 = 6379 ) if serviceType == corev1.ServiceTypeNodePort { - podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeNodePort, 20, logger) - if errors.IsNotFound(err) { - if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeNodePort, 20, logger); err != nil { - logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } - } else if err != nil { - logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } for _, v := range podSvc.Spec.Ports { if v.Name == "client" { announcePort = v.NodePort @@ -115,17 +116,6 @@ func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podNam return err } } else if serviceType == corev1.ServiceTypeLoadBalancer { - podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeLoadBalancer, 20, logger) - if errors.IsNotFound(err) { - if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeLoadBalancer, 20, logger); err != nil { - logger.Error(err, "retry get lb service failed") - return err - } - } else if err != nil { - logger.Error(err, "get lb service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } - for _, v := range podSvc.Status.LoadBalancer.Ingress { if v.IP == "" { continue @@ -161,13 +151,13 @@ func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podNam } } - format_announceIp := strings.Replace(announceIp, ":", "-", -1) - if strings.HasSuffix(format_announceIp, "-") { - format_announceIp = fmt.Sprintf("%s0", format_announceIp) + fAnnounceIp := strings.ReplaceAll(announceIp, ":", "-") + if strings.HasSuffix(fAnnounceIp, "-") { + fAnnounceIp = fmt.Sprintf("%s0", fAnnounceIp) } labelPatch := fmt.Sprintf(`[{"op":"add","path":"/metadata/labels/%s","value":"%s"},{"op":"add","path":"/metadata/labels/%s","value":"%s"}]`, - strings.Replace(builder.AnnounceIPLabelKey, "/", "~1", -1), format_announceIp, - strings.Replace(builder.AnnouncePortLabelKey, "/", "~1", -1), strconv.Itoa(int(announcePort))) + strings.ReplaceAll(builder.AnnounceIPLabelKey, "/", "~1"), fAnnounceIp, + strings.ReplaceAll(builder.AnnouncePortLabelKey, "/", "~1"), strconv.Itoa(int(announcePort))) logger.Info(labelPatch) _, err = client.CoreV1().Pods(pod.Namespace).Patch(ctx, podName, types.JSONPatchType, []byte(labelPatch), metav1.PatchOptions{}) diff --git a/cmd/helper/commands/failover/command.go b/cmd/helper/commands/failover/command.go index 9c4ceeb..428f6e5 100644 --- a/cmd/helper/commands/failover/command.go +++ b/cmd/helper/commands/failover/command.go @@ -21,7 +21,6 @@ import ( "github.com/chideat/valkey-operator/cmd/helper/commands" "github.com/urfave/cli/v2" - corev1 "k8s.io/api/core/v1" ) func NewCommand(ctx context.Context) *cli.Command { @@ -49,11 +48,6 @@ func NewCommand(ctx context.Context) *cli.Command { Usage: "The id of current pod", EnvVars: []string{"POD_UID"}, }, - &cli.StringFlag{ - Name: "service-name", - Usage: "Service name of the statefulset", - EnvVars: []string{"SERVICE_NAME"}, - }, &cli.StringFlag{ Name: "operator-username", Usage: "Operator username", @@ -108,12 +102,6 @@ func NewCommand(ctx context.Context) *cli.Command { Usage: "IP_FAMILY for servie access", EnvVars: []string{"IP_FAMILY_PREFER"}, }, - &cli.StringFlag{ - Name: "service-type", - Usage: "Service type for sentinel service", - EnvVars: []string{"SERVICE_TYPE"}, - Value: "ClusterIP", - }, }, Subcommands: []*cli.Command{ { @@ -122,10 +110,9 @@ func NewCommand(ctx context.Context) *cli.Command { Flags: []cli.Flag{}, Action: func(c *cli.Context) error { var ( - namespace = c.String("namespace") - podName = c.String("pod-name") - ipFamily = c.String("ip-family") - serviceType = corev1.ServiceType(c.String("service-type")) + namespace = c.String("namespace") + podName = c.String("pod-name") + ipFamily = c.String("ip-family") ) if namespace == "" { return cli.Exit("require namespace", 1) @@ -142,7 +129,7 @@ func NewCommand(ctx context.Context) *cli.Command { return cli.Exit(err, 1) } - if err := Access(ctx, client, namespace, podName, ipFamily, serviceType, logger); err != nil { + if err := Access(ctx, client, namespace, podName, ipFamily, logger); err != nil { logger.Error(err, "enable nodeport service access failed") return cli.Exit(err, 1) } diff --git a/cmd/helper/commands/failover/shutdown.go b/cmd/helper/commands/failover/shutdown.go index c4d4564..3229b6a 100644 --- a/cmd/helper/commands/failover/shutdown.go +++ b/cmd/helper/commands/failover/shutdown.go @@ -40,6 +40,9 @@ func loadAnnounceAddress(filepath string, logger logr.Logger) string { } data, err := os.ReadFile(filepath) if err != nil { + if os.IsNotExist(err) { + return "" + } logger.Error(err, "read announce file failed", "path", filepath) return "" } diff --git a/cmd/helper/commands/helper.go b/cmd/helper/commands/helper.go index 3e9e798..6cd9f2c 100644 --- a/cmd/helper/commands/helper.go +++ b/cmd/helper/commands/helper.go @@ -196,10 +196,8 @@ func GetPod(ctx context.Context, client *kubernetes.Clientset, namespace, name s return pod, nil } -func RetryGetService(ctx context.Context, clientset *kubernetes.Clientset, svcNamespace, svcName string, typ corev1.ServiceType, - count int, logger logr.Logger) (*corev1.Service, error) { - - serviceChecker := func(svc *corev1.Service, typ corev1.ServiceType) error { +func RetryGetService(ctx context.Context, clientset *kubernetes.Clientset, svcNamespace, svcName string, count int, logger logr.Logger) (*corev1.Service, error) { + serviceChecker := func(svc *corev1.Service) error { if svc == nil { return fmt.Errorf("service not found") } @@ -207,10 +205,6 @@ func RetryGetService(ctx context.Context, clientset *kubernetes.Clientset, svcNa return fmt.Errorf("service port not found") } - if svc.Spec.Type != typ { - return fmt.Errorf("service type not match") - } - switch svc.Spec.Type { case corev1.ServiceTypeNodePort: for _, port := range svc.Spec.Ports { @@ -233,13 +227,13 @@ func RetryGetService(ctx context.Context, clientset *kubernetes.Clientset, svcNa } logger.Info("retry get service", "target", fmt.Sprintf("%s/%s", svcNamespace, svcName), "count", count) - for i := 0; i < count+1; i++ { + for range count + 1 { svc, err := clientset.CoreV1().Services(svcNamespace).Get(ctx, svcName, metav1.GetOptions{}) if err != nil { logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", svcNamespace, svcName)) return nil, err } - if serviceChecker(svc, typ) != nil { + if serviceChecker(svc) != nil { logger.Error(err, "service check failed", "target", fmt.Sprintf("%s/%s", svcNamespace, svcName)) } else { return svc, nil diff --git a/cmd/helper/commands/sentinel/access.go b/cmd/helper/commands/sentinel/access.go index 462465c..e61b026 100644 --- a/cmd/helper/commands/sentinel/access.go +++ b/cmd/helper/commands/sentinel/access.go @@ -34,8 +34,20 @@ import ( "k8s.io/client-go/kubernetes" ) -func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podName, ipfamily string, serviceType corev1.ServiceType, logger logr.Logger) error { - logger.Info("service access", "serviceType", serviceType, "ipfamily", ipfamily, "podName", podName) +func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podName, ipfamily string, logger logr.Logger) error { + podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, 20, logger) + if errors.IsNotFound(err) { + if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, 20, logger); err != nil { + logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) + return err + } + } else if err != nil { + logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) + return err + } + serviceType := podSvc.Spec.Type + + logger.Info("check pod service type", "serviceType", serviceType, "ipfamily", ipfamily, "podName", podName) pod, err := commands.GetPod(ctx, client, namespace, podName, logger) if err != nil { logger.Error(err, "get pods failed", "namespace", namespace, "name", podName) @@ -51,16 +63,6 @@ func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podNam announcePort int32 = 26379 ) if serviceType == corev1.ServiceTypeNodePort { - podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeNodePort, 20, logger) - if errors.IsNotFound(err) { - if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeNodePort, 20, logger); err != nil { - logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } - } else if err != nil { - logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } for _, v := range podSvc.Spec.Ports { if v.Name == "sentinel" { announcePort = v.NodePort @@ -115,17 +117,6 @@ func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podNam return err } } else if serviceType == corev1.ServiceTypeLoadBalancer { - podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeLoadBalancer, 20, logger) - if errors.IsNotFound(err) { - if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeLoadBalancer, 20, logger); err != nil { - logger.Error(err, "retry get lb service failed") - return err - } - } else if err != nil { - logger.Error(err, "get lb service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } - for _, v := range podSvc.Status.LoadBalancer.Ingress { if v.IP == "" { continue @@ -161,13 +152,13 @@ func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podNam } } - format_announceIp := strings.Replace(announceIp, ":", "-", -1) - if strings.HasSuffix(format_announceIp, "-") { - format_announceIp = fmt.Sprintf("%s0", format_announceIp) + fAnnounceIp := strings.ReplaceAll(announceIp, ":", "-") + if strings.HasSuffix(fAnnounceIp, "-") { + fAnnounceIp = fmt.Sprintf("%s0", fAnnounceIp) } labelPatch := fmt.Sprintf(`[{"op":"add","path":"/metadata/labels/%s","value":"%s"},{"op":"add","path":"/metadata/labels/%s","value":"%s"}]`, - strings.Replace(builder.AnnounceIPLabelKey, "/", "~1", -1), format_announceIp, - strings.Replace(builder.AnnouncePortLabelKey, "/", "~1", -1), strconv.Itoa(int(announcePort))) + strings.ReplaceAll(builder.AnnounceIPLabelKey, "/", "~1"), fAnnounceIp, + strings.ReplaceAll(builder.AnnouncePortLabelKey, "/", "~1"), strconv.Itoa(int(announcePort))) logger.Info(labelPatch) _, err = client.CoreV1().Pods(pod.Namespace).Patch(ctx, podName, types.JSONPatchType, []byte(labelPatch), metav1.PatchOptions{}) diff --git a/cmd/helper/commands/sentinel/command.go b/cmd/helper/commands/sentinel/command.go index 9dd436d..cd55e8a 100644 --- a/cmd/helper/commands/sentinel/command.go +++ b/cmd/helper/commands/sentinel/command.go @@ -28,7 +28,6 @@ import ( "github.com/chideat/valkey-operator/cmd/helper/commands/runner" "github.com/chideat/valkey-operator/pkg/valkey" "github.com/urfave/cli/v2" - corev1 "k8s.io/api/core/v1" ) func NewCommand(ctx context.Context) *cli.Command { @@ -94,12 +93,6 @@ func NewCommand(ctx context.Context) *cli.Command { Usage: "IP_FAMILY for servie access", EnvVars: []string{"IP_FAMILY_PREFER"}, }, - &cli.StringFlag{ - Name: "service-type", - Usage: "Service type for sentinel service", - EnvVars: []string{"SERVICE_TYPE"}, - Value: "ClusterIP", - }, }, Subcommands: []*cli.Command{ { @@ -108,10 +101,9 @@ func NewCommand(ctx context.Context) *cli.Command { Flags: []cli.Flag{}, Action: func(c *cli.Context) error { var ( - namespace = c.String("namespace") - podName = c.String("pod-name") - ipFamily = c.String("ip-family") - serviceType = corev1.ServiceType(c.String("service-type")) + namespace = c.String("namespace") + podName = c.String("pod-name") + ipFamily = c.String("ip-family") ) if namespace == "" { return cli.Exit("require namespace", 1) @@ -128,7 +120,7 @@ func NewCommand(ctx context.Context) *cli.Command { return cli.Exit(err, 1) } - if err := Access(ctx, client, namespace, podName, ipFamily, serviceType, logger); err != nil { + if err := Access(ctx, client, namespace, podName, ipFamily, logger); err != nil { logger.Error(err, "enable nodeport service access failed") return cli.Exit(err, 1) } diff --git a/cmd/init_cluster.sh b/cmd/init_cluster.sh index a82f0b3..86c4b14 100755 --- a/cmd/init_cluster.sh +++ b/cmd/init_cluster.sh @@ -3,10 +3,10 @@ chmod -f 644 /data/*.rdb /data/*.aof /data/*.conf 2>/dev/null || true chown -f 999:1000 /data/*.rdb /data/*.aof /data/*.conf 2>/dev/null || true -if [ "$SERVICE_TYPE" = "LoadBalancer" ] || [ "$SERVICE_TYPE" = "NodePort" ] || [ -n "$IP_FAMILY_PREFER" ] ; then - echo "check pod binded service" - /opt/valkey-helper cluster expose || exit 1 -fi +rm -rf /data/announce.conf + +echo "check pod binded service" +/opt/valkey-helper cluster expose || exit 1 # copy binaries cp /opt/*.sh /opt/valkey-helper /mnt/opt/ && chmod 555 /mnt/opt/*.sh /mnt/opt/valkey-helper diff --git a/cmd/init_failover.sh b/cmd/init_failover.sh index 2ef5dc8..a375aed 100755 --- a/cmd/init_failover.sh +++ b/cmd/init_failover.sh @@ -3,10 +3,10 @@ chmod -f 644 /data/*.rdb /data/*.aof 2>/dev/null || true chown -f 999:1000 /data/*.rdb /data/*.aof 2>/dev/null || true -if [ "${SERVICE_TYPE}" = "LoadBalancer" ] || [ "${SERVICE_TYPE}" = "NodePort" ] || [ -n "${IP_FAMILY_PREFER}" ] ; then - echo "check pod binded service" - /opt/valkey-helper failover expose || exit 1 -fi +rm -rf /data/announce.conf + +echo "check pod binded service" +/opt/valkey-helper failover expose || exit 1 # copy binaries cp /opt/*.sh /opt/valkey-helper /mnt/opt/ && chmod 555 /mnt/opt/*.sh /mnt/opt/valkey-helper diff --git a/cmd/init_sentinel.sh b/cmd/init_sentinel.sh index 0b3c010..6e33f79 100755 --- a/cmd/init_sentinel.sh +++ b/cmd/init_sentinel.sh @@ -1,9 +1,9 @@ #!/bin/sh -if [ "$SERVICE_TYPE" = "LoadBalancer" ] || [ "$SERVICE_TYPE" = "NodePort" ] || [ -n "$IP_FAMILY_PREFER" ] ; then - echo "check pod binded service" - /opt/valkey-helper sentinel expose || exit 1 -fi +rm -rf /data/announce.conf + +echo "check pod binded service" +/opt/valkey-helper sentinel expose || exit 1 # copy binaries cp /opt/*.sh /opt/valkey-helper /mnt/opt/ && chmod 555 /mnt/opt/*.sh /mnt/opt/valkey-helper diff --git a/cmd/run_cluster.sh b/cmd/run_cluster.sh index 70407be..e93ca90 100755 --- a/cmd/run_cluster.sh +++ b/cmd/run_cluster.sh @@ -43,9 +43,6 @@ if [ -n "${password}" ]; then fi # Handle announcement configuration -if [ -z "${SERVICE_TYPE}" ] || [ "${SERVICE_TYPE}" = "ClusterIP" ]; then - rm -f "${ANNOUNCE_CONFIG_FILE}" -fi if [ -f "${ANNOUNCE_CONFIG_FILE}" ]; then cat "${ANNOUNCE_CONFIG_FILE}" >> "${VALKEY_CONFIG_FILE}" echo "" >> "${VALKEY_CONFIG_FILE}" diff --git a/cmd/run_failover.sh b/cmd/run_failover.sh index a25c2aa..5222733 100755 --- a/cmd/run_failover.sh +++ b/cmd/run_failover.sh @@ -23,10 +23,6 @@ if [ "$MONITOR_POLICY" = "sentinel" ]; then ANNOUNCE_IP="" ANNOUNCE_PORT="" - if [ -z "${SERVICE_TYPE}" ] || [ "${SERVICE_TYPE}" = "ClusterIP" ]; then - rm -f ${ANNOUNCE_CONFIG_FILE} - fi - if [ -f "$ANNOUNCE_CONFIG_FILE" ]; then echo "" >> "$VALKEY_CONFIG_FILE" cat "$ANNOUNCE_CONFIG_FILE" >> "$VALKEY_CONFIG_FILE" diff --git a/cmd/run_sentinel.sh b/cmd/run_sentinel.sh index 6365058..09bebf1 100755 --- a/cmd/run_sentinel.sh +++ b/cmd/run_sentinel.sh @@ -15,9 +15,6 @@ if [ -n "${password}" ]; then fi # Append announce configuration to sentinel configuration if it exists -if [ -z "${SERVICE_TYPE}" ] || [ "${SERVICE_TYPE}" = "ClusterIP" ]; then - rm -f ${ANNOUNCE_CONFIG_FILE} -fi if [ -f ${ANNOUNCE_CONFIG_FILE} ]; then echo "# append announce conf to sentinel config" cat "${ANNOUNCE_CONFIG_FILE}" | grep "announce" | sed "s/^/sentinel /" >> ${VALKEY_SENTINEL_CONFIG_FILE} From 62df66f5f64642945132fec3baa6577011d0594e Mon Sep 17 00:00:00 2001 From: Seer Date: Fri, 19 Sep 2025 22:08:56 +0800 Subject: [PATCH 11/13] Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Signed-off-by: Seer --- internal/util/kubernetes.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/util/kubernetes.go b/internal/util/kubernetes.go index a0ea59a..e6c3c23 100644 --- a/internal/util/kubernetes.go +++ b/internal/util/kubernetes.go @@ -155,7 +155,7 @@ func IsStatefulsetChanged2(newSts, sts *appsv1.StatefulSet, logger logr.Logger) return false, false } - immutableChanged := cmp.Equal(newSts.Spec, sts.Spec, cmpopts.EquateEmpty(), + immutableChanged := !cmp.Equal(newSts.Spec, sts.Spec, cmpopts.EquateEmpty(), cmpopts.IgnoreFields(appsv1.StatefulSetSpec{}, "Replicas", "Ordinals", "Template", "UpdateStrategy", "PersistentVolumeClaimRetentionPolicy", "MinReadySeconds")) From c12bc251f4fd8d3ffa4d5fe7eefc9fa7cb512336 Mon Sep 17 00:00:00 2001 From: Seer Date: Fri, 19 Sep 2025 22:22:27 +0800 Subject: [PATCH 12/13] chore: update release pipeline and dependencies --- .github/workflows/release-pipeline.yaml | 2 +- go.mod | 49 +++++++------ go.sum | 95 ++++++++++++++----------- 3 files changed, 79 insertions(+), 67 deletions(-) diff --git a/.github/workflows/release-pipeline.yaml b/.github/workflows/release-pipeline.yaml index adfb4f2..040ace8 100644 --- a/.github/workflows/release-pipeline.yaml +++ b/.github/workflows/release-pipeline.yaml @@ -58,6 +58,6 @@ jobs: needs: build steps: - name: Create Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 with: generate_release_notes: true diff --git a/go.mod b/go.mod index 876255e..20fd988 100644 --- a/go.mod +++ b/go.mod @@ -3,26 +3,27 @@ module github.com/chideat/valkey-operator go 1.24.0 require ( - github.com/Masterminds/semver/v3 v3.3.1 + github.com/Masterminds/semver/v3 v3.4.0 github.com/alicebob/miniredis/v2 v2.35.0 github.com/cert-manager/cert-manager v1.18.2 github.com/fsnotify/fsnotify v1.9.0 - github.com/go-logr/logr v1.4.2 + github.com/go-logr/logr v1.4.3 github.com/gomodule/redigo v1.9.2 - github.com/onsi/ginkgo/v2 v2.23.3 - github.com/onsi/gomega v1.36.3 - github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.83.0 + github.com/google/go-cmp v0.7.0 + github.com/onsi/ginkgo/v2 v2.25.1 + github.com/onsi/gomega v1.38.2 + github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.85.0 github.com/rafaeljusto/redigomock/v3 v3.1.2 github.com/samber/lo v1.51.0 github.com/stretchr/testify v1.10.0 github.com/urfave/cli/v2 v2.27.7 - github.com/valkey-io/valkey-go v1.0.60 + github.com/valkey-io/valkey-go v1.0.64 go.uber.org/zap v1.27.0 gotest.tools/v3 v3.5.2 - k8s.io/api v0.33.1 - k8s.io/apimachinery v0.33.1 - k8s.io/client-go v0.33.1 - k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979 + k8s.io/api v0.33.3 + k8s.io/apimachinery v0.33.3 + k8s.io/client-go v0.33.3 + k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 sigs.k8s.io/controller-runtime v0.21.0 ) @@ -49,8 +50,7 @@ require ( github.com/google/btree v1.1.3 // indirect github.com/google/cel-go v0.23.2 // indirect github.com/google/gnostic-models v0.6.9 // indirect - github.com/google/go-cmp v0.7.0 // indirect - github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect + github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect github.com/google/uuid v1.6.0 // indirect github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 // indirect @@ -86,27 +86,30 @@ require ( go.opentelemetry.io/otel/sdk v1.33.0 // indirect go.opentelemetry.io/otel/trace v1.33.0 // indirect go.opentelemetry.io/proto/otlp v1.4.0 // indirect + go.uber.org/automaxprocs v1.6.0 // indirect go.uber.org/multierr v1.11.0 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67 // indirect - golang.org/x/net v0.42.0 // indirect + golang.org/x/net v0.44.0 // indirect golang.org/x/oauth2 v0.28.0 // indirect - golang.org/x/sync v0.16.0 // indirect - golang.org/x/sys v0.35.0 // indirect - golang.org/x/term v0.33.0 // indirect - golang.org/x/text v0.28.0 // indirect + golang.org/x/sync v0.17.0 // indirect + golang.org/x/sys v0.36.0 // indirect + golang.org/x/term v0.35.0 // indirect + golang.org/x/text v0.29.0 // indirect golang.org/x/time v0.11.0 // indirect - golang.org/x/tools v0.35.0 // indirect + golang.org/x/tools v0.36.0 // indirect gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20241219192143-6b3ec007d9bb // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20241219192143-6b3ec007d9bb // indirect google.golang.org/grpc v1.69.2 // indirect - google.golang.org/protobuf v1.36.6 // indirect + google.golang.org/protobuf v1.36.7 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/apiextensions-apiserver v0.33.1 // indirect - k8s.io/apiserver v0.33.1 // indirect - k8s.io/component-base v0.33.1 // indirect + k8s.io/apiextensions-apiserver v0.33.3 // indirect + k8s.io/apiserver v0.33.3 // indirect + k8s.io/component-base v0.33.3 // indirect k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect @@ -114,5 +117,5 @@ require ( sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect sigs.k8s.io/randfill v1.0.0 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect + sigs.k8s.io/yaml v1.5.0 // indirect ) diff --git a/go.sum b/go.sum index ff8d915..fe19c2c 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,7 @@ cel.dev/expr v0.19.1 h1:NciYrtDRIR0lNCnH1LFJegdjspNx9fI59O7TWcua/W4= cel.dev/expr v0.19.1/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw= -github.com/Masterminds/semver/v3 v3.3.1 h1:QtNSWtVZ3nBfk8mAOu/B6v7FMJ+NHTIgUPi7rj+4nv4= -github.com/Masterminds/semver/v3 v3.3.1/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= +github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= +github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/alicebob/miniredis/v2 v2.35.0 h1:QwLphYqCEAo1eu1TqPRN2jgVMPBweeQcR21jeqDCONI= github.com/alicebob/miniredis/v2 v2.35.0/go.mod h1:TcL7YfarKPGDAthEtl5NBeHZfeUQj6OXMm/+iu5cLMM= github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ= @@ -38,8 +38,8 @@ github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8 github.com/fxamacker/cbor/v2 v2.8.0 h1:fFtUGXUzXPHTIUdne5+zzMPTfffl3RD5qYnkY40vtxU= github.com/fxamacker/cbor/v2 v2.8.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= -github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= @@ -71,8 +71,8 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/ZoQgRgVIWFJljSWa/zetS2WTvg= -github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo= @@ -108,17 +108,19 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= -github.com/onsi/ginkgo/v2 v2.23.3 h1:edHxnszytJ4lD9D5Jjc4tiDkPBZ3siDeJJkUZJJVkp0= -github.com/onsi/ginkgo/v2 v2.23.3/go.mod h1:zXTP6xIp3U8aVuXN8ENK9IXRaTjFnpVB9mGmaSRvxnM= -github.com/onsi/gomega v1.36.3 h1:hID7cr8t3Wp26+cYnfcjR6HpJ00fdogN6dqZ1t6IylU= -github.com/onsi/gomega v1.36.3/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0= +github.com/onsi/ginkgo/v2 v2.25.1 h1:Fwp6crTREKM+oA6Cz4MsO8RhKQzs2/gOIVOUscMAfZY= +github.com/onsi/ginkgo/v2 v2.25.1/go.mod h1:ppTWQ1dh9KM/F1XgpeRqelR+zHVwV81DGRSDnFxK7Sk= +github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A= +github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.83.0 h1:j9Ce3W6X6Tzi0QnSap+YzGwpqJLJGP/7xV6P9f86jjM= -github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.83.0/go.mod h1:sSxwdmprUfmRfTknPc4KIjUd2ZIc/kirw4UdXNhOauM= +github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= +github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.85.0 h1:oY+F5FZFmCjCyzkHWPjVQpzvnvEB/0FP+iyzDUUlqFc= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.85.0/go.mod h1:VB7wtBmDT6W2RJHzsvPZlBId+EnmeQA0d33fFTXvraM= github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= @@ -156,8 +158,8 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU= github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4= -github.com/valkey-io/valkey-go v1.0.60 h1:idh959D20H5n7D/kwEdTKNaMn5+4HpZTn7bLXnAhQIw= -github.com/valkey-io/valkey-go v1.0.60/go.mod h1:bHmwjIEOrGq/ubOJfh5uMRs7Xj6mV3mQ/ZXUbmqpjqY= +github.com/valkey-io/valkey-go v1.0.64 h1:3u4+b6D6zs9JQs254TLy4LqitCMHHr9XorP9GGk7XY4= +github.com/valkey-io/valkey-go v1.0.64/go.mod h1:bHmwjIEOrGq/ubOJfh5uMRs7Xj6mV3mQ/ZXUbmqpjqY= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 h1:FnBeRrxr7OU4VvAzt5X7s6266i6cSVkkFPS0TuXWbIg= @@ -186,12 +188,18 @@ go.opentelemetry.io/otel/trace v1.33.0 h1:cCJuF7LRjUFso9LPnEAHJDB2pqzp+hbO8eu1qq go.opentelemetry.io/otel/trace v1.33.0/go.mod h1:uIcdVUZMpTAmz0tI1z04GoVSezK37CbGV4fr1f2nBck= go.opentelemetry.io/proto/otlp v1.4.0 h1:TA9WRvW6zMwP+Ssb6fLoUIuirti1gGbP28GcKG1jgeg= go.opentelemetry.io/proto/otlp v1.4.0/go.mod h1:PPBWZIP98o2ElSqI35IHfu7hIhSwvc5N38Jw8pXuGFY= +go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= +go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -203,34 +211,34 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= -golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= +golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I= +golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY= golang.org/x/oauth2 v0.28.0 h1:CrgCKl8PPAVtLnU3c+EDw6x11699EWlsDeWNWKdIOkc= golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= -golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= +golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= -golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.33.0 h1:NuFncQrRcaRvVmgRkvM3j/F00gWIAlcmlB8ACEKmGIg= -golang.org/x/term v0.33.0/go.mod h1:s18+ql9tYWp1IfpV9DmCtQDDSRBUjKaw9M1eAv5UeF0= +golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= +golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= +golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= -golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= +golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= +golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0= -golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw= +golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= +golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -243,8 +251,8 @@ google.golang.org/genproto/googleapis/rpc v0.0.0-20241219192143-6b3ec007d9bb h1: google.golang.org/genproto/googleapis/rpc v0.0.0-20241219192143-6b3ec007d9bb/go.mod h1:lcTa1sDdWEIHMWlITnIczmw5w60CF9ffkb8Z+DVmmjA= google.golang.org/grpc v1.69.2 h1:U3S9QEtbXC0bYNvRtcoklF3xGtLViumSYxWykJS+7AU= google.golang.org/grpc v1.69.2/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4= -google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= -google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= +google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= @@ -257,24 +265,24 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q= gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA= -k8s.io/api v0.33.1 h1:tA6Cf3bHnLIrUK4IqEgb2v++/GYUtqiu9sRVk3iBXyw= -k8s.io/api v0.33.1/go.mod h1:87esjTn9DRSRTD4fWMXamiXxJhpOIREjWOSjsW1kEHw= -k8s.io/apiextensions-apiserver v0.33.1 h1:N7ccbSlRN6I2QBcXevB73PixX2dQNIW0ZRuguEE91zI= -k8s.io/apiextensions-apiserver v0.33.1/go.mod h1:uNQ52z1A1Gu75QSa+pFK5bcXc4hq7lpOXbweZgi4dqA= -k8s.io/apimachinery v0.33.1 h1:mzqXWV8tW9Rw4VeW9rEkqvnxj59k1ezDUl20tFK/oM4= -k8s.io/apimachinery v0.33.1/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= -k8s.io/apiserver v0.33.1 h1:yLgLUPDVC6tHbNcw5uE9mo1T6ELhJj7B0geifra3Qdo= -k8s.io/apiserver v0.33.1/go.mod h1:VMbE4ArWYLO01omz+k8hFjAdYfc3GVAYPrhP2tTKccs= -k8s.io/client-go v0.33.1 h1:ZZV/Ks2g92cyxWkRRnfUDsnhNn28eFpt26aGc8KbXF4= -k8s.io/client-go v0.33.1/go.mod h1:JAsUrl1ArO7uRVFWfcj6kOomSlCv+JpvIsp6usAGefA= -k8s.io/component-base v0.33.1 h1:EoJ0xA+wr77T+G8p6T3l4efT2oNwbqBVKR71E0tBIaI= -k8s.io/component-base v0.33.1/go.mod h1:guT/w/6piyPfTgq7gfvgetyXMIh10zuXA6cRRm3rDuY= +k8s.io/api v0.33.3 h1:SRd5t//hhkI1buzxb288fy2xvjubstenEKL9K51KBI8= +k8s.io/api v0.33.3/go.mod h1:01Y/iLUjNBM3TAvypct7DIj0M0NIZc+PzAHCIo0CYGE= +k8s.io/apiextensions-apiserver v0.33.3 h1:qmOcAHN6DjfD0v9kxL5udB27SRP6SG/MTopmge3MwEs= +k8s.io/apiextensions-apiserver v0.33.3/go.mod h1:oROuctgo27mUsyp9+Obahos6CWcMISSAPzQ77CAQGz8= +k8s.io/apimachinery v0.33.3 h1:4ZSrmNa0c/ZpZJhAgRdcsFcZOw1PQU1bALVQ0B3I5LA= +k8s.io/apimachinery v0.33.3/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= +k8s.io/apiserver v0.33.3 h1:Wv0hGc+QFdMJB4ZSiHrCgN3zL3QRatu56+rpccKC3J4= +k8s.io/apiserver v0.33.3/go.mod h1:05632ifFEe6TxwjdAIrwINHWE2hLwyADFk5mBsQa15E= +k8s.io/client-go v0.33.3 h1:M5AfDnKfYmVJif92ngN532gFqakcGi6RvaOF16efrpA= +k8s.io/client-go v0.33.3/go.mod h1:luqKBQggEf3shbxHY4uVENAxrDISLOarxpTKMiUuujg= +k8s.io/component-base v0.33.3 h1:mlAuyJqyPlKZM7FyaoM/LcunZaaY353RXiOd2+B5tGA= +k8s.io/component-base v0.33.3/go.mod h1:ktBVsBzkI3imDuxYXmVxZ2zxJnYTZ4HAsVj9iF09qp4= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4= k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8= -k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979 h1:jgJW5IePPXLGB8e/1wvd0Ich9QE97RvvF3a8J3fP/Lg= -k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 h1:jpcvIRr3GLoUoEKRkHKSmGjxb6lWwrBlJsXc+eUYQHM= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= sigs.k8s.io/controller-runtime v0.21.0 h1:CYfjpEuicjUecRk+KAeyYh+ouUBn4llGyDYytIGcJS8= @@ -288,5 +296,6 @@ sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= sigs.k8s.io/structured-merge-diff/v4 v4.7.0 h1:qPeWmscJcXP0snki5IYF79Z8xrl8ETFxgMd7wez1XkI= sigs.k8s.io/structured-merge-diff/v4 v4.7.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= -sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +sigs.k8s.io/yaml v1.5.0 h1:M10b2U7aEUY6hRtU870n2VTPgR5RZiL/I6Lcc2F4NUQ= +sigs.k8s.io/yaml v1.5.0/go.mod h1:wZs27Rbxoai4C0f8/9urLZtZtF3avA3gKvGyPdDqTO4= From 07aa81924767f839e02aeecf77c0d5a41bd8d137 Mon Sep 17 00:00:00 2001 From: Seer Date: Thu, 11 Dec 2025 18:26:58 +0800 Subject: [PATCH 13/13] fake: add unit tests for sentinel statefulset builder and service client --- .../sentinelbuilder/statefulset_test.go | 176 ++++++++++++++++++ pkg/kubernetes/clientset/service_test.go | 124 ++++++++++++ 2 files changed, 300 insertions(+) create mode 100644 internal/builder/sentinelbuilder/statefulset_test.go create mode 100644 pkg/kubernetes/clientset/service_test.go diff --git a/internal/builder/sentinelbuilder/statefulset_test.go b/internal/builder/sentinelbuilder/statefulset_test.go new file mode 100644 index 0000000..c5d68a9 --- /dev/null +++ b/internal/builder/sentinelbuilder/statefulset_test.go @@ -0,0 +1,176 @@ +/* +Copyright 2024 chideat. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sentinelbuilder + +import ( + "context" + "crypto/tls" + "testing" + + certmetav1 "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" + "github.com/chideat/valkey-operator/api/core" + "github.com/chideat/valkey-operator/api/v1alpha1" + "github.com/chideat/valkey-operator/internal/builder" + "github.com/chideat/valkey-operator/pkg/types" + "github.com/chideat/valkey-operator/pkg/version" + "github.com/go-logr/logr" + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type mockSentinelInstance struct { + *v1alpha1.Sentinel +} + +func (m *mockSentinelInstance) Definition() *v1alpha1.Sentinel { + return m.Sentinel +} + +// Implement types.Object interface +func (m *mockSentinelInstance) GetObjectKind() schema.ObjectKind { return m.Sentinel.GetObjectKind() } +func (m *mockSentinelInstance) DeepCopyObject() runtime.Object { return m.Sentinel.DeepCopy() } +func (m *mockSentinelInstance) NamespacedName() client.ObjectKey { + return client.ObjectKeyFromObject(m.Sentinel) +} +func (m *mockSentinelInstance) Version() version.ValkeyVersion { return version.ValkeyVersion("7.2") } +func (m *mockSentinelInstance) IsReady() bool { return true } +func (m *mockSentinelInstance) Restart(ctx context.Context, annotationKeyVal ...string) error { + return nil +} +func (m *mockSentinelInstance) Refresh(ctx context.Context) error { return nil } + +// Implement types.Instance interface +func (m *mockSentinelInstance) Arch() core.Arch { return core.ValkeySentinel } +func (m *mockSentinelInstance) Issuer() *certmetav1.ObjectReference { return nil } +func (m *mockSentinelInstance) Users() types.Users { return nil } +func (m *mockSentinelInstance) TLSConfig() *tls.Config { return nil } +func (m *mockSentinelInstance) IsInService() bool { return true } +func (m *mockSentinelInstance) IsACLUserExists() bool { return false } +func (m *mockSentinelInstance) IsACLAppliedToAll() bool { return false } +func (m *mockSentinelInstance) IsResourceFullfilled(ctx context.Context) (bool, error) { + return true, nil +} +func (m *mockSentinelInstance) UpdateStatus(ctx context.Context, st types.InstanceStatus, message string) error { + return nil +} +func (m *mockSentinelInstance) SendEventf(eventtype, reason, messageFmt string, args ...any) {} +func (m *mockSentinelInstance) Logger() logr.Logger { return logr.Discard() } + +// Implement types.SentinelInstance interface +func (m *mockSentinelInstance) Replication() types.SentinelReplication { return nil } +func (m *mockSentinelInstance) Nodes() []types.SentinelNode { return nil } +func (m *mockSentinelInstance) RawNodes(ctx context.Context) ([]corev1.Pod, error) { return nil, nil } +func (m *mockSentinelInstance) Clusters(ctx context.Context) ([]string, error) { return nil, nil } +func (m *mockSentinelInstance) GetPassword() (string, error) { return "", nil } +func (m *mockSentinelInstance) Selector() map[string]string { return nil } + +func TestGenerateSentinelStatefulset(t *testing.T) { + sentinel := &v1alpha1.Sentinel{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-sentinel", + Namespace: "default", + UID: "test-uid", + }, + Spec: v1alpha1.SentinelSpec{ + Image: "valkey/valkey:7.2", + Replicas: 3, + Access: v1alpha1.SentinelInstanceAccess{ + InstanceAccess: core.InstanceAccess{ + IPFamilyPrefer: corev1.IPv4Protocol, + EnableTLS: true, + }, + DefaultPasswordSecret: "test-secret", + }, + SecurityContext: &corev1.PodSecurityContext{ + RunAsUser: func(i int64) *int64 { return &i }(1000), + }, + }, + } + + inst := &mockSentinelInstance{Sentinel: sentinel} + ss, err := GenerateSentinelStatefulset(inst) + assert.NoError(t, err) + assert.NotNil(t, ss) + + // Verify metadata + assert.Equal(t, "rfs-test-sentinel", ss.Name) + assert.Equal(t, "default", ss.Namespace) + + // Verify replicas + assert.Equal(t, int32(3), *ss.Spec.Replicas) + + // Verify containers + assert.Len(t, ss.Spec.Template.Spec.InitContainers, 1) + assert.Len(t, ss.Spec.Template.Spec.Containers, 2) + + // Verify init container + initCont := ss.Spec.Template.Spec.InitContainers[0] + assert.Equal(t, "init", initCont.Name) + + // Verify server container + serverCont := ss.Spec.Template.Spec.Containers[0] + assert.Equal(t, SentinelContainerName, serverCont.Name) + assert.Equal(t, "valkey/valkey:7.2", serverCont.Image) + + // Check env vars exist + envMap := make(map[string]string) + for _, env := range serverCont.Env { + envMap[env.Name] = env.Value + } + assert.Equal(t, "test-secret", envMap[builder.OperatorSecretName]) + assert.Equal(t, "true", envMap["TLS_ENABLED"]) + + // Verify agent container + agentCont := ss.Spec.Template.Spec.Containers[1] + assert.Equal(t, "agent", agentCont.Name) + + // Verify volumes + // 3 base volumes + 1 TLS + 1 Auth = 5 + assert.Len(t, ss.Spec.Template.Spec.Volumes, 5) +} + +func TestGenerateSentinelStatefulset_NoTLS(t *testing.T) { + sentinel := &v1alpha1.Sentinel{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-sentinel-no-tls", + Namespace: "default", + }, + Spec: v1alpha1.SentinelSpec{ + Image: "valkey/valkey:7.2", + Replicas: 1, + Access: v1alpha1.SentinelInstanceAccess{ + InstanceAccess: core.InstanceAccess{ + IPFamilyPrefer: corev1.IPv4Protocol, + }, + // No password secret + }, + }, + } + + inst := &mockSentinelInstance{Sentinel: sentinel} + ss, err := GenerateSentinelStatefulset(inst) + assert.NoError(t, err) + assert.NotNil(t, ss) + + // Verify volumes + // 3 base volumes (Config, Data, Opt) + assert.Len(t, ss.Spec.Template.Spec.Volumes, 3) +} diff --git a/pkg/kubernetes/clientset/service_test.go b/pkg/kubernetes/clientset/service_test.go new file mode 100644 index 0000000..a8ba389 --- /dev/null +++ b/pkg/kubernetes/clientset/service_test.go @@ -0,0 +1,124 @@ +/* +Copyright 2024 chideat. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package clientset + +import ( + "context" + "testing" + + "github.com/go-logr/logr" + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestServiceOption_CRUD(t *testing.T) { + client := fake.NewClientBuilder().Build() + logger := logr.Discard() + svcClient := NewService(client, logger) + ctx := context.Background() + ns := "default" + name := "test-service" + + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: corev1.ServiceSpec{ + Ports: []corev1.ServicePort{ + {Port: 80}, + }, + }, + } + + // Test Create + err := svcClient.CreateService(ctx, ns, svc) + assert.NoError(t, err) + + // Test Get + found, err := svcClient.GetService(ctx, ns, name) + assert.NoError(t, err) + assert.Equal(t, name, found.Name) + + // Test CreateIfNotExists (exists) + err = svcClient.CreateIfNotExistsService(ctx, ns, svc) + assert.NoError(t, err) + + // Test Update + svc.Labels["new-label"] = "new-value" + err = svcClient.UpdateService(ctx, ns, svc) + assert.NoError(t, err) + + found, err = svcClient.GetService(ctx, ns, name) + assert.NoError(t, err) + assert.Equal(t, "new-value", found.Labels["new-label"]) + + // Test CreateOrUpdateService + svc.Annotations = map[string]string{"anno": "val"} + err = svcClient.CreateOrUpdateService(ctx, ns, svc) + assert.NoError(t, err) + + found, err = svcClient.GetService(ctx, ns, name) + assert.NoError(t, err) + assert.Equal(t, "val", found.Annotations["anno"]) + + // Test Delete + err = svcClient.DeleteService(ctx, ns, name) + assert.NoError(t, err) + + _, err = svcClient.GetService(ctx, ns, name) + assert.Error(t, err) +} + +func TestServiceOption_UpdateIfSelectorChangedService(t *testing.T) { + client := fake.NewClientBuilder().Build() + logger := logr.Discard() + svcClient := NewService(client, logger) + ctx := context.Background() + ns := "default" + + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "selector-service", + Namespace: ns, + }, + Spec: corev1.ServiceSpec{ + Selector: map[string]string{"app": "v1"}, + }, + } + + err := svcClient.CreateService(ctx, ns, svc) + assert.NoError(t, err) + + // No change + err = svcClient.UpdateIfSelectorChangedService(ctx, ns, svc) + assert.NoError(t, err) + + // Change selector + svc.Spec.Selector = map[string]string{"app": "v2"} + err = svcClient.UpdateIfSelectorChangedService(ctx, ns, svc) + assert.NoError(t, err) + + found, err := svcClient.GetService(ctx, ns, "selector-service") + assert.NoError(t, err) + assert.Equal(t, "v2", found.Spec.Selector["app"]) +}