diff --git a/.github/workflows/release-pipeline.yaml b/.github/workflows/release-pipeline.yaml index adfb4f2..040ace8 100644 --- a/.github/workflows/release-pipeline.yaml +++ b/.github/workflows/release-pipeline.yaml @@ -58,6 +58,6 @@ jobs: needs: build steps: - name: Create Release - uses: softprops/action-gh-release@v1 + uses: softprops/action-gh-release@v2 with: generate_release_notes: true diff --git a/api/rds/v1alpha1/valkey_types.go b/api/rds/v1alpha1/valkey_types.go index ee64bed..4dc9206 100644 --- a/api/rds/v1alpha1/valkey_types.go +++ b/api/rds/v1alpha1/valkey_types.go @@ -72,7 +72,10 @@ type ValkeySpec struct { // for detailed settings, please refer to https://github.com/valkey-io/valkey/blob/unstable/valkey.conf CustomConfigs map[string]string `json:"customConfigs,omitempty"` - // Modules defines the module settings for Valkey + // Modules defines a list of modules to be loaded into the valkey instance. + // Each module is specified by its name and version. + // Modules are loaded at startup and can extend Redis functionality. + // +optional Modules []core.ValkeyModule `json:"modules,omitempty"` // Storage defines the storage settings for Valkey diff --git a/api/v1alpha1/cluster_types.go b/api/v1alpha1/cluster_types.go index 15f1b25..0a99f68 100644 --- a/api/v1alpha1/cluster_types.go +++ b/api/v1alpha1/cluster_types.go @@ -22,6 +22,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +const ( + ClusterResourceCleanFinalizer = "buf.red/cluster-resource-clean" +) + type ShardConfig struct { // Slots is the slot range for the shard, eg: 0-1000,1002,1005-1100 //+kubebuilder:validation:Pattern:=`^(\d{1,5}|(\d{1,5}-\d{1,5}))(,(\d{1,5}|(\d{1,5}-\d{1,5})))*$` diff --git a/cmd/helper/commands/cluster/access.go b/cmd/helper/commands/cluster/access.go index eb7d288..bf21ba8 100644 --- a/cmd/helper/commands/cluster/access.go +++ b/cmd/helper/commands/cluster/access.go @@ -35,9 +35,21 @@ import ( "k8s.io/client-go/kubernetes" ) -// ExposeNodePort -func ExposeNodePort(ctx context.Context, client *kubernetes.Clientset, namespace, podName, ipfamily string, serviceType corev1.ServiceType, logger logr.Logger) error { - logger.Info("Info", "serviceType", serviceType, "ipfamily", ipfamily) +// Access +func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podName, ipfamily string, logger logr.Logger) error { + podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, 20, logger) + if errors.IsNotFound(err) { + if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, 20, logger); err != nil { + logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) + return err + } + } else if err != nil { + logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) + return err + } + serviceType := podSvc.Spec.Type + + logger.Info("check pod service type", "ipfamily", ipfamily, "serviceType", serviceType, "podName", podName) pod, err := commands.GetPod(ctx, client, namespace, podName, logger) if err != nil { logger.Error(err, "get pods failed", "namespace", namespace, "name", podName) @@ -54,16 +66,6 @@ func ExposeNodePort(ctx context.Context, client *kubernetes.Clientset, namespace announceIPort int32 = 16379 ) if serviceType == corev1.ServiceTypeNodePort { - podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeNodePort, 20, logger) - if errors.IsNotFound(err) { - if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeNodePort, 20, logger); err != nil { - logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } - } else if err != nil { - logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } for _, v := range podSvc.Spec.Ports { if v.Name == "client" { announcePort = v.NodePort @@ -121,17 +123,6 @@ func ExposeNodePort(ctx context.Context, client *kubernetes.Clientset, namespace return err } } else if serviceType == corev1.ServiceTypeLoadBalancer { - podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeLoadBalancer, 20, logger) - if errors.IsNotFound(err) { - if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeLoadBalancer, 20, logger); err != nil { - logger.Error(err, "retry get lb service failed") - return err - } - } else if err != nil { - logger.Error(err, "get lb service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } - for _, v := range podSvc.Status.LoadBalancer.Ingress { if v.IP != "" { ip, err := netip.ParseAddr(v.IP) @@ -165,14 +156,14 @@ func ExposeNodePort(ctx context.Context, client *kubernetes.Clientset, namespace } } - format_announceIp := strings.Replace(announceIp, ":", "-", -1) - if strings.HasSuffix(format_announceIp, "-") { - format_announceIp = fmt.Sprintf("%s0", format_announceIp) + fAnnounceIp := strings.ReplaceAll(announceIp, ":", "-") + if strings.HasSuffix(fAnnounceIp, "-") { + fAnnounceIp = fmt.Sprintf("%s0", fAnnounceIp) } labelPatch := fmt.Sprintf(`[{"op":"add","path":"/metadata/labels/%s","value":"%s"},{"op":"add","path":"/metadata/labels/%s","value":"%s"},{"op":"add","path":"/metadata/labels/%s","value":"%s"}]`, - strings.Replace(builder.AnnounceIPLabelKey, "/", "~1", -1), format_announceIp, - strings.Replace(builder.AnnouncePortLabelKey, "/", "~1", -1), strconv.Itoa(int(announcePort)), - strings.Replace(builder.AnnounceIPortLabelKey, "/", "~1", -1), strconv.Itoa(int(announceIPort))) + strings.ReplaceAll(builder.AnnounceIPLabelKey, "/", "~1"), fAnnounceIp, + strings.ReplaceAll(builder.AnnouncePortLabelKey, "/", "~1"), strconv.Itoa(int(announcePort)), + strings.ReplaceAll(builder.AnnounceIPortLabelKey, "/", "~1"), strconv.Itoa(int(announceIPort))) logger.Info(labelPatch) _, err = client.CoreV1().Pods(pod.Namespace).Patch(ctx, podName, types.JSONPatchType, []byte(labelPatch), metav1.PatchOptions{}) diff --git a/cmd/helper/commands/cluster/command.go b/cmd/helper/commands/cluster/command.go index 2ea6391..be78d88 100644 --- a/cmd/helper/commands/cluster/command.go +++ b/cmd/helper/commands/cluster/command.go @@ -22,7 +22,6 @@ import ( "github.com/chideat/valkey-operator/cmd/helper/commands" "github.com/urfave/cli/v2" - corev1 "k8s.io/api/core/v1" ) func NewCommand(ctx context.Context) *cli.Command { @@ -99,12 +98,6 @@ func NewCommand(ctx context.Context) *cli.Command { Usage: "IP_FAMILY for expose", EnvVars: []string{"IP_FAMILY_PREFER"}, }, - &cli.StringFlag{ - Name: "service-type", - Usage: "Service type for sentinel service", - EnvVars: []string{"SERVICE_TYPE"}, - Value: "ClusterIP", - }, }, Subcommands: []*cli.Command{ { @@ -113,10 +106,9 @@ func NewCommand(ctx context.Context) *cli.Command { Flags: []cli.Flag{}, Action: func(c *cli.Context) error { var ( - namespace = c.String("namespace") - podName = c.String("pod-name") - ipFamily = c.String("ip-family") - serviceType = corev1.ServiceType(c.String("service-type")) + namespace = c.String("namespace") + podName = c.String("pod-name") + ipFamily = c.String("ip-family") ) if namespace == "" { return cli.Exit("require namespace", 1) @@ -133,7 +125,7 @@ func NewCommand(ctx context.Context) *cli.Command { return cli.Exit(err, 1) } - if err := ExposeNodePort(ctx, client, namespace, podName, ipFamily, serviceType, logger); err != nil { + if err := Access(ctx, client, namespace, podName, ipFamily, logger); err != nil { logger.Error(err, "expose node port failed") return cli.Exit(err, 1) } diff --git a/cmd/helper/commands/failover/access.go b/cmd/helper/commands/failover/access.go index 449071d..0c788b1 100644 --- a/cmd/helper/commands/failover/access.go +++ b/cmd/helper/commands/failover/access.go @@ -34,14 +34,25 @@ import ( "k8s.io/client-go/kubernetes" ) -func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podName, ipfamily string, serviceType corev1.ServiceType, logger logr.Logger) error { - logger.Info("service access", "serviceType", serviceType, "ipfamily", ipfamily, "podName", podName) +func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podName, ipfamily string, logger logr.Logger) error { + podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, 20, logger) + if errors.IsNotFound(err) { + if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, 20, logger); err != nil { + logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) + return err + } + } else if err != nil { + logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) + return err + } + serviceType := podSvc.Spec.Type + + logger.Info("check pod service type", "ipfamily", ipfamily, "serviceType", serviceType, "podName", podName) pod, err := commands.GetPod(ctx, client, namespace, podName, logger) if err != nil { logger.Error(err, "get pods failed", "namespace", namespace, "name", podName) return err } - if pod.Status.HostIP == "" { return fmt.Errorf("pod not found or pod with invalid hostIP") } @@ -51,16 +62,6 @@ func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podNam announcePort int32 = 6379 ) if serviceType == corev1.ServiceTypeNodePort { - podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeNodePort, 20, logger) - if errors.IsNotFound(err) { - if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeNodePort, 20, logger); err != nil { - logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } - } else if err != nil { - logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } for _, v := range podSvc.Spec.Ports { if v.Name == "client" { announcePort = v.NodePort @@ -115,17 +116,6 @@ func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podNam return err } } else if serviceType == corev1.ServiceTypeLoadBalancer { - podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeLoadBalancer, 20, logger) - if errors.IsNotFound(err) { - if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeLoadBalancer, 20, logger); err != nil { - logger.Error(err, "retry get lb service failed") - return err - } - } else if err != nil { - logger.Error(err, "get lb service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } - for _, v := range podSvc.Status.LoadBalancer.Ingress { if v.IP == "" { continue @@ -161,13 +151,13 @@ func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podNam } } - format_announceIp := strings.Replace(announceIp, ":", "-", -1) - if strings.HasSuffix(format_announceIp, "-") { - format_announceIp = fmt.Sprintf("%s0", format_announceIp) + fAnnounceIp := strings.ReplaceAll(announceIp, ":", "-") + if strings.HasSuffix(fAnnounceIp, "-") { + fAnnounceIp = fmt.Sprintf("%s0", fAnnounceIp) } labelPatch := fmt.Sprintf(`[{"op":"add","path":"/metadata/labels/%s","value":"%s"},{"op":"add","path":"/metadata/labels/%s","value":"%s"}]`, - strings.Replace(builder.AnnounceIPLabelKey, "/", "~1", -1), format_announceIp, - strings.Replace(builder.AnnouncePortLabelKey, "/", "~1", -1), strconv.Itoa(int(announcePort))) + strings.ReplaceAll(builder.AnnounceIPLabelKey, "/", "~1"), fAnnounceIp, + strings.ReplaceAll(builder.AnnouncePortLabelKey, "/", "~1"), strconv.Itoa(int(announcePort))) logger.Info(labelPatch) _, err = client.CoreV1().Pods(pod.Namespace).Patch(ctx, podName, types.JSONPatchType, []byte(labelPatch), metav1.PatchOptions{}) diff --git a/cmd/helper/commands/failover/command.go b/cmd/helper/commands/failover/command.go index 9c4ceeb..428f6e5 100644 --- a/cmd/helper/commands/failover/command.go +++ b/cmd/helper/commands/failover/command.go @@ -21,7 +21,6 @@ import ( "github.com/chideat/valkey-operator/cmd/helper/commands" "github.com/urfave/cli/v2" - corev1 "k8s.io/api/core/v1" ) func NewCommand(ctx context.Context) *cli.Command { @@ -49,11 +48,6 @@ func NewCommand(ctx context.Context) *cli.Command { Usage: "The id of current pod", EnvVars: []string{"POD_UID"}, }, - &cli.StringFlag{ - Name: "service-name", - Usage: "Service name of the statefulset", - EnvVars: []string{"SERVICE_NAME"}, - }, &cli.StringFlag{ Name: "operator-username", Usage: "Operator username", @@ -108,12 +102,6 @@ func NewCommand(ctx context.Context) *cli.Command { Usage: "IP_FAMILY for servie access", EnvVars: []string{"IP_FAMILY_PREFER"}, }, - &cli.StringFlag{ - Name: "service-type", - Usage: "Service type for sentinel service", - EnvVars: []string{"SERVICE_TYPE"}, - Value: "ClusterIP", - }, }, Subcommands: []*cli.Command{ { @@ -122,10 +110,9 @@ func NewCommand(ctx context.Context) *cli.Command { Flags: []cli.Flag{}, Action: func(c *cli.Context) error { var ( - namespace = c.String("namespace") - podName = c.String("pod-name") - ipFamily = c.String("ip-family") - serviceType = corev1.ServiceType(c.String("service-type")) + namespace = c.String("namespace") + podName = c.String("pod-name") + ipFamily = c.String("ip-family") ) if namespace == "" { return cli.Exit("require namespace", 1) @@ -142,7 +129,7 @@ func NewCommand(ctx context.Context) *cli.Command { return cli.Exit(err, 1) } - if err := Access(ctx, client, namespace, podName, ipFamily, serviceType, logger); err != nil { + if err := Access(ctx, client, namespace, podName, ipFamily, logger); err != nil { logger.Error(err, "enable nodeport service access failed") return cli.Exit(err, 1) } diff --git a/cmd/helper/commands/failover/shutdown.go b/cmd/helper/commands/failover/shutdown.go index c4d4564..3229b6a 100644 --- a/cmd/helper/commands/failover/shutdown.go +++ b/cmd/helper/commands/failover/shutdown.go @@ -40,6 +40,9 @@ func loadAnnounceAddress(filepath string, logger logr.Logger) string { } data, err := os.ReadFile(filepath) if err != nil { + if os.IsNotExist(err) { + return "" + } logger.Error(err, "read announce file failed", "path", filepath) return "" } diff --git a/cmd/helper/commands/helper.go b/cmd/helper/commands/helper.go index 3e9e798..6cd9f2c 100644 --- a/cmd/helper/commands/helper.go +++ b/cmd/helper/commands/helper.go @@ -196,10 +196,8 @@ func GetPod(ctx context.Context, client *kubernetes.Clientset, namespace, name s return pod, nil } -func RetryGetService(ctx context.Context, clientset *kubernetes.Clientset, svcNamespace, svcName string, typ corev1.ServiceType, - count int, logger logr.Logger) (*corev1.Service, error) { - - serviceChecker := func(svc *corev1.Service, typ corev1.ServiceType) error { +func RetryGetService(ctx context.Context, clientset *kubernetes.Clientset, svcNamespace, svcName string, count int, logger logr.Logger) (*corev1.Service, error) { + serviceChecker := func(svc *corev1.Service) error { if svc == nil { return fmt.Errorf("service not found") } @@ -207,10 +205,6 @@ func RetryGetService(ctx context.Context, clientset *kubernetes.Clientset, svcNa return fmt.Errorf("service port not found") } - if svc.Spec.Type != typ { - return fmt.Errorf("service type not match") - } - switch svc.Spec.Type { case corev1.ServiceTypeNodePort: for _, port := range svc.Spec.Ports { @@ -233,13 +227,13 @@ func RetryGetService(ctx context.Context, clientset *kubernetes.Clientset, svcNa } logger.Info("retry get service", "target", fmt.Sprintf("%s/%s", svcNamespace, svcName), "count", count) - for i := 0; i < count+1; i++ { + for range count + 1 { svc, err := clientset.CoreV1().Services(svcNamespace).Get(ctx, svcName, metav1.GetOptions{}) if err != nil { logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", svcNamespace, svcName)) return nil, err } - if serviceChecker(svc, typ) != nil { + if serviceChecker(svc) != nil { logger.Error(err, "service check failed", "target", fmt.Sprintf("%s/%s", svcNamespace, svcName)) } else { return svc, nil diff --git a/cmd/helper/commands/sentinel/access.go b/cmd/helper/commands/sentinel/access.go index 462465c..e61b026 100644 --- a/cmd/helper/commands/sentinel/access.go +++ b/cmd/helper/commands/sentinel/access.go @@ -34,8 +34,20 @@ import ( "k8s.io/client-go/kubernetes" ) -func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podName, ipfamily string, serviceType corev1.ServiceType, logger logr.Logger) error { - logger.Info("service access", "serviceType", serviceType, "ipfamily", ipfamily, "podName", podName) +func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podName, ipfamily string, logger logr.Logger) error { + podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, 20, logger) + if errors.IsNotFound(err) { + if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, 20, logger); err != nil { + logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) + return err + } + } else if err != nil { + logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) + return err + } + serviceType := podSvc.Spec.Type + + logger.Info("check pod service type", "serviceType", serviceType, "ipfamily", ipfamily, "podName", podName) pod, err := commands.GetPod(ctx, client, namespace, podName, logger) if err != nil { logger.Error(err, "get pods failed", "namespace", namespace, "name", podName) @@ -51,16 +63,6 @@ func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podNam announcePort int32 = 26379 ) if serviceType == corev1.ServiceTypeNodePort { - podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeNodePort, 20, logger) - if errors.IsNotFound(err) { - if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeNodePort, 20, logger); err != nil { - logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } - } else if err != nil { - logger.Error(err, "get service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } for _, v := range podSvc.Spec.Ports { if v.Name == "sentinel" { announcePort = v.NodePort @@ -115,17 +117,6 @@ func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podNam return err } } else if serviceType == corev1.ServiceTypeLoadBalancer { - podSvc, err := commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeLoadBalancer, 20, logger) - if errors.IsNotFound(err) { - if podSvc, err = commands.RetryGetService(ctx, client, namespace, podName, corev1.ServiceTypeLoadBalancer, 20, logger); err != nil { - logger.Error(err, "retry get lb service failed") - return err - } - } else if err != nil { - logger.Error(err, "get lb service failed", "target", fmt.Sprintf("%s/%s", namespace, podName)) - return err - } - for _, v := range podSvc.Status.LoadBalancer.Ingress { if v.IP == "" { continue @@ -161,13 +152,13 @@ func Access(ctx context.Context, client *kubernetes.Clientset, namespace, podNam } } - format_announceIp := strings.Replace(announceIp, ":", "-", -1) - if strings.HasSuffix(format_announceIp, "-") { - format_announceIp = fmt.Sprintf("%s0", format_announceIp) + fAnnounceIp := strings.ReplaceAll(announceIp, ":", "-") + if strings.HasSuffix(fAnnounceIp, "-") { + fAnnounceIp = fmt.Sprintf("%s0", fAnnounceIp) } labelPatch := fmt.Sprintf(`[{"op":"add","path":"/metadata/labels/%s","value":"%s"},{"op":"add","path":"/metadata/labels/%s","value":"%s"}]`, - strings.Replace(builder.AnnounceIPLabelKey, "/", "~1", -1), format_announceIp, - strings.Replace(builder.AnnouncePortLabelKey, "/", "~1", -1), strconv.Itoa(int(announcePort))) + strings.ReplaceAll(builder.AnnounceIPLabelKey, "/", "~1"), fAnnounceIp, + strings.ReplaceAll(builder.AnnouncePortLabelKey, "/", "~1"), strconv.Itoa(int(announcePort))) logger.Info(labelPatch) _, err = client.CoreV1().Pods(pod.Namespace).Patch(ctx, podName, types.JSONPatchType, []byte(labelPatch), metav1.PatchOptions{}) diff --git a/cmd/helper/commands/sentinel/command.go b/cmd/helper/commands/sentinel/command.go index 9dd436d..cd55e8a 100644 --- a/cmd/helper/commands/sentinel/command.go +++ b/cmd/helper/commands/sentinel/command.go @@ -28,7 +28,6 @@ import ( "github.com/chideat/valkey-operator/cmd/helper/commands/runner" "github.com/chideat/valkey-operator/pkg/valkey" "github.com/urfave/cli/v2" - corev1 "k8s.io/api/core/v1" ) func NewCommand(ctx context.Context) *cli.Command { @@ -94,12 +93,6 @@ func NewCommand(ctx context.Context) *cli.Command { Usage: "IP_FAMILY for servie access", EnvVars: []string{"IP_FAMILY_PREFER"}, }, - &cli.StringFlag{ - Name: "service-type", - Usage: "Service type for sentinel service", - EnvVars: []string{"SERVICE_TYPE"}, - Value: "ClusterIP", - }, }, Subcommands: []*cli.Command{ { @@ -108,10 +101,9 @@ func NewCommand(ctx context.Context) *cli.Command { Flags: []cli.Flag{}, Action: func(c *cli.Context) error { var ( - namespace = c.String("namespace") - podName = c.String("pod-name") - ipFamily = c.String("ip-family") - serviceType = corev1.ServiceType(c.String("service-type")) + namespace = c.String("namespace") + podName = c.String("pod-name") + ipFamily = c.String("ip-family") ) if namespace == "" { return cli.Exit("require namespace", 1) @@ -128,7 +120,7 @@ func NewCommand(ctx context.Context) *cli.Command { return cli.Exit(err, 1) } - if err := Access(ctx, client, namespace, podName, ipFamily, serviceType, logger); err != nil { + if err := Access(ctx, client, namespace, podName, ipFamily, logger); err != nil { logger.Error(err, "enable nodeport service access failed") return cli.Exit(err, 1) } diff --git a/cmd/init_cluster.sh b/cmd/init_cluster.sh index a82f0b3..86c4b14 100755 --- a/cmd/init_cluster.sh +++ b/cmd/init_cluster.sh @@ -3,10 +3,10 @@ chmod -f 644 /data/*.rdb /data/*.aof /data/*.conf 2>/dev/null || true chown -f 999:1000 /data/*.rdb /data/*.aof /data/*.conf 2>/dev/null || true -if [ "$SERVICE_TYPE" = "LoadBalancer" ] || [ "$SERVICE_TYPE" = "NodePort" ] || [ -n "$IP_FAMILY_PREFER" ] ; then - echo "check pod binded service" - /opt/valkey-helper cluster expose || exit 1 -fi +rm -rf /data/announce.conf + +echo "check pod binded service" +/opt/valkey-helper cluster expose || exit 1 # copy binaries cp /opt/*.sh /opt/valkey-helper /mnt/opt/ && chmod 555 /mnt/opt/*.sh /mnt/opt/valkey-helper diff --git a/cmd/init_failover.sh b/cmd/init_failover.sh index 2ef5dc8..a375aed 100755 --- a/cmd/init_failover.sh +++ b/cmd/init_failover.sh @@ -3,10 +3,10 @@ chmod -f 644 /data/*.rdb /data/*.aof 2>/dev/null || true chown -f 999:1000 /data/*.rdb /data/*.aof 2>/dev/null || true -if [ "${SERVICE_TYPE}" = "LoadBalancer" ] || [ "${SERVICE_TYPE}" = "NodePort" ] || [ -n "${IP_FAMILY_PREFER}" ] ; then - echo "check pod binded service" - /opt/valkey-helper failover expose || exit 1 -fi +rm -rf /data/announce.conf + +echo "check pod binded service" +/opt/valkey-helper failover expose || exit 1 # copy binaries cp /opt/*.sh /opt/valkey-helper /mnt/opt/ && chmod 555 /mnt/opt/*.sh /mnt/opt/valkey-helper diff --git a/cmd/init_sentinel.sh b/cmd/init_sentinel.sh index 0b3c010..6e33f79 100755 --- a/cmd/init_sentinel.sh +++ b/cmd/init_sentinel.sh @@ -1,9 +1,9 @@ #!/bin/sh -if [ "$SERVICE_TYPE" = "LoadBalancer" ] || [ "$SERVICE_TYPE" = "NodePort" ] || [ -n "$IP_FAMILY_PREFER" ] ; then - echo "check pod binded service" - /opt/valkey-helper sentinel expose || exit 1 -fi +rm -rf /data/announce.conf + +echo "check pod binded service" +/opt/valkey-helper sentinel expose || exit 1 # copy binaries cp /opt/*.sh /opt/valkey-helper /mnt/opt/ && chmod 555 /mnt/opt/*.sh /mnt/opt/valkey-helper diff --git a/cmd/main.go b/cmd/main.go index 7c9aaa1..bc1ca9b 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -70,7 +70,7 @@ func init() { //+kubebuilder:rbac:groups=apps,resources=statefulsets;statefulsets/finalizers;deployments;deployments/finalizers;daemonsets;replicasets,verbs=get;list;watch;create;update;delete //+kubebuilder:rbac:groups=batch,resources=jobs;jobs/finalizers;cronjobs;cronjobs/finalizers,verbs=get;list;watch;create;update;delete;deletecollection //+kubebuilder:rbac:groups=*,resources=pods;pods/exec;configmaps;configmaps/finalizers;secrets;secrets/finalizers;services;services/finalizers;persistentvolumeclaims;persistentvolumeclaims/finalizers;endpoints,verbs=get;list;watch;create;update;patch;delete;deletecollection -//+kubebuilder:rbac:groups=*,resources=events,verbs=get;list;watch;create;update;delete;deletecollection +//+kubebuilder:rbac:groups=*,resources=events,verbs=create;update;patch //+kubebuilder:rbac:groups=policy,resources=poddisruptionbudgets;poddisruptionbudgets/finalizers,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;watch;create;update;patch;delete //+kubebuilder:rbac:groups=*,resources=pods;pods/exec;configmaps;endpoints;services;services/finalizers,verbs=* diff --git a/cmd/run_cluster.sh b/cmd/run_cluster.sh index 70407be..e93ca90 100755 --- a/cmd/run_cluster.sh +++ b/cmd/run_cluster.sh @@ -43,9 +43,6 @@ if [ -n "${password}" ]; then fi # Handle announcement configuration -if [ -z "${SERVICE_TYPE}" ] || [ "${SERVICE_TYPE}" = "ClusterIP" ]; then - rm -f "${ANNOUNCE_CONFIG_FILE}" -fi if [ -f "${ANNOUNCE_CONFIG_FILE}" ]; then cat "${ANNOUNCE_CONFIG_FILE}" >> "${VALKEY_CONFIG_FILE}" echo "" >> "${VALKEY_CONFIG_FILE}" diff --git a/cmd/run_failover.sh b/cmd/run_failover.sh index a25c2aa..5222733 100755 --- a/cmd/run_failover.sh +++ b/cmd/run_failover.sh @@ -23,10 +23,6 @@ if [ "$MONITOR_POLICY" = "sentinel" ]; then ANNOUNCE_IP="" ANNOUNCE_PORT="" - if [ -z "${SERVICE_TYPE}" ] || [ "${SERVICE_TYPE}" = "ClusterIP" ]; then - rm -f ${ANNOUNCE_CONFIG_FILE} - fi - if [ -f "$ANNOUNCE_CONFIG_FILE" ]; then echo "" >> "$VALKEY_CONFIG_FILE" cat "$ANNOUNCE_CONFIG_FILE" >> "$VALKEY_CONFIG_FILE" diff --git a/cmd/run_sentinel.sh b/cmd/run_sentinel.sh index 6365058..09bebf1 100755 --- a/cmd/run_sentinel.sh +++ b/cmd/run_sentinel.sh @@ -15,9 +15,6 @@ if [ -n "${password}" ]; then fi # Append announce configuration to sentinel configuration if it exists -if [ -z "${SERVICE_TYPE}" ] || [ "${SERVICE_TYPE}" = "ClusterIP" ]; then - rm -f ${ANNOUNCE_CONFIG_FILE} -fi if [ -f ${ANNOUNCE_CONFIG_FILE} ]; then echo "# append announce conf to sentinel config" cat "${ANNOUNCE_CONFIG_FILE}" | grep "announce" | sed "s/^/sentinel /" >> ${VALKEY_SENTINEL_CONFIG_FILE} diff --git a/config/crd/bases/rds.valkey.buf.red_valkeys.yaml b/config/crd/bases/rds.valkey.buf.red_valkeys.yaml index 2f76660..dd8a74c 100644 --- a/config/crd/bases/rds.valkey.buf.red_valkeys.yaml +++ b/config/crd/bases/rds.valkey.buf.red_valkeys.yaml @@ -1317,7 +1317,10 @@ spec: type: object type: object modules: - description: Modules defines the module settings for Valkey + description: |- + Modules defines a list of modules to be loaded into the valkey instance. + Each module is specified by its name and version. + Modules are loaded at startup and can extend Redis functionality. items: description: ValkeyModule defines the module for Valkey properties: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 211d8d5..2002cc8 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -46,12 +46,8 @@ rules: - events verbs: - create - - delete - - deletecollection - - get - - list + - patch - update - - watch - apiGroups: - '*' resources: diff --git a/go.mod b/go.mod index 876255e..20fd988 100644 --- a/go.mod +++ b/go.mod @@ -3,26 +3,27 @@ module github.com/chideat/valkey-operator go 1.24.0 require ( - github.com/Masterminds/semver/v3 v3.3.1 + github.com/Masterminds/semver/v3 v3.4.0 github.com/alicebob/miniredis/v2 v2.35.0 github.com/cert-manager/cert-manager v1.18.2 github.com/fsnotify/fsnotify v1.9.0 - github.com/go-logr/logr v1.4.2 + github.com/go-logr/logr v1.4.3 github.com/gomodule/redigo v1.9.2 - github.com/onsi/ginkgo/v2 v2.23.3 - github.com/onsi/gomega v1.36.3 - github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.83.0 + github.com/google/go-cmp v0.7.0 + github.com/onsi/ginkgo/v2 v2.25.1 + github.com/onsi/gomega v1.38.2 + github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.85.0 github.com/rafaeljusto/redigomock/v3 v3.1.2 github.com/samber/lo v1.51.0 github.com/stretchr/testify v1.10.0 github.com/urfave/cli/v2 v2.27.7 - github.com/valkey-io/valkey-go v1.0.60 + github.com/valkey-io/valkey-go v1.0.64 go.uber.org/zap v1.27.0 gotest.tools/v3 v3.5.2 - k8s.io/api v0.33.1 - k8s.io/apimachinery v0.33.1 - k8s.io/client-go v0.33.1 - k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979 + k8s.io/api v0.33.3 + k8s.io/apimachinery v0.33.3 + k8s.io/client-go v0.33.3 + k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 sigs.k8s.io/controller-runtime v0.21.0 ) @@ -49,8 +50,7 @@ require ( github.com/google/btree v1.1.3 // indirect github.com/google/cel-go v0.23.2 // indirect github.com/google/gnostic-models v0.6.9 // indirect - github.com/google/go-cmp v0.7.0 // indirect - github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect + github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect github.com/google/uuid v1.6.0 // indirect github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.25.1 // indirect @@ -86,27 +86,30 @@ require ( go.opentelemetry.io/otel/sdk v1.33.0 // indirect go.opentelemetry.io/otel/trace v1.33.0 // indirect go.opentelemetry.io/proto/otlp v1.4.0 // indirect + go.uber.org/automaxprocs v1.6.0 // indirect go.uber.org/multierr v1.11.0 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67 // indirect - golang.org/x/net v0.42.0 // indirect + golang.org/x/net v0.44.0 // indirect golang.org/x/oauth2 v0.28.0 // indirect - golang.org/x/sync v0.16.0 // indirect - golang.org/x/sys v0.35.0 // indirect - golang.org/x/term v0.33.0 // indirect - golang.org/x/text v0.28.0 // indirect + golang.org/x/sync v0.17.0 // indirect + golang.org/x/sys v0.36.0 // indirect + golang.org/x/term v0.35.0 // indirect + golang.org/x/text v0.29.0 // indirect golang.org/x/time v0.11.0 // indirect - golang.org/x/tools v0.35.0 // indirect + golang.org/x/tools v0.36.0 // indirect gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20241219192143-6b3ec007d9bb // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20241219192143-6b3ec007d9bb // indirect google.golang.org/grpc v1.69.2 // indirect - google.golang.org/protobuf v1.36.6 // indirect + google.golang.org/protobuf v1.36.7 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/apiextensions-apiserver v0.33.1 // indirect - k8s.io/apiserver v0.33.1 // indirect - k8s.io/component-base v0.33.1 // indirect + k8s.io/apiextensions-apiserver v0.33.3 // indirect + k8s.io/apiserver v0.33.3 // indirect + k8s.io/component-base v0.33.3 // indirect k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 // indirect @@ -114,5 +117,5 @@ require ( sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect sigs.k8s.io/randfill v1.0.0 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect - sigs.k8s.io/yaml v1.4.0 // indirect + sigs.k8s.io/yaml v1.5.0 // indirect ) diff --git a/go.sum b/go.sum index ff8d915..fe19c2c 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,7 @@ cel.dev/expr v0.19.1 h1:NciYrtDRIR0lNCnH1LFJegdjspNx9fI59O7TWcua/W4= cel.dev/expr v0.19.1/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw= -github.com/Masterminds/semver/v3 v3.3.1 h1:QtNSWtVZ3nBfk8mAOu/B6v7FMJ+NHTIgUPi7rj+4nv4= -github.com/Masterminds/semver/v3 v3.3.1/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= +github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= +github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/alicebob/miniredis/v2 v2.35.0 h1:QwLphYqCEAo1eu1TqPRN2jgVMPBweeQcR21jeqDCONI= github.com/alicebob/miniredis/v2 v2.35.0/go.mod h1:TcL7YfarKPGDAthEtl5NBeHZfeUQj6OXMm/+iu5cLMM= github.com/antlr4-go/antlr/v4 v4.13.1 h1:SqQKkuVZ+zWkMMNkjy5FZe5mr5WURWnlpmOuzYWrPrQ= @@ -38,8 +38,8 @@ github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8 github.com/fxamacker/cbor/v2 v2.8.0 h1:fFtUGXUzXPHTIUdne5+zzMPTfffl3RD5qYnkY40vtxU= github.com/fxamacker/cbor/v2 v2.8.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= -github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= @@ -71,8 +71,8 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad h1:a6HEuzUHeKH6hwfN/ZoQgRgVIWFJljSWa/zetS2WTvg= -github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 h1:BHT72Gu3keYf3ZEu2J0b1vyeLSOYI8bm5wbJM/8yDe8= +github.com/google/pprof v0.0.0-20250403155104-27863c87afa6/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5THxAzdVpqr6/geYxZytqFMBCOtn/ujyeo= @@ -108,17 +108,19 @@ github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus= github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw= -github.com/onsi/ginkgo/v2 v2.23.3 h1:edHxnszytJ4lD9D5Jjc4tiDkPBZ3siDeJJkUZJJVkp0= -github.com/onsi/ginkgo/v2 v2.23.3/go.mod h1:zXTP6xIp3U8aVuXN8ENK9IXRaTjFnpVB9mGmaSRvxnM= -github.com/onsi/gomega v1.36.3 h1:hID7cr8t3Wp26+cYnfcjR6HpJ00fdogN6dqZ1t6IylU= -github.com/onsi/gomega v1.36.3/go.mod h1:8D9+Txp43QWKhM24yyOBEdpkzN8FvJyAwecBgsU4KU0= +github.com/onsi/ginkgo/v2 v2.25.1 h1:Fwp6crTREKM+oA6Cz4MsO8RhKQzs2/gOIVOUscMAfZY= +github.com/onsi/ginkgo/v2 v2.25.1/go.mod h1:ppTWQ1dh9KM/F1XgpeRqelR+zHVwV81DGRSDnFxK7Sk= +github.com/onsi/gomega v1.38.2 h1:eZCjf2xjZAqe+LeWvKb5weQ+NcPwX84kqJ0cZNxok2A= +github.com/onsi/gomega v1.38.2/go.mod h1:W2MJcYxRGV63b418Ai34Ud0hEdTVXq9NW9+Sx6uXf3k= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.83.0 h1:j9Ce3W6X6Tzi0QnSap+YzGwpqJLJGP/7xV6P9f86jjM= -github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.83.0/go.mod h1:sSxwdmprUfmRfTknPc4KIjUd2ZIc/kirw4UdXNhOauM= +github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= +github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.85.0 h1:oY+F5FZFmCjCyzkHWPjVQpzvnvEB/0FP+iyzDUUlqFc= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.85.0/go.mod h1:VB7wtBmDT6W2RJHzsvPZlBId+EnmeQA0d33fFTXvraM= github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q= github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= @@ -156,8 +158,8 @@ github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOf github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU= github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4= -github.com/valkey-io/valkey-go v1.0.60 h1:idh959D20H5n7D/kwEdTKNaMn5+4HpZTn7bLXnAhQIw= -github.com/valkey-io/valkey-go v1.0.60/go.mod h1:bHmwjIEOrGq/ubOJfh5uMRs7Xj6mV3mQ/ZXUbmqpjqY= +github.com/valkey-io/valkey-go v1.0.64 h1:3u4+b6D6zs9JQs254TLy4LqitCMHHr9XorP9GGk7XY4= +github.com/valkey-io/valkey-go v1.0.64/go.mod h1:bHmwjIEOrGq/ubOJfh5uMRs7Xj6mV3mQ/ZXUbmqpjqY= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xrash/smetrics v0.0.0-20250705151800-55b8f293f342 h1:FnBeRrxr7OU4VvAzt5X7s6266i6cSVkkFPS0TuXWbIg= @@ -186,12 +188,18 @@ go.opentelemetry.io/otel/trace v1.33.0 h1:cCJuF7LRjUFso9LPnEAHJDB2pqzp+hbO8eu1qq go.opentelemetry.io/otel/trace v1.33.0/go.mod h1:uIcdVUZMpTAmz0tI1z04GoVSezK37CbGV4fr1f2nBck= go.opentelemetry.io/proto/otlp v1.4.0 h1:TA9WRvW6zMwP+Ssb6fLoUIuirti1gGbP28GcKG1jgeg= go.opentelemetry.io/proto/otlp v1.4.0/go.mod h1:PPBWZIP98o2ElSqI35IHfu7hIhSwvc5N38Jw8pXuGFY= +go.uber.org/automaxprocs v1.6.0 h1:O3y2/QNTOdbF+e/dpXNNW7Rx2hZ4sTIPyybbxyNqTUs= +go.uber.org/automaxprocs v1.6.0/go.mod h1:ifeIMSnPZuznNm6jmdzmU3/bfk01Fe2fotchwEFJ8r8= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0= go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= go.uber.org/zap v1.27.0 h1:aJMhYGrd5QSmlpLMr2MftRKl7t8J8PTZPA732ud/XR8= go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= @@ -203,34 +211,34 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.42.0 h1:jzkYrhi3YQWD6MLBJcsklgQsoAcw89EcZbJw8Z614hs= -golang.org/x/net v0.42.0/go.mod h1:FF1RA5d3u7nAYA4z2TkclSCKh68eSXtiFwcWQpPXdt8= +golang.org/x/net v0.44.0 h1:evd8IRDyfNBMBTTY5XRF1vaZlD+EmWx6x8PkhR04H/I= +golang.org/x/net v0.44.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY= golang.org/x/oauth2 v0.28.0 h1:CrgCKl8PPAVtLnU3c+EDw6x11699EWlsDeWNWKdIOkc= golang.org/x/oauth2 v0.28.0/go.mod h1:onh5ek6nERTohokkhCD/y2cV4Do3fxFHFuAejCkRWT8= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw= -golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= +golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= -golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/term v0.33.0 h1:NuFncQrRcaRvVmgRkvM3j/F00gWIAlcmlB8ACEKmGIg= -golang.org/x/term v0.33.0/go.mod h1:s18+ql9tYWp1IfpV9DmCtQDDSRBUjKaw9M1eAv5UeF0= +golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k= +golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ= +golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= -golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= +golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= +golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.35.0 h1:mBffYraMEf7aa0sB+NuKnuCy8qI/9Bughn8dC2Gu5r0= -golang.org/x/tools v0.35.0/go.mod h1:NKdj5HkL/73byiZSJjqJgKn3ep7KjFkBOkR/Hps3VPw= +golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= +golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -243,8 +251,8 @@ google.golang.org/genproto/googleapis/rpc v0.0.0-20241219192143-6b3ec007d9bb h1: google.golang.org/genproto/googleapis/rpc v0.0.0-20241219192143-6b3ec007d9bb/go.mod h1:lcTa1sDdWEIHMWlITnIczmw5w60CF9ffkb8Z+DVmmjA= google.golang.org/grpc v1.69.2 h1:U3S9QEtbXC0bYNvRtcoklF3xGtLViumSYxWykJS+7AU= google.golang.org/grpc v1.69.2/go.mod h1:vyjdE6jLBI76dgpDojsFGNaHlxdjXN9ghpnd2o7JGZ4= -google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= -google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +google.golang.org/protobuf v1.36.7 h1:IgrO7UwFQGJdRNXH/sQux4R1Dj1WAKcLElzeeRaXV2A= +google.golang.org/protobuf v1.36.7/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= @@ -257,24 +265,24 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.5.2 h1:7koQfIKdy+I8UTetycgUqXWSDwpgv193Ka+qRsmBY8Q= gotest.tools/v3 v3.5.2/go.mod h1:LtdLGcnqToBH83WByAAi/wiwSFCArdFIUV/xxN4pcjA= -k8s.io/api v0.33.1 h1:tA6Cf3bHnLIrUK4IqEgb2v++/GYUtqiu9sRVk3iBXyw= -k8s.io/api v0.33.1/go.mod h1:87esjTn9DRSRTD4fWMXamiXxJhpOIREjWOSjsW1kEHw= -k8s.io/apiextensions-apiserver v0.33.1 h1:N7ccbSlRN6I2QBcXevB73PixX2dQNIW0ZRuguEE91zI= -k8s.io/apiextensions-apiserver v0.33.1/go.mod h1:uNQ52z1A1Gu75QSa+pFK5bcXc4hq7lpOXbweZgi4dqA= -k8s.io/apimachinery v0.33.1 h1:mzqXWV8tW9Rw4VeW9rEkqvnxj59k1ezDUl20tFK/oM4= -k8s.io/apimachinery v0.33.1/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= -k8s.io/apiserver v0.33.1 h1:yLgLUPDVC6tHbNcw5uE9mo1T6ELhJj7B0geifra3Qdo= -k8s.io/apiserver v0.33.1/go.mod h1:VMbE4ArWYLO01omz+k8hFjAdYfc3GVAYPrhP2tTKccs= -k8s.io/client-go v0.33.1 h1:ZZV/Ks2g92cyxWkRRnfUDsnhNn28eFpt26aGc8KbXF4= -k8s.io/client-go v0.33.1/go.mod h1:JAsUrl1ArO7uRVFWfcj6kOomSlCv+JpvIsp6usAGefA= -k8s.io/component-base v0.33.1 h1:EoJ0xA+wr77T+G8p6T3l4efT2oNwbqBVKR71E0tBIaI= -k8s.io/component-base v0.33.1/go.mod h1:guT/w/6piyPfTgq7gfvgetyXMIh10zuXA6cRRm3rDuY= +k8s.io/api v0.33.3 h1:SRd5t//hhkI1buzxb288fy2xvjubstenEKL9K51KBI8= +k8s.io/api v0.33.3/go.mod h1:01Y/iLUjNBM3TAvypct7DIj0M0NIZc+PzAHCIo0CYGE= +k8s.io/apiextensions-apiserver v0.33.3 h1:qmOcAHN6DjfD0v9kxL5udB27SRP6SG/MTopmge3MwEs= +k8s.io/apiextensions-apiserver v0.33.3/go.mod h1:oROuctgo27mUsyp9+Obahos6CWcMISSAPzQ77CAQGz8= +k8s.io/apimachinery v0.33.3 h1:4ZSrmNa0c/ZpZJhAgRdcsFcZOw1PQU1bALVQ0B3I5LA= +k8s.io/apimachinery v0.33.3/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= +k8s.io/apiserver v0.33.3 h1:Wv0hGc+QFdMJB4ZSiHrCgN3zL3QRatu56+rpccKC3J4= +k8s.io/apiserver v0.33.3/go.mod h1:05632ifFEe6TxwjdAIrwINHWE2hLwyADFk5mBsQa15E= +k8s.io/client-go v0.33.3 h1:M5AfDnKfYmVJif92ngN532gFqakcGi6RvaOF16efrpA= +k8s.io/client-go v0.33.3/go.mod h1:luqKBQggEf3shbxHY4uVENAxrDISLOarxpTKMiUuujg= +k8s.io/component-base v0.33.3 h1:mlAuyJqyPlKZM7FyaoM/LcunZaaY353RXiOd2+B5tGA= +k8s.io/component-base v0.33.3/go.mod h1:ktBVsBzkI3imDuxYXmVxZ2zxJnYTZ4HAsVj9iF09qp4= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4= k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8= -k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979 h1:jgJW5IePPXLGB8e/1wvd0Ich9QE97RvvF3a8J3fP/Lg= -k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2 h1:jpcvIRr3GLoUoEKRkHKSmGjxb6lWwrBlJsXc+eUYQHM= sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.2/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= sigs.k8s.io/controller-runtime v0.21.0 h1:CYfjpEuicjUecRk+KAeyYh+ouUBn4llGyDYytIGcJS8= @@ -288,5 +296,6 @@ sigs.k8s.io/randfill v1.0.0 h1:JfjMILfT8A6RbawdsK2JXGBR5AQVfd+9TbzrlneTyrU= sigs.k8s.io/randfill v1.0.0/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= sigs.k8s.io/structured-merge-diff/v4 v4.7.0 h1:qPeWmscJcXP0snki5IYF79Z8xrl8ETFxgMd7wez1XkI= sigs.k8s.io/structured-merge-diff/v4 v4.7.0/go.mod h1:dDy58f92j70zLsuZVuUX5Wp9vtxXpaZnkPGWeqDfCps= -sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +sigs.k8s.io/yaml v1.5.0 h1:M10b2U7aEUY6hRtU870n2VTPgR5RZiL/I6Lcc2F4NUQ= +sigs.k8s.io/yaml v1.5.0/go.mod h1:wZs27Rbxoai4C0f8/9urLZtZtF3avA3gKvGyPdDqTO4= diff --git a/internal/builder/clusterbuilder/configmap.go b/internal/builder/clusterbuilder/configmap.go index 09e05e4..2fe5669 100644 --- a/internal/builder/clusterbuilder/configmap.go +++ b/internal/builder/clusterbuilder/configmap.go @@ -45,6 +45,7 @@ func NewConfigMapForCR(cluster types.ClusterInstance) (*corev1.ConfigMap, error) Name: ValkeyConfigMapName(cluster.GetName()), Namespace: cluster.GetNamespace(), Labels: GenerateClusterLabels(cluster.GetName(), nil), + Annotations: map[string]string{}, OwnerReferences: util.BuildOwnerReferences(cluster.Definition()), }, Data: map[string]string{ @@ -160,6 +161,11 @@ func buildValkeyConfigs(cluster types.ClusterInstance) (string, error) { buffer.WriteString(fmt.Sprintf("%s %s\n", k, v)) } } + + for _, mod := range cr.Spec.Modules { + args := append([]string{"loadmodule", mod.Path}, mod.Args...) + buffer.WriteString(strings.Join(args, " ") + "\n") + } return buffer.String(), nil } diff --git a/internal/builder/clusterbuilder/configmap_test.go b/internal/builder/clusterbuilder/configmap_test.go index 0f1112c..6bed2d1 100644 --- a/internal/builder/clusterbuilder/configmap_test.go +++ b/internal/builder/clusterbuilder/configmap_test.go @@ -182,6 +182,10 @@ func (m *MockClusterInstance) Shards() []types.ClusterShard { return nil } +func (m *MockClusterInstance) Shard(i int) types.ClusterShard { + return nil +} + func (m *MockClusterInstance) RewriteShards(ctx context.Context, shards []*v1alpha1.ClusterShards) error { return nil } diff --git a/internal/builder/config.go b/internal/builder/config.go index 2f3f02e..7fb9451 100644 --- a/internal/builder/config.go +++ b/internal/builder/config.go @@ -62,7 +62,6 @@ const ( var ValkeyConfigRestartPolicy = map[string]ValkeyConfigSettingRule{ // forbid "include": Forbid, - "loadmodule": Forbid, "bind": Forbid, "protected-mode": Forbid, "port": Forbid, @@ -106,6 +105,7 @@ var ValkeyConfigRestartPolicy = map[string]ValkeyConfigSettingRule{ "rdbchecksum": RequireRestart, "io-threads": RequireRestart, "io-threads-do-reads": RequireRestart, + "loadmodule": RequireRestart, } type ValkeyConfigValues []string @@ -124,7 +124,7 @@ type ValkeyConfig map[string]ValkeyConfigValues // LoadValkeyConfig func LoadValkeyConfig(data string) (ValkeyConfig, error) { conf := ValkeyConfig{} - for _, line := range strings.Split(data, "\n") { + for line := range strings.SplitSeq(data, "\n") { line = strings.TrimSpace(line) if line == "" || strings.HasPrefix(line, "#") { continue diff --git a/internal/builder/const.go b/internal/builder/const.go index 83e865e..f9fb191 100644 --- a/internal/builder/const.go +++ b/internal/builder/const.go @@ -25,12 +25,13 @@ const ( AppComponentLabelKey = "app.kubernetes.io/component" AppNameLabelKey = "app.kubernetes.io/name" - ArchLabelKey = "valkeyarch" - RoleLabelKey = "valkey.buf.red/role" - AnnounceIPLabelKey = "valkey.buf.red/announce_ip" - AnnouncePortLabelKey = "valkey.buf.red/announce_port" - AnnounceIPortLabelKey = "valkey.buf.red/announce_iport" - ChecksumLabelKey = "valkey.buf.red/checksum" + ArchLabelKey = "valkeyarch" + RoleLabelKey = "valkey.buf.red/role" + AnnounceIPLabelKey = "valkey.buf.red/announce_ip" + AnnouncePortLabelKey = "valkey.buf.red/announce_port" + AnnounceIPortLabelKey = "valkey.buf.red/announce_iport" + ChecksumLabelKey = "valkey.buf.red/checksum" + LastAppliedConfigAnnotationKey = "valkey.buf.red/last-applied-config" InstanceTypeLabelKey = "buf.red/type" InstanceNameLabelKey = "buf.red/name" @@ -68,3 +69,7 @@ const ( OperatorVersionAnnotation = "operatorVersion" ) + +const ( + ResourceCleanFinalizer = "buf.red/resource-clean" +) diff --git a/internal/builder/failoverbuilder/configmap.go b/internal/builder/failoverbuilder/configmap.go index f5d4e05..08815fa 100644 --- a/internal/builder/failoverbuilder/configmap.go +++ b/internal/builder/failoverbuilder/configmap.go @@ -140,12 +140,18 @@ func GenerateConfigMap(inst types.FailoverInstance) (*corev1.ConfigMap, error) { } } + for _, mod := range rf.Spec.Modules { + args := append([]string{"loadmodule", mod.Path}, mod.Args...) + buffer.WriteString(strings.Join(args, " ") + "\n") + } + return &corev1.ConfigMap{ ObjectMeta: metav1.ObjectMeta{ Name: ConfigMapName(rf.Name), Namespace: rf.Namespace, Labels: GenerateCommonLabels(rf.Name), OwnerReferences: util.BuildOwnerReferences(rf), + Annotations: map[string]string{}, }, Data: map[string]string{ builder.ValkeyConfigKey: buffer.String(), diff --git a/internal/builder/failoverbuilder/statefulset.go b/internal/builder/failoverbuilder/statefulset.go index 2be3acc..09b8132 100644 --- a/internal/builder/failoverbuilder/statefulset.go +++ b/internal/builder/failoverbuilder/statefulset.go @@ -214,10 +214,6 @@ func buildEnvs(inst types.FailoverInstance, opUser *user.User, aclConfigMapName Name: builder.OperatorSecretName, Value: opUser.GetPassword().GetSecretName(), }, - { - Name: "SERVICE_TYPE", - Value: string(rf.Spec.Access.ServiceType), - }, { Name: "IP_FAMILY_PREFER", Value: string(rf.Spec.Access.IPFamilyPrefer), @@ -390,10 +386,6 @@ func buildValkeyDataInitContainer(rf *v1alpha1.Failover) (*corev1.Container, err Name: "IP_FAMILY_PREFER", Value: string(rf.Spec.Access.IPFamilyPrefer), }, - { - Name: "SERVICE_TYPE", - Value: string(rf.Spec.Access.ServiceType), - }, }, Command: []string{"sh", "/opt/init_failover.sh"}, SecurityContext: builder.GetSecurityContext(rf.Spec.SecurityContext), diff --git a/internal/builder/helper.go b/internal/builder/helper.go index c3302bd..e239f23 100644 --- a/internal/builder/helper.go +++ b/internal/builder/helper.go @@ -165,31 +165,63 @@ func ParsePodShardAndIndex(name string) (shard int, index int, err error) { return shard, index, nil } -func MergeAnnotations(t, s map[string]string) map[string]string { - if t == nil { - return s +func MergeRestartAnnotation(n, o map[string]string) map[string]string { + if n == nil { + n = make(map[string]string) } - if s == nil { - return t + + oldTimeStr, exists := o[RestartAnnotationKey] + if !exists || oldTimeStr == "" { + return n + } + oldTime, err := time.Parse(time.RFC3339Nano, oldTimeStr) + if err != nil { + return n } - for k, v := range s { + newTimeStr, exists := n[RestartAnnotationKey] + if !exists || newTimeStr == "" { + n[RestartAnnotationKey] = oldTimeStr + return n + } + newTime, err := time.Parse(time.RFC3339Nano, newTimeStr) + if err != nil { + n[RestartAnnotationKey] = oldTimeStr + return n + } + + if oldTime.After(newTime) { + n[RestartAnnotationKey] = oldTimeStr + return n + } + return n +} + +func IsPodAnnotationDiff(d map[string]string, s map[string]string) bool { + if len(d) != len(s) { + return true + } + + for k, v := range d { if k == RestartAnnotationKey { - tRestartAnn := t[k] - if tRestartAnn == "" && v != "" { - t[k] = v + if v == "" { + continue } - - tTime, err1 := time.Parse(time.RFC3339Nano, tRestartAnn) - sTime, err2 := time.Parse(time.RFC3339Nano, v) - if err1 != nil || err2 != nil || sTime.After(tTime) { - t[k] = v - } else { - t[k] = tRestartAnn + targetV := s[RestartAnnotationKey] + if targetV == "" { + return true + } + newTime, err1 := time.Parse(time.RFC3339Nano, v) + targetTime, err2 := time.Parse(time.RFC3339Nano, targetV) + if err1 != nil || err2 != nil { + return true + } + if newTime.After(targetTime) { + return true } - } else { - t[k] = v + } else if s[k] != v { + return true } } - return t + return false } diff --git a/internal/builder/helper_test.go b/internal/builder/helper_test.go index 84ae3c9..1e2eae0 100644 --- a/internal/builder/helper_test.go +++ b/internal/builder/helper_test.go @@ -419,7 +419,7 @@ func TestParsePodShardAndIndex(t *testing.T) { } } -func TestMergeAnnotations(t *testing.T) { +func TestMergeRestartAnnotation(t *testing.T) { now := time.Now() older := now.Add(-1 * time.Hour) newer := now.Add(1 * time.Hour) @@ -434,7 +434,7 @@ func TestMergeAnnotations(t *testing.T) { name: "nil target", t: nil, s: map[string]string{"key": "value"}, - want: map[string]string{"key": "value"}, + want: map[string]string{}, }, { name: "nil source", @@ -446,12 +446,12 @@ func TestMergeAnnotations(t *testing.T) { name: "merge regular keys", t: map[string]string{"key1": "value1"}, s: map[string]string{"key2": "value2"}, - want: map[string]string{"key1": "value1", "key2": "value2"}, + want: map[string]string{"key1": "value1"}, }, { name: "source overwrites target", - t: map[string]string{"key": "old"}, - s: map[string]string{"key": "new"}, + t: map[string]string{"key": "new"}, + s: map[string]string{"key": "old"}, want: map[string]string{"key": "new"}, }, { @@ -482,9 +482,9 @@ func TestMergeAnnotations(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := MergeAnnotations(tt.t, tt.s) + got := MergeRestartAnnotation(tt.t, tt.s) if !reflect.DeepEqual(got, tt.want) { - t.Errorf("MergeAnnotations() = %v, want %v", got, tt.want) + t.Errorf("MergeRestartAnnotation() = %v, want %v", got, tt.want) } }) } diff --git a/internal/builder/sentinelbuilder/statefulset.go b/internal/builder/sentinelbuilder/statefulset.go index 96d42c7..eb35481 100644 --- a/internal/builder/sentinelbuilder/statefulset.go +++ b/internal/builder/sentinelbuilder/statefulset.go @@ -173,10 +173,6 @@ func buildInitContainer(sen *v1alpha1.Sentinel, _ []corev1.EnvVar) (*corev1.Cont Name: "IP_FAMILY_PREFER", Value: string(sen.Spec.Access.IPFamilyPrefer), }, - { - Name: "SERVICE_TYPE", - Value: string(sen.Spec.Access.ServiceType), - }, }, Resources: corev1.ResourceRequirements{ Limits: corev1.ResourceList{ @@ -218,7 +214,7 @@ func buildServerContainer(sen *v1alpha1.Sentinel, envs []corev1.EnvVar) (*corev1 }, }, StartupProbe: &corev1.Probe{ - InitialDelaySeconds: 3, + InitialDelaySeconds: 30, TimeoutSeconds: 5, FailureThreshold: 3, ProbeHandler: corev1.ProbeHandler{ @@ -347,10 +343,6 @@ func buildEnvs(sen *v1alpha1.Sentinel) []corev1.EnvVar { Name: "TLS_ENABLED", Value: fmt.Sprintf("%t", sen.Spec.Access.EnableTLS), }, - { - Name: "SERVICE_TYPE", - Value: string(sen.Spec.Access.ServiceType), - }, { Name: "IP_FAMILY_PREFER", Value: string(sen.Spec.Access.IPFamilyPrefer), diff --git a/internal/builder/sentinelbuilder/statefulset_test.go b/internal/builder/sentinelbuilder/statefulset_test.go new file mode 100644 index 0000000..c5d68a9 --- /dev/null +++ b/internal/builder/sentinelbuilder/statefulset_test.go @@ -0,0 +1,176 @@ +/* +Copyright 2024 chideat. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package sentinelbuilder + +import ( + "context" + "crypto/tls" + "testing" + + certmetav1 "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" + "github.com/chideat/valkey-operator/api/core" + "github.com/chideat/valkey-operator/api/v1alpha1" + "github.com/chideat/valkey-operator/internal/builder" + "github.com/chideat/valkey-operator/pkg/types" + "github.com/chideat/valkey-operator/pkg/version" + "github.com/go-logr/logr" + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type mockSentinelInstance struct { + *v1alpha1.Sentinel +} + +func (m *mockSentinelInstance) Definition() *v1alpha1.Sentinel { + return m.Sentinel +} + +// Implement types.Object interface +func (m *mockSentinelInstance) GetObjectKind() schema.ObjectKind { return m.Sentinel.GetObjectKind() } +func (m *mockSentinelInstance) DeepCopyObject() runtime.Object { return m.Sentinel.DeepCopy() } +func (m *mockSentinelInstance) NamespacedName() client.ObjectKey { + return client.ObjectKeyFromObject(m.Sentinel) +} +func (m *mockSentinelInstance) Version() version.ValkeyVersion { return version.ValkeyVersion("7.2") } +func (m *mockSentinelInstance) IsReady() bool { return true } +func (m *mockSentinelInstance) Restart(ctx context.Context, annotationKeyVal ...string) error { + return nil +} +func (m *mockSentinelInstance) Refresh(ctx context.Context) error { return nil } + +// Implement types.Instance interface +func (m *mockSentinelInstance) Arch() core.Arch { return core.ValkeySentinel } +func (m *mockSentinelInstance) Issuer() *certmetav1.ObjectReference { return nil } +func (m *mockSentinelInstance) Users() types.Users { return nil } +func (m *mockSentinelInstance) TLSConfig() *tls.Config { return nil } +func (m *mockSentinelInstance) IsInService() bool { return true } +func (m *mockSentinelInstance) IsACLUserExists() bool { return false } +func (m *mockSentinelInstance) IsACLAppliedToAll() bool { return false } +func (m *mockSentinelInstance) IsResourceFullfilled(ctx context.Context) (bool, error) { + return true, nil +} +func (m *mockSentinelInstance) UpdateStatus(ctx context.Context, st types.InstanceStatus, message string) error { + return nil +} +func (m *mockSentinelInstance) SendEventf(eventtype, reason, messageFmt string, args ...any) {} +func (m *mockSentinelInstance) Logger() logr.Logger { return logr.Discard() } + +// Implement types.SentinelInstance interface +func (m *mockSentinelInstance) Replication() types.SentinelReplication { return nil } +func (m *mockSentinelInstance) Nodes() []types.SentinelNode { return nil } +func (m *mockSentinelInstance) RawNodes(ctx context.Context) ([]corev1.Pod, error) { return nil, nil } +func (m *mockSentinelInstance) Clusters(ctx context.Context) ([]string, error) { return nil, nil } +func (m *mockSentinelInstance) GetPassword() (string, error) { return "", nil } +func (m *mockSentinelInstance) Selector() map[string]string { return nil } + +func TestGenerateSentinelStatefulset(t *testing.T) { + sentinel := &v1alpha1.Sentinel{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-sentinel", + Namespace: "default", + UID: "test-uid", + }, + Spec: v1alpha1.SentinelSpec{ + Image: "valkey/valkey:7.2", + Replicas: 3, + Access: v1alpha1.SentinelInstanceAccess{ + InstanceAccess: core.InstanceAccess{ + IPFamilyPrefer: corev1.IPv4Protocol, + EnableTLS: true, + }, + DefaultPasswordSecret: "test-secret", + }, + SecurityContext: &corev1.PodSecurityContext{ + RunAsUser: func(i int64) *int64 { return &i }(1000), + }, + }, + } + + inst := &mockSentinelInstance{Sentinel: sentinel} + ss, err := GenerateSentinelStatefulset(inst) + assert.NoError(t, err) + assert.NotNil(t, ss) + + // Verify metadata + assert.Equal(t, "rfs-test-sentinel", ss.Name) + assert.Equal(t, "default", ss.Namespace) + + // Verify replicas + assert.Equal(t, int32(3), *ss.Spec.Replicas) + + // Verify containers + assert.Len(t, ss.Spec.Template.Spec.InitContainers, 1) + assert.Len(t, ss.Spec.Template.Spec.Containers, 2) + + // Verify init container + initCont := ss.Spec.Template.Spec.InitContainers[0] + assert.Equal(t, "init", initCont.Name) + + // Verify server container + serverCont := ss.Spec.Template.Spec.Containers[0] + assert.Equal(t, SentinelContainerName, serverCont.Name) + assert.Equal(t, "valkey/valkey:7.2", serverCont.Image) + + // Check env vars exist + envMap := make(map[string]string) + for _, env := range serverCont.Env { + envMap[env.Name] = env.Value + } + assert.Equal(t, "test-secret", envMap[builder.OperatorSecretName]) + assert.Equal(t, "true", envMap["TLS_ENABLED"]) + + // Verify agent container + agentCont := ss.Spec.Template.Spec.Containers[1] + assert.Equal(t, "agent", agentCont.Name) + + // Verify volumes + // 3 base volumes + 1 TLS + 1 Auth = 5 + assert.Len(t, ss.Spec.Template.Spec.Volumes, 5) +} + +func TestGenerateSentinelStatefulset_NoTLS(t *testing.T) { + sentinel := &v1alpha1.Sentinel{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-sentinel-no-tls", + Namespace: "default", + }, + Spec: v1alpha1.SentinelSpec{ + Image: "valkey/valkey:7.2", + Replicas: 1, + Access: v1alpha1.SentinelInstanceAccess{ + InstanceAccess: core.InstanceAccess{ + IPFamilyPrefer: corev1.IPv4Protocol, + }, + // No password secret + }, + }, + } + + inst := &mockSentinelInstance{Sentinel: sentinel} + ss, err := GenerateSentinelStatefulset(inst) + assert.NoError(t, err) + assert.NotNil(t, ss) + + // Verify volumes + // 3 base volumes (Config, Data, Opt) + assert.Len(t, ss.Spec.Template.Spec.Volumes, 3) +} diff --git a/internal/config/env.go b/internal/config/env.go index c8957f8..25be204 100644 --- a/internal/config/env.go +++ b/internal/config/env.go @@ -21,6 +21,7 @@ import ( "fmt" "os" "strings" + "time" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -129,3 +130,13 @@ func GetValkeyExporterImage(obj v1.Object) string { imgVersion := Getenv("DEFAULT_EXPORTER_VERSION", "v1.67.0-alpine") return GetFullImageURL(imgName, imgVersion) } + +func LoadbalancerReadyTimeout() time.Duration { + timeout := os.Getenv("LOADBALANCER_WAIT_TIMEOUT") + if timeout != "" { + if d, err := time.ParseDuration(timeout); err == nil { + return d + } + } + return 2 * time.Minute +} diff --git a/internal/controller/cluster_controller.go b/internal/controller/cluster_controller.go index 1bd52b2..3e3b36a 100644 --- a/internal/controller/cluster_controller.go +++ b/internal/controller/cluster_controller.go @@ -28,10 +28,12 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" "github.com/chideat/valkey-operator/api/v1alpha1" "github.com/chideat/valkey-operator/internal/builder" + "github.com/chideat/valkey-operator/internal/builder/clusterbuilder" "github.com/chideat/valkey-operator/internal/config" "github.com/chideat/valkey-operator/internal/ops" ) @@ -54,11 +56,60 @@ func (r *ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct logger := log.FromContext(ctx).WithValues("target", req.String()) var instance v1alpha1.Cluster - if err := r.Get(ctx, req.NamespacedName, &instance); errors.IsNotFound(err) { - return ctrl.Result{}, nil - } else if err != nil { + if err := r.Get(ctx, req.NamespacedName, &instance); err != nil { logger.Error(err, "get resource failed") - return ctrl.Result{}, err + return ctrl.Result{}, client.IgnoreNotFound(err) + } else if instance.GetDeletionTimestamp() != nil { + if controllerutil.ContainsFinalizer(&instance, builder.ResourceCleanFinalizer) { + // check if all pods is shutdown + labels := clusterbuilder.GenerateClusterLabels(instance.GetName(), nil) + stsList := appsv1.StatefulSetList{} + if err := r.List(ctx, &stsList, client.InNamespace(instance.Namespace), client.MatchingLabels(labels)); err != nil { + logger.Error(err, "get cluster statefulsets failed", "labels", labels) + return ctrl.Result{}, err + } + + needRequeue := false + for _, sts := range stsList.Items { + if sts.GetDeletionTimestamp() == nil { + if err := r.Delete(ctx, &sts); err != nil { + logger.Error(err, "delete cluster statefulset failed", "name", sts.GetName()) + } + needRequeue = true + } + } + if needRequeue { + return ctrl.Result{RequeueAfter: time.Second * 5}, nil + } + + podList := corev1.PodList{} + if err := r.List(ctx, &podList, client.InNamespace(instance.Namespace), client.MatchingLabels(labels)); err != nil { + logger.Error(err, "list pods failed", "namespace", instance.Namespace, "labels", labels) + return ctrl.Result{}, err + } else if len(podList.Items) > 0 { + // still has pods running, wait for them to shutdown + logger.Info("instance is deleting, but still has pods running, wait for them to shutdown") + return ctrl.Result{RequeueAfter: time.Second * 5}, nil + } + + // all pods is shutdown, remove finalizer + controllerutil.RemoveFinalizer(&instance, builder.ResourceCleanFinalizer) + if err := r.Update(ctx, &instance); err != nil { + logger.Error(err, "remove finalizer failed", "instance", instance.GetName()) + return ctrl.Result{}, err + } + } + return ctrl.Result{}, nil + } + + if !controllerutil.ContainsFinalizer(&instance, builder.ResourceCleanFinalizer) { + // add finalizer + controllerutil.AddFinalizer(&instance, builder.ResourceCleanFinalizer) + if err := r.Update(ctx, &instance); err != nil { + logger.Error(err, "add finalizer failed", "instance", instance.GetName()) + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: time.Second}, nil } // update default status diff --git a/internal/controller/failover_controller.go b/internal/controller/failover_controller.go index 611d458..b125f34 100644 --- a/internal/controller/failover_controller.go +++ b/internal/controller/failover_controller.go @@ -24,17 +24,18 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/log" "github.com/chideat/valkey-operator/api/v1alpha1" "github.com/chideat/valkey-operator/internal/builder" "github.com/chideat/valkey-operator/internal/builder/certbuilder" + "github.com/chideat/valkey-operator/internal/builder/failoverbuilder" "github.com/chideat/valkey-operator/internal/builder/sentinelbuilder" "github.com/chideat/valkey-operator/internal/config" "github.com/chideat/valkey-operator/internal/ops" @@ -58,11 +59,59 @@ func (r *FailoverReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c logger := log.FromContext(ctx).WithValues("target", req.String()) var instance v1alpha1.Failover - if err := r.Get(ctx, req.NamespacedName, &instance); errors.IsNotFound(err) { - return ctrl.Result{}, nil - } else if err != nil { + if err := r.Get(ctx, req.NamespacedName, &instance); err != nil { logger.Error(err, "get resource failed") - return ctrl.Result{}, err + return ctrl.Result{}, client.IgnoreNotFound(err) + } else if instance.GetDeletionTimestamp() != nil { + if controllerutil.ContainsFinalizer(&instance, builder.ResourceCleanFinalizer) { + // check if sts is marked for deletion + labels := failoverbuilder.GenerateCommonLabels(instance.GetName()) + stsList := appsv1.StatefulSetList{} + if err := r.List(ctx, &stsList, client.InNamespace(instance.Namespace), client.MatchingLabels(labels)); err != nil { + logger.Error(err, "get failover statefulsets failed", "labels", labels) + return ctrl.Result{}, err + } + + needRequeue := false + for _, sts := range stsList.Items { + if sts.GetDeletionTimestamp() == nil { + if err := r.Delete(ctx, &sts); err != nil { + logger.Error(err, "delete failover statefulset failed", "name", sts.GetName()) + } + needRequeue = true + } + } + if needRequeue { + return ctrl.Result{RequeueAfter: time.Second * 5}, nil + } + + // check if all pods is shutdown + podList := corev1.PodList{} + if err := r.List(ctx, &podList, client.InNamespace(instance.Namespace), client.MatchingLabels(labels)); err != nil { + logger.Error(err, "list pods failed", "namespace", instance.Namespace, "labels", labels) + return ctrl.Result{}, err + } + if len(podList.Items) > 0 { + logger.Info("failover pods is not shutdown, waiting for next reconcile", "pods", len(podList.Items)) + return ctrl.Result{RequeueAfter: time.Second * 10}, nil + } + + logger.Info("instance is deleting, remove finalizer", "name", instance.GetName()) + controllerutil.RemoveFinalizer(&instance, builder.ResourceCleanFinalizer) + if err := r.Update(ctx, &instance); err != nil { + logger.Error(err, "update instance finalizer failed") + return ctrl.Result{}, err + } + } + } + + if !controllerutil.ContainsFinalizer(&instance, builder.ResourceCleanFinalizer) { + controllerutil.AddFinalizer(&instance, builder.ResourceCleanFinalizer) + if err := r.Update(ctx, &instance); err != nil { + logger.Error(err, "add finalizer failed", "instance", instance.GetName()) + return ctrl.Result{}, err + } + return ctrl.Result{RequeueAfter: time.Second}, nil } if crVersion := instance.Annotations[builder.CRVersionKey]; crVersion == "" { diff --git a/internal/controller/rds/valkey/cluster.go b/internal/controller/rds/valkey/cluster.go index 674f87f..e80b8c7 100644 --- a/internal/controller/rds/valkey/cluster.go +++ b/internal/controller/rds/valkey/cluster.go @@ -27,7 +27,9 @@ import ( "github.com/chideat/valkey-operator/internal/config" "github.com/chideat/valkey-operator/internal/util" "github.com/go-logr/logr" - corev1 "k8s.io/api/core/v1" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/samber/lo" "k8s.io/apimachinery/pkg/api/resource" v12 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -78,18 +80,6 @@ func GenerateValkeyCluster(instance *rdsv1alpha1.Valkey) (*v1alpha1.Cluster, err if exporter.Image == "" { exporter.Image = config.GetValkeyExporterImage(nil) } - if exporter.Resources == nil || exporter.Resources.Limits.Cpu().IsZero() || exporter.Resources.Limits.Memory().IsZero() { - exporter.Resources = &corev1.ResourceRequirements{ - Requests: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceCPU: resource.MustParse("100m"), - corev1.ResourceMemory: resource.MustParse("300Mi"), - }, - Limits: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceCPU: resource.MustParse("100m"), - corev1.ResourceMemory: resource.MustParse("300Mi"), - }, - } - } } shardsConf := instance.Spec.Replicas.ShardsConfig @@ -104,6 +94,7 @@ func GenerateValkeyCluster(instance *rdsv1alpha1.Valkey) (*v1alpha1.Cluster, err Labels: labels, Annotations: annotations, OwnerReferences: util.BuildOwnerReferences(instance), + Finalizers: []string{builder.ResourceCleanFinalizer}, }, Spec: v1alpha1.ClusterSpec{ Image: image, @@ -119,10 +110,11 @@ func GenerateValkeyCluster(instance *rdsv1alpha1.Valkey) (*v1alpha1.Cluster, err NodeSelector: instance.Spec.NodeSelector, Tolerations: instance.Spec.Tolerations, SecurityContext: instance.Spec.SecurityContext, - PodAnnotations: instance.Spec.PodAnnotations, + PodAnnotations: lo.Assign(instance.Spec.PodAnnotations), Access: instance.Spec.Access, Exporter: exporter, Storage: instance.Spec.Storage, + Modules: instance.Spec.Modules, }, } @@ -140,7 +132,17 @@ func ShouldUpdateCluster(cluster, newCluster *v1alpha1.Cluster, logger logr.Logg return true } - return !reflect.DeepEqual(cluster.Spec, newCluster.Spec) + if !cmp.Equal(cluster.Spec, newCluster.Spec, cmpopts.EquateEmpty(), + cmpopts.IgnoreFields(v1alpha1.ClusterSpec{}, "PodAnnotations"), + ) { + return true + } + + if builder.IsPodAnnotationDiff(newCluster.Spec.PodAnnotations, cluster.Spec.PodAnnotations) { + logger.V(3).Info("pod annotations diff") + return true + } + return false } func ClusterIsUp(cluster *v1alpha1.Cluster) bool { diff --git a/internal/controller/rds/valkey/failover.go b/internal/controller/rds/valkey/failover.go index a53cc05..c92f9e6 100644 --- a/internal/controller/rds/valkey/failover.go +++ b/internal/controller/rds/valkey/failover.go @@ -26,10 +26,11 @@ import ( "github.com/chideat/valkey-operator/internal/builder/failoverbuilder" "github.com/chideat/valkey-operator/internal/config" "github.com/chideat/valkey-operator/internal/util" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/go-logr/logr" "github.com/samber/lo" - corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -64,18 +65,6 @@ func GenerateFailover(instance *rdsv1alpha1.Valkey) (*v1alpha1.Failover, error) if exporter.Image == "" { exporter.Image = config.GetValkeyExporterImage(nil) } - if exporter.Resources == nil || exporter.Resources.Limits.Cpu().IsZero() || exporter.Resources.Limits.Memory().IsZero() { - exporter.Resources = &corev1.ResourceRequirements{ - Requests: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceCPU: resource.MustParse("50m"), - corev1.ResourceMemory: resource.MustParse("128Mi"), - }, - Limits: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceCPU: resource.MustParse("100m"), - corev1.ResourceMemory: resource.MustParse("384Mi"), - }, - } - } } if sentinel != nil { @@ -102,6 +91,7 @@ func GenerateFailover(instance *rdsv1alpha1.Valkey) (*v1alpha1.Failover, error) Labels: failoverbuilder.GenerateSelectorLabels(instance.Name), Annotations: annotations, OwnerReferences: util.BuildOwnerReferences(instance), + Finalizers: []string{builder.ResourceCleanFinalizer}, }, Spec: v1alpha1.FailoverSpec{ Image: image, @@ -112,6 +102,7 @@ func GenerateFailover(instance *rdsv1alpha1.Valkey) (*v1alpha1.Failover, error) Exporter: exporter, Access: *access, Storage: instance.Spec.Storage.DeepCopy(), + Modules: instance.Spec.Modules, Affinity: instance.Spec.CustomAffinity, NodeSelector: instance.Spec.NodeSelector, @@ -135,5 +126,19 @@ func ShouldUpdateFailover(failover, newFailover *v1alpha1.Failover, logger logr. !reflect.DeepEqual(newFailover.Labels, failover.Labels) { return true } - return !reflect.DeepEqual(failover.Spec, newFailover.Spec) + + if !cmp.Equal(newFailover.Spec, failover.Spec, + cmpopts.EquateEmpty(), + cmpopts.IgnoreFields(v1alpha1.FailoverSpec{}, "PodAnnotations"), + ) { + return true + } + + if builder.IsPodAnnotationDiff(newFailover.Spec.PodAnnotations, failover.Spec.PodAnnotations) || + (newFailover.Spec.Sentinel != nil && + failover.Spec.Sentinel != nil && + builder.IsPodAnnotationDiff(newFailover.Spec.Sentinel.PodAnnotations, failover.Spec.Sentinel.PodAnnotations)) { + return true + } + return false } diff --git a/internal/controller/rds/valkey_controller.go b/internal/controller/rds/valkey_controller.go index 13e69ac..6a03959 100644 --- a/internal/controller/rds/valkey_controller.go +++ b/internal/controller/rds/valkey_controller.go @@ -68,13 +68,9 @@ func (r *ValkeyReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctr inst := &rdsv1alpha1.Valkey{} if err := r.Get(ctx, req.NamespacedName, inst); err != nil { - if errors.IsNotFound(err) { - return reconcile.Result{}, nil - } logger.Error(err, "Fail to get valkey instance") - return reconcile.Result{}, err - } - if inst.GetDeletionTimestamp() != nil { + return reconcile.Result{}, client.IgnoreNotFound(err) + } else if inst.GetDeletionTimestamp() != nil { if err := r.processFinalizer(inst); err != nil { logger.Error(err, "fail to process finalizer") return r.updateInstanceStatus(ctx, inst, err, logger) @@ -139,20 +135,23 @@ func (r *ValkeyReconciler) reconcileFailover(ctx context.Context, inst *rdsv1alp return nil } else if err != nil { return err + } else if failover.GetDeletionTimestamp() != nil { + return fmt.Errorf("redis failover %s is deleting, waiting for it to be deleted", failover.Name) } if len(inst.Status.MatchLabels) == 0 { inst.Status.MatchLabels = failoverbuilder.GenerateSelectorLabels(inst.Name) } + if inst.Spec.PodAnnotations == nil { + inst.Spec.PodAnnotations = make(map[string]string) + } for key := range vkHandler.GetValkeyConfigsApplyPolicyByVersion(inst.Spec.Version) { if inst.Spec.CustomConfigs[key] != failover.Spec.CustomConfigs[key] { - if inst.Spec.PodAnnotations == nil { - inst.Spec.PodAnnotations = map[string]string{} - } inst.Spec.PodAnnotations[builder.RestartAnnotationKey] = time.Now().Format(time.RFC3339Nano) break } } + inst.Spec.PodAnnotations = builder.MergeRestartAnnotation(inst.Spec.PodAnnotations, failover.Spec.PodAnnotations) newFailover, err := vkHandler.GenerateFailover(inst) if err != nil { @@ -220,20 +219,24 @@ func (r *ValkeyReconciler) reconcileCluster(ctx context.Context, inst *rdsv1alph return nil } else if err != nil { return err + } else if cluster.GetDeletionTimestamp() != nil { + // wait old resource deleted + logger.V(3).Info("redis cluster is deleting, waiting for it to be deleted") + return fmt.Errorf("redis cluster %s is deleting, waiting for it to be deleted", cluster.Name) } if len(inst.Status.MatchLabels) == 0 { inst.Status.MatchLabels = clusterbuilder.GenerateClusterLabels(inst.Name, nil) } + for key := range vkHandler.GetValkeyConfigsApplyPolicyByVersion(inst.Spec.Version) { if inst.Spec.CustomConfigs[key] != cluster.Spec.CustomConfigs[key] { - if inst.Spec.PodAnnotations == nil { - inst.Spec.PodAnnotations = map[string]string{} - } inst.Spec.PodAnnotations[builder.RestartAnnotationKey] = time.Now().Format(time.RFC3339Nano) break } } + inst.Spec.PodAnnotations = builder.MergeRestartAnnotation(inst.Spec.PodAnnotations, cluster.Spec.PodAnnotations) + newCluster, err := vkHandler.GenerateValkeyCluster(inst) if err != nil { return err diff --git a/internal/controller/user_controller.go b/internal/controller/user_controller.go index c82c857..e11bf41 100644 --- a/internal/controller/user_controller.go +++ b/internal/controller/user_controller.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "reflect" + "slices" "strings" "time" @@ -30,6 +31,7 @@ import ( "github.com/chideat/valkey-operator/internal/controller/user" "github.com/chideat/valkey-operator/internal/util" security "github.com/chideat/valkey-operator/pkg/security/password" + tuser "github.com/chideat/valkey-operator/pkg/types/user" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -70,17 +72,45 @@ func (r *UserReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. logger := log.FromContext(ctx).WithName("User").WithValues("target", req.String()) instance := v1alpha1.User{} - err := r.Client.Get(ctx, req.NamespacedName, &instance) - if err != nil { - if errors.IsNotFound(err) { - return reconcile.Result{}, nil - } + if err := r.Client.Get(ctx, req.NamespacedName, &instance); err != nil { logger.Error(err, "get valkey user failed") - return reconcile.Result{}, err - } + return reconcile.Result{}, client.IgnoreNotFound(err) + } else if instance.GetDeletionTimestamp() != nil { + if slices.Contains([]string{tuser.DefaultOperatorUserName}, instance.Spec.Username) { + switch instance.Spec.Arch { + case core.ValkeyReplica, core.ValkeyFailover: + rf := &v1alpha1.Failover{} + if err := r.Get(ctx, types.NamespacedName{Namespace: instance.Namespace, Name: instance.Spec.InstanceName}, rf); err != nil { + if !errors.IsNotFound(err) { + logger.Error(err, "get instance failed", "name", instance.Name) + return ctrl.Result{}, err + } + } else { + if rf.GetDeletionTimestamp() != nil { + logger.Info("failover is deleting, skip remove finalizer", "name", instance.Spec.InstanceName) + return ctrl.Result{RequeueAfter: time.Second * 10}, nil + } + // this should not happen, but we still return a requeue result + return ctrl.Result{RequeueAfter: time.Minute}, nil + } + case core.ValkeyCluster: + cluster := &v1alpha1.Cluster{} + if err := r.Get(ctx, types.NamespacedName{Namespace: instance.Namespace, Name: instance.Spec.InstanceName}, cluster); err != nil { + if !errors.IsNotFound(err) { + logger.Error(err, "get instance failed", "name", instance.Name) + return ctrl.Result{}, err + } + } else { + if cluster.GetDeletionTimestamp() != nil { + logger.Info("instance is deleting, skip remove finalizer", "name", instance.Spec.InstanceName) + return ctrl.Result{RequeueAfter: time.Second * 10}, nil + } + // this should not happen, but we still return a requeue result + return ctrl.Result{RequeueAfter: time.Minute}, nil + } + } + } - if instance.GetDeletionTimestamp() != nil { - logger.Info("user is being deleted", "instance", req.NamespacedName) if err := r.Handler.Delete(ctx, instance, logger); err != nil { if instance.Status.Message != err.Error() { instance.Status.Phase = v1alpha1.UserFail @@ -91,12 +121,36 @@ func (r *UserReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. } } return ctrl.Result{RequeueAfter: time.Second * 10}, err - } else { - controllerutil.RemoveFinalizer(&instance, UserFinalizer) - if err := r.Update(ctx, &instance); err != nil { - logger.Error(err, "remove finalizer failed", "instance", req.NamespacedName) - return ctrl.Result{RequeueAfter: time.Second * 10}, nil + } + + for _, name := range instance.Spec.PasswordSecrets { + if name == "" { + continue } + secret := &corev1.Secret{} + if err := r.Get(ctx, types.NamespacedName{Namespace: instance.Namespace, Name: name}, secret); err != nil { + if errors.IsNotFound(err) { + logger.Info("secret not found, skip remove finalizer", "name", name) + continue + } + logger.Error(err, "get secret failed", "secret name", name) + return ctrl.Result{}, err + } + + if slices.Contains(secret.GetFinalizers(), UserFinalizer) { + controllerutil.RemoveFinalizer(secret, UserFinalizer) + if err := r.Update(ctx, secret); err != nil { + logger.Error(err, "remove finalizer from secret failed", "secret name", name) + return ctrl.Result{}, err + } + } + } + + logger.Info("RemoveFinalizer", "instance", req.NamespacedName) + controllerutil.RemoveFinalizer(&instance, UserFinalizer) + if err := r.Update(ctx, &instance); err != nil { + logger.Error(err, "remove finalizer failed", "instance", req.NamespacedName) + return ctrl.Result{RequeueAfter: time.Second * 10}, nil } return ctrl.Result{}, nil } @@ -131,10 +185,7 @@ func (r *UserReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. continue } secret := &corev1.Secret{} - if err := r.Get(ctx, types.NamespacedName{ - Namespace: instance.Namespace, - Name: name, - }, secret); err != nil { + if err := r.Get(ctx, types.NamespacedName{Namespace: instance.Namespace, Name: name}, secret); err != nil { logger.Error(err, "get secret failed", "secret name", name) instance.Status.Message = err.Error() instance.Status.Phase = v1alpha1.UserFail @@ -157,12 +208,14 @@ func (r *UserReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl. secret.SetLabels(map[string]string{}) } if secret.Labels[builder.InstanceNameLabelKey] != vkName || - len(secret.GetOwnerReferences()) == 0 || secret.OwnerReferences[0].UID != instance.GetUID() { + len(secret.GetOwnerReferences()) == 0 || secret.OwnerReferences[0].UID != instance.GetUID() || + controllerutil.ContainsFinalizer(secret, UserFinalizer) { secret.Labels[builder.ManagedByLabelKey] = config.AppName secret.Labels[builder.InstanceNameLabelKey] = vkName secret.OwnerReferences = util.BuildOwnerReferences(&instance) - if err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + controllerutil.AddFinalizer(secret, UserFinalizer) + if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { return r.Update(ctx, secret) }); err != nil { logger.Error(err, "update secret owner failed", "secret", secret.Name) diff --git a/internal/ops/cluster/actor/actor_ensure_resource.go b/internal/ops/cluster/actor/actor_ensure_resource.go index 46c65ac..d6a30f4 100644 --- a/internal/ops/cluster/actor/actor_ensure_resource.go +++ b/internal/ops/cluster/actor/actor_ensure_resource.go @@ -38,6 +38,8 @@ import ( "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/types" "github.com/go-logr/logr" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -79,7 +81,10 @@ func (a *actorEnsureResource) Do(ctx context.Context, val types.Instance) *actor if ret := a.pauseStatefulSet(ctx, cluster, logger); ret != nil { return ret } - return actor.NewResult(cops.CommandPaused) + if len(cluster.Nodes()) == 0 { + return actor.Pause() + } + return actor.Requeue() } if ret := a.ensureServiceAccount(ctx, cluster, logger); ret != nil { @@ -283,7 +288,7 @@ func (a *actorEnsureResource) ensureStatefulset(ctx context.Context, cluster typ } else if err != nil { logger.Error(err, "get poddisruptionbudget failed", "target", client.ObjectKeyFromObject(pdb)) return actor.RequeueWithError(err) - } else if !reflect.DeepEqual(oldPdb.Spec, pdb.Spec) { + } else if !cmp.Equal(oldPdb.Spec, pdb.Spec, cmpopts.EquateEmpty()) { pdb.ResourceVersion = oldPdb.ResourceVersion if err = a.client.UpdatePodDisruptionBudget(ctx, cr.GetNamespace(), pdb); err != nil { logger.Error(err, "update poddisruptionbudget failed", "target", client.ObjectKeyFromObject(pdb)) @@ -320,7 +325,7 @@ func (a *actorEnsureResource) ensureStatefulset(ctx context.Context, cluster typ newSts.Spec.VolumeClaimTemplates = oldSts.Spec.VolumeClaimTemplates // merge restart annotations, if statefulset is more new, not restart statefulset - newSts.Spec.Template.Annotations = MergeAnnotations(newSts.Spec.Template.Annotations, oldSts.Spec.Template.Annotations) + newSts.Spec.Template.Annotations = builder.MergeRestartAnnotation(newSts.Spec.Template.Annotations, oldSts.Spec.Template.Annotations) if util.IsStatefulsetChanged(newSts, oldSts, logger) { if err := a.client.UpdateStatefulSet(ctx, cr.GetNamespace(), newSts); err != nil { @@ -657,32 +662,3 @@ func (a *actorEnsureResource) fetchAllPodBindedServices(ctx context.Context, nam } return services, nil } - -func MergeAnnotations(t, s map[string]string) map[string]string { - if t == nil { - return s - } - if s == nil { - return t - } - - for k, v := range s { - if k == builder.RestartAnnotationKey { - tRestartAnn := t[k] - if tRestartAnn == "" && v != "" { - t[k] = v - } - - tTime, err1 := time.Parse(time.RFC3339Nano, tRestartAnn) - sTime, err2 := time.Parse(time.RFC3339Nano, v) - if err1 != nil || err2 != nil || sTime.After(tTime) { - t[k] = v - } else { - t[k] = tRestartAnn - } - } else { - t[k] = v - } - } - return t -} diff --git a/internal/ops/cluster/actor/actor_ensure_slots.go b/internal/ops/cluster/actor/actor_ensure_slots.go index d6a178b..d9d06ed 100644 --- a/internal/ops/cluster/actor/actor_ensure_slots.go +++ b/internal/ops/cluster/actor/actor_ensure_slots.go @@ -29,6 +29,7 @@ import ( "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/slot" "github.com/chideat/valkey-operator/pkg/types" + "github.com/chideat/valkey-operator/pkg/valkey" "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" ) @@ -144,6 +145,20 @@ func (a *actorEnsureSlots) Do(ctx context.Context, val types.Instance) *actor.Ac if len(failedShards) > 0 { for _, shard := range failedShards { + for _, node := range shard.Replicas() { + if !node.IsReady() { + continue + } + + if node.ClusterInfo().ClusterState != valkey.ClusterStateOk { + logger.Info("node is not in cluster state", "node", node.GetName(), "state", node.ClusterInfo().ClusterState) + if err := a.meetNode(ctx, cluster, node, logger); err != nil { + time.Sleep(time.Second * 2) + } + } + + } + if err := shard.Refresh(ctx); err != nil { logger.Error(err, "refresh shard info failed", "shard", shard.GetName()) continue @@ -153,10 +168,15 @@ func (a *actorEnsureSlots) Do(ctx context.Context, val types.Instance) *actor.Ac } for _, node := range shard.Replicas() { - if !node.IsReady() && node.Role() != core.NodeRoleReplica { + if !node.IsReady() { continue } + if node.ClusterInfo().ClusterState != valkey.ClusterStateOk { + logger.Info("node is not in cluster state", "node", node.GetName(), "state", node.ClusterInfo().ClusterState) + _ = a.meetNode(ctx, cluster, node, logger) + } + // disable takeover when shard in importing or migrating if a.doFailover(ctx, node, 10, !shard.IsImporting() && !shard.IsMigrating(), logger) == nil { cluster.SendEventf(corev1.EventTypeWarning, config.EventFailover, "healed shard %s with new master %s", shard.GetName(), node.GetName()) @@ -256,19 +276,39 @@ func (a *actorEnsureSlots) Do(ctx context.Context, val types.Instance) *actor.Ac return nil } +func (a *actorEnsureSlots) meetNode(ctx context.Context, cluster types.ClusterInstance, node types.ValkeyNode, logger logr.Logger) error { + if cluster == nil || node == nil { + return fmt.Errorf("cluster or node is nil") + } + + arg := []any{"CLUSTER", "MEET", node.DefaultInternalIP().String(), node.InternalPort(), node.InternalIPort()} + for _, shard := range cluster.Shards() { + for _, snode := range shard.Nodes() { + if snode.ID() == node.ID() { + continue + } + if err := snode.Setup(ctx, arg); err != nil { + logger.Error(err, "meet node failed", "node", snode.GetName()) + return err + } + } + } + return nil +} + func (a *actorEnsureSlots) doFailover(ctx context.Context, node types.ValkeyNode, retry int, ensure bool, logger logr.Logger) error { ctx, cancel := context.WithTimeout(ctx, time.Minute) defer cancel() args := []any{"CLUSTER", "FAILOVER", "FORCE"} for i := 0; i < retry+1; i++ { - logger.Info("do shard failover", "node", node.GetName(), "action", args[2]) + logger.Info("do shard force failover", "node", node.GetName(), "action", args[2]) if err := node.Setup(ctx, args); err != nil { logger.Error(err, "do failover failed", "node", node.GetName()) return err } - for j := 0; j < 3; j++ { + for range 3 { time.Sleep(time.Second * 2) if err := node.Refresh(ctx); err != nil { logger.Error(err, "refresh node info failed") @@ -287,13 +327,13 @@ func (a *actorEnsureSlots) doFailover(ctx context.Context, node types.ValkeyNode } args[2] = "TAKEOVER" - logger.Info("do shard failover", "node", node.GetName(), "action", args[2]) + logger.Info("do shard takeover failover", "node", node.GetName(), "action", args[2]) if err := node.Setup(ctx, args); err != nil { logger.Error(err, "do failover failed", "node", node.GetName()) return err } - for j := 0; j < 3; j++ { + for range 3 { time.Sleep(time.Second * 2) if err := node.Refresh(ctx); err != nil { logger.Error(err, "refresh node info failed") diff --git a/internal/ops/cluster/actor/actor_heal_pod.go b/internal/ops/cluster/actor/actor_heal_pod.go index e783d76..705171c 100644 --- a/internal/ops/cluster/actor/actor_heal_pod.go +++ b/internal/ops/cluster/actor/actor_heal_pod.go @@ -122,7 +122,8 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR logger.Error(err, "get service failed", "name", node.GetName()) return actor.RequeueWithError(err) } - if typ == corev1.ServiceTypeNodePort { + switch typ { + case corev1.ServiceTypeNodePort: port := util.GetServicePortByName(svc, "client") if port != nil { if int(port.NodePort) != announcePort { @@ -132,14 +133,14 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR return actor.RequeueWithError(err) } else { cluster.SendEventf(corev1.EventTypeWarning, config.EventCleanResource, - "force delete pod with inconsist annotation %s", node.GetName()) + "force delete pod with inconsist announce %s", node.GetName()) return actor.Requeue() } } } else { logger.Error(fmt.Errorf("service port not found"), "service port not found", "name", node.GetName(), "port", "client") } - } else if typ == corev1.ServiceTypeLoadBalancer { + case corev1.ServiceTypeLoadBalancer: if index := slices.IndexFunc(svc.Status.LoadBalancer.Ingress, func(ing corev1.LoadBalancerIngress) bool { return ing.IP == announceIP || ing.Hostname == announceIP }); index < 0 { @@ -149,7 +150,7 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR return actor.RequeueWithError(err) } else { cluster.SendEventf(corev1.EventTypeWarning, config.EventCleanResource, - "force delete pod with inconsist annotation %s", node.GetName()) + "force delete pod with inconsist announce %s", node.GetName()) return actor.Requeue() } } diff --git a/internal/ops/cluster/actor/actor_rebalance.go b/internal/ops/cluster/actor/actor_rebalance.go index 1a12608..306ec77 100644 --- a/internal/ops/cluster/actor/actor_rebalance.go +++ b/internal/ops/cluster/actor/actor_rebalance.go @@ -131,8 +131,8 @@ type SlotMigrateStatus struct { // 槽迁移实现 // 由于槽迁移是一个标记然后后台任务一直执行的过程,为了槽迁移的健壮性,将槽迁移的任务进行拆分 // 1. operator 部分:operator 只负责标记哪些槽要迁移 -// 2. sidecar: sidercar 用于按照标记信息迁移槽,并在数据迁移完成之后,清理标记 -// 3. 即使在槽迁移过程中 node 重启或者关机(可能会数据丢失),operator 会重新标记,sidecar 会重新进行迁移 +// 2. agent: agent 用于按照标记信息迁移槽,并在数据迁移完成之后,清理标记 +// 3. 即使在槽迁移过程中 node 重启或者关机(可能会数据丢失),operator 会重新标记,agent 会重新进行迁移 func (a *actorRebalance) Do(ctx context.Context, val types.Instance) *actor.ActorResult { cluster := val.(types.ClusterInstance) logger := val.Logger().WithValues("actor", cops.CommandRebalance.String()) diff --git a/internal/ops/cluster/actor/actor_update_config.go b/internal/ops/cluster/actor/actor_update_config.go index 4f0ef47..688a9cc 100644 --- a/internal/ops/cluster/actor/actor_update_config.go +++ b/internal/ops/cluster/actor/actor_update_config.go @@ -18,13 +18,14 @@ package actor import ( "context" + "maps" "github.com/Masterminds/semver/v3" "github.com/chideat/valkey-operator/api/core" "github.com/chideat/valkey-operator/internal/actor" "github.com/chideat/valkey-operator/internal/builder" "github.com/chideat/valkey-operator/internal/builder/clusterbuilder" - cops "github.com/chideat/valkey-operator/internal/ops/cluster" + ops "github.com/chideat/valkey-operator/internal/ops/cluster" "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/types" "github.com/go-logr/logr" @@ -53,7 +54,7 @@ type actorUpdateConfig struct { // SupportedCommands func (a *actorUpdateConfig) SupportedCommands() []actor.Command { - return []actor.Command{cops.CommandUpdateConfig} + return []actor.Command{ops.CommandUpdateConfig} } func (a *actorUpdateConfig) Version() *semver.Version { @@ -65,83 +66,100 @@ func (a *actorUpdateConfig) Version() *semver.Version { // two type config: hotconfig and restartconfig // use cm to check the difference of the config func (a *actorUpdateConfig) Do(ctx context.Context, val types.Instance) *actor.ActorResult { - logger := val.Logger().WithValues("actor", cops.CommandUpdateConfig.String()) + logger := val.Logger().WithValues("actor", ops.CommandUpdateConfig.String()) cluster := val.(types.ClusterInstance) newCm, _ := clusterbuilder.NewConfigMapForCR(cluster) oldCm, err := a.client.GetConfigMap(ctx, newCm.Namespace, newCm.Name) if err != nil && !errors.IsNotFound(err) { logger.Error(err, "get configmap failed", "target", client.ObjectKeyFromObject(newCm)) - return actor.NewResultWithError(cops.CommandRequeue, err) + return actor.NewResultWithError(ops.CommandRequeue, err) } else if oldCm == nil || oldCm.Data[builder.ValkeyConfigKey] == "" { if err = a.client.CreateConfigMap(ctx, cluster.GetNamespace(), newCm); err != nil { logger.Error(err, "create configmap failed", "target", client.ObjectKeyFromObject(newCm)) - return actor.NewResultWithError(cops.CommandRequeue, err) + return actor.NewResultWithError(ops.CommandRequeue, err) } return nil } - // check if config changed - newConf, _ := builder.LoadValkeyConfig(newCm.Data[builder.ValkeyConfigKey]) - oldConf, _ := builder.LoadValkeyConfig(oldCm.Data[builder.ValkeyConfigKey]) - added, changed, deleted := oldConf.Diff(newConf) - - if len(deleted) > 0 || len(added) > 0 || len(changed) > 0 { - // NOTE: update configmap first may cause the hot config fail for it will not retry again - if err := a.client.UpdateConfigMap(ctx, cluster.GetNamespace(), newCm); err != nil { - logger.Error(err, "update config failed", "target", client.ObjectKeyFromObject(newCm)) - return actor.NewResultWithError(cops.CommandRequeue, err) - } + var ( + newConf builder.ValkeyConfig + oldConf builder.ValkeyConfig + ) + if lastAppliedConf := oldCm.Annotations[builder.LastAppliedConfigAnnotationKey]; lastAppliedConf != "" { + newConf, _ = builder.LoadValkeyConfig(newCm.Data[builder.ValkeyConfigKey]) + oldConf, _ = builder.LoadValkeyConfig(lastAppliedConf) + } else { + newConf, _ = builder.LoadValkeyConfig(newCm.Data[builder.ValkeyConfigKey]) + oldConf, _ = builder.LoadValkeyConfig(oldCm.Data[builder.ValkeyConfigKey]) } - for k, v := range added { - changed[k] = v - } - if len(changed) == 0 { - return nil - } + added, changed, deleted := oldConf.Diff(newConf) + maps.Copy(changed, added) + + if len(deleted)+len(changed) > 0 { + conf := newCm.DeepCopy() + if lastAppliedConf := oldCm.Annotations[builder.LastAppliedConfigAnnotationKey]; lastAppliedConf != "" { + conf.Annotations[builder.LastAppliedConfigAnnotationKey] = lastAppliedConf + } else { + conf.Annotations[builder.LastAppliedConfigAnnotationKey] = oldCm.Data[builder.ValkeyConfigKey] + } - foundRestartApplyConfig := false - for key := range changed { - if policy := builder.ValkeyConfigRestartPolicy[key]; policy == builder.RequireRestart { - foundRestartApplyConfig = true - break + // update configmap with last applied config + if err := a.client.UpdateConfigMap(ctx, conf.GetNamespace(), conf); err != nil { + logger.Error(err, "update config failed", "target", client.ObjectKeyFromObject(conf)) + return actor.RequeueWithError(err) } } - if foundRestartApplyConfig { - logger.Info("rolling restart all shard") - // NOTE: the restart is done by RDS - // rolling update all statefulset - // if err := cluster.Restart(ctx); err != nil { - // logger.Error(err, "restart instance failed") - // } - return actor.NewResult(cops.CommandEnsureResource) - } else { - var margs [][]any - for key, vals := range changed { - logger.V(2).Info("hot config ", "key", key, "value", vals.String()) - margs = append(margs, []any{"config", "set", key, vals.String()}) + if len(changed) > 0 { + foundRestartApplyConfig := false + for key := range changed { + if policy := builder.ValkeyConfigRestartPolicy[key]; policy == builder.RequireRestart { + foundRestartApplyConfig = true + break + } } + if foundRestartApplyConfig { + logger.Info("rolling restart all shard") + // NOTE: the restart is done by RDS + // rolling update all statefulset + if err := cluster.Restart(ctx); err != nil { + logger.Error(err, "restart instance failed") + return actor.NewResultWithError(ops.CommandRequeue, err) + } + } else { + var margs [][]any + for key, vals := range changed { + logger.V(2).Info("hot config ", "key", key, "value", vals.String()) + margs = append(margs, []any{"config", "set", key, vals.String()}) + } - var ( - isUpdateFailed = false - err error - ) - for _, node := range cluster.Nodes() { - if node.ContainerStatus() == nil || !node.ContainerStatus().Ready || - node.IsTerminating() { - continue + var ( + isUpdateFailed = false + err error + ) + for _, node := range cluster.Nodes() { + if node.ContainerStatus() == nil || !node.ContainerStatus().Ready || + node.IsTerminating() { + continue + } + if err = node.Setup(ctx, margs...); err != nil { + isUpdateFailed = true + break + } } - if err = node.Setup(ctx, margs...); err != nil { - isUpdateFailed = true - break + if isUpdateFailed { + return actor.NewResultWithError(ops.CommandRequeue, err) } } + } - if !isUpdateFailed { - return actor.NewResultWithError(cops.CommandRequeue, err) - } + // update configmap without last applied config + if err := a.client.UpdateConfigMap(ctx, cluster.GetNamespace(), newCm); err != nil { + logger.Error(err, "update config failed", "target", client.ObjectKeyFromObject(newCm)) + return actor.NewResultWithError(ops.CommandRequeue, err) } + return nil } diff --git a/internal/ops/cluster/engine.go b/internal/ops/cluster/engine.go index 270d524..0b39140 100644 --- a/internal/ops/cluster/engine.go +++ b/internal/ops/cluster/engine.go @@ -316,7 +316,8 @@ func (g *RuleEngine) Inspect(ctx context.Context, val types.Instance) *actor.Act announceIP := node.DefaultIP().String() announcePort := node.Port() - if typ == corev1.ServiceTypeNodePort { + switch typ { + case corev1.ServiceTypeNodePort: port := util.GetServicePortByName(svc, "client") if port != nil { if int(port.NodePort) != announcePort { @@ -325,7 +326,7 @@ func (g *RuleEngine) Inspect(ctx context.Context, val types.Instance) *actor.Act } else { logger.Error(fmt.Errorf("service %s not found", node.GetName()), "failed to get service, which should not happen") } - } else if typ == corev1.ServiceTypeLoadBalancer { + case corev1.ServiceTypeLoadBalancer: if slices.IndexFunc(svc.Status.LoadBalancer.Ingress, func(ing corev1.LoadBalancerIngress) bool { return ing.IP == announceIP }) < 0 { @@ -534,9 +535,20 @@ func (g *RuleEngine) isCustomServerChanged(ctx context.Context, cluster types.Cl func (g *RuleEngine) isConfigMapChanged(ctx context.Context, cluster types.ClusterInstance) (bool, error) { logger := g.logger.WithName("isConfigMapChanged") + + // check if all pod fullfilled + for _, shard := range cluster.Shards() { + for _, node := range shard.Nodes() { + if node.CurrentVersion() != cluster.Version() { + // postpone the configmap check + return false, nil + } + } + } + newCm, _ := clusterbuilder.NewConfigMapForCR(cluster) oldCm, err := g.client.GetConfigMap(ctx, newCm.Namespace, newCm.Name) - if errors.IsNotFound(err) || oldCm.Data[builder.ValkeyConfigKey] == "" { + if errors.IsNotFound(err) || (oldCm != nil && oldCm.Data[builder.ValkeyConfigKey] == "") { return true, nil } else if err != nil { logger.Error(err, "get old configmap failed") @@ -550,6 +562,9 @@ func (g *RuleEngine) isConfigMapChanged(ctx context.Context, cluster types.Clust if len(added)+len(changed)+len(deleted) != 0 { return true, nil } + if oldCm.Annotations[builder.LastAppliedConfigAnnotationKey] != "" { + return true, nil + } return false, nil } diff --git a/internal/ops/failover/actor/actor_ensure_resource.go b/internal/ops/failover/actor/actor_ensure_resource.go index 2be5baa..360ae2f 100644 --- a/internal/ops/failover/actor/actor_ensure_resource.go +++ b/internal/ops/failover/actor/actor_ensure_resource.go @@ -21,6 +21,7 @@ import ( "fmt" "reflect" "slices" + "strings" "time" "github.com/Masterminds/semver/v3" @@ -39,6 +40,8 @@ import ( "github.com/chideat/valkey-operator/internal/util" "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/types" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/samber/lo" "github.com/go-logr/logr" @@ -87,7 +90,10 @@ func (a *actorEnsureResource) Do(ctx context.Context, val types.Instance) *actor if ret := a.pauseSentinel(ctx, inst, logger); ret != nil { return ret } - return actor.Pause() + if len(inst.Nodes()) == 0 { + return actor.Pause() + } + return actor.Requeue() } if ret := a.ensureValkeySSL(ctx, inst, logger); ret != nil { @@ -99,19 +105,19 @@ func (a *actorEnsureResource) Do(ctx context.Context, val types.Instance) *actor if ret := a.ensureSentinel(ctx, inst, logger); ret != nil { return ret } - if ret := a.ensureService(ctx, inst, logger); ret != nil { + if ret := a.ensureConfigMap(ctx, inst, logger); ret != nil { return ret } - if ret := a.ensureConfigMap(ctx, inst, logger); ret != nil { + if ret := a.ensureService(ctx, inst, logger); ret != nil { return ret } - if ret := a.ensureValkeyStatefulSet(ctx, inst, logger); ret != nil { + if ret := a.ensureStatefulSet(ctx, inst, logger); ret != nil { return ret } return nil } -func (a *actorEnsureResource) ensureValkeyStatefulSet(ctx context.Context, inst types.FailoverInstance, logger logr.Logger) *actor.ActorResult { +func (a *actorEnsureResource) ensureStatefulSet(ctx context.Context, inst types.FailoverInstance, logger logr.Logger) *actor.ActorResult { var ( err error cr = inst.Definition() @@ -137,42 +143,55 @@ func (a *actorEnsureResource) ensureValkeyStatefulSet(ctx context.Context, inst return actor.RequeueWithError(err) } - if util.IsStatefulsetChanged(sts, oldSts, logger) { - if *oldSts.Spec.Replicas > *sts.Spec.Replicas { - // scale down - oldSts.Spec.Replicas = sts.Spec.Replicas - if err := a.client.UpdateStatefulSet(ctx, cr.Namespace, oldSts); err != nil { - logger.Error(err, "scale down statefulset failed", "target", client.ObjectKeyFromObject(oldSts)) - return actor.RequeueWithError(err) + sts.Spec.Template.Annotations = builder.MergeRestartAnnotation(sts.Spec.Template.Annotations, oldSts.Spec.Template.Annotations) + + if changed, ichanged := util.IsStatefulsetChanged2(sts, oldSts, logger); changed { + // check if only mutable fields changed + if !ichanged { + if err := a.client.UpdateStatefulSet(ctx, cr.Namespace, sts); err != nil { + if strings.Contains(err.Error(), "updates to statefulset spec for fields other than") { + ichanged = true + } else { + logger.Error(err, "update statefulset failed", "target", client.ObjectKeyFromObject(oldSts)) + return actor.RequeueWithError(err) + } } - time.Sleep(time.Second * 3) } - // patch pods with new labels in selector - pods, err := inst.RawNodes(ctx) - if err != nil { - logger.Error(err, "get pods failed") - return actor.RequeueWithError(err) - } - for _, item := range pods { - pod := item.DeepCopy() - pod.Labels = lo.Assign(pod.Labels, inst.Selector()) - if !reflect.DeepEqual(pod.Labels, item.Labels) { - if err := a.client.UpdatePod(ctx, pod.GetNamespace(), pod); err != nil { - logger.Error(err, "patch pod label failed", "target", client.ObjectKeyFromObject(pod)) + if ichanged { + if *oldSts.Spec.Replicas > *sts.Spec.Replicas { + oldSts.Spec.Replicas = sts.Spec.Replicas + if err := a.client.UpdateStatefulSet(ctx, cr.Namespace, oldSts); err != nil { + logger.Error(err, "scale down statefulset failed", "target", client.ObjectKeyFromObject(oldSts)) return actor.RequeueWithError(err) } + time.Sleep(time.Second * 3) } - } - time.Sleep(time.Second * 3) - if err := a.client.DeleteStatefulSet(ctx, cr.Namespace, sts.Name, - client.PropagationPolicy(metav1.DeletePropagationOrphan)); err != nil && !errors.IsNotFound(err) { - logger.Error(err, "delete old statefulset failed", "target", client.ObjectKeyFromObject(sts)) - return actor.RequeueWithError(err) - } - if err = a.client.CreateStatefulSet(ctx, cr.Namespace, sts); err != nil { - logger.Error(err, "update statefulset failed", "target", client.ObjectKeyFromObject(sts)) - return actor.RequeueWithError(err) + + // patch pods with new labels in selector + pods, err := inst.RawNodes(ctx) + if err != nil { + logger.Error(err, "get pods failed") + return actor.RequeueWithError(err) + } + for _, item := range pods { + pod := item.DeepCopy() + pod.Labels = lo.Assign(pod.Labels, sts.Spec.Selector.MatchLabels) + logger.V(4).Info("check patch pod labels", "pod", item.Name, "labels", pod.Labels) + if !reflect.DeepEqual(pod.Labels, item.Labels) { + if err := a.client.UpdatePod(ctx, pod.GetNamespace(), pod); err != nil { + logger.Error(err, "patch pod label failed", "target", client.ObjectKeyFromObject(pod)) + return actor.RequeueWithError(err) + } + } + } + + if err := a.client.DeleteStatefulSet(ctx, cr.Namespace, sts.Name, + client.PropagationPolicy(metav1.DeletePropagationOrphan)); err != nil && !errors.IsNotFound(err) { + logger.Error(err, "delete old statefulset failed", "target", client.ObjectKeyFromObject(sts)) + return actor.RequeueWithError(err) + } + return actor.Requeue() } } return nil @@ -345,7 +364,7 @@ func (a *actorEnsureResource) ensureSentinel(ctx context.Context, inst types.Fai logger.Error(err, "get sentinel failed", "target", client.ObjectKeyFromObject(newSen)) return actor.RequeueWithError(err) } - if !reflect.DeepEqual(newSen.Spec, oldSen.Spec) || + if !cmp.Equal(newSen.Spec, oldSen.Spec, cmpopts.EquateEmpty()) || !reflect.DeepEqual(newSen.Labels, oldSen.Labels) || !reflect.DeepEqual(newSen.Annotations, oldSen.Annotations) { oldSen.Spec = newSen.Spec @@ -360,47 +379,55 @@ func (a *actorEnsureResource) ensureSentinel(ctx context.Context, inst types.Fai } func (a *actorEnsureResource) ensureService(ctx context.Context, inst types.FailoverInstance, logger logr.Logger) *actor.ActorResult { - cr := inst.Definition() - // read write svc - rwSvc := failoverbuilder.GenerateReadWriteService(cr) - roSvc := failoverbuilder.GenerateReadonlyService(cr) - if err := a.client.CreateOrUpdateIfServiceChanged(ctx, inst.GetNamespace(), rwSvc); err != nil { - return actor.RequeueWithError(err) - } - if err := a.client.CreateOrUpdateIfServiceChanged(ctx, inst.GetNamespace(), roSvc); err != nil { - return actor.RequeueWithError(err) - } - - selector := inst.Selector() - exporterService := failoverbuilder.GenerateExporterService(cr) - if err := a.client.CreateOrUpdateIfServiceChanged(ctx, inst.GetNamespace(), exporterService); err != nil { - return actor.RequeueWithError(err) - } + var ( + cr = inst.Definition() + selector = inst.Selector() + ) if ret := a.cleanUselessService(ctx, cr, logger, selector); ret != nil { return ret } - switch cr.Spec.Access.ServiceType { - case corev1.ServiceTypeNodePort: - if ret := a.ensureValkeySpecifiedNodePortService(ctx, inst, logger, selector); ret != nil { + + if cr.Spec.Access.ServiceType == corev1.ServiceTypeNodePort && cr.Spec.Access.Ports != "" { + if ret := a.ensureValkeySpecifiedNodePortService(ctx, inst, logger); ret != nil { return ret } - case corev1.ServiceTypeLoadBalancer: - if ret := a.ensureValkeyPodService(ctx, cr, logger, selector); ret != nil { - return ret + } else if ret := a.ensureValkeyPodService(ctx, inst, logger); ret != nil { + return ret + } + + for _, newSvc := range []*corev1.Service{ + failoverbuilder.GenerateReadWriteService(cr), + failoverbuilder.GenerateReadonlyService(cr), + failoverbuilder.GenerateExporterService(cr), + } { + if oldSvc, err := a.client.GetService(ctx, inst.GetNamespace(), newSvc.Name); errors.IsNotFound(err) { + if err := a.client.CreateService(ctx, inst.GetNamespace(), newSvc); err != nil { + logger.Error(err, "create service failed", "target", client.ObjectKeyFromObject(newSvc)) + return actor.RequeueWithError(err) + } + } else if err != nil { + logger.Error(err, "get service failed", "target", client.ObjectKeyFromObject(newSvc)) + return actor.RequeueWithError(err) + } else if util.IsServiceChanged(newSvc, oldSvc, logger) { + if err := a.client.UpdateService(ctx, inst.GetNamespace(), newSvc); err != nil { + logger.Error(err, "update service failed", "target", client.ObjectKeyFromObject(newSvc)) + return actor.RequeueWithError(err) + } + } else if oldSvc.Spec.Type == corev1.ServiceTypeLoadBalancer && + len(oldSvc.Status.LoadBalancer.Ingress) == 0 && + time.Since(oldSvc.GetCreationTimestamp().Time) >= config.LoadbalancerReadyTimeout() { + // if lb block ed pending for 2mins, return no lb usable error + return actor.RequeueWithError(fmt.Errorf("no loadbalancer available, please check the cloud provider")) } } return nil } func (a *actorEnsureResource) ensureValkeySpecifiedNodePortService(ctx context.Context, - inst types.FailoverInstance, logger logr.Logger, selectors map[string]string) *actor.ActorResult { + inst types.FailoverInstance, logger logr.Logger) *actor.ActorResult { cr := inst.Definition() - if cr.Spec.Access.Ports == "" { - return a.ensureValkeyPodService(ctx, cr, logger, selectors) - } - logger.V(3).Info("ensure cluster nodeports", "namepspace", cr.Namespace, "name", cr.Name) configedPorts, err := helper.ParsePorts(cr.Spec.Access.Ports) if err != nil { @@ -454,6 +481,22 @@ func (a *actorEnsureResource) ensureValkeySpecifiedNodePortService(ctx context.C } } } + + for _, name := range []string{ + failoverbuilder.RWServiceName(cr.GetName()), + failoverbuilder.ROServiceName(cr.GetName()), + } { + if svc, err := a.client.GetService(ctx, cr.GetNamespace(), name); err != nil && !errors.IsNotFound(err) { + a.logger.Error(err, "get cluster nodeport service failed", "target", name) + return actor.RequeueWithError(err) + } else if svc != nil && slices.Contains(configedPorts, getClientPort(svc, "server")) { + if err := a.client.DeleteService(ctx, cr.GetNamespace(), svc.GetName()); err != nil { + a.logger.Error(err, "delete service failed", "target", client.ObjectKeyFromObject(svc)) + return actor.RequeueWithError(err) + } + } + } + if services, ret = a.fetchAllPodBindedServices(ctx, cr.Namespace, labels); ret != nil { return ret } @@ -465,8 +508,9 @@ func (a *actorEnsureResource) ensureValkeySpecifiedNodePortService(ctx context.C needUpdateServices []*corev1.Service ) for _, svc := range services { - svc := svc.DeepCopy() - bindedNodeports = append(bindedNodeports, getClientPort(svc)) + if svc.Spec.Type == corev1.ServiceTypeNodePort { + bindedNodeports = append(bindedNodeports, getClientPort(svc.DeepCopy())) + } } // filter used ports for _, port := range configedPorts { @@ -495,52 +539,81 @@ func (a *actorEnsureResource) ensureValkeySpecifiedNodePortService(ctx context.C svc := failoverbuilder.GeneratePodNodePortService(cr, i, getClientPort(oldService)) // check old service for compatibility - if len(oldService.OwnerReferences) == 0 || - oldService.OwnerReferences[0].Kind == "Pod" || - !reflect.DeepEqual(oldService.Spec, svc.Spec) || - !reflect.DeepEqual(oldService.Labels, svc.Labels) || - !reflect.DeepEqual(oldService.Annotations, svc.Annotations) { - - oldService.OwnerReferences = util.BuildOwnerReferences(cr) - oldService.Spec = svc.Spec - oldService.Labels = svc.Labels - oldService.Annotations = svc.Annotations - if err := a.client.UpdateService(ctx, oldService.Namespace, oldService); err != nil { + if util.IsServiceChanged(svc, oldService, logger) { + if err := a.client.UpdateService(ctx, oldService.Namespace, svc); err != nil { a.logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(oldService)) return actor.NewResultWithValue(ops.CommandRequeue, err) } } - if port := getClientPort(oldService); port != 0 && !slices.Contains(configedPorts, port) { - needUpdateServices = append(needUpdateServices, oldService) + svc.Spec.Type = corev1.ServiceTypeNodePort + if port := getClientPort(oldService); (port != 0 && !slices.Contains(configedPorts, port)) || + oldService.Spec.Type != corev1.ServiceTypeNodePort { + needUpdateServices = append(needUpdateServices, svc) } } // 3. update existing service and restart pod (only one pod is restarted at a same time for each shard) if len(needUpdateServices) > 0 && len(newPorts) > 0 { - port, svc := newPorts[0], needUpdateServices[0] - if sp := util.GetServicePortByName(svc, "client"); sp != nil { - sp.NodePort = port + // node must be ready, and the latest pod must ready for about 60s for cluster to sync info + if inst.Replication() != nil && (!inst.Replication().IsReady() || !func() bool { + ts := time.Now() + for _, node := range inst.Replication().Nodes() { + if cond, exists := lo.Find(node.Definition().Status.Conditions, func(item corev1.PodCondition) bool { + return item.Type == corev1.PodReady && item.Status == corev1.ConditionTrue + }); !exists || cond.LastTransitionTime.Time.Add(time.Second*30).After(ts) { + return false + } + } + return len(inst.Replication().Nodes()) == int(*inst.Replication().Definition().Spec.Replicas) + }()) { + logger.Info("wait statefulset ready to update next NodePort") + return actor.Requeue() } - // NOTE: here not make sure the failover success, because the nodeport updated, the communication will be failed - // in k8s, the nodeport can still access for sometime after the nodeport updated - // - // update service - if err = a.client.UpdateService(ctx, svc.Namespace, svc); err != nil { - a.logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(svc), "port", port) - return actor.NewResultWithValue(ops.CommandRequeue, err) - } - if pod, _ := a.client.GetPod(ctx, cr.Namespace, svc.Spec.Selector[builder.PodNameLabelKey]); pod != nil { - if err := a.client.DeletePod(ctx, cr.Namespace, pod.Name); err != nil { - return actor.RequeueWithError(err) + for i := len(needUpdateServices) - 1; i >= 0; i-- { + if len(newPorts) <= i { + logger.Error(fmt.Errorf("update nodeport failed"), "not enough nodeport for service", "ports", newPorts) + return actor.NewResultWithValue(ops.CommandRequeue, fmt.Errorf("not enough nodeport for service, please check the config")) + } + port, svc := newPorts[i], needUpdateServices[i] + if oldPort := getClientPort(svc); slices.Contains(newPorts, oldPort) { + port = oldPort + } + if sp := util.GetServicePortByName(svc, "client"); sp != nil { + sp.NodePort = port + } + tmpNewPorts := newPorts + newPorts = newPorts[0:0] + for _, p := range tmpNewPorts { + if p != port { + newPorts = append(newPorts, p) + } + } + // NOTE: here not make sure the failover success, because the nodeport updated, the communication will be failed + // in k8s, the nodeport can still access for sometime after the nodeport updated + // + // update service + if err = a.client.UpdateService(ctx, svc.Namespace, svc); err != nil { + a.logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(svc), "port", port) + return actor.NewResultWithValue(ops.CommandRequeue, err) + } + if pod, _ := a.client.GetPod(ctx, cr.Namespace, svc.Spec.Selector[builder.PodNameLabelKey]); pod != nil { + if err := a.client.DeletePod(ctx, cr.Namespace, pod.Name); err != nil { + return actor.RequeueWithError(err) + } + return actor.RequeueAfter(time.Second * 5) } - return actor.NewResult(ops.CommandRequeue) } } return nil } -func (a *actorEnsureResource) ensureValkeyPodService(ctx context.Context, rf *v1alpha1.Failover, logger logr.Logger, selectors map[string]string) *actor.ActorResult { +func (a *actorEnsureResource) ensureValkeyPodService(ctx context.Context, inst types.FailoverInstance, logger logr.Logger) *actor.ActorResult { + var ( + rf = inst.Definition() + needUpdateServices []*corev1.Service + ) + for i := 0; i < int(rf.Spec.Replicas); i++ { newSvc := failoverbuilder.GeneratePodService(rf, i) if svc, err := a.client.GetService(ctx, rf.Namespace, newSvc.Name); errors.IsNotFound(err) { @@ -551,16 +624,43 @@ func (a *actorEnsureResource) ensureValkeyPodService(ctx context.Context, rf *v1 } else if err != nil { logger.Error(err, "get service failed", "target", client.ObjectKeyFromObject(newSvc)) return actor.NewResult(ops.CommandRequeue) - } else if newSvc.Spec.Type != svc.Spec.Type || - !reflect.DeepEqual(newSvc.Spec.Selector, svc.Spec.Selector) || - !reflect.DeepEqual(newSvc.Labels, svc.Labels) || - !reflect.DeepEqual(newSvc.Annotations, svc.Annotations) { - svc.Spec = newSvc.Spec - svc.Labels = newSvc.Labels - svc.Annotations = newSvc.Annotations - if err = a.client.UpdateService(ctx, rf.Namespace, svc); err != nil { - logger.Error(err, "update service failed", "target", client.ObjectKeyFromObject(svc)) - return actor.RequeueWithError(err) + } else if util.IsServiceChanged(newSvc, svc, logger) { + needUpdateServices = append(needUpdateServices, newSvc) + } else if svc.Spec.Type == corev1.ServiceTypeLoadBalancer && + len(svc.Status.LoadBalancer.Ingress) == 0 && + time.Since(svc.GetCreationTimestamp().Time) >= config.LoadbalancerReadyTimeout() { + // if lb block ed pending for 2mins, return no lb usable error + return actor.RequeueWithError(fmt.Errorf("no loadbalancer available, please check the cloud provider")) + } + } + + if len(needUpdateServices) > 0 { + if inst.Replication() != nil && !(inst.Replication().Definition().Status.ReadyReplicas == 0 || (inst.Replication().IsReady() && func() bool { + ts := time.Now() + for _, node := range inst.Replication().Nodes() { + if cond, exists := lo.Find(node.Definition().Status.Conditions, func(item corev1.PodCondition) bool { + return item.Type == corev1.PodReady && item.Status == corev1.ConditionTrue + }); !exists || cond.LastTransitionTime.Time.Add(time.Second*30).After(ts) { + return false + } + } + return len(inst.Replication().Nodes()) == int(*inst.Replication().Definition().Spec.Replicas) + }())) { + logger.V(1).Info("wait statefulset ready to update next service") + return actor.Requeue() + } + + for i := len(needUpdateServices) - 1; i >= 0; i-- { + svc := needUpdateServices[i] + if err := a.client.UpdateService(ctx, inst.GetNamespace(), svc); err != nil { + logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(svc)) + return actor.NewResultWithValue(ops.CommandRequeue, err) + } + if pod, _ := a.client.GetPod(ctx, inst.GetNamespace(), svc.Spec.Selector[builder.PodNameLabelKey]); pod != nil { + if err := a.client.DeletePod(ctx, inst.GetNamespace(), pod.Name); err != nil { + return actor.NewResultWithError(ops.CommandRequeue, err) + } + return actor.RequeueAfter(time.Second * 5) } } } diff --git a/internal/ops/failover/actor/actor_heal_monitor.go b/internal/ops/failover/actor/actor_heal_monitor.go index de9a5f0..da721a3 100644 --- a/internal/ops/failover/actor/actor_heal_monitor.go +++ b/internal/ops/failover/actor/actor_heal_monitor.go @@ -103,7 +103,15 @@ func (a *actorHealMaster) Do(ctx context.Context, val types.Instance) *actor.Act onlineNodeCount += 1 } } - } else if !errors.Is(err, monitor.ErrNoMaster) && !errors.Is(err, monitor.ErrAddressConflict) { + } else if errors.Is(err, monitor.ErrAddressConflict) { + // do failover to force sentinel update node's announce info + if err := instMonitor.Failover(ctx); err != nil { + logger.Error(err, "do manual failover failed") + // continue with master setup + } else { + return actor.RequeueAfter(time.Second * 10) + } + } else if !errors.Is(err, monitor.ErrNoMaster) { logger.Error(err, "failed to get master node") return actor.RequeueWithError(err) } diff --git a/internal/ops/failover/actor/actor_heal_pod.go b/internal/ops/failover/actor/actor_heal_pod.go index d5b0d09..b3cb0ab 100644 --- a/internal/ops/failover/actor/actor_heal_pod.go +++ b/internal/ops/failover/actor/actor_heal_pod.go @@ -129,7 +129,7 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR return actor.RequeueWithError(err) } else { inst.SendEventf(corev1.EventTypeWarning, config.EventCleanResource, - "force delete pod with inconsist annotation %s", node.GetName()) + "force delete pod with inconsist announce %s", node.GetName()) return actor.Requeue() } } @@ -145,7 +145,7 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR return actor.RequeueWithError(err) } else { inst.SendEventf(corev1.EventTypeWarning, config.EventCleanResource, - "force delete pod with inconsist annotation %s", node.GetName()) + "force delete pod with inconsist announce %s", node.GetName()) return actor.Requeue() } } diff --git a/internal/ops/failover/actor/actor_update_config.go b/internal/ops/failover/actor/actor_update_config.go index 3acc3a7..d91a163 100644 --- a/internal/ops/failover/actor/actor_update_config.go +++ b/internal/ops/failover/actor/actor_update_config.go @@ -72,57 +72,85 @@ func (a *actorUpdateConfigMap) Do(ctx context.Context, val types.Instance) *acto if errors.IsNotFound(err) || oldCm.Data[builder.ValkeyConfigKey] == "" { return actor.NewResultWithError(ops.CommandEnsureResource, fmt.Errorf("configmap %s not found", newCm.GetName())) } else if err != nil { - return actor.NewResultWithError(ops.CommandRequeue, err) + return actor.RequeueWithError(err) } - newConf, _ := builder.LoadValkeyConfig(newCm.Data[builder.ValkeyConfigKey]) - oldConf, _ := builder.LoadValkeyConfig(oldCm.Data[builder.ValkeyConfigKey]) - added, changed, deleted := oldConf.Diff(newConf) - if len(deleted) > 0 || len(added) > 0 || len(changed) > 0 { - // NOTE: update configmap first may cause the hot config fail for it will not retry again - if err := a.client.UpdateConfigMap(ctx, newCm.GetNamespace(), newCm); err != nil { - logger.Error(err, "update config failed", "target", client.ObjectKeyFromObject(newCm)) - return actor.NewResultWithError(ops.CommandRequeue, err) - } + + var ( + newConf builder.ValkeyConfig + oldConf builder.ValkeyConfig + ) + if lastAppliedConf := oldCm.Annotations[builder.LastAppliedConfigAnnotationKey]; lastAppliedConf != "" { + newConf, _ = builder.LoadValkeyConfig(newCm.Data[builder.ValkeyConfigKey]) + oldConf, _ = builder.LoadValkeyConfig(lastAppliedConf) + } else { + newConf, _ = builder.LoadValkeyConfig(newCm.Data[builder.ValkeyConfigKey]) + oldConf, _ = builder.LoadValkeyConfig(oldCm.Data[builder.ValkeyConfigKey]) } + + added, changed, deleted := oldConf.Diff(newConf) maps.Copy(changed, added) - foundRestartApplyConfig := false - for key := range changed { - if policy := builder.ValkeyConfigRestartPolicy[key]; policy == builder.RequireRestart { - foundRestartApplyConfig = true - break + if len(deleted)+len(changed) > 0 { + conf := newCm.DeepCopy() + if lastAppliedConf := oldCm.Annotations[builder.LastAppliedConfigAnnotationKey]; lastAppliedConf != "" { + conf.Annotations[builder.LastAppliedConfigAnnotationKey] = lastAppliedConf + } else { + conf.Annotations[builder.LastAppliedConfigAnnotationKey] = oldCm.Data[builder.ValkeyConfigKey] } - } - if foundRestartApplyConfig { - err := st.Restart(ctx) - if err != nil { - logger.Error(err, "restart instance failed") + + // update configmap with last applied config + if err := a.client.UpdateConfigMap(ctx, conf.GetNamespace(), conf); err != nil { + logger.Error(err, "update config failed", "target", client.ObjectKeyFromObject(conf)) return actor.NewResultWithError(ops.CommandRequeue, err) } - } else { - var margs [][]any - for key, vals := range changed { - logger.V(2).Info("hot config ", "key", key, "value", vals.String()) - margs = append(margs, []any{"config", "set", key, vals.String()}) - } - var ( - isUpdateFailed = false - err error - ) - for _, node := range st.Nodes() { - if node.ContainerStatus() == nil || !node.ContainerStatus().Ready || - node.IsTerminating() { - continue - } - if err = node.Setup(ctx, margs...); err != nil { - isUpdateFailed = true + } + + if len(changed) > 0 { + foundRestartApplyConfig := false + for key := range changed { + if policy := builder.ValkeyConfigRestartPolicy[key]; policy == builder.RequireRestart { + foundRestartApplyConfig = true break } } + if foundRestartApplyConfig { + err := st.Restart(ctx) + if err != nil { + logger.Error(err, "restart redis failed") + return actor.NewResultWithError(ops.CommandRequeue, err) + } + } else { + var margs [][]any + for key, vals := range changed { + logger.V(2).Info("hot config ", "key", key, "value", vals.String()) + margs = append(margs, []any{"config", "set", key, vals.String()}) + } + var ( + isUpdateFailed = false + err error + ) + for _, node := range st.Nodes() { + if node.ContainerStatus() == nil || !node.ContainerStatus().Ready || + node.IsTerminating() { + continue + } + if err = node.Setup(ctx, margs...); err != nil { + isUpdateFailed = true + break + } + } - if !isUpdateFailed { - return actor.NewResultWithError(ops.CommandRequeue, err) + if isUpdateFailed { + return actor.NewResultWithError(ops.CommandRequeue, err) + } } } + + // update configmap without last applied config + if err := a.client.UpdateConfigMap(ctx, newCm.GetNamespace(), newCm); err != nil { + logger.Error(err, "update config failed", "target", client.ObjectKeyFromObject(newCm)) + return actor.NewResultWithError(ops.CommandRequeue, err) + } + return nil } diff --git a/internal/ops/failover/engine.go b/internal/ops/failover/engine.go index d5d87a0..cedc761 100644 --- a/internal/ops/failover/engine.go +++ b/internal/ops/failover/engine.go @@ -189,12 +189,20 @@ func (g *RuleEngine) isPasswordChanged(ctx context.Context, inst types.FailoverI } func (g *RuleEngine) isConfigChanged(ctx context.Context, inst types.FailoverInstance, logger logr.Logger) *actor.ActorResult { + // check if all pod fullfilled + for _, node := range inst.Nodes() { + if node.CurrentVersion() != inst.Version() { + logger.V(3).Info("node version not match", "node", node.GetName(), "version", node.CurrentVersion(), "expect", inst.Version()) + return actor.NewResult(CommandEnsureResource) + } + } + newCm, err := failoverbuilder.GenerateConfigMap(inst) if err != nil { return actor.RequeueWithError(err) } oldCm, err := g.client.GetConfigMap(ctx, newCm.GetNamespace(), newCm.GetName()) - if errors.IsNotFound(err) || oldCm.Data[builder.ValkeyConfigKey] == "" { + if errors.IsNotFound(err) || (oldCm != nil && oldCm.Data[builder.ValkeyConfigKey] == "") { err := fmt.Errorf("configmap %s not found", newCm.GetName()) return actor.NewResultWithError(CommandEnsureResource, err) } else if err != nil { @@ -206,6 +214,9 @@ func (g *RuleEngine) isConfigChanged(ctx context.Context, inst types.FailoverIns if len(added)+len(changed)+len(deleted) != 0 { return actor.NewResult(CommandUpdateConfig) } + if oldCm.Annotations[builder.LastAppliedConfigAnnotationKey] != "" { + return actor.NewResult(CommandUpdateConfig) + } if inst.Monitor().Policy() == v1.SentinelFailoverPolicy { // HACK: check and update sentinel monitor config @@ -232,7 +243,13 @@ func (g *RuleEngine) isNodesHealthy(ctx context.Context, inst types.FailoverInst } else if err != nil { return actor.RequeueWithError(err) } - if typ == corev1.ServiceTypeNodePort { + + if svc.Spec.Type != typ { + return actor.NewResult(CommandEnsureResource) + } + + switch typ { + case corev1.ServiceTypeNodePort: port := util.GetServicePortByName(svc, "client") if port != nil { if int(port.NodePort) != announcePort { @@ -241,7 +258,7 @@ func (g *RuleEngine) isNodesHealthy(ctx context.Context, inst types.FailoverInst } else { logger.Error(fmt.Errorf("service %s not found", node.GetName()), "failed to get service, which should not happen") } - } else if typ == corev1.ServiceTypeLoadBalancer { + case corev1.ServiceTypeLoadBalancer: if slices.IndexFunc(svc.Status.LoadBalancer.Ingress, func(i corev1.LoadBalancerIngress) bool { return i.IP == announceIP }) < 0 { diff --git a/internal/ops/sentinel/actor/actor_ensure_resource.go b/internal/ops/sentinel/actor/actor_ensure_resource.go index 131e981..57d2e09 100644 --- a/internal/ops/sentinel/actor/actor_ensure_resource.go +++ b/internal/ops/sentinel/actor/actor_ensure_resource.go @@ -21,6 +21,7 @@ import ( "fmt" "reflect" "slices" + "strings" "time" "github.com/Masterminds/semver/v3" @@ -31,11 +32,14 @@ import ( "github.com/chideat/valkey-operator/internal/builder/certbuilder" "github.com/chideat/valkey-operator/internal/builder/sabuilder" "github.com/chideat/valkey-operator/internal/builder/sentinelbuilder" + "github.com/chideat/valkey-operator/internal/config" ops "github.com/chideat/valkey-operator/internal/ops/sentinel" "github.com/chideat/valkey-operator/internal/util" "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/types" "github.com/go-logr/logr" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/samber/lo" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" @@ -82,22 +86,24 @@ func (a *actorEnsureResource) Do(ctx context.Context, val types.Instance) *actor if ret := a.ensurePauseStatefulSet(ctx, sentinel, logger); ret != nil { return ret } - return actor.NewResult(ops.CommandPaused) + if len(sentinel.Nodes()) == 0 { + return actor.Pause() + } + return actor.Requeue() } if ret := a.ensureServiceAccount(ctx, sentinel, logger); ret != nil { return ret } - if ret := a.ensureService(ctx, sentinel, logger); ret != nil { - return ret - } - // ensure configMap if ret := a.ensureConfigMap(ctx, sentinel, logger); ret != nil { return ret } if ret := a.ensureValkeySSL(ctx, sentinel, logger); ret != nil { return ret } + if ret := a.ensureService(ctx, sentinel, logger); ret != nil { + return ret + } if ret := a.ensureStatefulSet(ctx, sentinel, logger); ret != nil { return ret } @@ -152,42 +158,51 @@ func (a *actorEnsureResource) ensureStatefulSet(ctx context.Context, inst types. } else if err != nil { logger.Error(err, "get statefulset failed", "target", client.ObjectKeyFromObject(sts)) return actor.NewResultWithError(ops.CommandRequeue, err) - } else if util.IsStatefulsetChanged(sts, oldSts, logger) { - if *oldSts.Spec.Replicas > *sts.Spec.Replicas { - oldSts.Spec.Replicas = sts.Spec.Replicas - if err := a.client.UpdateStatefulSet(ctx, sen.Namespace, oldSts); err != nil { - logger.Error(err, "scale down statefulset failed", "target", client.ObjectKeyFromObject(oldSts)) - return actor.RequeueWithError(err) + } else if changed, ichanged := util.IsStatefulsetChanged2(sts, oldSts, logger); changed { + if !ichanged { + if err := a.client.UpdateStatefulSet(ctx, sen.Namespace, sts); err != nil { + if strings.Contains(err.Error(), "updates to statefulset spec for fields other than") { + ichanged = true + } else { + logger.Error(err, "update statefulset failed", "target", client.ObjectKeyFromObject(oldSts)) + return actor.RequeueWithError(err) + } } - time.Sleep(time.Second * 3) } - pods, err := inst.RawNodes(ctx) - if err != nil { - logger.Error(err, "get pods failed") - return actor.RequeueWithError(err) - } - for _, item := range pods { - pod := item.DeepCopy() - pod.Labels = lo.Assign(pod.Labels, inst.Selector()) - if !reflect.DeepEqual(pod.Labels, item.Labels) { - if err := a.client.UpdatePod(ctx, pod.GetNamespace(), pod); err != nil { - logger.Error(err, "patch pod label failed", "target", client.ObjectKeyFromObject(pod)) + if ichanged { + if *oldSts.Spec.Replicas > *sts.Spec.Replicas { + oldSts.Spec.Replicas = sts.Spec.Replicas + if err := a.client.UpdateStatefulSet(ctx, sen.Namespace, oldSts); err != nil { + logger.Error(err, "scale down statefulset failed", "target", client.ObjectKeyFromObject(oldSts)) return actor.RequeueWithError(err) } + time.Sleep(time.Second * 3) } - } - if err := a.client.DeleteStatefulSet(ctx, sen.Namespace, sts.GetName(), - client.PropagationPolicy(metav1.DeletePropagationOrphan)); err != nil && !errors.IsNotFound(err) { + pods, err := inst.RawNodes(ctx) + if err != nil { + logger.Error(err, "get pods failed") + return actor.RequeueWithError(err) + } + for _, item := range pods { + pod := item.DeepCopy() + pod.Labels = lo.Assign(pod.Labels, sts.Spec.Selector.MatchLabels) + if !reflect.DeepEqual(pod.Labels, item.Labels) { + if err := a.client.UpdatePod(ctx, pod.GetNamespace(), pod); err != nil { + logger.Error(err, "patch pod label failed", "target", client.ObjectKeyFromObject(pod)) + return actor.RequeueWithError(err) + } + } + } - logger.Error(err, "delete old statefulset failed", "target", client.ObjectKeyFromObject(sts)) - return actor.RequeueWithError(err) - } - time.Sleep(time.Second * 3) - if err = a.client.CreateStatefulSet(ctx, sen.Namespace, sts); err != nil { - logger.Error(err, "update statefulset failed", "target", client.ObjectKeyFromObject(sts)) - return actor.RequeueWithError(err) + if err := a.client.DeleteStatefulSet(ctx, sen.Namespace, sts.GetName(), + client.PropagationPolicy(metav1.DeletePropagationOrphan)); err != nil && !errors.IsNotFound(err) { + + logger.Error(err, "delete old statefulset failed", "target", client.ObjectKeyFromObject(sts)) + return actor.RequeueWithError(err) + } + return actor.Requeue() } } return nil @@ -204,7 +219,7 @@ func (a *actorEnsureResource) ensurePodDisruptionBudget(ctx context.Context, ins } else if err != nil { logger.Error(err, "get poddisruptionbudget failed", "target", client.ObjectKeyFromObject(pdb)) return actor.NewResultWithError(ops.CommandRequeue, err) - } else if !reflect.DeepEqual(oldPdb.Spec, pdb.Spec) { + } else if !cmp.Equal(oldPdb.Spec, pdb.Spec, cmpopts.EquateEmpty()) { pdb.ResourceVersion = oldPdb.ResourceVersion if err := a.client.UpdatePodDisruptionBudget(ctx, sen.Namespace, pdb); err != nil { logger.Error(err, "update poddisruptionbudget failed", "target", client.ObjectKeyFromObject(pdb)) @@ -380,39 +395,36 @@ func (a *actorEnsureResource) ensureService(ctx context.Context, inst types.Sent return ret } - createService := func(senService *corev1.Service) *actor.ActorResult { + ensureService := func(senService *corev1.Service) *actor.ActorResult { if oldService, err := a.client.GetService(ctx, sen.GetNamespace(), senService.Name); errors.IsNotFound(err) { if err := a.client.CreateService(ctx, sen.GetNamespace(), senService); err != nil { return actor.NewResultWithError(ops.CommandRequeue, err) } } else if err != nil { return actor.NewResultWithError(ops.CommandRequeue, err) - } else if senService.Spec.Type != oldService.Spec.Type || - (senService.Spec.Type == corev1.ServiceTypeNodePort && senService.Spec.Ports[0].NodePort != oldService.Spec.Ports[0].NodePort) || - !reflect.DeepEqual(senService.Spec.Selector, oldService.Spec.Selector) || - !reflect.DeepEqual(senService.Labels, oldService.Labels) || - !reflect.DeepEqual(senService.Annotations, oldService.Annotations) { - + } else if util.IsServiceChanged(senService, oldService, logger) { if err := a.client.UpdateService(ctx, sen.GetNamespace(), senService); err != nil { return actor.NewResultWithError(ops.CommandRequeue, err) } + } else if oldService.Spec.Type == corev1.ServiceTypeLoadBalancer && + len(oldService.Status.LoadBalancer.Ingress) == 0 && + time.Since(oldService.GetCreationTimestamp().Time) >= config.LoadbalancerReadyTimeout() { + // if lb block ed pending for 2mins, return no lb usable error + return actor.RequeueWithError(fmt.Errorf("no loadbalancer available for service %s, please check the cloud provider", oldService.Name)) } return nil } - if ret := createService(sentinelbuilder.GenerateSentinelHeadlessService(sen)); ret != nil { - return ret - } - - switch sen.Spec.Access.ServiceType { - case corev1.ServiceTypeNodePort: + if sen.Spec.Access.ServiceType == corev1.ServiceTypeNodePort && sen.Spec.Access.Ports != "" { if ret := a.ensureValkeySpecifiedNodePortService(ctx, inst, logger); ret != nil { return ret } - case corev1.ServiceTypeLoadBalancer: - if ret := a.ensureValkeyPodService(ctx, inst, logger); ret != nil { - return ret - } + } else if ret := a.ensureValkeyPodService(ctx, inst, logger); ret != nil { + return ret + } + + if ret := ensureService(sentinelbuilder.GenerateSentinelHeadlessService(sen)); ret != nil { + return ret } return nil } @@ -420,10 +432,6 @@ func (a *actorEnsureResource) ensureService(ctx context.Context, inst types.Sent func (a *actorEnsureResource) ensureValkeySpecifiedNodePortService(ctx context.Context, inst types.SentinelInstance, logger logr.Logger) *actor.ActorResult { sen := inst.Definition() - if sen.Spec.Access.Ports == "" { - return a.ensureValkeyPodService(ctx, inst, logger) - } - logger.V(3).Info("ensure sentinel nodeports", "namepspace", sen.Namespace, "name", sen.Name) configedPorts, err := helper.ParsePorts(sen.Spec.Access.Ports) if err != nil { @@ -489,7 +497,9 @@ func (a *actorEnsureResource) ensureValkeySpecifiedNodePortService(ctx context.C needUpdateServices []*corev1.Service ) for _, svc := range services { - bindedNodeports = append(bindedNodeports, getClientPort(svc.DeepCopy())) + if svc.Spec.Type == corev1.ServiceTypeNodePort { + bindedNodeports = append(bindedNodeports, getClientPort(svc.DeepCopy())) + } } // filter used ports @@ -519,52 +529,81 @@ func (a *actorEnsureResource) ensureValkeySpecifiedNodePortService(ctx context.C svc := sentinelbuilder.GeneratePodNodePortService(sen, i, getClientPort(oldService)) // check old service for compatibility - if !reflect.DeepEqual(oldService.Spec.Selector, svc.Spec.Selector) || - len(oldService.Spec.Ports) != len(svc.Spec.Ports) || - !reflect.DeepEqual(oldService.Labels, svc.Labels) || - !reflect.DeepEqual(oldService.Annotations, svc.Annotations) { - - oldService.OwnerReferences = util.BuildOwnerReferences(sen) - oldService.Spec = svc.Spec - oldService.Labels = svc.Labels - oldService.Annotations = svc.Annotations - if err := a.client.UpdateService(ctx, oldService.Namespace, oldService); err != nil { + svc.Spec.Type = oldService.Spec.Type + if util.IsServiceChanged(oldService, svc, logger) { + if err := a.client.UpdateService(ctx, oldService.Namespace, svc); err != nil { a.logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(oldService)) return actor.NewResultWithValue(ops.CommandRequeue, err) } } - if port := getClientPort(oldService); port != 0 && !slices.Contains(configedPorts, port) { - needUpdateServices = append(needUpdateServices, oldService) + svc.Spec.Type = corev1.ServiceTypeNodePort + if port := getClientPort(oldService); (port != 0 && !slices.Contains(configedPorts, port)) || + oldService.Spec.Type != corev1.ServiceTypeNodePort { + needUpdateServices = append(needUpdateServices, svc) } } // 3. update existing service and restart pod (only one pod is restarted at a same time for each shard) if len(needUpdateServices) > 0 && len(newPorts) > 0 { - port, svc := newPorts[0], needUpdateServices[0] - if sp := util.GetServicePortByName(svc, "sentinel"); sp != nil { - sp.NodePort = port + if inst.Replication() != nil && !(inst.Replication().Definition().Status.ReadyReplicas == 0 || (inst.Replication().IsReady() && func() bool { + ts := time.Now() + for _, node := range inst.Replication().Nodes() { + if cond, exists := lo.Find(node.Definition().Status.Conditions, func(item corev1.PodCondition) bool { + return item.Type == corev1.PodReady && item.Status == corev1.ConditionTrue + }); !exists || cond.LastTransitionTime.Time.Add(time.Second*30).After(ts) { + return false + } + } + return len(inst.Replication().Nodes()) == int(*inst.Replication().Definition().Spec.Replicas) + }())) { + logger.Info("wait statefulset ready to update next NodePort") + return actor.Requeue() } - // NOTE: here not make sure the failover success, because the nodeport updated, the communication will be failed - // in k8s, the nodeport can still access for sometime after the nodeport updated - // - // update service - if err = a.client.UpdateService(ctx, svc.Namespace, svc); err != nil { - a.logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(svc), "port", port) - return actor.NewResultWithValue(ops.CommandRequeue, err) - } - if pod, _ := a.client.GetPod(ctx, sen.Namespace, svc.Spec.Selector[builder.PodNameLabelKey]); pod != nil { - if err := a.client.DeletePod(ctx, sen.Namespace, pod.Name); err != nil { - return actor.NewResultWithError(ops.CommandRequeue, err) + for i := len(needUpdateServices) - 1; i >= 0; i-- { + if len(newPorts) <= i { + logger.Error(fmt.Errorf("update nodeport failed"), "not enough nodeport for service", "ports", newPorts) + return actor.NewResultWithValue(ops.CommandRequeue, fmt.Errorf("not enough nodeport for service, please check the config")) + } + port, svc := newPorts[i], needUpdateServices[i] + if oldPort := getClientPort(svc); slices.Contains(newPorts, oldPort) { + port = oldPort + } + if sp := util.GetServicePortByName(svc, "sentinel"); sp != nil { + sp.NodePort = port + } + tmpNewPorts := newPorts + newPorts = newPorts[0:0] + for _, p := range tmpNewPorts { + if p != port { + newPorts = append(newPorts, p) + } + } + // NOTE: here not make sure the failover success, because the nodeport updated, the communication will be failed + // in k8s, the nodeport can still access for sometime after the nodeport updated + // + // update service + if err = a.client.UpdateService(ctx, svc.Namespace, svc); err != nil { + logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(svc), "port", port) + return actor.NewResultWithValue(ops.CommandRequeue, err) + } + if pod, _ := a.client.GetPod(ctx, inst.GetNamespace(), svc.Spec.Selector[builder.PodNameLabelKey]); pod != nil { + if err := a.client.DeletePod(ctx, inst.GetNamespace(), pod.Name); err != nil { + return actor.NewResultWithError(ops.CommandRequeue, err) + } + return actor.RequeueAfter(time.Second * 5) } - return actor.NewResult(ops.CommandRequeue) } } return nil } func (a *actorEnsureResource) ensureValkeyPodService(ctx context.Context, inst types.SentinelInstance, logger logr.Logger) *actor.ActorResult { - sen := inst.Definition() + var ( + sen = inst.Definition() + needUpdateServices []*corev1.Service + ) + for i := 0; i < int(sen.Spec.Replicas); i++ { newSvc := sentinelbuilder.GeneratePodService(sen, i) if svc, err := a.client.GetService(ctx, sen.Namespace, newSvc.Name); errors.IsNotFound(err) { @@ -575,14 +614,43 @@ func (a *actorEnsureResource) ensureValkeyPodService(ctx context.Context, inst t } else if err != nil { logger.Error(err, "get service failed", "target", client.ObjectKeyFromObject(newSvc)) return actor.NewResult(ops.CommandRequeue) - } else if newSvc.Spec.Type != svc.Spec.Type || - !reflect.DeepEqual(newSvc.Spec.Selector, svc.Spec.Selector) || - !reflect.DeepEqual(newSvc.Labels, svc.Labels) || - !reflect.DeepEqual(newSvc.Annotations, svc.Annotations) { - svc.Spec = newSvc.Spec - if err = a.client.UpdateService(ctx, sen.Namespace, svc); err != nil { - logger.Error(err, "update service failed", "target", client.ObjectKeyFromObject(svc)) - return actor.NewResultWithError(ops.CommandRequeue, err) + } else if util.IsServiceChanged(newSvc, svc, logger) { + needUpdateServices = append(needUpdateServices, newSvc) + } else if svc.Spec.Type == corev1.ServiceTypeLoadBalancer && + len(svc.Status.LoadBalancer.Ingress) == 0 && + time.Since(svc.GetCreationTimestamp().Time) >= config.LoadbalancerReadyTimeout() { + // if lb block ed pending for 2mins, return no lb usable error + return actor.RequeueWithError(fmt.Errorf("no loadbalancer available for service %s, please check the cloud provider", svc.Name)) + } + } + + if len(needUpdateServices) > 0 { + if inst.Replication() != nil && !(inst.Replication().Definition().Status.ReadyReplicas == 0 || (inst.Replication().IsReady() && func() bool { + ts := time.Now() + for _, node := range inst.Replication().Nodes() { + if cond, exists := lo.Find(node.Definition().Status.Conditions, func(item corev1.PodCondition) bool { + return item.Type == corev1.PodReady && item.Status == corev1.ConditionTrue + }); !exists || cond.LastTransitionTime.Time.Add(time.Second*30).After(ts) { + return false + } + } + return len(inst.Replication().Nodes()) == int(*inst.Replication().Definition().Spec.Replicas) + }())) { + logger.Info("wait statefulset ready to update next service") + return actor.Requeue() + } + + for i := len(needUpdateServices) - 1; i >= 0; i-- { + svc := needUpdateServices[i] + if err := a.client.UpdateService(ctx, inst.GetNamespace(), svc); err != nil { + logger.Error(err, "update nodeport service failed", "target", client.ObjectKeyFromObject(svc)) + return actor.NewResultWithValue(ops.CommandRequeue, err) + } + if pod, _ := a.client.GetPod(ctx, inst.GetNamespace(), svc.Spec.Selector[builder.PodNameLabelKey]); pod != nil { + if err := a.client.DeletePod(ctx, inst.GetNamespace(), pod.Name); err != nil { + return actor.NewResultWithError(ops.CommandRequeue, err) + } + return actor.RequeueAfter(time.Second * 5) } } } diff --git a/internal/ops/sentinel/actor/actor_heal_monitor.go b/internal/ops/sentinel/actor/actor_heal_monitor.go index 6d4c71a..8880fb5 100644 --- a/internal/ops/sentinel/actor/actor_heal_monitor.go +++ b/internal/ops/sentinel/actor/actor_heal_monitor.go @@ -26,6 +26,7 @@ import ( ops "github.com/chideat/valkey-operator/internal/ops/sentinel" "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/types" + ptypes "github.com/chideat/valkey-operator/pkg/types/user" "github.com/chideat/valkey-operator/pkg/valkey" "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" @@ -79,6 +80,10 @@ func (a *actorHealMonitor) Do(ctx context.Context, val types.Instance) *actor.Ac user = inst.Users().GetOpUser() tls = inst.TLSConfig() ) + if user == nil { + user = &ptypes.User{} + } + for name, nodes := range unknownSentinels { // set sentinels for _, node := range inst.Nodes() { diff --git a/internal/ops/sentinel/actor/actor_heal_pod.go b/internal/ops/sentinel/actor/actor_heal_pod.go index 91bec54..49b183c 100644 --- a/internal/ops/sentinel/actor/actor_heal_pod.go +++ b/internal/ops/sentinel/actor/actor_heal_pod.go @@ -132,7 +132,8 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR logger.Error(err, "get service failed", "name", node.GetName()) return actor.RequeueWithError(err) } - if typ == corev1.ServiceTypeNodePort { + switch typ { + case corev1.ServiceTypeNodePort: port := util.GetServicePortByName(svc, "sentinel") if port != nil { if int(port.NodePort) != announcePort { @@ -141,14 +142,14 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR return actor.RequeueWithError(err) } else { inst.SendEventf(corev1.EventTypeWarning, config.EventCleanResource, - "force delete pod with inconsist annotation %s", node.GetName()) + "force delete pod with inconsist announce %s", node.GetName()) return actor.Requeue() } } } else { logger.Error(fmt.Errorf("service port not found"), "service port not found", "name", node.GetName(), "port", "sentinel") } - } else if typ == corev1.ServiceTypeLoadBalancer { + case corev1.ServiceTypeLoadBalancer: if index := slices.IndexFunc(svc.Status.LoadBalancer.Ingress, func(ing corev1.LoadBalancerIngress) bool { return ing.IP == announceIP || ing.Hostname == announceIP }); index < 0 { @@ -157,7 +158,7 @@ func (a *actorHealPod) Do(ctx context.Context, val types.Instance) *actor.ActorR return actor.RequeueWithError(err) } else { inst.SendEventf(corev1.EventTypeWarning, config.EventCleanResource, - "force delete pod with inconsist annotation %s", node.GetName()) + "force delete pod with inconsist announce %s", node.GetName()) return actor.Requeue() } } diff --git a/internal/ops/sentinel/engine.go b/internal/ops/sentinel/engine.go index 70ece8e..b5f079d 100644 --- a/internal/ops/sentinel/engine.go +++ b/internal/ops/sentinel/engine.go @@ -114,7 +114,13 @@ func (g *RuleEngine) isPodHealNeeded(ctx context.Context, inst types.SentinelIns } else if err != nil { return actor.RequeueWithError(err) } - if typ == corev1.ServiceTypeNodePort { + + if svc.Spec.Type != typ { + return actor.NewResult(CommandEnsureResource) + } + + switch typ { + case corev1.ServiceTypeNodePort: port := util.GetServicePortByName(svc, "sentinel") if port != nil { if int(port.NodePort) != announcePort { @@ -123,7 +129,7 @@ func (g *RuleEngine) isPodHealNeeded(ctx context.Context, inst types.SentinelIns } else { logger.Error(fmt.Errorf("service %s not found", node.GetName()), "failed to get service, which should not happen") } - } else if typ == corev1.ServiceTypeLoadBalancer { + case corev1.ServiceTypeLoadBalancer: if slices.IndexFunc(svc.Status.LoadBalancer.Ingress, func(i corev1.LoadBalancerIngress) bool { return i.IP == announceIP }) < 0 { diff --git a/internal/util/kubernetes.go b/internal/util/kubernetes.go index c19da50..e6c3c23 100644 --- a/internal/util/kubernetes.go +++ b/internal/util/kubernetes.go @@ -22,6 +22,8 @@ import ( "time" "github.com/go-logr/logr" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -129,11 +131,43 @@ func GetVolumeByName(vols []corev1.Volume, name string) *corev1.Volume { return nil } +// isSubmap checks if map 'a' is a submap of map 'b'. +// It returns true if every key-value pair in 'a' is also present in 'b'. +func isSubmap[K, V comparable](a, b map[K]V) bool { + if len(a) == 0 { + return true + } + if len(b) == 0 { + return false + } + for keyA, valA := range a { + valB, ok := b[keyA] + if !ok || valA != valB { + return false + } + } + return true +} + +func IsStatefulsetChanged2(newSts, sts *appsv1.StatefulSet, logger logr.Logger) (bool, bool) { + changed := IsStatefulsetChanged(newSts, sts, logger) + if !changed { + return false, false + } + + immutableChanged := !cmp.Equal(newSts.Spec, sts.Spec, cmpopts.EquateEmpty(), + cmpopts.IgnoreFields(appsv1.StatefulSetSpec{}, + "Replicas", "Ordinals", "Template", "UpdateStrategy", + "PersistentVolumeClaimRetentionPolicy", "MinReadySeconds")) + + return changed, immutableChanged +} + // IsStatefulsetChanged func IsStatefulsetChanged(newSts, sts *appsv1.StatefulSet, logger logr.Logger) bool { // statefulset check - if !reflect.DeepEqual(newSts.GetLabels(), sts.GetLabels()) || - !reflect.DeepEqual(newSts.GetAnnotations(), sts.GetAnnotations()) { + if !cmp.Equal(newSts.GetLabels(), sts.GetLabels(), cmpopts.EquateEmpty()) || + !cmp.Equal(newSts.GetAnnotations(), sts.GetAnnotations(), cmpopts.EquateEmpty()) { logger.V(2).Info("labels or annotations diff") return true } @@ -166,7 +200,7 @@ func IsStatefulsetChanged(newSts, sts *appsv1.StatefulSet, logger logr.Logger) b return true } - if !reflect.DeepEqual(oldPvc.Spec, newPvc.Spec) { + if !cmp.Equal(oldPvc.Spec, newPvc.Spec, cmpopts.EquateEmpty()) { logger.V(2).Info("pvc diff", "name", name, "old", oldPvc.Spec, "new", newPvc.Spec) return true } @@ -177,9 +211,13 @@ func IsStatefulsetChanged(newSts, sts *appsv1.StatefulSet, logger logr.Logger) b func IsPodTemplasteChanged(newTplSpec, oldTplSpec *corev1.PodTemplateSpec, logger logr.Logger) bool { if (newTplSpec == nil && oldTplSpec != nil) || (newTplSpec != nil && oldTplSpec == nil) || - !reflect.DeepEqual(newTplSpec.Labels, oldTplSpec.Labels) || - !reflect.DeepEqual(newTplSpec.Annotations, oldTplSpec.Annotations) { - logger.V(2).Info("pod labels diff") + !cmp.Equal(newTplSpec.Labels, oldTplSpec.Labels, cmpopts.EquateEmpty()) || + !isSubmap(newTplSpec.Annotations, oldTplSpec.Annotations) { + + logger.V(2).Info("pod labels diff", + "newLabels", newTplSpec.Labels, "oldLabels", oldTplSpec.Labels, + "newAnnotations", newTplSpec.Annotations, "oldAnnotations", oldTplSpec.Annotations, + ) return true } @@ -191,14 +229,14 @@ func IsPodTemplasteChanged(newTplSpec, oldTplSpec *corev1.PodTemplateSpec, logge } // nodeselector - if !reflect.DeepEqual(newSpec.NodeSelector, oldSpec.NodeSelector) || - !reflect.DeepEqual(newSpec.Affinity, oldSpec.Affinity) || - !reflect.DeepEqual(newSpec.Tolerations, oldSpec.Tolerations) { + if !cmp.Equal(newSpec.NodeSelector, oldSpec.NodeSelector, cmpopts.EquateEmpty()) || + !cmp.Equal(newSpec.Affinity, oldSpec.Affinity, cmpopts.EquateEmpty()) || + !cmp.Equal(newSpec.Tolerations, oldSpec.Tolerations, cmpopts.EquateEmpty()) { logger.V(2).Info("pod nodeselector|affinity|tolerations diff") return true } - if !reflect.DeepEqual(newSpec.SecurityContext, oldSpec.SecurityContext) || + if !cmp.Equal(newSpec.SecurityContext, oldSpec.SecurityContext, cmpopts.EquateEmpty()) || newSpec.HostNetwork != oldSpec.HostNetwork || newSpec.ServiceAccountName != oldSpec.ServiceAccountName { logger.V(2).Info("pod securityContext or hostnetwork or serviceaccount diff", @@ -248,16 +286,32 @@ func IsPodTemplasteChanged(newTplSpec, oldTplSpec *corev1.PodTemplateSpec, logge return true } + nLimits, nReqs := newCon.Resources.Limits, newCon.Resources.Requests + oLimits, oReqs := oldCon.Resources.Limits, oldCon.Resources.Requests + if oLimits.Cpu().Cmp(*nLimits.Cpu()) != 0 || oLimits.Memory().Cmp(*nLimits.Memory()) != 0 || + oReqs.Cpu().Cmp(*nReqs.Cpu()) != 0 || oReqs.Memory().Cmp(*nReqs.Memory()) != 0 || + (!nLimits.StorageEphemeral().IsZero() && oLimits.StorageEphemeral().Cmp(*nLimits.StorageEphemeral()) != 0) || + (!nReqs.StorageEphemeral().IsZero() && oReqs.StorageEphemeral().Cmp(*nReqs.StorageEphemeral()) != 0) { + logger.V(2).Info("pod containers resources diff", + "CpuLimit", oLimits.Cpu().Cmp(*nLimits.Cpu()), + "MemLimit", oLimits.Memory().Cmp(*nLimits.Memory()), + "CpuRequest", oReqs.Cpu().Cmp(*nReqs.Cpu()), + "MemRequest", oReqs.Memory().Cmp(*nReqs.Memory()), + "StorageEphemeralLimit", oLimits.StorageEphemeral().Cmp(*nLimits.StorageEphemeral()), + "StorageEphemeralRequest", oReqs.StorageEphemeral().Cmp(*nReqs.StorageEphemeral()), + ) + return true + } + // check almost all fields of container // should make sure that apiserver not return noset default value if oldCon.Image != newCon.Image || oldCon.ImagePullPolicy != newCon.ImagePullPolicy || - !reflect.DeepEqual(oldCon.Resources, newCon.Resources) || !reflect.DeepEqual(loadEnvs(oldCon.Env), loadEnvs(newCon.Env)) || !reflect.DeepEqual(oldCon.Command, newCon.Command) || !reflect.DeepEqual(oldCon.Args, newCon.Args) || !reflect.DeepEqual(oldCon.Ports, newCon.Ports) || - !reflect.DeepEqual(oldCon.Lifecycle, newCon.Lifecycle) || - !reflect.DeepEqual(oldCon.VolumeMounts, newCon.VolumeMounts) { + !cmp.Equal(oldCon.Lifecycle, newCon.Lifecycle, cmpopts.EquateEmpty()) || + !cmp.Equal(oldCon.VolumeMounts, newCon.VolumeMounts, cmpopts.EquateEmpty()) { logger.V(2).Info("pod containers config diff", "image", oldCon.Image != newCon.Image, @@ -273,7 +327,136 @@ func IsPodTemplasteChanged(newTplSpec, oldTplSpec *corev1.PodTemplateSpec, logge return true } } + return false +} + +func IsServiceChanged(ns, os *corev1.Service, logger logr.Logger) bool { + if (ns == nil && os != nil) || (ns != nil && os == nil) { + return true + } + newSvc, oldSvc := ns.DeepCopy(), os.DeepCopy() + + isSubset := func(n, o map[string]string) bool { + if len(n) > len(o) { + return false + } + for k, v := range n { + if val, ok := o[k]; !ok || val != v { + return false + } + } + return true + } + + if !isSubset(newSvc.Labels, oldSvc.Labels) || + !isSubset(newSvc.Annotations, oldSvc.Annotations) { + logger.V(1).Info("Service labels or annotations changed", + "newLabels", newSvc.Labels, + "oldLabels", oldSvc.Labels, + "newAnnotations", newSvc.Annotations, + "oldAnnotations", oldSvc.Annotations, + ) + return true + } + if newSvc.Spec.Type == "" { + newSvc.Spec.Type = corev1.ServiceTypeClusterIP + } + if oldSvc.Spec.Type == "" { + oldSvc.Spec.Type = corev1.ServiceTypeClusterIP + } + + if newSvc.Spec.Type != oldSvc.Spec.Type { + logger.V(1).Info("Service type changed") + return true + } + if newSvc.Spec.Type == corev1.ServiceTypeLoadBalancer { + if newSvc.Spec.AllocateLoadBalancerNodePorts == nil { + newSvc.Spec.AllocateLoadBalancerNodePorts = ptr.To(true) + } + if oldSvc.Spec.AllocateLoadBalancerNodePorts == nil { + oldSvc.Spec.AllocateLoadBalancerNodePorts = ptr.To(true) + } + } + if newSvc.Spec.SessionAffinity == "" { + newSvc.Spec.SessionAffinity = corev1.ServiceAffinityNone + } + if oldSvc.Spec.SessionAffinity == "" { + oldSvc.Spec.SessionAffinity = corev1.ServiceAffinityNone + } + if newSvc.Spec.InternalTrafficPolicy == nil { + newSvc.Spec.InternalTrafficPolicy = ptr.To(corev1.ServiceInternalTrafficPolicyCluster) + } + if oldSvc.Spec.InternalTrafficPolicy == nil { + oldSvc.Spec.InternalTrafficPolicy = ptr.To(corev1.ServiceInternalTrafficPolicyCluster) + } + if newSvc.Spec.ExternalTrafficPolicy == "" { + newSvc.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeCluster + } + if oldSvc.Spec.ExternalTrafficPolicy == "" { + oldSvc.Spec.ExternalTrafficPolicy = corev1.ServiceExternalTrafficPolicyTypeCluster + } + + if !cmp.Equal(newSvc.Spec.Selector, oldSvc.Spec.Selector, cmpopts.EquateEmpty()) || + !cmp.Equal(newSvc.Spec.IPFamilyPolicy, oldSvc.Spec.IPFamilyPolicy, cmpopts.EquateEmpty()) || + !cmp.Equal(newSvc.Spec.IPFamilies, oldSvc.Spec.IPFamilies, cmpopts.EquateEmpty()) || + newSvc.Spec.HealthCheckNodePort != oldSvc.Spec.HealthCheckNodePort || + newSvc.Spec.PublishNotReadyAddresses != oldSvc.Spec.PublishNotReadyAddresses || + newSvc.Spec.SessionAffinity != oldSvc.Spec.SessionAffinity || + !cmp.Equal(newSvc.Spec.InternalTrafficPolicy, oldSvc.Spec.InternalTrafficPolicy, cmpopts.EquateEmpty()) || + newSvc.Spec.ExternalTrafficPolicy != oldSvc.Spec.ExternalTrafficPolicy || + !cmp.Equal(newSvc.Spec.TrafficDistribution, oldSvc.Spec.TrafficDistribution, cmpopts.EquateEmpty()) || + + (newSvc.Spec.Type == corev1.ServiceTypeLoadBalancer && + (newSvc.Spec.LoadBalancerIP != oldSvc.Spec.LoadBalancerIP || + !cmp.Equal(newSvc.Spec.LoadBalancerSourceRanges, oldSvc.Spec.LoadBalancerSourceRanges, cmpopts.EquateEmpty()) || + !cmp.Equal(newSvc.Spec.AllocateLoadBalancerNodePorts, oldSvc.Spec.AllocateLoadBalancerNodePorts, cmpopts.EquateEmpty()))) { + + logger.V(1).Info("Service spec changed", + "selector", !cmp.Equal(newSvc.Spec.Selector, oldSvc.Spec.Selector, cmpopts.EquateEmpty()), + "familypolicy", !cmp.Equal(newSvc.Spec.IPFamilyPolicy, oldSvc.Spec.IPFamilyPolicy, cmpopts.EquateEmpty()), + "IPFamilies", !cmp.Equal(newSvc.Spec.IPFamilies, oldSvc.Spec.IPFamilies, cmpopts.EquateEmpty()), + "allocatelbport", !cmp.Equal(newSvc.Spec.AllocateLoadBalancerNodePorts, oldSvc.Spec.AllocateLoadBalancerNodePorts, cmpopts.EquateEmpty()), + "HealthCheckNodePort", newSvc.Spec.HealthCheckNodePort != oldSvc.Spec.HealthCheckNodePort, + "LoadBalancerIP", newSvc.Spec.LoadBalancerIP != oldSvc.Spec.LoadBalancerIP, + "LoadBalancerSourceRanges", !cmp.Equal(newSvc.Spec.LoadBalancerSourceRanges, oldSvc.Spec.LoadBalancerSourceRanges, cmpopts.EquateEmpty()), + "PublishNotReadyAddresses", newSvc.Spec.PublishNotReadyAddresses != oldSvc.Spec.PublishNotReadyAddresses, + "TrafficDistribution", !cmp.Equal(newSvc.Spec.TrafficDistribution, oldSvc.Spec.TrafficDistribution, cmpopts.EquateEmpty()), + ) + return true + } + + if len(newSvc.Spec.Ports) != len(oldSvc.Spec.Ports) { + logger.V(1).Info("Service ports length changed") + return true + } + for i, port := range newSvc.Spec.Ports { + oldPort := oldSvc.Spec.Ports[i] + if port.Protocol == "" { + port.Protocol = corev1.ProtocolTCP + } + if oldPort.Protocol == "" { + oldPort.Protocol = corev1.ProtocolTCP + } + + if port.Name != oldPort.Name || + port.Protocol != oldPort.Protocol || + port.Port != oldPort.Port || + (newSvc.Spec.Type == corev1.ServiceTypeNodePort && port.NodePort != 0 && port.NodePort != oldPort.NodePort) { + + logger.V(1).Info("Service port changed", + "portName", port.Name, + "portProtocol", port.Protocol, + "portNumber", port.Port, + "nodePort", port.NodePort, + "oldPortName", oldPort.Name, + "oldPortProtocol", oldPort.Protocol, + "oldPortNumber", oldPort.Port, + "oldNodePort", oldPort.NodePort, + ) + return true + } + } return false } diff --git a/internal/valkey/cluster/cluster.go b/internal/valkey/cluster/cluster.go index 7f1d118..2d7a0ff 100644 --- a/internal/valkey/cluster/cluster.go +++ b/internal/valkey/cluster/cluster.go @@ -23,6 +23,7 @@ import ( "fmt" "strconv" "strings" + "time" certmetav1 "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" "github.com/chideat/valkey-operator/api/core" @@ -31,6 +32,7 @@ import ( "github.com/chideat/valkey-operator/internal/builder" "github.com/chideat/valkey-operator/internal/builder/aclbuilder" "github.com/chideat/valkey-operator/internal/builder/clusterbuilder" + "github.com/chideat/valkey-operator/internal/config" "github.com/chideat/valkey-operator/internal/util" clientset "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/security/acl" @@ -422,6 +424,21 @@ func (c *ValkeyCluster) Shards() []types.ClusterShard { return c.shards } +func (c *ValkeyCluster) Shard(index int) types.ClusterShard { + if c == nil { + return nil + } + if index < 0 || index >= int(c.Definition().Spec.Replicas.Shards) { + return nil + } + for _, shard := range c.shards { + if shard.Index() == index { + return shard + } + } + return nil +} + func (c *ValkeyCluster) Nodes() []types.ValkeyNode { var ret []types.ValkeyNode for _, shard := range c.shards { @@ -674,6 +691,17 @@ func (c *ValkeyCluster) IsResourceFullfilled(ctx context.Context) (bool, error) c.logger.Error(err, "get resource failed", "target", util.ObjectKey(c.GetNamespace(), name)) return false, err } + + if obj.GroupVersionKind() == serviceKey { + ts := obj.GetCreationTimestamp() + typ, _, _ := unstructured.NestedString(obj.Object, "spec", "type") + lbs, _, _ := unstructured.NestedSlice(obj.Object, "status", "loadBalancer", "ingress") + if typ == string(corev1.ServiceTypeLoadBalancer) && len(lbs) == 0 && + time.Since(ts.Time) >= config.LoadbalancerReadyTimeout() { + c.logger.V(3).Info("load balancer service not ready", "target", util.ObjectKey(c.GetNamespace(), name), "createdAt", ts.Time) + return false, nil + } + } } } diff --git a/internal/valkey/failover/failover.go b/internal/valkey/failover/failover.go index a41f21d..700333a 100644 --- a/internal/valkey/failover/failover.go +++ b/internal/valkey/failover/failover.go @@ -20,9 +20,9 @@ import ( "context" "crypto/tls" "fmt" - "reflect" "strconv" "strings" + "time" certmetav1 "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" "github.com/chideat/valkey-operator/api/core" @@ -32,6 +32,7 @@ import ( "github.com/chideat/valkey-operator/internal/builder" "github.com/chideat/valkey-operator/internal/builder/aclbuilder" "github.com/chideat/valkey-operator/internal/builder/failoverbuilder" + "github.com/chideat/valkey-operator/internal/config" "github.com/chideat/valkey-operator/internal/util" "github.com/chideat/valkey-operator/internal/valkey/failover/monitor" clientset "github.com/chideat/valkey-operator/pkg/kubernetes" @@ -39,6 +40,8 @@ import ( "github.com/chideat/valkey-operator/pkg/types" "github.com/chideat/valkey-operator/pkg/types/user" "github.com/chideat/valkey-operator/pkg/version" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/go-logr/logr" "github.com/samber/lo" @@ -301,6 +304,13 @@ func (s *Failover) Masters() []types.ValkeyNode { return ret } +func (s *Failover) Replication() types.Replication { + if s == nil { + return nil + } + return s.replication +} + func (s *Failover) Nodes() []types.ValkeyNode { if s == nil || s.replication == nil { return nil @@ -646,6 +656,17 @@ func (s *Failover) IsResourceFullfilled(ctx context.Context) (bool, error) { s.logger.Error(err, "get resource failed", "kind", gvk.Kind, "target", util.ObjectKey(s.GetNamespace(), name)) return false, err } + + if obj.GroupVersionKind() == serviceKey { + ts := obj.GetCreationTimestamp() + typ, _, _ := unstructured.NestedString(obj.Object, "spec", "type") + lbs, _, _ := unstructured.NestedSlice(obj.Object, "status", "loadBalancer", "ingress") + if typ == string(corev1.ServiceTypeLoadBalancer) && len(lbs) == 0 && + time.Since(ts.Time) >= config.LoadbalancerReadyTimeout() { + s.logger.V(3).Info("load balancer service not ready", "target", util.ObjectKey(s.GetNamespace(), name), "createdAt", ts.Time) + return false, nil + } + } } } @@ -659,9 +680,9 @@ func (s *Failover) IsResourceFullfilled(ctx context.Context) (bool, error) { s.logger.Error(err, "get sentinel failed", "target", client.ObjectKeyFromObject(newSen)) return false, err } - if !reflect.DeepEqual(newSen.Spec, oldSen.Spec) || - !reflect.DeepEqual(newSen.Labels, oldSen.Labels) || - !reflect.DeepEqual(newSen.Annotations, oldSen.Annotations) { + if !cmp.Equal(newSen.Spec, oldSen.Spec, cmpopts.EquateEmpty()) || + !cmp.Equal(newSen.Labels, oldSen.Labels, cmpopts.EquateEmpty()) || + !cmp.Equal(newSen.Annotations, oldSen.Annotations, cmpopts.EquateEmpty()) { oldSen.Spec = newSen.Spec oldSen.Labels = newSen.Labels oldSen.Annotations = newSen.Annotations diff --git a/internal/valkey/failover/monitor/sentinel_monitor.go b/internal/valkey/failover/monitor/sentinel_monitor.go index 7d763a9..c2ed6fe 100644 --- a/internal/valkey/failover/monitor/sentinel_monitor.go +++ b/internal/valkey/failover/monitor/sentinel_monitor.go @@ -39,6 +39,7 @@ import ( ) var ( + ErrNoUseableNode = fmt.Errorf("no usable sentinel node") ErrNoMaster = fmt.Errorf("no master") ErrDoFailover = fmt.Errorf("sentinel doing failover") ErrMultipleMaster = fmt.Errorf("multiple master without majority agreement") @@ -151,6 +152,7 @@ func (s *SentinelMonitor) Master(ctx context.Context, flags ...bool) (*vkcli.Sen var ( masterStat []*Stat masterIds = map[string]int{} + idAddrMap = map[string][]string{} registeredNodes int ) for _, node := range s.nodes { @@ -185,6 +187,9 @@ func (s *SentinelMonitor) Master(ctx context.Context, flags ...bool) (*vkcli.Sen } if n.RunId != "" { masterIds[n.RunId] += 1 + if !slices.Contains(idAddrMap[n.RunId], n.Address()) { + idAddrMap[n.RunId] = append(idAddrMap[n.RunId], n.Address()) + } } } if len(masterStat) == 0 { @@ -203,6 +208,23 @@ func (s *SentinelMonitor) Master(ctx context.Context, flags ...bool) (*vkcli.Sen return nil, ErrAddressConflict } } + + for _, node := range s.failover.Nodes() { + if !node.IsReady() { + s.logger.Info("node not ready, ignored", "node", node.GetName()) + continue + } + registeredAddrs := idAddrMap[node.Info().RunId] + addr := net.JoinHostPort(node.DefaultIP().String(), strconv.Itoa(node.Port())) + addr2 := net.JoinHostPort(node.DefaultInternalIP().String(), strconv.Itoa(node.InternalPort())) + // same runid registered with different addr + // TODO: limit service InternalTrafficPolicy to Local + if (len(registeredAddrs) == 1 && registeredAddrs[0] != addr && registeredAddrs[0] != addr2) || + len(registeredAddrs) > 1 { + return nil, ErrAddressConflict + } + } + // masterStat[0].Count == registeredNodes used to check if all nodes are consistent no matter how many sentinel nodes if masterStat[0].Count >= 1+len(s.nodes)/2 || masterStat[0].Count == registeredNodes { return masterStat[0].Node, nil @@ -315,7 +337,7 @@ func (s *SentinelMonitor) Replicas(ctx context.Context) ([]*vkcli.SentinelMonito func (s *SentinelMonitor) Inited(ctx context.Context) (bool, error) { if s == nil || len(s.nodes) == 0 { - return false, fmt.Errorf("no sentinel nodes") + return false, ErrNoUseableNode } for _, node := range s.nodes { @@ -333,7 +355,7 @@ func (s *SentinelMonitor) Inited(ctx context.Context) (bool, error) { // AllNodeMonitored checks if all sentinel nodes are monitoring all the master and replicas func (s *SentinelMonitor) AllNodeMonitored(ctx context.Context) (bool, error) { if s == nil || len(s.nodes) == 0 { - return false, fmt.Errorf("no sentinel nodes") + return false, ErrNoUseableNode } var ( @@ -409,7 +431,7 @@ func (s *SentinelMonitor) AllNodeMonitored(ctx context.Context) (bool, error) { func (s *SentinelMonitor) UpdateConfig(ctx context.Context, params map[string]string) error { if s == nil || len(s.nodes) == 0 { - return fmt.Errorf("no sentinel nodes") + return ErrNoUseableNode } logger := s.logger.WithName("UpdateConfig") @@ -455,7 +477,7 @@ func (s *SentinelMonitor) UpdateConfig(ctx context.Context, params map[string]st func (s *SentinelMonitor) Failover(ctx context.Context) error { if s == nil || len(s.nodes) == 0 { - return fmt.Errorf("no sentinel nodes") + return ErrNoUseableNode } logger := s.logger.WithName("failover") diff --git a/internal/valkey/node.go b/internal/valkey/node.go index 1f1a066..1c1e700 100644 --- a/internal/valkey/node.go +++ b/internal/valkey/node.go @@ -742,7 +742,7 @@ func (n *ValkeyNode) InternalPort() int { func (n *ValkeyNode) DefaultIP() net.IP { if value := n.Pod.Labels[builder.AnnounceIPLabelKey]; value != "" { - address := strings.Replace(value, "-", ":", -1) + address := strings.ReplaceAll(value, "-", ":") return net.ParseIP(address) } return n.DefaultInternalIP() diff --git a/internal/valkey/sentinel/sentinel.go b/internal/valkey/sentinel/sentinel.go index 81510b4..1bab67c 100644 --- a/internal/valkey/sentinel/sentinel.go +++ b/internal/valkey/sentinel/sentinel.go @@ -24,6 +24,7 @@ import ( "slices" "strconv" "strings" + "time" certmetav1 "github.com/cert-manager/cert-manager/pkg/apis/meta/v1" "github.com/chideat/valkey-operator/api/core" @@ -32,6 +33,7 @@ import ( databasesv1 "github.com/chideat/valkey-operator/api/v1alpha1" "github.com/chideat/valkey-operator/internal/builder" "github.com/chideat/valkey-operator/internal/builder/sentinelbuilder" + "github.com/chideat/valkey-operator/internal/config" "github.com/chideat/valkey-operator/internal/util" clientset "github.com/chideat/valkey-operator/pkg/kubernetes" "github.com/chideat/valkey-operator/pkg/types" @@ -187,7 +189,10 @@ func (s *ValkeySentinel) Definition() *v1alpha1.Sentinel { } func (s *ValkeySentinel) Users() types.Users { - return nil + if s == nil { + return nil + } + return s.users } func (s *ValkeySentinel) Replication() types.SentinelReplication { @@ -331,15 +336,18 @@ func (s *ValkeySentinel) IsResourceFullfilled(ctx context.Context) (bool, error) s.logger.Error(err, "get resource failed", "target", util.ObjectKey(s.GetNamespace(), name)) return false, err } - // if gvk == stsKey { - // if replicas, found, err := unstructured.NestedInt64(obj.Object, "spec", "replicas"); err != nil { - // s.logger.Error(err, "get service replicas failed", "target", util.ObjectKey(s.GetNamespace(), name)) - // return false, err - // } else if found && replicas != int64(s.Spec.Replicas) { - // s.logger.Info("@@@@@@@ found", "replicas", replicas, "s.Spec.Replicas", s.Spec.Replicas) - // return false, nil - // } - // } + + if obj.GroupVersionKind() == serviceKey { + ts := obj.GetCreationTimestamp() + typ, _, _ := unstructured.NestedString(obj.Object, "spec", "type") + lbs, _, _ := unstructured.NestedSlice(obj.Object, "status", "loadBalancer", "ingress") + if typ == string(corev1.ServiceTypeLoadBalancer) && len(lbs) == 0 && + time.Since(ts.Time) >= config.LoadbalancerReadyTimeout() { + s.logger.V(3).Info("load balancer service not ready", "target", util.ObjectKey(s.GetNamespace(), name), "createdAt", ts.Time) + return false, nil + } + } + } } return true, nil diff --git a/pkg/kubernetes/clientset/service.go b/pkg/kubernetes/clientset/service.go index a7820db..7c96c6a 100644 --- a/pkg/kubernetes/clientset/service.go +++ b/pkg/kubernetes/clientset/service.go @@ -27,6 +27,8 @@ import ( "k8s.io/client-go/util/retry" "github.com/go-logr/logr" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -129,8 +131,8 @@ func (s *ServiceOption) CreateOrUpdateIfServiceChanged(ctx context.Context, name return err } if !reflect.DeepEqual(oldSvc.Labels, service.Labels) || - !reflect.DeepEqual(oldSvc.Spec.Selector, service.Spec.Selector) || - len(oldSvc.Spec.Ports) != len(service.Spec.Ports) { + !reflect.DeepEqual(oldSvc.Annotations, service.Annotations) || + !cmp.Equal(oldSvc.Spec, service.Spec, cmpopts.EquateEmpty()) { return s.UpdateService(ctx, namespace, service) } diff --git a/pkg/kubernetes/clientset/service_test.go b/pkg/kubernetes/clientset/service_test.go new file mode 100644 index 0000000..a8ba389 --- /dev/null +++ b/pkg/kubernetes/clientset/service_test.go @@ -0,0 +1,124 @@ +/* +Copyright 2024 chideat. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package clientset + +import ( + "context" + "testing" + + "github.com/go-logr/logr" + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" +) + +func TestServiceOption_CRUD(t *testing.T) { + client := fake.NewClientBuilder().Build() + logger := logr.Discard() + svcClient := NewService(client, logger) + ctx := context.Background() + ns := "default" + name := "test-service" + + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + Labels: map[string]string{ + "app": "test", + }, + }, + Spec: corev1.ServiceSpec{ + Ports: []corev1.ServicePort{ + {Port: 80}, + }, + }, + } + + // Test Create + err := svcClient.CreateService(ctx, ns, svc) + assert.NoError(t, err) + + // Test Get + found, err := svcClient.GetService(ctx, ns, name) + assert.NoError(t, err) + assert.Equal(t, name, found.Name) + + // Test CreateIfNotExists (exists) + err = svcClient.CreateIfNotExistsService(ctx, ns, svc) + assert.NoError(t, err) + + // Test Update + svc.Labels["new-label"] = "new-value" + err = svcClient.UpdateService(ctx, ns, svc) + assert.NoError(t, err) + + found, err = svcClient.GetService(ctx, ns, name) + assert.NoError(t, err) + assert.Equal(t, "new-value", found.Labels["new-label"]) + + // Test CreateOrUpdateService + svc.Annotations = map[string]string{"anno": "val"} + err = svcClient.CreateOrUpdateService(ctx, ns, svc) + assert.NoError(t, err) + + found, err = svcClient.GetService(ctx, ns, name) + assert.NoError(t, err) + assert.Equal(t, "val", found.Annotations["anno"]) + + // Test Delete + err = svcClient.DeleteService(ctx, ns, name) + assert.NoError(t, err) + + _, err = svcClient.GetService(ctx, ns, name) + assert.Error(t, err) +} + +func TestServiceOption_UpdateIfSelectorChangedService(t *testing.T) { + client := fake.NewClientBuilder().Build() + logger := logr.Discard() + svcClient := NewService(client, logger) + ctx := context.Background() + ns := "default" + + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: "selector-service", + Namespace: ns, + }, + Spec: corev1.ServiceSpec{ + Selector: map[string]string{"app": "v1"}, + }, + } + + err := svcClient.CreateService(ctx, ns, svc) + assert.NoError(t, err) + + // No change + err = svcClient.UpdateIfSelectorChangedService(ctx, ns, svc) + assert.NoError(t, err) + + // Change selector + svc.Spec.Selector = map[string]string{"app": "v2"} + err = svcClient.UpdateIfSelectorChangedService(ctx, ns, svc) + assert.NoError(t, err) + + found, err := svcClient.GetService(ctx, ns, "selector-service") + assert.NoError(t, err) + assert.Equal(t, "v2", found.Spec.Selector["app"]) +} diff --git a/pkg/types/cluster_instance.go b/pkg/types/cluster_instance.go index f5084ac..702fd5c 100644 --- a/pkg/types/cluster_instance.go +++ b/pkg/types/cluster_instance.go @@ -57,5 +57,6 @@ type ClusterInstance interface { Nodes() []ValkeyNode RawNodes(ctx context.Context) ([]corev1.Pod, error) Shards() []ClusterShard + Shard(index int) ClusterShard RewriteShards(ctx context.Context, shards []*v1alpha1.ClusterShards) error } diff --git a/pkg/types/failover_instance.go b/pkg/types/failover_instance.go index cfb650f..f3086b2 100644 --- a/pkg/types/failover_instance.go +++ b/pkg/types/failover_instance.go @@ -40,6 +40,7 @@ type FailoverInstance interface { Instance Definition() *v1alpha1.Failover + Replication() Replication Masters() []ValkeyNode Nodes() []ValkeyNode RawNodes(ctx context.Context) ([]corev1.Pod, error) diff --git a/pkg/valkey/valkey.go b/pkg/valkey/valkey.go index 9aa1470..6a9c7a9 100644 --- a/pkg/valkey/valkey.go +++ b/pkg/valkey/valkey.go @@ -619,8 +619,7 @@ func (c *valkeyClient) ClusterInfo(ctx context.Context) (*ClusterNodeInfo, error parseInfo := func(data string) *ClusterNodeInfo { info := ClusterNodeInfo{} - lines := strings.Split(data, "\n") - for _, line := range lines { + for line := range strings.SplitSeq(data, "\n") { line = strings.TrimSpace(line) if line == "" || strings.HasPrefix(line, "#") { continue @@ -677,7 +676,7 @@ func (c *valkeyClient) Nodes(ctx context.Context) (ClusterNodes, error) { } var nodes ClusterNodes - for _, line := range strings.Split(strings.TrimSpace(data), "\n") { + for line := range strings.SplitSeq(strings.TrimSpace(data), "\n") { line = strings.TrimSpace(line) if node, err := ParseNodeFromClusterNode(line); err != nil { return nil, err