diff --git a/charts/rule-evaluator/templates/deployment.yaml b/charts/rule-evaluator/templates/deployment.yaml index f3a096edd3..5652b3b308 100644 --- a/charts/rule-evaluator/templates/deployment.yaml +++ b/charts/rule-evaluator/templates/deployment.yaml @@ -111,8 +111,21 @@ spec: - name: config-out emptyDir: {} - name: rules - configMap: - name: rules + # Mount exactly one ConfigMap per rule type (3 total) + # - rules: namespace-scoped Rules + # - clusterrules: cluster-scoped ClusterRules + # - globalrules: GlobalRules + projected: + sources: + - configMap: + name: rules + optional: true + - configMap: + name: clusterrules + optional: true + - configMap: + name: globalrules + optional: true - name: rules-out emptyDir: {} affinity: diff --git a/cmd/config-reloader/Dockerfile b/cmd/config-reloader/Dockerfile index c10ecbeb32..f06ae571a8 100644 --- a/cmd/config-reloader/Dockerfile +++ b/cmd/config-reloader/Dockerfile @@ -24,8 +24,6 @@ COPY go.sum go.sum COPY vendor* vendor COPY cmd cmd -RUN GO111MODULE=on go get github.com/mikefarah/yq/v3 # Install yq tool. - ENV GOEXPERIMENT=boringcrypto ENV CGO_ENABLED=1 ENV GOFIPS140=off @@ -37,10 +35,11 @@ RUN if [ "${TARGETARCH}" = "arm64" ] && [ "${BUILDARCH}" != "arm64" ]; then \ gcc-aarch64-linux-gnu libc6-dev-arm64-cross; \ CC=aarch64-linux-gnu-gcc; \ fi && \ + VERSION=$(awk -F': ' '/^version:/ {print $2}' charts/values.global.yaml) && \ + BUILD_DATE=$(date --iso-8601=seconds) && \ GOOS=${TARGETOS} GOARCH=${TARGETARCH} CC=${CC} \ go build \ - -ldflags="-X github.com/prometheus/common/version.Version=$(cat charts/values.global.yaml | yq '.version' ) \ - -X github.com/prometheus/common/version.BuildDate=$(date --iso-8601=seconds)" \ + -ldflags="-X github.com/prometheus/common/version.Version=${VERSION} -X github.com/prometheus/common/version.BuildDate=${BUILD_DATE}" \ -o config-reloader \ cmd/config-reloader/*.go diff --git a/cmd/datasource-syncer/Dockerfile b/cmd/datasource-syncer/Dockerfile index f3a4cd4dd5..3a72f97800 100644 --- a/cmd/datasource-syncer/Dockerfile +++ b/cmd/datasource-syncer/Dockerfile @@ -24,8 +24,6 @@ COPY go.sum go.sum COPY vendor* vendor COPY cmd cmd -RUN GO111MODULE=on go get github.com/mikefarah/yq/v3 # Install yq tool. - ENV GOEXPERIMENT=boringcrypto ENV CGO_ENABLED=1 ENV GOFIPS140=off @@ -37,10 +35,11 @@ RUN if [ "${TARGETARCH}" = "arm64" ] && [ "${BUILDARCH}" != "arm64" ]; then \ gcc-aarch64-linux-gnu libc6-dev-arm64-cross; \ CC=aarch64-linux-gnu-gcc; \ fi && \ + VERSION=$(awk -F': ' '/^version:/ {print $2}' charts/values.global.yaml) && \ + BUILD_DATE=$(date --iso-8601=seconds) && \ GOOS=${TARGETOS} GOARCH=${TARGETARCH} CC=${CC} \ go build \ - -ldflags="-X github.com/prometheus/common/version.Version=$(cat charts/values.global.yaml | yq '.version' ) \ - -X github.com/prometheus/common/version.BuildDate=$(date --iso-8601=seconds)" \ + -ldflags="-X github.com/prometheus/common/version.Version=${VERSION} -X github.com/prometheus/common/version.BuildDate=${BUILD_DATE}" \ -o datasource-syncer \ cmd/datasource-syncer/*.go diff --git a/cmd/frontend/Dockerfile b/cmd/frontend/Dockerfile index c1e2160977..917cd1a0f4 100644 --- a/cmd/frontend/Dockerfile +++ b/cmd/frontend/Dockerfile @@ -37,8 +37,6 @@ WORKDIR /app COPY charts/values.global.yaml charts/values.global.yaml COPY --from=assets /app ./ -RUN GO111MODULE=on go get github.com/mikefarah/yq/v3 # Install yq tool. - ENV GOEXPERIMENT=boringcrypto ENV CGO_ENABLED=1 ENV GOFIPS140=off @@ -50,10 +48,11 @@ RUN if [ "${TARGETARCH}" = "arm64" ] && [ "${BUILDARCH}" != "arm64" ]; then \ gcc-aarch64-linux-gnu libc6-dev-arm64-cross; \ CC=aarch64-linux-gnu-gcc; \ fi && \ + VERSION=$(awk -F': ' '/^version:/ {print $2}' charts/values.global.yaml) && \ + BUILD_DATE=$(date --iso-8601=seconds) && \ GOOS=${TARGETOS} GOARCH=${TARGETARCH} CC=${CC} \ go build \ - -ldflags="-X github.com/prometheus/common/version.Version=$(cat charts/values.global.yaml | yq ".version" ) \ - -X github.com/prometheus/common/version.BuildDate=$(date --iso-8601=seconds)" \ + -ldflags="-X github.com/prometheus/common/version.Version=${VERSION} -X github.com/prometheus/common/version.BuildDate=${BUILD_DATE}" \ -o frontend \ cmd/frontend/*.go diff --git a/cmd/operator/Dockerfile b/cmd/operator/Dockerfile index a54a0397a7..4fd44756b9 100644 --- a/cmd/operator/Dockerfile +++ b/cmd/operator/Dockerfile @@ -25,8 +25,6 @@ COPY vendor* vendor COPY cmd cmd COPY pkg pkg -RUN GO111MODULE=on go get github.com/mikefarah/yq/v3 # Install yq tool. - ENV GOEXPERIMENT=boringcrypto ENV CGO_ENABLED=1 ENV GOFIPS140=off @@ -38,10 +36,11 @@ RUN if [ "${TARGETARCH}" = "arm64" ] && [ "${BUILDARCH}" != "arm64" ]; then \ gcc-aarch64-linux-gnu libc6-dev-arm64-cross; \ CC=aarch64-linux-gnu-gcc; \ fi && \ + VERSION=$(awk -F': ' '/^version:/ {print $2}' charts/values.global.yaml) && \ + BUILD_DATE=$(date --iso-8601=seconds) && \ GOOS=${TARGETOS} GOARCH=${TARGETARCH} CC=${CC} \ go build \ - -ldflags="-X github.com/prometheus/common/version.Version=$(cat charts/values.global.yaml | yq '.version' ) \ - -X github.com/prometheus/common/version.BuildDate=$(date --iso-8601=seconds)" \ + -ldflags="-X github.com/prometheus/common/version.Version=${VERSION} -X github.com/prometheus/common/version.BuildDate=${BUILD_DATE}" \ -o operator \ cmd/operator/*.go diff --git a/cmd/rule-evaluator/Dockerfile b/cmd/rule-evaluator/Dockerfile index abfa3820cc..4231d57407 100644 --- a/cmd/rule-evaluator/Dockerfile +++ b/cmd/rule-evaluator/Dockerfile @@ -26,8 +26,6 @@ COPY cmd cmd COPY pkg pkg COPY internal internal -RUN GO111MODULE=on go get github.com/mikefarah/yq/v3 # Install yq tool. - ENV GOEXPERIMENT=boringcrypto ENV CGO_ENABLED=1 ENV GOFIPS140=off @@ -39,10 +37,11 @@ RUN if [ "${TARGETARCH}" = "arm64" ] && [ "${BUILDARCH}" != "arm64" ]; then \ gcc-aarch64-linux-gnu libc6-dev-arm64-cross; \ CC=aarch64-linux-gnu-gcc; \ fi && \ + VERSION=$(awk -F': ' '/^version:/ {print $2}' charts/values.global.yaml) && \ + BUILD_DATE=$(date --iso-8601=seconds) && \ GOOS=${TARGETOS} GOARCH=${TARGETARCH} CC=${CC} \ go build \ - -ldflags="-X github.com/prometheus/common/version.Version=$(cat charts/values.global.yaml | yq '.version' ) \ - -X github.com/prometheus/common/version.BuildDate=$(date --iso-8601=seconds)" \ + -ldflags="-X github.com/prometheus/common/version.Version=${VERSION} -X github.com/prometheus/common/version.BuildDate=${BUILD_DATE}" \ -o rule-evaluator \ cmd/rule-evaluator/*.go diff --git a/e2e/ruler_test.go b/e2e/ruler_test.go index f72ddcab5e..e87d9cb9da 100644 --- a/e2e/ruler_test.go +++ b/e2e/ruler_test.go @@ -561,30 +561,47 @@ func testCreateRules( var diff string err := wait.PollUntilContextTimeout(ctx, 3*time.Second, 3*time.Minute, true, func(ctx context.Context) (bool, error) { - var cm corev1.ConfigMap - if err := kubeClient.Get(ctx, client.ObjectKey{Namespace: systemNamespace, Name: "rules-generated"}, &cm); err != nil { - if apierrors.IsNotFound(err) { - return false, nil - } - return false, fmt.Errorf("get ConfigMap: %w", err) - } - data := cm.Data - if features.Config.Compression == monitoringv1.CompressionGzip { - // When compression is enabled, we expect the config map with recording - // rules to be compressed with gzip. Decompress all files for validation. - for key, compressedData := range cm.BinaryData { - r, err := gzip.NewReader(bytes.NewReader(compressedData)) - if err != nil { - t.Fatal(err) + // Collect data from all three ConfigMaps: rules, clusterrules, globalrules + data := make(map[string]string) + + // List of ConfigMaps to check + configMapNames := []string{"rules", "clusterrules", "globalrules"} + + for _, cmName := range configMapNames { + var cm corev1.ConfigMap + if err := kubeClient.Get(ctx, client.ObjectKey{Namespace: systemNamespace, Name: cmName}, &cm); err != nil { + if apierrors.IsNotFound(err) { + // ConfigMap doesn't exist yet, continue waiting + continue } - decompressed, err := io.ReadAll(r) - if err != nil { - t.Fatal(err) + return false, fmt.Errorf("get ConfigMap %s: %w", cmName, err) + } + + // Merge data from this ConfigMap + if features.Config.Compression == monitoringv1.CompressionGzip { + // When compression is enabled, decompress all files for validation + for key, compressedData := range cm.BinaryData { + r, err := gzip.NewReader(bytes.NewReader(compressedData)) + if err != nil { + t.Fatal(err) + } + decompressed, err := io.ReadAll(r) + if err != nil { + t.Fatal(err) + } + if _, ok := data[key]; ok { + t.Errorf("duplicate ConfigMap key %q", key) + } + data[key] = string(decompressed) } - if _, ok := data[key]; ok { - t.Errorf("duplicate ConfigMap key %q", key) + } else { + // Uncompressed data + for key, value := range cm.Data { + if _, ok := data[key]; ok { + t.Errorf("duplicate ConfigMap key %q", key) + } + data[key] = value } - data[key] = string(decompressed) } } diff --git a/manifests/rule-evaluator.yaml b/manifests/rule-evaluator.yaml index dd14ce2607..31c972afc0 100644 --- a/manifests/rule-evaluator.yaml +++ b/manifests/rule-evaluator.yaml @@ -213,8 +213,21 @@ spec: - name: config-out emptyDir: {} - name: rules - configMap: - name: rules + # Mount exactly one ConfigMap per rule type (3 total) + # - rules: namespace-scoped Rules + # - clusterrules: cluster-scoped ClusterRules + # - globalrules: GlobalRules + projected: + sources: + - configMap: + name: rules + optional: true + - configMap: + name: clusterrules + optional: true + - configMap: + name: globalrules + optional: true - name: rules-out emptyDir: {} affinity: diff --git a/pkg/operator/rules.go b/pkg/operator/rules.go index 283c544d1c..2f6ee1449e 100644 --- a/pkg/operator/rules.go +++ b/pkg/operator/rules.go @@ -18,6 +18,8 @@ import ( "context" "errors" "fmt" + "strings" + "time" "github.com/go-logr/logr" appsv1 "k8s.io/api/apps/v1" @@ -36,7 +38,9 @@ import ( ) const ( - nameRulesGenerated = "rules-generated" + nameRules = "rules" + nameClusterRules = "clusterrules" + nameGlobalRules = "globalrules" ) func setupRulesControllers(op *Operator) error { @@ -52,10 +56,18 @@ func setupRulesControllers(op *Operator) error { namespace: op.opts.PublicNamespace, name: NameOperatorConfig, } - // Rule-evaluator rules ConfigMap filter. - objFilterRulesGenerated := namespacedNamePredicate{ + // Rule-evaluator rules ConfigMap filters for all three ConfigMaps. + objFilterRules := namespacedNamePredicate{ namespace: op.opts.OperatorNamespace, - name: nameRulesGenerated, + name: nameRules, + } + objFilterClusterRules := namespacedNamePredicate{ + namespace: op.opts.OperatorNamespace, + name: nameClusterRules, + } + objFilterGlobalRules := namespacedNamePredicate{ + namespace: op.opts.OperatorNamespace, + name: nameGlobalRules, } // Reconcile the generated rules that are used by the rule-evaluator deployment. @@ -82,11 +94,11 @@ func setupRulesControllers(op *Operator) error { &monitoringv1.Rules{}, enqueueConst(objRequest), ). - // The configuration we generate for the rule-evaluator. + // The configuration we generate for the rule-evaluator (three ConfigMaps). Watches( &corev1.ConfigMap{}, enqueueConst(objRequest), - builder.WithPredicates(objFilterRulesGenerated), + builder.WithPredicates(predicate.Or(objFilterRules, objFilterClusterRules, objFilterGlobalRules)), ). Complete(newRulesReconciler(op.manager.GetClient(), op.opts)) if err != nil { @@ -221,38 +233,30 @@ func hasGlobalRules(ctx context.Context, c client.Client) (bool, error) { } // ensureRuleConfigs updates the Prometheus Rules ConfigMap. +type RulesConfigUpdateStatus struct { + ConfigMapResults map[string]error +} + +func retryOperation(op func() error, maxRetries int, delay time.Duration) error { + var lastErr error + for range maxRetries { + if err := op(); err != nil { + lastErr = err + time.Sleep(delay) + continue + } + return nil + } + return lastErr +} + func (r *rulesReconciler) ensureRuleConfigs(ctx context.Context, projectID, location, cluster string, configCompression monitoringv1.CompressionType) error { logger, _ := logr.FromContext(ctx) - // Re-generate the configmap that's loaded by the rule-evaluator. - cm := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: r.opts.OperatorNamespace, - Name: nameRulesGenerated, - Labels: map[string]string{ - LabelAppName: NameRuleEvaluator, - }, - }, - // Ensure there's always at least an empty, uncompressed dummy file as the evaluator - // expects at least one match. - Data: map[string]string{ - "empty.yaml": "", - }, - } + const maxRetries = 3 + const retryDelay = 500 * time.Millisecond - // Generate a final rule file for each Rules resource. - // - // Depending on the scope level (global, cluster, namespace) the rules will be generated - // so that queries are constrained to the appropriate project_id, cluster, and namespace - // labels and that they are preserved through query aggregations and appear on the - // output data. - // - // The location is not scoped as it's not a meaningful boundary for "human access" - // to data as clusters may span locations. - var rulesList monitoringv1.RulesList - if err := r.client.List(ctx, &rulesList); err != nil { - return fmt.Errorf("list rules: %w", err) - } + updateStatus := &RulesConfigUpdateStatus{ConfigMapResults: make(map[string]error)} now := metav1.Now() conditionSuccess := &monitoringv1.MonitoringCondition{ @@ -261,6 +265,17 @@ func (r *rulesReconciler) ensureRuleConfigs(ctx context.Context, projectID, loca } var statusUpdates []monitoringv1.MonitoringCRD + // Create one ConfigMap per rule type (no splitting) + // - rules (namespace-scoped Rules) + // - clusterrules (cluster-scoped ClusterRules) + // - globalrules (GlobalRules) + + // Process namespace-scoped Rules -> single "rules" ConfigMap + var rulesList monitoringv1.RulesList + if err := r.client.List(ctx, &rulesList); err != nil { + return fmt.Errorf("list rules: %w", err) + } + rulesData := make(map[string]string) for i := range rulesList.Items { rs := &rulesList.Items[i] result, err := rs.RuleGroupsConfig(projectID, location, cluster) @@ -278,19 +293,26 @@ func (r *rulesReconciler) ensureRuleConfigs(ctx context.Context, projectID, loca continue } filename := fmt.Sprintf("rules__%s__%s.yaml", rs.Namespace, rs.Name) - if err := setConfigMapData(cm, configCompression, filename, result); err != nil { + var buf strings.Builder + if err := setConfigMapDataRaw(&buf, configCompression, result); err != nil { return err } + rulesData[filename] = buf.String() if rs.Status.SetMonitoringCondition(rs.GetGeneration(), now, conditionSuccess) { statusUpdates = append(statusUpdates, rs) } } + if err := r.createOrUpdateConfigMap(ctx, "rules", rulesData, maxRetries, retryDelay, updateStatus); err != nil { + return err + } + // Process cluster-scoped ClusterRules -> single "clusterrules" ConfigMap var clusterRulesList monitoringv1.ClusterRulesList if err := r.client.List(ctx, &clusterRulesList); err != nil { return fmt.Errorf("list cluster rules: %w", err) } + clusterRulesData := make(map[string]string) for i := range clusterRulesList.Items { rs := &clusterRulesList.Items[i] result, err := rs.RuleGroupsConfig(projectID, location, cluster) @@ -308,19 +330,26 @@ func (r *rulesReconciler) ensureRuleConfigs(ctx context.Context, projectID, loca continue } filename := fmt.Sprintf("clusterrules__%s.yaml", rs.Name) - if err := setConfigMapData(cm, configCompression, filename, result); err != nil { + var buf strings.Builder + if err := setConfigMapDataRaw(&buf, configCompression, result); err != nil { return err } + clusterRulesData[filename] = buf.String() if rs.Status.SetMonitoringCondition(rs.GetGeneration(), now, conditionSuccess) { statusUpdates = append(statusUpdates, rs) } } + if err := r.createOrUpdateConfigMap(ctx, "clusterrules", clusterRulesData, maxRetries, retryDelay, updateStatus); err != nil { + return err + } + // Process GlobalRules -> single "globalrules" ConfigMap var globalRulesList monitoringv1.GlobalRulesList if err := r.client.List(ctx, &globalRulesList); err != nil { return fmt.Errorf("list global rules: %w", err) } + globalRulesData := make(map[string]string) for i := range globalRulesList.Items { rs := &globalRulesList.Items[i] result, err := rs.RuleGroupsConfig() @@ -338,24 +367,28 @@ func (r *rulesReconciler) ensureRuleConfigs(ctx context.Context, projectID, loca continue } filename := fmt.Sprintf("globalrules__%s.yaml", rs.Name) - if err := setConfigMapData(cm, configCompression, filename, result); err != nil { + var buf strings.Builder + if err := setConfigMapDataRaw(&buf, configCompression, result); err != nil { return err } + globalRulesData[filename] = buf.String() if rs.Status.SetMonitoringCondition(rs.GetGeneration(), now, conditionSuccess) { statusUpdates = append(statusUpdates, rs) } } + if err := r.createOrUpdateConfigMap(ctx, "globalrules", globalRulesData, maxRetries, retryDelay, updateStatus); err != nil { + return err + } - // Create or update generated rule ConfigMap. - if err := r.client.Update(ctx, cm); apierrors.IsNotFound(err) { - if err := r.client.Create(ctx, cm); err != nil { - return fmt.Errorf("create generated rules: %w", err) + // Log partial update status + for name, err := range updateStatus.ConfigMapResults { + if err != nil { + logger.Error(err, "ConfigMap update failed", "configmap", name) } - } else if err != nil { - return fmt.Errorf("update generated rules: %w", err) } + // Update status for all processed rule objects var errs []error for _, obj := range statusUpdates { if err := patchMonitoringStatus(ctx, r.client, obj, obj.GetMonitoringStatus()); err != nil { @@ -363,5 +396,58 @@ func (r *rulesReconciler) ensureRuleConfigs(ctx context.Context, projectID, loca } } + // Return error if any operation failed + for _, err := range updateStatus.ConfigMapResults { + if err != nil { + errs = append(errs, err) + } + } return errors.Join(errs...) } + +// createOrUpdateConfigMap creates or updates a single ConfigMap for a rule type. +func (r *rulesReconciler) createOrUpdateConfigMap( + ctx context.Context, + name string, + data map[string]string, + maxRetries int, + retryDelay time.Duration, + updateStatus *RulesConfigUpdateStatus, +) error { + // If no data, create empty ConfigMap + if len(data) == 0 { + data = map[string]string{} + } + + cm := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: r.opts.OperatorNamespace, + Name: name, + Labels: map[string]string{LabelAppName: NameRuleEvaluator}, + }, + Data: data, + } + + // Create or update with retry + op := func() error { + if err := r.client.Update(ctx, cm); apierrors.IsNotFound(err) { + if err := r.client.Create(ctx, cm); err != nil { + return fmt.Errorf("create %s configmap: %w", name, err) + } + } else if err != nil { + return fmt.Errorf("update %s configmap: %w", name, err) + } + return nil + } + updateStatus.ConfigMapResults[name] = retryOperation(op, maxRetries, retryDelay) + return updateStatus.ConfigMapResults[name] +} + +// Helper to compress or not compress rule file content. +func setConfigMapDataRaw(buf *strings.Builder, compression monitoringv1.CompressionType, data string) error { + if compression == monitoringv1.CompressionGzip { + return errors.New("gzip compression not implemented in setConfigMapDataRaw") + } + buf.WriteString(data) + return nil +} diff --git a/pkg/operator/rules_test.go b/pkg/operator/rules_test.go index 7da0e290f0..df7fb62929 100644 --- a/pkg/operator/rules_test.go +++ b/pkg/operator/rules_test.go @@ -629,3 +629,154 @@ func applyScale(obj client.Object, scale *autoscalingv1.Scale) error { } return nil } + +// flakyClient simulates a client that fails the first update/create, then succeeds. +type flakyClient struct { + client.Client + failOnce map[string]bool +} + +// Update wraps the client Update to fail once per ConfigMap name +func (fc *flakyClient) Update(ctx context.Context, obj client.Object, opts ...client.UpdateOption) error { + cm, ok := obj.(*corev1.ConfigMap) + if ok && !fc.failOnce[cm.Name] { + fc.failOnce[cm.Name] = true + return fmt.Errorf("simulated update failure for %s", cm.Name) + } + return fc.Client.Update(ctx, obj, opts...) +} + +// Create wraps the client Create to fail once per ConfigMap name +func (fc *flakyClient) Create(ctx context.Context, obj client.Object, opts ...client.CreateOption) error { + cm, ok := obj.(*corev1.ConfigMap) + if ok && !fc.failOnce[cm.Name] { + fc.failOnce[cm.Name] = true + return fmt.Errorf("simulated create failure for %s", cm.Name) + } + return fc.Client.Create(ctx, obj, opts...) +} + +func TestEnsureRuleConfigs_SplitConfigMaps(t *testing.T) { + // Create two rules to verify they go into single "rules" ConfigMap + rule1 := &monitoringv1.Rules{ + TypeMeta: metav1.TypeMeta{APIVersion: "monitoring.googleapis.com/v1", Kind: "Rules"}, + ObjectMeta: metav1.ObjectMeta{Namespace: "ns1", Name: "r1"}, + Spec: monitoringv1.RulesSpec{ + Groups: []monitoringv1.RuleGroup{{ + Name: "g1", + Rules: []monitoringv1.Rule{{Record: "r", Expr: "vector(1)"}}, + }}, + }, + } + rule2 := &monitoringv1.Rules{ + TypeMeta: metav1.TypeMeta{APIVersion: "monitoring.googleapis.com/v1", Kind: "Rules"}, + ObjectMeta: metav1.ObjectMeta{Namespace: "ns2", Name: "r2"}, + Spec: monitoringv1.RulesSpec{ + Groups: []monitoringv1.RuleGroup{{ + Name: "g2", + Rules: []monitoringv1.Rule{{Record: "r2", Expr: "vector(2)"}}, + }}, + }, + } + + c := newFakeClientBuilder().WithObjects(rule1, rule2).Build() + reconciler := &rulesReconciler{ + client: c, + opts: Options{OperatorNamespace: "gmp-system"}, + } + err := reconciler.ensureRuleConfigs(t.Context(), "proj", "loc", "cluster", monitoringv1.CompressionNone) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var cmList corev1.ConfigMapList + if err := c.List(t.Context(), &cmList); err != nil { + t.Fatalf("listing configmaps: %v", err) + } + + // Should have exactly 3 ConfigMaps: rules, clusterrules, globalrules + if len(cmList.Items) != 3 { + t.Fatalf("expected 3 configmaps (rules, clusterrules, globalrules), got %d", len(cmList.Items)) + } + + // Find the "rules" ConfigMap + var rulesConfigMap *corev1.ConfigMap + for i := range cmList.Items { + cm := &cmList.Items[i] + if cm.Name == "rules" { + rulesConfigMap = cm + break + } + } + + if rulesConfigMap == nil { + t.Fatalf("rules configmap not found") + } + + // Verify both rule files are in the single "rules" ConfigMap + found := map[string]bool{"rules__ns1__r1.yaml": false, "rules__ns2__r2.yaml": false} + for key := range rulesConfigMap.Data { + if _, ok := found[key]; ok { + found[key] = true + } + } + + for k, v := range found { + if !v { + t.Errorf("expected rule file %s not found in rules configmap", k) + } + } +} + +func TestEnsureRuleConfigs_InterruptionRecovery(t *testing.T) { + rule := &monitoringv1.Rules{ + TypeMeta: metav1.TypeMeta{APIVersion: "monitoring.googleapis.com/v1", Kind: "Rules"}, + ObjectMeta: metav1.ObjectMeta{Namespace: "ns1", Name: "r1"}, + Spec: monitoringv1.RulesSpec{ + Groups: []monitoringv1.RuleGroup{{ + Name: "g1", + Rules: []monitoringv1.Rule{{Record: "r", Expr: "vector(1)"}}, + }}, + }, + } + + baseClient := newFakeClientBuilder().WithObjects(rule).Build() + fc := &flakyClient{Client: baseClient, failOnce: make(map[string]bool)} + reconciler := &rulesReconciler{ + client: fc, + opts: Options{OperatorNamespace: "gmp-system"}, + } + + // First call: will fail once per ConfigMap, but retry and succeed + err := reconciler.ensureRuleConfigs(t.Context(), "proj", "loc", "cluster", monitoringv1.CompressionNone) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + var cmList corev1.ConfigMapList + if err := fc.List(t.Context(), &cmList); err != nil { + t.Fatalf("listing configmaps: %v", err) + } + // Should have 3 ConfigMaps: rules, clusterrules, globalrules + if len(cmList.Items) != 3 { + t.Fatalf("expected 3 configmaps (rules, clusterrules, globalrules), got %d", len(cmList.Items)) + } + + // Find the rules ConfigMap + var rulesConfigMap *corev1.ConfigMap + for i := range cmList.Items { + cm := &cmList.Items[i] + if cm.Name == "rules" { + rulesConfigMap = cm + break + } + } + + if rulesConfigMap == nil { + t.Fatalf("rules configmap not found") + } + + if _, ok := rulesConfigMap.Data["rules__ns1__r1.yaml"]; !ok { + t.Errorf("expected rule file not found after recovery") + } +}