diff --git a/charts/operator/crds/monitoring.googleapis.com_operatorconfigs.yaml b/charts/operator/crds/monitoring.googleapis.com_operatorconfigs.yaml index b4ab00e104..f291a34661 100644 --- a/charts/operator/crds/monitoring.googleapis.com_operatorconfigs.yaml +++ b/charts/operator/crds/monitoring.googleapis.com_operatorconfigs.yaml @@ -90,12 +90,43 @@ spec: description: Filter limits which metric data is sent to Cloud Monitoring (it doesn't apply to additional exports). properties: + enableMatchOneOf: + description: |- + EnableMatchOneOf allows additional control over MatchOneOf filtering. + + Available settings: + * not set: The MatchOneOf settings are ignored, default is used. + * false: MatchOneOf feature is explicitly disabled; export is forced to match all series. + * true: The MatchOneOf settings are used, overwriting a default. + + See MatchOneOf IMPORTANT section to learn about the MatchOneOf default. + type: boolean matchOneOf: description: |- - A list of Prometheus time series matchers. Every time series must match at least one - of the matchers to be exported. This field can be used equivalently to the match[] - parameter of the Prometheus federation endpoint to selectively export data. + MatchOneOf, if EnableMatchOneOf is true, controls the export filtering setting. + + MatchOneOf expects a list of Prometheus time series matchers. Every time series + must match at least one of the matchers to be exported. This field can be used + equivalently to the match[] parameter of the Prometheus federation endpoint to + selectively export data. + Example: `["{job!='foobar'}", "{__name__!~'container_foo.*|container_bar.*'}"]` + + IMPORTANT: MatchOneOf is guarded by the additional flag (EnableMatchOneOf) + and removed from the public docs. Replacements: https://cloud.google.com/stackdriver/docs/managed-prometheus/setup-managed#filter-metrics. + + Rationales: + * This option is prone to misconfiguration, e.g. you need to manually + match important built-in metrics (up, scrape_samples_added, etc.). + * Unmatched metrics are still in the local collector memory + (this filters on export only). + * Typically, the default (empty array), means match all metrics. However, + for the clusters that were using MatchOneOf filtering before the GMP 0.14.x, + version, the default is what has been configured back then in the orphaned + collector DaemonSet EXTRA_ARGS environment variable. If you are impacted: + * use EnableMatchOneOf = false to reset the default + * use EnableMatchOneOf = true to apply this configuration's MatchOneOf + * Remove EXTRA_ARGS environment variable from the collector DaemonSet (it's safe to do so). items: type: string type: array diff --git a/charts/values.global.yaml b/charts/values.global.yaml index b9345b2107..c6f8da5c1b 100644 --- a/charts/values.global.yaml +++ b/charts/values.global.yaml @@ -16,30 +16,30 @@ commonLabels: false namespace: public: gmp-public system: gmp-system -version: 0.15.4 +version: 0.17.2 images: # NOTE: All tags have to be quoted otherwise they might be treated as a number. bash: image: gke.gcr.io/gke-distroless/bash - tag: "gke_distroless_20250407.00_p0" + tag: gke_distroless_20251107.00_p0 alertmanager: image: gke.gcr.io/prometheus-engine/alertmanager - tag: "v0.27.0-gmp.3-gke.0" + tag: "v0.27.0-gmp.4-gke.4" prometheus: image: gke.gcr.io/prometheus-engine/prometheus - tag: "v2.53.4-gmp.0-gke.1" + tag: "v2.53.5-gmp.1-gke.2" configReloader: image: gke.gcr.io/prometheus-engine/config-reloader - tag: "v0.15.4-gke.0" + tag: "v0.17.2-gke.2" operator: image: gke.gcr.io/prometheus-engine/operator - tag: "v0.15.4-gke.0" + tag: "v0.17.2-gke.2" ruleEvaluator: image: gke.gcr.io/prometheus-engine/rule-evaluator - tag: "v0.15.4-gke.0" + tag: "v0.17.2-gke.2" datasourceSyncer: image: gke.gcr.io/prometheus-engine/datasource-syncer - tag: "v0.15.4-gke.0" + tag: "v0.17.2-gke.2" resources: alertManager: limits: @@ -98,4 +98,4 @@ ruleEvaluator: create: true serviceAccount: create: true - name: rule-evaluator + name: rule-evaluator \ No newline at end of file diff --git a/cmd/datasource-syncer/datasource-syncer.yaml b/cmd/datasource-syncer/datasource-syncer.yaml index 783d35df05..27df3892e4 100644 --- a/cmd/datasource-syncer/datasource-syncer.yaml +++ b/cmd/datasource-syncer/datasource-syncer.yaml @@ -41,7 +41,7 @@ spec: - linux containers: - name: datasource-syncer-init - image: gke.gcr.io/prometheus-engine/datasource-syncer:v0.15.4-gke.0 + image: gke.gcr.io/prometheus-engine/datasource-syncer:v0.17.2-gke.2 args: - "--datasource-uids=$DATASOURCE_UIDS" - "--grafana-api-endpoint=$GRAFANA_API_ENDPOINT" @@ -79,7 +79,7 @@ spec: - linux containers: - name: datasource-syncer - image: gke.gcr.io/prometheus-engine/datasource-syncer:v0.15.4-gke.0 + image: gke.gcr.io/prometheus-engine/datasource-syncer:v0.17.2-gke.2 args: - "--datasource-uids=$DATASOURCE_UIDS" - "--grafana-api-endpoint=$GRAFANA_API_ENDPOINT" diff --git a/cmd/rule-evaluator/README.md b/cmd/rule-evaluator/README.md index 713e1f3a5c..dd8276d98f 100644 --- a/cmd/rule-evaluator/README.md +++ b/cmd/rule-evaluator/README.md @@ -80,14 +80,6 @@ Flags: targets. Prefer setting the external label "cluster" in the Prometheus configuration if not using the auto-discovered default. - --export.match= ... A Prometheus time series matcher. Can be - repeated. Every time series must match at - least one of the matchers to be exported. - This flag can be used equivalently to the - match[] parameter of the Prometheus federation - endpoint to selectively export data. - (Example: --export.match='{job="prometheus"}' - --export.match='{__name__=~"job:.*"}) --export.debug.metric-prefix="prometheus.googleapis.com" Google Cloud Monitoring metric prefix to use. --[no-]export.debug.disable-auth diff --git a/doc/api.md b/doc/api.md index 0acf696578..9e576cfcc6 100644 --- a/doc/api.md +++ b/doc/api.md @@ -1009,16 +1009,48 @@ ClusterPodMonitoring, PodMonitoring, GlobalRules, ClusterRules, and/or Rules.

+enableMatchOneOf
+ +bool + + + +

EnableMatchOneOf allows additional control over MatchOneOf filtering.

+

Available settings: +* not set: The MatchOneOf settings are ignored, default is used. +* false: MatchOneOf feature is explicitly disabled; export is forced to match all series. +* true: The MatchOneOf settings are used, overwriting a default.

+

See MatchOneOf IMPORTANT section to learn about the MatchOneOf default.

+ + + + matchOneOf
[]string -

A list of Prometheus time series matchers. Every time series must match at least one -of the matchers to be exported. This field can be used equivalently to the match[] -parameter of the Prometheus federation endpoint to selectively export data. -Example: ["{job!='foobar'}", "{__name__!~'container_foo.*|container_bar.*'}"]

+

MatchOneOf, if EnableMatchOneOf is true, controls the export filtering setting.

+

MatchOneOf expects a list of Prometheus time series matchers. Every time series +must match at least one of the matchers to be exported. This field can be used +equivalently to the match[] parameter of the Prometheus federation endpoint to +selectively export data.

+

Example: ["{job!='foobar'}", "{__name__!~'container_foo.*|container_bar.*'}"]

+

IMPORTANT: MatchOneOf is guarded by the additional flag (EnableMatchOneOf) +and removed from the public docs. Replacements: https://cloud.google.com/stackdriver/docs/managed-prometheus/setup-managed#filter-metrics.

+

Rationales: +* This option is prone to misconfiguration, e.g. you need to manually +match important built-in metrics (up, scrape_samples_added, etc.). +* Unmatched metrics are still in the local collector memory +(this filters on export only). +* Typically, the default (empty array), means match all metrics. However, +for the clusters that were using MatchOneOf filtering before the GMP 0.14.x, +version, the default is what has been configured back then in the orphaned +collector DaemonSet EXTRA_ARGS environment variable. If you are impacted: +* use EnableMatchOneOf = false to reset the default +* use EnableMatchOneOf = true to apply this configuration’s MatchOneOf +* Remove EXTRA_ARGS environment variable from the collector DaemonSet (it’s safe to do so).

diff --git a/e2e/collector_filter_test.go b/e2e/collector_filter_test.go index e27286d5ce..738594921d 100644 --- a/e2e/collector_filter_test.go +++ b/e2e/collector_filter_test.go @@ -22,6 +22,7 @@ import ( "github.com/GoogleCloudPlatform/prometheus-engine/e2e/deploy" "github.com/GoogleCloudPlatform/prometheus-engine/pkg/operator" monitoringv1 "github.com/GoogleCloudPlatform/prometheus-engine/pkg/operator/apis/monitoring/v1" + "google.golang.org/protobuf/proto" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -54,52 +55,58 @@ var collectorPodMonitoring = &monitoringv1.PodMonitoring{ } type filterState struct { - match string + container string } var ( stateEmpty = filterState{} - stateA = filterState{match: "{__name__='go_goroutines',container='prometheus'}"} - stateB = filterState{match: "{__name__='go_goroutines',container='config-reloader'}"} + stateA = filterState{container: "prometheus"} + stateB = filterState{container: "config-reloader"} ) -func (f filterState) expectedMatchConfigEntry(t testing.TB) string { - switch f { - case stateEmpty: +// expectedCollectorExportConfigEntry returns expected prometheus config.GoogleCloudExportConfig +// serialized entry for the given filter state. +func (f filterState) expectedCollectorExportConfigEntry(enabled *bool) string { + var entry string + switch { + case enabled == nil: return "" - case stateA: - return ` - match: - - '{__name__=''go_goroutines'',container=''prometheus''}'` - case stateB: - return ` - match: - - '{__name__=''go_goroutines'',container=''config-reloader''}'` + case *enabled: + entry += ` + enable_match: true` + case !*enabled: + entry += ` + enable_match: false` default: - t.Fatalf("invalid filter state: %s", f) - return "" + panic("unexpected enabled state") + } + + // We only add matchers if it's enabled, no point doing this otherwise. + if *enabled && f.container != "" { + entry += fmt.Sprintf(` + match: + - '{__name__=''go_goroutines'',container=''%s''}'`, f.container) } + return entry } -func (f filterState) filters(t testing.TB) []string { - switch f { - case stateEmpty: - return nil - case stateA, stateB: - return []string{f.match} - default: - t.Fatalf("invalid filter state: %s", f) - return nil +func (f filterState) toMatcher() string { + if f.container != "" { + return fmt.Sprintf("{__name__='go_goroutines',container='%s'}", f.container) } + return "" } -// testValidateApplied fails the test if the current filtering state is not applied to "f" -// within the context deadline. This test assumes: +// testFiltering fails the test if the current filtering state is applied by +// querying GCM expecting only f.container metrics to be present. If f.container +// is empty, it means GCM should see both container's metrics. +// +// This test assumes: // * collectors are running. // * collectorPodMonitoring is applied. // * prometheus and config-reloader expose 'go_goroutines' metric. -// * OperatorConfig as "external_key"=$externalValue label configured (as well as default ones like project, etc.). -func (f filterState) testValidateApplied(ctx context.Context, kubeClient client.Client, externalValue string) func(*testing.T) { +// * OperatorConfig as "external_key"=$externalValue label configured (on top default labels like project, etc.). +func (f filterState) testFiltering(ctx context.Context, kubeClient client.Client, externalValue string) func(*testing.T) { return func(t *testing.T) { metricClient, err := newMetricClient(ctx) if err != nil { @@ -129,21 +136,6 @@ func (f filterState) testValidateApplied(ctx context.Context, kubeClient client. for _, pod := range pods.Items { t.Run(pod.Name, func(t *testing.T) { - var promMatch, configReloaderMatch bool - switch f { - case stateEmpty: - promMatch = true - configReloaderMatch = true - case stateA: - promMatch = true - configReloaderMatch = false - case stateB: - promMatch = false - configReloaderMatch = true - default: - t.Fatalf("invalid filter state: %s", f) - } - t.Run("prometheus", testValidateGCMMetric(ctx, metricClient, listTimeSeriesFilter{ metricType: "prometheus.googleapis.com/go_goroutines/gauge", job: collectorPodMonitoring.Name, @@ -152,7 +144,7 @@ func (f filterState) testValidateApplied(ctx context.Context, kubeClient client. container: "prometheus", externalValue: externalValue, namespace: operator.DefaultOperatorNamespace, - }, metricExpectation{noPoints: !promMatch})) + }, metricExpectation{noPoints: f.container != "" && f.container != "prometheus"})) t.Run("config-reloader", testValidateGCMMetric(ctx, metricClient, listTimeSeriesFilter{ metricType: "prometheus.googleapis.com/go_goroutines/gauge", @@ -162,41 +154,104 @@ func (f filterState) testValidateApplied(ctx context.Context, kubeClient client. container: "config-reloader", externalValue: externalValue, namespace: operator.DefaultOperatorNamespace, - }, metricExpectation{noPoints: !configReloaderMatch})) + }, metricExpectation{noPoints: f.container != "" && f.container != "config-reloader"})) }) } } } type filterCase struct { - filter filterState + name string + filter filterState // OperatorConfig.collection.filter.matchOneOf. + enableMatchOneOf *bool // Opt in flag, so OperatorConfig.collection.filter.enableMatchOneOf. + expectedFilter filterState // What we expect to be applied. } // Regression tests against go/gmp:matchstuck. -// See go/gmp:matchstuck for 0, A, B, C case definition. -func TestCollectorMatch0toACase(t *testing.T) { +// NOTE: TestCollectorMatch_NoFiltering takes ~1m per case, add sequential cases carefully. +func TestCollectorMatch_NoFiltering(t *testing.T) { if skipGCM { t.Skip("this test requires GCM integration") } testCollectorMatch(t, stateEmpty, []filterCase{ - // 0 { - filter: stateEmpty, + name: "no filtering", + filter: stateEmpty, + + expectedFilter: stateEmpty, + }, + { + name: "no filtering/enable=true", + filter: stateEmpty, + enableMatchOneOf: proto.Bool(true), + + expectedFilter: stateEmpty, + }, + { + name: "no filtering/enable=false", + filter: stateEmpty, + enableMatchOneOf: proto.Bool(false), + expectedFilter: stateEmpty, }, - // A + }) +} + +// Regression tests against go/gmp:matchstuck. +// NOTE: TestCollectorMatch_NewFilter takes ~1m per case, add sequential cases carefully. +func TestCollectorMatch_NewFilter(t *testing.T) { + if skipGCM { + t.Skip("this test requires GCM integration") + } + testCollectorMatch(t, stateEmpty, []filterCase{ { + name: "filtering stuck", filter: stateA, - // Given the go/gmp:matchstuck we expect the noop behaviour. - expectedFilter: stateEmpty, // TODO: Add fix, so it's stateA (when forced). + + // Given the go/gmp:matchstuck we expect the noop behaviour, without opt-in. + expectedFilter: stateEmpty, }, { - filter: stateB, + name: "filtering/enable=true", + filter: stateA, + enableMatchOneOf: proto.Bool(true), + + expectedFilter: stateA, + }, + { + name: "filtering/enable=false", + filter: stateA, + enableMatchOneOf: proto.Bool(false), + + expectedFilter: stateEmpty, + }, + }) +} + +// Regression tests against go/gmp:matchstuck. +// NOTE: TestCollectorMatch_NewFilter_ThenRemoved takes ~1m per case, add sequential cases carefully. +func TestCollectorMatch_NewFilter_ThenRemoved(t *testing.T) { + if skipGCM { + t.Skip("this test requires GCM integration") + } + testCollectorMatch(t, stateEmpty, []filterCase{ + { + name: "filtering/enable=true", + filter: stateA, + enableMatchOneOf: proto.Bool(true), + + expectedFilter: stateA, + }, + { + name: "filtering stuck again", + filter: stateA, + // Given the go/gmp:matchstuck we expect the noop behaviour. - expectedFilter: stateEmpty, // TODO: Add fix, so it's stateB (when forced). + expectedFilter: stateEmpty, }, { + name: "no filtering again", filter: stateEmpty, expectedFilter: stateEmpty, }, @@ -204,27 +259,34 @@ func TestCollectorMatch0toACase(t *testing.T) { } // Regression tests against go/gmp:matchstuck. -// See go/gmp:matchstuck for 0, A, B, C case definition. -func TestCollectorMatchBtoCCase(t *testing.T) { +// NOTE: TestCollectorMatch_StuckFilter takes some time per case, add cases carefully. +func TestCollectorMatch_StuckFilter(t *testing.T) { if skipGCM { t.Skip("this test requires GCM integration") } + + // --export.match=stateB. testCollectorMatch(t, stateB, []filterCase{ { + name: "filtering stuck", filter: stateA, + // Given the go/gmp:matchstuck we expect the orphaned setting applied. - expectedFilter: stateB, // TODO: Add fix, so it's stateA (when forced). + expectedFilter: stateB, }, - // B-2 { - filter: stateEmpty, - // Given the go/gmp:matchstuck we expect the orphaned setting applied. - expectedFilter: stateB, // TODO: Add fix, `so it's stateEmpty (when forced). + name: "filtering/enable=true", + filter: stateA, + enableMatchOneOf: proto.Bool(true), + + expectedFilter: stateA, }, - // C { - filter: stateB, - expectedFilter: stateB, + name: "filtering/enable=disabled", + filter: stateA, + enableMatchOneOf: proto.Bool(false), + + expectedFilter: stateEmpty, }, }) } @@ -238,7 +300,7 @@ func testCollectorMatch(t *testing.T, explicitFilter filterState, filterCases [] var dOpts []deploy.DeployOption if explicitFilter != stateEmpty { - dOpts = append(dOpts, deploy.WithExplicitCollectorFilter(explicitFilter.match)) + dOpts = append(dOpts, deploy.WithExplicitCollectorFilter(explicitFilter.toMatcher())) } kubeClient, restConfig, err := setupCluster(ctx, t, dOpts...) if err != nil { @@ -250,16 +312,19 @@ func testCollectorMatch(t *testing.T, explicitFilter filterState, filterCases [] t.Run("self-podmonitoring-ready", testEnsurePodMonitoringReady(ctx, kubeClient, collectorPodMonitoring)) for i, fcase := range filterCases { - // Ensure a unique external label value so we are sure the existence checks are accurate. - externalValue := fmt.Sprintf("filter%d", i) - - // Setup OperatorConfig with an intput filtering state (filter.matchOneOf). - t.Run("collector-operatorconfig", testCollectorOperatorConfigWithParams( - ctx, - kubeClient, - externalValue, - fcase.filter, - )) - t.Run("filter-applied-gcm", fcase.expectedFilter.testValidateApplied(ctx, kubeClient, externalValue)) + t.Run(fcase.name, func(t *testing.T) { + // Ensure a unique external label value so we are sure the existence checks are accurate. + externalValue := fmt.Sprintf("filter%d", i) + + // Setup OperatorConfig with an input filtering state (filter.matchOneOf). + t.Run("collector-operatorconfig", testCollectorOperatorConfigWithParams( + ctx, + kubeClient, + externalValue, + fcase.filter, + fcase.enableMatchOneOf, + )) + t.Run("filter-applied-gcm", fcase.expectedFilter.testFiltering(ctx, kubeClient, externalValue)) + }) } } diff --git a/e2e/collector_test.go b/e2e/collector_test.go index 8e1fda208d..5ab9d2d1ec 100644 --- a/e2e/collector_test.go +++ b/e2e/collector_test.go @@ -176,10 +176,16 @@ func testCollectorDeployed(ctx context.Context, restConfig *rest.Config, kubeCli } func testCollectorOperatorConfig(ctx context.Context, kubeClient client.Client) func(*testing.T) { - return testCollectorOperatorConfigWithParams(ctx, kubeClient, "external_val", stateEmpty) + return testCollectorOperatorConfigWithParams(ctx, kubeClient, "external_val", stateEmpty, nil) } -func testCollectorOperatorConfigWithParams(ctx context.Context, kubeClient client.Client, externalValue string, filter filterState) func(*testing.T) { +func testCollectorOperatorConfigWithParams( + ctx context.Context, + kubeClient client.Client, + externalValue string, + matchOneOf filterState, + enableMatchOneOf *bool, +) func(*testing.T) { return func(t *testing.T) { t.Log("checking collector is configured") @@ -194,8 +200,13 @@ func testCollectorOperatorConfigWithParams(ctx context.Context, kubeClient clien } // Test propagation of the custom options. + var matchers []string + if m := matchOneOf.toMatcher(); m != "" { + matchers = []string{m} + } config.Collection.Filter = monitoringv1.ExportFilters{ - MatchOneOf: filter.filters(t), + MatchOneOf: matchers, + EnableMatchOneOf: enableMatchOneOf, } config.Collection.Compression = monitoringv1.CompressionGzip config.Collection.ExternalLabels = map[string]string{ @@ -221,7 +232,7 @@ func testCollectorOperatorConfigWithParams(ctx context.Context, kubeClient clien return fmt.Sprintf(` credentials: %s`, collectorExplicitCredentials()) }(), - "{expectedMatchEntry}", filter.expectedMatchConfigEntry(t), + "{expectedMatchEntry}", matchOneOf.expectedCollectorExportConfigEntry(enableMatchOneOf), ).Replace(s) } want := map[string]string{ @@ -505,6 +516,7 @@ func testValidateGCMMetric(ctx context.Context, metricClient *gcm.MetricClient, return func(t *testing.T) { filter := f.Filter(t) t.Log("checking for metric in Cloud Monitoring", filter) + now := time.Now() if err := wait.PollUntilContextCancel(ctx, pollDuration, false, func(ctx context.Context) (bool, error) { endTime := time.Now() // Always check for fresh data, so we don't have a potential race between collector starting to send data vs this timestamp. @@ -549,8 +561,9 @@ func testValidateGCMMetric(ctx context.Context, metricClient *gcm.MetricClient, } return true, nil }); err != nil { - t.Fatalf("waiting for collector metric to appear in GCM failed: %s; filter: %v", err, filter) + t.Fatalf("waiting for collector metric to appear in GCM failed after %v; err: %s; filter: %v", time.Since(now), err, filter) } + t.Log("found metric after", time.Since(now)) } } diff --git a/go.mod b/go.mod index 70bc17d51e..86b4421486 100644 --- a/go.mod +++ b/go.mod @@ -379,7 +379,8 @@ replace ( // Remove once this version moves to newer Prometheus. github.com/prometheus/common => github.com/prometheus/common v0.61.0 // See go/gmp:fork-toil for rationales of this entry. - github.com/prometheus/prometheus => github.com/GoogleCloudPlatform/prometheus v0.0.0-20250822124349-98e3120b1750 // v2.53.5-gmp.0-rc.6 + // v2.53.5-gmp.0-rc.8 + github.com/prometheus/prometheus => github.com/GoogleCloudPlatform/prometheus v0.0.0-20251124145432-3a83a91a9e4f // v2.53.5-gmp.1-rc.0 ) tool ( diff --git a/go.sum b/go.sum index ef55266833..ed9ba15db0 100644 --- a/go.sum +++ b/go.sum @@ -53,8 +53,8 @@ github.com/Code-Hex/go-generics-cache v1.5.1 h1:6vhZGc5M7Y/YD8cIUcY8kcuQLB4cHR7U github.com/Code-Hex/go-generics-cache v1.5.1/go.mod h1:qxcC9kRVrct9rHeiYpFWSoW1vxyillCVzX13KZG8dl4= github.com/Djarvur/go-err113 v0.1.1 h1:eHfopDqXRwAi+YmCUas75ZE0+hoBHJ2GQNLYRSxao4g= github.com/Djarvur/go-err113 v0.1.1/go.mod h1:IaWJdYFLg76t2ihfflPZnM1LIQszWOsFDh2hhhAVF6k= -github.com/GoogleCloudPlatform/prometheus v0.0.0-20250822124349-98e3120b1750 h1:xuD+UwWYcwPqUvHVoyowUEy49UnW+n+0DCDpwhUL548= -github.com/GoogleCloudPlatform/prometheus v0.0.0-20250822124349-98e3120b1750/go.mod h1:KJY4lbAwOWwFJ9qgAPDYo3KVfXKokl7gU9WsMrNIdNk= +github.com/GoogleCloudPlatform/prometheus v0.0.0-20251124145432-3a83a91a9e4f h1:gKRiqqc1rkAtT8GrpbV3MlYHv6iKYG62YM714HZ0s58= +github.com/GoogleCloudPlatform/prometheus v0.0.0-20251124145432-3a83a91a9e4f/go.mod h1:Rjg7i0YEph5JUInO6E3W2yDUjyxA6qzWlHwXqb7WRHM= github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= diff --git a/manifests/operator.yaml b/manifests/operator.yaml index 5a8e50c5be..cb30ff61df 100644 --- a/manifests/operator.yaml +++ b/manifests/operator.yaml @@ -347,7 +347,7 @@ spec: labels: app: managed-prometheus-collector app.kubernetes.io/name: collector - app.kubernetes.io/version: 0.15.4 + app.kubernetes.io/version: 0.17.2 annotations: # The emptyDir for the storage and config directories prevents cluster # autoscaling unless this annotation is set. @@ -359,7 +359,7 @@ spec: priorityClassName: gmp-critical initContainers: - name: config-init - image: gke.gcr.io/gke-distroless/bash:gke_distroless_20250407.00_p0 + image: gke.gcr.io/gke-distroless/bash:gke_distroless_20251107.00_p0 command: ['/bin/bash', '-c', 'touch /prometheus/config_out/config.yaml'] volumeMounts: - name: config-out @@ -373,7 +373,7 @@ spec: readOnlyRootFilesystem: true containers: - name: config-reloader - image: gke.gcr.io/prometheus-engine/config-reloader:v0.15.4-gke.0 + image: gke.gcr.io/prometheus-engine/config-reloader:v0.17.2-gke.2 args: - --config-file=/prometheus/config/config.yaml - --config-file-output=/prometheus/config_out/config.yaml @@ -409,7 +409,7 @@ spec: privileged: false readOnlyRootFilesystem: true - name: prometheus - image: gke.gcr.io/prometheus-engine/prometheus:v2.53.4-gmp.0-gke.1 + image: gke.gcr.io/prometheus-engine/prometheus:v2.53.5-gmp.1-gke.2 args: - --config.file=/prometheus/config_out/config.yaml - --enable-feature=exemplar-storage @@ -545,14 +545,14 @@ spec: app.kubernetes.io/component: operator app.kubernetes.io/name: gmp-operator app.kubernetes.io/part-of: gmp - app.kubernetes.io/version: 0.15.4 + app.kubernetes.io/version: 0.17.2 spec: serviceAccountName: operator automountServiceAccountToken: true priorityClassName: gmp-critical containers: - name: operator - image: gke.gcr.io/prometheus-engine/operator:v0.15.4-gke.0 + image: gke.gcr.io/prometheus-engine/operator:v0.17.2-gke.2 args: - "--operator-namespace=gmp-system" - "--public-namespace=gmp-public" @@ -648,7 +648,7 @@ spec: labels: app.kubernetes.io/name: rule-evaluator app: managed-prometheus-rule-evaluator - app.kubernetes.io/version: 0.15.4 + app.kubernetes.io/version: 0.17.2 annotations: # The emptyDir for the storage and config directories prevents cluster # autoscaling unless this annotation is set. @@ -660,7 +660,7 @@ spec: priorityClassName: gmp-critical initContainers: - name: config-init - image: gke.gcr.io/gke-distroless/bash:gke_distroless_20250407.00_p0 + image: gke.gcr.io/gke-distroless/bash:gke_distroless_20251107.00_p0 command: ['/bin/bash', '-c', 'touch /prometheus/config_out/config.yaml'] volumeMounts: - name: config-out @@ -674,7 +674,7 @@ spec: readOnlyRootFilesystem: true containers: - name: evaluator - image: gke.gcr.io/prometheus-engine/rule-evaluator:v0.15.4-gke.0 + image: gke.gcr.io/prometheus-engine/rule-evaluator:v0.17.2-gke.2 args: - --config.file=/prometheus/config_out/config.yaml - --web.listen-address=:19092 @@ -716,7 +716,7 @@ spec: privileged: false readOnlyRootFilesystem: true - name: config-reloader - image: gke.gcr.io/prometheus-engine/config-reloader:v0.15.4-gke.0 + image: gke.gcr.io/prometheus-engine/config-reloader:v0.17.2-gke.2 args: - --config-file=/prometheus/config/config.yaml - --config-file-output=/prometheus/config_out/config.yaml @@ -823,7 +823,7 @@ spec: labels: app: managed-prometheus-alertmanager app.kubernetes.io/name: alertmanager - app.kubernetes.io/version: 0.15.4 + app.kubernetes.io/version: 0.17.2 annotations: cluster-autoscaler.kubernetes.io/safe-to-evict: "true" components.gke.io/component-name: managed_prometheus @@ -832,7 +832,7 @@ spec: automountServiceAccountToken: false initContainers: - name: config-init - image: gke.gcr.io/gke-distroless/bash:gke_distroless_20250407.00_p0 + image: gke.gcr.io/gke-distroless/bash:gke_distroless_20251107.00_p0 command: ['/bin/bash', '-c', 'touch /alertmanager/config_out/config.yaml && echo -e "receivers:\n - name: noop\nroute:\n receiver: noop" > alertmanager/config_out/config.yaml'] volumeMounts: - name: alertmanager-config @@ -846,7 +846,7 @@ spec: readOnlyRootFilesystem: true containers: - name: alertmanager - image: gke.gcr.io/prometheus-engine/alertmanager:v0.27.0-gmp.3-gke.0 + image: gke.gcr.io/prometheus-engine/alertmanager:v0.27.0-gmp.4-gke.4 args: - --config.file=/alertmanager/config_out/config.yaml - --storage.path=/alertmanager-data @@ -882,7 +882,7 @@ spec: privileged: false readOnlyRootFilesystem: true - name: config-reloader - image: gke.gcr.io/prometheus-engine/config-reloader:v0.15.4-gke.0 + image: gke.gcr.io/prometheus-engine/config-reloader:v0.17.2-gke.2 args: - --config-file=/alertmanager/config.yaml - --config-file-output=/alertmanager/config_out/config.yaml diff --git a/manifests/rule-evaluator.yaml b/manifests/rule-evaluator.yaml index 8757fb3551..d7e8edfb1a 100644 --- a/manifests/rule-evaluator.yaml +++ b/manifests/rule-evaluator.yaml @@ -118,20 +118,20 @@ spec: metadata: labels: app.kubernetes.io/name: rule-evaluator - app.kubernetes.io/version: 0.15.4 + app.kubernetes.io/version: 0.17.2 spec: serviceAccountName: rule-evaluator automountServiceAccountToken: true initContainers: - name: config-init - image: gke.gcr.io/gke-distroless/bash:gke_distroless_20250407.00_p0 + image: gke.gcr.io/gke-distroless/bash:gke_distroless_20251107.00_p0 command: ['/bin/bash', '-c', 'touch /prometheus/config_out/config.yaml'] volumeMounts: - name: config-out mountPath: /prometheus/config_out containers: - name: evaluator - image: gke.gcr.io/prometheus-engine/rule-evaluator:v0.15.4-gke.0 + image: gke.gcr.io/prometheus-engine/rule-evaluator:v0.17.2-gke.2 args: - "--config.file=/prometheus/config_out/config.yaml" - "--web.listen-address=:9092" @@ -169,7 +169,7 @@ spec: privileged: false readOnlyRootFilesystem: true - name: config-reloader - image: gke.gcr.io/prometheus-engine/config-reloader:v0.15.4-gke.0 + image: gke.gcr.io/prometheus-engine/config-reloader:v0.17.2-gke.2 args: - --config-file=/prometheus/config/config.yaml - --config-file-output=/prometheus/config_out/config.yaml diff --git a/manifests/setup.yaml b/manifests/setup.yaml index d2ca1405a9..4be92d240b 100644 --- a/manifests/setup.yaml +++ b/manifests/setup.yaml @@ -1924,12 +1924,43 @@ spec: filter: description: Filter limits which metric data is sent to Cloud Monitoring (it doesn't apply to additional exports). properties: + enableMatchOneOf: + description: |- + EnableMatchOneOf allows additional control over MatchOneOf filtering. + + Available settings: + * not set: The MatchOneOf settings are ignored, default is used. + * false: MatchOneOf feature is explicitly disabled; export is forced to match all series. + * true: The MatchOneOf settings are used, overwriting a default. + + See MatchOneOf IMPORTANT section to learn about the MatchOneOf default. + type: boolean matchOneOf: description: |- - A list of Prometheus time series matchers. Every time series must match at least one - of the matchers to be exported. This field can be used equivalently to the match[] - parameter of the Prometheus federation endpoint to selectively export data. + MatchOneOf, if EnableMatchOneOf is true, controls the export filtering setting. + + MatchOneOf expects a list of Prometheus time series matchers. Every time series + must match at least one of the matchers to be exported. This field can be used + equivalently to the match[] parameter of the Prometheus federation endpoint to + selectively export data. + Example: `["{job!='foobar'}", "{__name__!~'container_foo.*|container_bar.*'}"]` + + IMPORTANT: MatchOneOf is guarded by the additional flag (EnableMatchOneOf) + and removed from the public docs. Replacements: https://cloud.google.com/stackdriver/docs/managed-prometheus/setup-managed#filter-metrics. + + Rationales: + * This option is prone to misconfiguration, e.g. you need to manually + match important built-in metrics (up, scrape_samples_added, etc.). + * Unmatched metrics are still in the local collector memory + (this filters on export only). + * Typically, the default (empty array), means match all metrics. However, + for the clusters that were using MatchOneOf filtering before the GMP 0.14.x, + version, the default is what has been configured back then in the orphaned + collector DaemonSet EXTRA_ARGS environment variable. If you are impacted: + * use EnableMatchOneOf = false to reset the default + * use EnableMatchOneOf = true to apply this configuration's MatchOneOf + * Remove EXTRA_ARGS environment variable from the collector DaemonSet (it's safe to do so). items: type: string type: array diff --git a/pkg/operator/apis/monitoring/v1/operator_types.go b/pkg/operator/apis/monitoring/v1/operator_types.go index f2c2bf0db0..a3fb792188 100644 --- a/pkg/operator/apis/monitoring/v1/operator_types.go +++ b/pkg/operator/apis/monitoring/v1/operator_types.go @@ -232,10 +232,40 @@ type KubeletScraping struct { // ExportFilters provides mechanisms to filter the scraped data that's sent to GMP. type ExportFilters struct { - // A list of Prometheus time series matchers. Every time series must match at least one - // of the matchers to be exported. This field can be used equivalently to the match[] - // parameter of the Prometheus federation endpoint to selectively export data. + // EnableMatchOneOf allows additional control over MatchOneOf filtering. + // + // Available settings: + // * not set: The MatchOneOf settings are ignored, default is used. + // * false: MatchOneOf feature is explicitly disabled; export is forced to match all series. + // * true: The MatchOneOf settings are used, overwriting a default. + // + // See MatchOneOf IMPORTANT section to learn about the MatchOneOf default. + EnableMatchOneOf *bool `json:"enableMatchOneOf,omitempty"` + + // MatchOneOf, if EnableMatchOneOf is true, controls the export filtering setting. + // + // MatchOneOf expects a list of Prometheus time series matchers. Every time series + // must match at least one of the matchers to be exported. This field can be used + // equivalently to the match[] parameter of the Prometheus federation endpoint to + // selectively export data. + // // Example: `["{job!='foobar'}", "{__name__!~'container_foo.*|container_bar.*'}"]` + // + // IMPORTANT: MatchOneOf is guarded by the additional flag (EnableMatchOneOf) + // and removed from the public docs. Replacements: https://cloud.google.com/stackdriver/docs/managed-prometheus/setup-managed#filter-metrics. + // + // Rationales: + // * This option is prone to misconfiguration, e.g. you need to manually + // match important built-in metrics (up, scrape_samples_added, etc.). + // * Unmatched metrics are still in the local collector memory + // (this filters on export only). + // * Typically, the default (empty array), means match all metrics. However, + // for the clusters that were using MatchOneOf filtering before the GMP 0.14.x, + // version, the default is what has been configured back then in the orphaned + // collector DaemonSet EXTRA_ARGS environment variable. If you are impacted: + // * use EnableMatchOneOf = false to reset the default + // * use EnableMatchOneOf = true to apply this configuration's MatchOneOf + // * Remove EXTRA_ARGS environment variable from the collector DaemonSet (it's safe to do so). MatchOneOf []string `json:"matchOneOf,omitempty"` } diff --git a/pkg/operator/apis/monitoring/v1/zz_generated.deepcopy.go b/pkg/operator/apis/monitoring/v1/zz_generated.deepcopy.go index eda798b774..c6a5d372f2 100644 --- a/pkg/operator/apis/monitoring/v1/zz_generated.deepcopy.go +++ b/pkg/operator/apis/monitoring/v1/zz_generated.deepcopy.go @@ -484,6 +484,11 @@ func (in *ConfigSpec) DeepCopy() *ConfigSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ExportFilters) DeepCopyInto(out *ExportFilters) { *out = *in + if in.EnableMatchOneOf != nil { + in, out := &in.EnableMatchOneOf, &out.EnableMatchOneOf + *out = new(bool) + **out = **in + } if in.MatchOneOf != nil { in, out := &in.MatchOneOf, &out.MatchOneOf *out = make([]string, len(*in)) diff --git a/pkg/operator/collection.go b/pkg/operator/collection.go index f92bd7acdc..45e2f78ecb 100644 --- a/pkg/operator/collection.go +++ b/pkg/operator/collection.go @@ -242,7 +242,7 @@ func gzipData(data []byte) ([]byte, error) { return b.Bytes(), nil } -func setConfigMapData(cm *corev1.ConfigMap, c monitoringv1.CompressionType, key string, data string) error { +func setConfigMapData(cm *corev1.ConfigMap, c monitoringv1.CompressionType, key, data string) error { // Thanos config-reloader detects gzip compression automatically, so no sync with // config-reloaders is needed when switching between these. switch c { @@ -274,9 +274,16 @@ func (r *collectionReconciler) ensureCollectorConfig(ctx context.Context, spec * return fmt.Errorf("generate Prometheus config: %w", err) } - // NOTE(bwplotka): Match logic will be removed in https://github.com/GoogleCloudPlatform/prometheus-engine/pull/1688 - // nolint:staticcheck - cfg.GoogleCloud.Export.Match = spec.Filter.MatchOneOf + if spec.Filter.EnableMatchOneOf != nil { + // NOTE: nil, false and true mean something else, see EnableMatchOneOf. + cfg.GoogleCloud.Export.EnableMatch = spec.Filter.EnableMatchOneOf + if *spec.Filter.EnableMatchOneOf { + // As per EnableMatchOneOf logic, it makes only sense to pass things through + // on EnableMatchOneOf = true. + cfg.GoogleCloud.Export.Match = spec.Filter.MatchOneOf + } + } + if string(spec.Compression) != "" { cfg.GoogleCloud.Export.Compression = string(spec.Compression) }