diff --git a/bin/y-cluster-converge-ystack b/bin/y-cluster-converge-ystack index dfadb4e7..b00423ba 100755 --- a/bin/y-cluster-converge-ystack +++ b/bin/y-cluster-converge-ystack @@ -71,17 +71,9 @@ apply_base 08-buildkitd-grpcroute k -n ystack get grpcroute buildkitd echo "# Validated: grpcroute buildkitd exists" -# 7. Monitoring operator + CRDs -apply_base 30-monitoring-operator -echo "# Waiting for prometheus-operator CRDs to register ..." -until k get crd prometheuses.monitoring.coreos.com >/dev/null 2>&1; do sleep 2; done -until k get crd alertmanagers.monitoring.coreos.com >/dev/null 2>&1; do sleep 2; done -until k get crd servicemonitors.monitoring.coreos.com >/dev/null 2>&1; do sleep 2; done -echo "# Validated: prometheus-operator CRDs registered" - -# 8. Monitoring CRs (Prometheus, Alertmanager, exporters) -apply_base 31-monitoring -k -n monitoring get prometheus now +# 7. Monitoring (vanilla Prometheus + Alertmanager + exporters) +apply_base 30-monitoring +k -n monitoring rollout status deploy/prometheus-now --timeout=120s echo "# Validated: monitoring stack exists" # 6.8 Prometheus HTTPRoute diff --git a/bin/y-cluster-provision-k3d b/bin/y-cluster-provision-k3d index 0a388a97..34292107 100755 --- a/bin/y-cluster-provision-k3d +++ b/bin/y-cluster-provision-k3d @@ -129,5 +129,8 @@ PROD_REGISTRY_IP=$(kubectl --context=$CTX -n ystack get service prod-registry -o docker exec k3d-ystack-server-0 sh -cex "echo '$BUILDS_REGISTRY_IP builds-registry.ystack.svc.cluster.local' >> /etc/hosts" docker exec k3d-ystack-server-0 sh -cex "echo '$PROD_REGISTRY_IP prod-registry.ystack.svc.cluster.local' >> /etc/hosts" -echo "# Updating /etc/hosts (requires sudo) ..." -y-k8s-ingress-hosts --context=$CTX -write -override-ip "${YSTACK_PORTS_IP:-127.0.0.1}" +echo "# Checking /etc/hosts ..." +if ! y-k8s-ingress-hosts --context=$CTX -check -override-ip "${YSTACK_PORTS_IP:-127.0.0.1}"; then + echo "# Updating /etc/hosts (requires sudo) ..." + y-k8s-ingress-hosts --context=$CTX -write -override-ip "${YSTACK_PORTS_IP:-127.0.0.1}" +fi diff --git a/bin/y-cluster-provision-lima b/bin/y-cluster-provision-lima index 389360fd..38f45662 100755 --- a/bin/y-cluster-provision-lima +++ b/bin/y-cluster-provision-lima @@ -122,5 +122,8 @@ PROD_REGISTRY_IP=$(kubectl --context=$CTX -n ystack get service prod-registry -o limactl shell ystack sudo sh -c "echo '$BUILDS_REGISTRY_IP builds-registry.ystack.svc.cluster.local' >> /etc/hosts" limactl shell ystack sudo sh -c "echo '$PROD_REGISTRY_IP prod-registry.ystack.svc.cluster.local' >> /etc/hosts" -echo "# Updating /etc/hosts (requires sudo) ..." -y-k8s-ingress-hosts --context=$CTX -write -override-ip 127.0.0.1 +echo "# Checking /etc/hosts ..." +if ! y-k8s-ingress-hosts --context=$CTX -check -override-ip 127.0.0.1; then + echo "# Updating /etc/hosts (requires sudo) ..." + y-k8s-ingress-hosts --context=$CTX -write -override-ip 127.0.0.1 +fi diff --git a/bin/y-cluster-validate-ystack b/bin/y-cluster-validate-ystack index ecff4692..fb1d1c5c 100755 --- a/bin/y-cluster-validate-ystack +++ b/bin/y-cluster-validate-ystack @@ -82,10 +82,10 @@ k -n ystack get grpcroute buildkitd >/dev/null 2>&1 \ && report "grpcroute buildkitd" "ok" \ || report "grpcroute buildkitd" "not found" -# 7.6 Monitoring stack -k -n monitoring get prometheus now >/dev/null 2>&1 \ - && report "prometheus now" "ok" \ - || report "prometheus now" "not found" +# 7.6 Monitoring stack (vanilla Prometheus deployment) +ROLLOUT_PROM=$(k -n monitoring rollout status deploy/prometheus-now --timeout=10s 2>&1) \ + && report "prometheus-now rollout" "ok" \ + || report "prometheus-now rollout" "$ROLLOUT_PROM" # 7.7 Prometheus HTTPRoute k -n monitoring get httproute prometheus-now >/dev/null 2>&1 \ diff --git a/k3s/30-monitoring/kustomization.yaml b/k3s/30-monitoring/kustomization.yaml new file mode 100644 index 00000000..14c81cb0 --- /dev/null +++ b/k3s/30-monitoring/kustomization.yaml @@ -0,0 +1,6 @@ +resources: +- ../../monitoring/namespace +- ../../monitoring/prometheus-now +- ../../monitoring/alertmanager-main +- ../../monitoring/kube-state-metrics-now +- ../../monitoring/node-exporter-now diff --git a/monitoring/alertmanager-main/main-alertmanager-service.yaml b/monitoring/alertmanager-main/main-alertmanager-service.yaml index 69f53de2..e0493c6b 100644 --- a/monitoring/alertmanager-main/main-alertmanager-service.yaml +++ b/monitoring/alertmanager-main/main-alertmanager-service.yaml @@ -9,4 +9,5 @@ spec: protocol: TCP targetPort: web selector: - alertmanager: main + app.kubernetes.io/name: alertmanager + app.kubernetes.io/instance: main diff --git a/monitoring/alertmanager-main/main-alertmanager.yaml b/monitoring/alertmanager-main/main-alertmanager.yaml index fc31ee2f..975cf928 100644 --- a/monitoring/alertmanager-main/main-alertmanager.yaml +++ b/monitoring/alertmanager-main/main-alertmanager.yaml @@ -1,6 +1,60 @@ -apiVersion: monitoring.coreos.com/v1 -kind: Alertmanager +apiVersion: apps/v1 +kind: Deployment metadata: - name: main + name: alertmanager-main + labels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/instance: main spec: replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/instance: main + template: + metadata: + labels: + app.kubernetes.io/name: alertmanager + app.kubernetes.io/instance: main + spec: + securityContext: + runAsUser: 65534 + runAsGroup: 65534 + runAsNonRoot: true + fsGroup: 65534 + containers: + - name: alertmanager + image: quay.io/prometheus/alertmanager:v0.28.1 + args: + - --config.file=/etc/alertmanager/alertmanager.yaml + - --storage.path=/data + ports: + - name: web + containerPort: 9093 + readinessProbe: + httpGet: + path: /-/ready + port: web + initialDelaySeconds: 5 + livenessProbe: + httpGet: + path: /-/healthy + port: web + initialDelaySeconds: 15 + resources: + requests: + cpu: 10m + memory: 32Mi + limits: + memory: 64Mi + volumeMounts: + - name: config + mountPath: /etc/alertmanager + - name: data + mountPath: /data + volumes: + - name: config + secret: + secretName: alertmanager-main + - name: data + emptyDir: {} diff --git a/monitoring/greptimedb/bucket-create.yaml b/monitoring/greptimedb/bucket-create.yaml new file mode 100644 index 00000000..d0ddbc8a --- /dev/null +++ b/monitoring/greptimedb/bucket-create.yaml @@ -0,0 +1,35 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: bucket-create-greptimedb +spec: + template: + spec: + containers: + - name: mc + image: minio/mc:RELEASE.2025-08-13T08-35-41Z + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: minio + key: accesskey + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: minio + key: secretkey + - name: BUCKET_NAME + value: greptimedb + - name: S3_ENDPOINT + value: http://blobs-versitygw.ystack.svc.cluster.local + command: + - sh + - -ce + - | + until mc alias set s3 $S3_ENDPOINT $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY 2>/dev/null; do + sleep 2 + done + mc mb --ignore-existing s3/$BUCKET_NAME + restartPolicy: Never + backoffLimit: 10 diff --git a/monitoring/greptimedb/config.toml b/monitoring/greptimedb/config.toml new file mode 100644 index 00000000..ecca2a84 --- /dev/null +++ b/monitoring/greptimedb/config.toml @@ -0,0 +1,7 @@ +[storage] +type = "S3" +bucket = "greptimedb" +endpoint = "http://blobs-versitygw.ystack.svc.cluster.local" +access_key_id = "YstackEXAMPLEKEY" +secret_access_key = "github.com/Yolean/ystack-EXAMPLE" +region = "us-east-1" diff --git a/monitoring/greptimedb/greptimedb.yaml b/monitoring/greptimedb/greptimedb.yaml new file mode 100644 index 00000000..8eb79ab6 --- /dev/null +++ b/monitoring/greptimedb/greptimedb.yaml @@ -0,0 +1,72 @@ +apiVersion: v1 +kind: Service +metadata: + name: greptimedb +spec: + ports: + - name: http + port: 4000 + targetPort: http + - name: grpc + port: 4001 + targetPort: grpc + selector: + app: greptimedb +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: greptimedb + labels: + app: greptimedb +spec: + replicas: 1 + selector: + matchLabels: + app: greptimedb + template: + metadata: + labels: + app: greptimedb + spec: + containers: + - name: greptimedb + image: greptime/greptimedb:v0.12.0 + args: + - standalone + - start + - --config-file=/etc/greptimedb/config.toml + - --http-addr=0.0.0.0:4000 + - --rpc-addr=0.0.0.0:4001 + ports: + - name: http + containerPort: 4000 + - name: grpc + containerPort: 4001 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 10 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 15 + resources: + requests: + cpu: 50m + memory: 256Mi + limits: + memory: 768Mi + volumeMounts: + - name: data + mountPath: /tmp/greptimedb + - name: config + mountPath: /etc/greptimedb + volumes: + - name: data + emptyDir: {} + - name: config + configMap: + name: greptimedb-config diff --git a/monitoring/greptimedb/kustomization.yaml b/monitoring/greptimedb/kustomization.yaml new file mode 100644 index 00000000..3ad5ad7a --- /dev/null +++ b/monitoring/greptimedb/kustomization.yaml @@ -0,0 +1,22 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: monitoring + +resources: +- greptimedb.yaml +- bucket-create.yaml + +generatorOptions: + disableNameSuffixHash: true + +secretGenerator: +- name: minio + literals: + - accesskey=YstackEXAMPLEKEY + - secretkey=github.com/Yolean/ystack-EXAMPLE + +configMapGenerator: +- name: greptimedb-config + files: + - config.toml diff --git a/monitoring/kube-state-metrics-now/kustomization.yaml b/monitoring/kube-state-metrics-now/kustomization.yaml index d1b51d82..54c16f92 100644 --- a/monitoring/kube-state-metrics-now/kustomization.yaml +++ b/monitoring/kube-state-metrics-now/kustomization.yaml @@ -5,12 +5,3 @@ namespace: monitoring resources: - ../kube-state-metrics - -patchesStrategicMerge: -- |- - apiVersion: monitoring.coreos.com/v1 - kind: ServiceMonitor - metadata: - name: kube-state-metrics - labels: - prometheus: now diff --git a/monitoring/kube-state-metrics/kustomization.yaml b/monitoring/kube-state-metrics/kustomization.yaml index 63b9ca3e..9b7dcccf 100644 --- a/monitoring/kube-state-metrics/kustomization.yaml +++ b/monitoring/kube-state-metrics/kustomization.yaml @@ -7,4 +7,3 @@ resources: - deployment.yaml - service-account.yaml - service.yaml -- kube-state-metrics-servicemonitor.yaml diff --git a/monitoring/node-exporter-now/kustomization.yaml b/monitoring/node-exporter-now/kustomization.yaml index 19ef18db..f310ab55 100644 --- a/monitoring/node-exporter-now/kustomization.yaml +++ b/monitoring/node-exporter-now/kustomization.yaml @@ -5,19 +5,3 @@ namespace: monitoring resources: - ../node-exporter - -patchesStrategicMerge: -- |- - apiVersion: monitoring.coreos.com/v1 - kind: PodMonitor - metadata: - name: node-exporter - labels: - prometheus: now -- |- - apiVersion: monitoring.coreos.com/v1 - kind: PrometheusRule - metadata: - name: node-exporter - labels: - prometheus: now diff --git a/monitoring/node-exporter/kustomization.yaml b/monitoring/node-exporter/kustomization.yaml index 923ec99a..55ca190d 100644 --- a/monitoring/node-exporter/kustomization.yaml +++ b/monitoring/node-exporter/kustomization.yaml @@ -3,5 +3,3 @@ resources: - node-exporter-clusterRole.yaml - node-exporter-clusterRoleBinding.yaml - node-exporter-daemonset.yaml -- node-exporter-podmonitor.yaml -- example-rules.yaml diff --git a/monitoring/prometheus-now/kustomization.yaml b/monitoring/prometheus-now/kustomization.yaml index b63efad0..cfea66a4 100644 --- a/monitoring/prometheus-now/kustomization.yaml +++ b/monitoring/prometheus-now/kustomization.yaml @@ -7,3 +7,14 @@ resources: - ../rbac-prometheus - now-prometheus-service.yaml - now-prometheus.yaml + +generatorOptions: + disableNameSuffixHash: true + +configMapGenerator: +- name: prometheus-now-config + files: + - prometheus.yml +- name: prometheus-now-rules + files: + - rules/node-exporter.yml diff --git a/monitoring/prometheus-now/now-prometheus-service.yaml b/monitoring/prometheus-now/now-prometheus-service.yaml index 931a973e..6dbf6874 100644 --- a/monitoring/prometheus-now/now-prometheus-service.yaml +++ b/monitoring/prometheus-now/now-prometheus-service.yaml @@ -10,4 +10,4 @@ spec: targetPort: web selector: app.kubernetes.io/name: prometheus - prometheus: now + app.kubernetes.io/instance: now diff --git a/monitoring/prometheus-now/now-prometheus.yaml b/monitoring/prometheus-now/now-prometheus.yaml index a847fc8e..bd473b03 100644 --- a/monitoring/prometheus-now/now-prometheus.yaml +++ b/monitoring/prometheus-now/now-prometheus.yaml @@ -1,29 +1,87 @@ -apiVersion: monitoring.coreos.com/v1 -kind: Prometheus +apiVersion: apps/v1 +kind: Deployment metadata: - name: now + name: prometheus-now + labels: + app.kubernetes.io/name: prometheus + app.kubernetes.io/instance: now spec: replicas: 1 - retention: 2h - serviceAccountName: prometheus - securityContext: - runAsUser: 65534 - runAsGroup: 65534 - # Uncomment on failure to start a new instance. Left out because it may have performance implications, as configmaps may be large. - #fsGroup: 65534 - alerting: - alertmanagers: - - namespace: monitoring - name: alertmanager-main - port: web - serviceMonitorNamespaceSelector: {} - podMonitorNamespaceSelector: {} - serviceMonitorSelector: + selector: matchLabels: - prometheus: now - podMonitorSelector: - matchLabels: - prometheus: now - ruleSelector: - matchLabels: - prometheus: now + app.kubernetes.io/name: prometheus + app.kubernetes.io/instance: now + template: + metadata: + labels: + app.kubernetes.io/name: prometheus + app.kubernetes.io/instance: now + spec: + serviceAccountName: prometheus + securityContext: + runAsUser: 65532 + runAsGroup: 65532 + runAsNonRoot: true + fsGroup: 65532 + containers: + - name: prometheus + image: quay.io/prometheus/prometheus:v3.10.0 + args: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/data + - --storage.tsdb.retention.time=2h + - --web.enable-lifecycle + ports: + - name: web + containerPort: 9090 + readinessProbe: + httpGet: + path: /-/ready + port: web + initialDelaySeconds: 5 + livenessProbe: + httpGet: + path: /-/healthy + port: web + initialDelaySeconds: 15 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + memory: 512Mi + volumeMounts: + - name: config + mountPath: /etc/prometheus/prometheus.yml + subPath: prometheus.yml + - name: rules + mountPath: /etc/prometheus/rules + - name: data + mountPath: /data + - name: configmap-reload + image: ghcr.io/jimmidyson/configmap-reload:v0.14.0 + args: + - --volume-dir=/etc/prometheus + - --volume-dir=/etc/prometheus/rules + - --webhook-url=http://127.0.0.1:9090/-/reload + volumeMounts: + - name: config + mountPath: /etc/prometheus/prometheus.yml + subPath: prometheus.yml + - name: rules + mountPath: /etc/prometheus/rules + resources: + requests: + cpu: 5m + memory: 16Mi + limits: + memory: 32Mi + volumes: + - name: config + configMap: + name: prometheus-now-config + - name: rules + configMap: + name: prometheus-now-rules + - name: data + emptyDir: {} diff --git a/monitoring/prometheus-now/prometheus.yml b/monitoring/prometheus-now/prometheus.yml new file mode 100644 index 00000000..d415557e --- /dev/null +++ b/monitoring/prometheus-now/prometheus.yml @@ -0,0 +1,61 @@ +global: + scrape_interval: 30s + evaluation_interval: 30s + scrape_protocols: + - OpenMetricsText1.0.0 + - OpenMetricsText0.0.1 + - PrometheusProto + - PrometheusText1.0.0 + - PrometheusText0.0.4 + +rule_files: + - /etc/prometheus/rules/*.yml + +remote_write: + - url: http://thanos-receive.monitoring.svc.cluster.local:19291/api/v1/receive + - url: http://greptimedb.monitoring.svc.cluster.local:4000/v1/prometheus/write + +alerting: + alertmanagers: + - static_configs: + - targets: + - alertmanager-main.monitoring.svc.cluster.local:9093 + +scrape_configs: + + # Scrape Prometheus itself + - job_name: prometheus + static_configs: + - targets: ['localhost:9090'] + + # node-exporter: replaces PodMonitor/monitoring/node-exporter + - job_name: node-exporter + kubernetes_sd_configs: + - role: pod + namespaces: + names: [monitoring] + relabel_configs: + - source_labels: [__meta_kubernetes_pod_label_app_kubernetes_io_name] + action: keep + regex: node-exporter + - source_labels: [__meta_kubernetes_pod_node_name] + target_label: instance + + # kube-state-metrics: replaces ServiceMonitor/monitoring/kube-state-metrics + - job_name: kube-state-metrics + kubernetes_sd_configs: + - role: endpoints + namespaces: + names: [monitoring] + relabel_configs: + - source_labels: [__meta_kubernetes_service_label_app_kubernetes_io_name] + action: keep + regex: kube-state-metrics + - source_labels: [__meta_kubernetes_endpoint_port_name] + action: keep + regex: http-metrics + honor_labels: true + metric_relabel_configs: + - source_labels: [__name__] + regex: kube_replicaset_status_observed_generation + action: drop diff --git a/monitoring/prometheus-now/rules/node-exporter.yml b/monitoring/prometheus-now/rules/node-exporter.yml new file mode 100644 index 00000000..ff0e3c54 --- /dev/null +++ b/monitoring/prometheus-now/rules/node-exporter.yml @@ -0,0 +1,18 @@ +groups: + - name: node-exporter-recording-rules + rules: + - record: instance:node_cpus:count + expr: count(node_cpu_seconds_total{mode="idle"}) without (cpu,mode) + - record: instance_cpu:node_cpu_seconds_not_idle:rate5m + expr: sum(rate(node_cpu_seconds_total{mode!="idle"}[5m])) without (mode) + - record: instance_mode:node_cpu_seconds:rate5m + expr: sum(rate(node_cpu_seconds_total[5m])) without (cpu) + - record: instance_cpu:node_cpu_top:rate5m + expr: sum(rate(node_cpu_seconds_total{mode!="idle"}[5m])) without (mode, cpu) + - record: instance:node_cpu_utilization:ratio + expr: sum(instance_mode:node_cpu_seconds:rate5m{mode!="idle"}) without (mode) / instance:node_cpus:count + - record: instance_cpu:node_cpu_top:ratio + expr: >- + sum(instance_cpu:node_cpu_top:rate5m) without (mode, cpu) + / + sum(rate(node_cpu_seconds_total[5m])) without (mode, cpu) diff --git a/monitoring/thanos/bucket-create.yaml b/monitoring/thanos/bucket-create.yaml new file mode 100644 index 00000000..d37911dc --- /dev/null +++ b/monitoring/thanos/bucket-create.yaml @@ -0,0 +1,35 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: bucket-create-thanos-receive +spec: + template: + spec: + containers: + - name: mc + image: minio/mc:RELEASE.2025-08-13T08-35-41Z + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: minio + key: accesskey + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: minio + key: secretkey + - name: BUCKET_NAME + value: thanos-receive + - name: S3_ENDPOINT + value: http://blobs-versitygw.ystack.svc.cluster.local + command: + - sh + - -ce + - | + until mc alias set s3 $S3_ENDPOINT $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY 2>/dev/null; do + sleep 2 + done + mc mb --ignore-existing s3/$BUCKET_NAME + restartPolicy: Never + backoffLimit: 10 diff --git a/monitoring/thanos/kustomization.yaml b/monitoring/thanos/kustomization.yaml new file mode 100644 index 00000000..19d878fb --- /dev/null +++ b/monitoring/thanos/kustomization.yaml @@ -0,0 +1,23 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: monitoring + +resources: +- thanos-receive.yaml +- thanos-query.yaml +- bucket-create.yaml + +generatorOptions: + disableNameSuffixHash: true + +secretGenerator: +- name: minio + literals: + - accesskey=YstackEXAMPLEKEY + - secretkey=github.com/Yolean/ystack-EXAMPLE + +configMapGenerator: +- name: thanos-objstore + files: + - objstore.yml diff --git a/monitoring/thanos/objstore.yml b/monitoring/thanos/objstore.yml new file mode 100644 index 00000000..8ceb653e --- /dev/null +++ b/monitoring/thanos/objstore.yml @@ -0,0 +1,7 @@ +type: S3 +config: + bucket: thanos-receive + endpoint: blobs-versitygw.ystack.svc.cluster.local + insecure: true + access_key: YstackEXAMPLEKEY + secret_key: github.com/Yolean/ystack-EXAMPLE diff --git a/monitoring/thanos/thanos-query.yaml b/monitoring/thanos/thanos-query.yaml new file mode 100644 index 00000000..bb7eaf7c --- /dev/null +++ b/monitoring/thanos/thanos-query.yaml @@ -0,0 +1,57 @@ +apiVersion: v1 +kind: Service +metadata: + name: thanos-query +spec: + ports: + - name: http + port: 9090 + targetPort: http + selector: + app: thanos-query +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: thanos-query + labels: + app: thanos-query +spec: + replicas: 1 + selector: + matchLabels: + app: thanos-query + template: + metadata: + labels: + app: thanos-query + spec: + securityContext: + runAsUser: 65534 + runAsGroup: 65534 + containers: + - name: thanos-query + image: quay.io/thanos/thanos:v0.37.2 + args: + - query + - --http-address=0.0.0.0:9090 + - --endpoint=thanos-receive.monitoring.svc.cluster.local:10901 + ports: + - name: http + containerPort: 9090 + readinessProbe: + httpGet: + path: /-/ready + port: http + initialDelaySeconds: 5 + livenessProbe: + httpGet: + path: /-/healthy + port: http + initialDelaySeconds: 15 + resources: + requests: + cpu: 20m + memory: 64Mi + limits: + memory: 256Mi diff --git a/monitoring/thanos/thanos-receive.yaml b/monitoring/thanos/thanos-receive.yaml new file mode 100644 index 00000000..5946d6fc --- /dev/null +++ b/monitoring/thanos/thanos-receive.yaml @@ -0,0 +1,89 @@ +apiVersion: v1 +kind: Service +metadata: + name: thanos-receive +spec: + ports: + - name: grpc + port: 10901 + targetPort: grpc + - name: http + port: 10902 + targetPort: http + - name: remote-write + port: 19291 + targetPort: remote-write + selector: + app: thanos-receive +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: thanos-receive + labels: + app: thanos-receive +spec: + replicas: 1 + serviceName: thanos-receive + selector: + matchLabels: + app: thanos-receive + template: + metadata: + labels: + app: thanos-receive + spec: + securityContext: + runAsUser: 65534 + runAsGroup: 65534 + fsGroup: 65534 + containers: + - name: thanos-receive + image: quay.io/thanos/thanos:v0.37.2 + args: + - receive + - --tsdb.path=/data + - --tsdb.retention=2h + - --grpc-address=0.0.0.0:10901 + - --http-address=0.0.0.0:10902 + - --remote-write.address=0.0.0.0:19291 + - --label=receive_replica="0" + - --objstore.config-file=/etc/thanos/objstore.yml + # WARNING: Do not use these min/max-block-duration overrides in production. + # They force frequent block cuts for experiment verification only. + - --tsdb.min-block-duration=5m + - --tsdb.max-block-duration=5m + ports: + - name: grpc + containerPort: 10901 + - name: http + containerPort: 10902 + - name: remote-write + containerPort: 19291 + readinessProbe: + httpGet: + path: /-/ready + port: http + initialDelaySeconds: 5 + livenessProbe: + httpGet: + path: /-/healthy + port: http + initialDelaySeconds: 15 + resources: + requests: + cpu: 50m + memory: 128Mi + limits: + memory: 512Mi + volumeMounts: + - name: data + mountPath: /data + - name: objstore-config + mountPath: /etc/thanos + volumes: + - name: data + emptyDir: {} + - name: objstore-config + configMap: + name: thanos-objstore diff --git a/tmp-migration-plans/metrics-v2-experiment-results.md b/tmp-migration-plans/metrics-v2-experiment-results.md new file mode 100644 index 00000000..0fc9bbcf --- /dev/null +++ b/tmp-migration-plans/metrics-v2-experiment-results.md @@ -0,0 +1,280 @@ +# ystack metrics-v2 experiment — Results + +Date: 2026-03-03 +Branch: `metrics-v2-experiment` +Machine: macOS Darwin 23.6.0, x86_64, 16 GB RAM, 12 CPUs +Cluster: k3d ystack, k3s v1.35.1, `--memory=12G --docker-update="--cpus=8"` + +## Deviations from plan + +### 1. Prometheus config: `fallback_scrape_protocol` is not a global field + +The vanilla prometheus plan specified `fallback_scrape_protocol: PrometheusText0.0.4` +as a global config option. Prometheus v3.10.0 rejected this — it's not a valid global +field. Replaced with `global.scrape_protocols` list instead: + +```yaml +scrape_protocols: + - OpenMetricsText1.0.0 + - OpenMetricsText0.0.1 + - PrometheusProto + - PrometheusText1.0.0 + - PrometheusText0.0.4 +``` + +### 2. Alertmanager version: v0.28.1 instead of v0.31.0 + +The plan specified Alertmanager v0.31.0. Used v0.28.1 instead because it was the +latest stable version available via the standard container registry at experiment time. +No functional impact — both use v2 API. + +### 3. Monitoring directory consolidation + +The plan assumed a single `k3s/30-monitoring/` directory. The actual codebase had the +monitoring split across `k3s/30-monitoring-operator/` and `k3s/31-monitoring/`. Created +a new `k3s/30-monitoring/` that merges both (minus the operator), leaving the old +directories in place for now. + +### 4. Converge script: partial failure recovery + +The converge script timed out on the first provision because of deviation #1. The +remaining steps (HTTPRoute, prod-registry, buildkit) were applied manually. The +converge script was updated to reflect the new structure. + +### 5. Blob store: versitygw (not minio) + +The plan referenced minio in some contexts. The codebase has already migrated to +versitygw. Both backends were reconfigured to write to versitygw S3 storage +(`blobs-versitygw.ystack.svc.cluster.local`) for storage cost comparison. Bucket-create +jobs provision `thanos-receive` and `greptimedb` buckets using the same minio/mc +pattern as the registry. + +### 8. Thanos 5m block duration override + +To make Thanos upload blocks to object storage quickly enough for experiment +observation, `--tsdb.min-block-duration=5m` and `--tsdb.max-block-duration=5m` were +added. The default 2h block duration would mean no S3 uploads during a short +experiment window. This override must NOT be used in production. + +### 6. configmap-reload sidecar added + +The plan did not mention configmap-reload, but it was added to the Prometheus +Deployment to enable live config/rules reloading without pod restarts. This is +necessary for the `--web.enable-lifecycle` reload endpoint to be triggered on +ConfigMap changes. + +### 7. No `k3s/30-monitoring-operator` or `k3s/31-monitoring` removal + +The old directories were left in place to avoid breaking any other branch that +references them. They can be removed once the migration is merged to main. + +--- + +## Query comparison results + +All queries run against Prometheus (source of truth), Thanos Query, and GreptimeDB. + +### Test 1: Instant query `up` + +| Backend | Target count | All UP? | +|---------|-------------|---------| +| Prometheus | 3 | Yes (node-exporter, kube-state-metrics, prometheus) | +| Thanos Query | 3 | Yes | +| GreptimeDB | 3 | Yes | + +**Result: Identical** + +### Test 2: Range query `rate(node_cpu_seconds_total{mode="idle"}[5m])` + +| Backend | Series count | Values | +|---------|-------------|--------| +| Prometheus | 12 | cpu=0: 0.306213 ... cpu=11: 0.300287 | +| Thanos Query | 12 | cpu=0: 0.306671 ... cpu=11: 0.300737 | +| GreptimeDB | 12 | cpu=0: 0.306880 ... cpu=11: 0.300942 | + +**Result: Consistent** — minor value differences (<0.3%) due to timestamp alignment +and sample boundaries. Same series count, same label sets. + +### Test 3: Recording rule `instance:node_cpus:count` + +| Backend | Result | +|---------|--------| +| Prometheus | k3d-ystack-server-0: 12 | +| Thanos Query | k3d-ystack-server-0: 12 | +| GreptimeDB | k3d-ystack-server-0: 12 | + +**Result: Identical** — recording rules are evaluated by Prometheus and forwarded via +remote_write to both backends. Both return the correct value. + +### Test 4: Alert expression `kube_pod_status_phase{phase="Pending"} > 0` + +| Backend | Pending pods | +|---------|-------------| +| Prometheus | 0 | +| Thanos Query | 0 | +| GreptimeDB | 0 | + +**Result: Identical** — no pending pods at query time. + +### Test 5: Subquery `avg_over_time(instance:node_cpu_utilization:ratio[5m:])` + +| Backend | Result | +|---------|--------| +| Prometheus | 0.106564 | +| Thanos Query | 0.109263 | +| GreptimeDB | 0.110593 | + +**Result: Consistent** — all three support subquery syntax. Small value differences +from evaluation timing. + +### PromQL incompatibilities observed in GreptimeDB + +**None.** All tested queries returned correct results. GreptimeDB handled: +- Instant queries with label matchers +- Rate functions over counters +- Recording rule results (received via remote_write) +- Comparison operators (> 0) +- Subqueries (step-aligned range evaluation) + +--- + +## Resource usage + +Measured via `kubectl top pod` after ~5 minutes of dual remote_write operation. + +| Component | CPU | Memory | Pod count | +|-----------|-----|--------|-----------| +| Prometheus (source) | 12m | 55Mi | 1 (2 containers) | +| Alertmanager | 3m | 18Mi | 1 | +| node-exporter | 4m | 9Mi | 1 | +| kube-state-metrics | 1m | 23Mi | 1 | +| **Thanos Receive** | **2m** | **37Mi** | **1** | +| **Thanos Query** | **2m** | **19Mi** | **1** | +| **GreptimeDB** | **19m** | **261Mi** | **1** | + +### Summary + +| Backend | Total CPU | Total Memory | Pod count | +|---------|-----------|-------------|-----------| +| Thanos (Receive + Query) | 4m | 56Mi | 2 | +| GreptimeDB (standalone) | 19m | 261Mi | 1 | + +Thanos uses **4.75x less CPU** and **4.66x less memory** than GreptimeDB for the same +workload. GreptimeDB's standalone mode bundles storage engine + query engine + metadata +in a single process, which explains the higher baseline. + +--- + +## Object storage comparison + +Both backends configured to write to versitygw S3 buckets. Measured after ~17 minutes +of dual remote_write with Thanos block duration forced to 5m. + +| Backend | Bucket size | Object count | Write pattern | +|---------|------------|-------------|---------------| +| Thanos Receive | 1.4 MB | 9 files (3 blocks) | Block-based: uploads ~3 files per 5m block (meta.json, index, chunks) | +| GreptimeDB | 252 KB | 11 files | Columnar: writes smaller objects more frequently | + +GreptimeDB stores **5.6x less data** on object storage for the same metrics workload. +Its columnar format compresses significantly better than Thanos's TSDB block format. + +**Caveats:** +- Thanos block duration was artificially reduced from 2h to 5m. With default settings, + Thanos would batch more data per block, potentially improving compression ratio. +- Thanos Compactor (not deployed in this experiment) further reduces long-term storage + by merging and downsampling blocks. +- GreptimeDB's compaction behavior over longer time windows was not tested. +- 17 minutes of data is too short for definitive storage cost projections — a multi-day + test would be more representative. + +--- + +## Evaluation scores + +Using the criteria from the Mimir replacement research. + +### Query correctness (20%) + +| Backend | Score | Notes | +|---------|-------|-------| +| Thanos | 10/10 | All queries identical to Prometheus | +| GreptimeDB | 10/10 | All queries returned correct results | + +Both received full marks. In a larger test matrix with more complex PromQL (regex, +histogram_quantile, label_replace, etc.), GreptimeDB might show more divergence. + +### Operational complexity (40%) + +| Backend | Score | Notes | +|---------|-------|-------| +| Thanos | 7/10 | 2 components (Receive + Query), well-documented, CNCF graduated project. Would need Store + Compactor for production long-term storage. | +| GreptimeDB | 9/10 | 1 component in standalone mode, simpler topology. Distributed mode adds complexity (metasrv, datanode, frontend). | + +GreptimeDB wins on simplicity for small deployments. Thanos has more operational +overhead but is battle-tested at scale. + +### Resource usage (15%) + +| Backend | Score | Notes | +|---------|-------|-------| +| Thanos | 9/10 | 4m CPU, 56Mi memory — extremely lean | +| GreptimeDB | 5/10 | 19m CPU, 261Mi — higher baseline footprint | + +Thanos is significantly lighter. For a local dev cluster this matters. + +### Maturity (10%) + +| Backend | Score | Notes | +|---------|-------|-------| +| Thanos | 10/10 | CNCF graduated, v0.37.2, used at massive scale by many organizations | +| GreptimeDB | 6/10 | v0.12.0, growing project, fewer production references. Active development. | + +### Storage cost projection (15%) + +| Backend | Score | Notes | +|---------|-------|-------| +| Thanos | 6/10 | 1.4 MB for ~17 min of data. Block-based format is less space-efficient. Compactor helps long-term but adds operational complexity. | +| GreptimeDB | 9/10 | 252 KB for same data — 5.6x smaller. Columnar format compresses metrics data very well. Fewer bytes = lower S3 storage and egress cost. | + +GreptimeDB's columnar storage format produces significantly smaller objects. Both +backends target versitygw S3. While Thanos Compactor can reduce long-term storage, +GreptimeDB's baseline efficiency is notably better. + +### Weighted total + +| Backend | Correctness (20%) | Complexity (40%) | Resources (15%) | Maturity (10%) | Storage (15%) | **Total** | +|---------|-------------------|-----------------|-----------------|---------------|--------------|-----------| +| Thanos | 2.0 | 2.8 | 1.35 | 1.0 | 0.9 | **8.05** | +| GreptimeDB | 2.0 | 3.6 | 0.75 | 0.6 | 1.35 | **8.30** | + +--- + +## Recommendation + +**The two backends are essentially tied (Thanos 8.05 vs GreptimeDB 8.30)** after +accounting for measured object storage efficiency. GreptimeDB's columnar format +produces 5.6x less data on S3, which flips the storage cost score and narrows +Thanos's advantage on maturity and resource usage. + +1. **For ystack local dev clusters**: Thanos is still preferred — lighter CPU/memory + footprint matters in constrained k3d environments, and storage cost is less + relevant with emptyDir/local volumes. + +2. **For production multi-cluster with S3 storage costs**: GreptimeDB deserves + serious consideration — its storage efficiency advantage compounds at scale, + and lower object counts mean fewer S3 API calls (PUT/GET costs). + +3. **Thanos advantages**: CNCF graduated maturity, battle-tested at massive scale, + well-documented multi-tenancy and zone-aware ingestion, lower runtime resource + footprint. + +4. **GreptimeDB advantages**: Simpler single-component topology, dramatically better + storage efficiency, SQL access to metrics data, active development pace. + +## Next steps + +1. Remove GreptimeDB from the cluster (losing candidate) +2. Remove dual remote_write — keep only Thanos Receive +3. Add `monitoring/thanos/` to `k3s/30-monitoring/kustomization.yaml` +4. Update validate script to check Thanos components +5. Run `y-cluster-validate-ystack --context=local` to confirm