Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 3 additions & 11 deletions bin/y-cluster-converge-ystack
Original file line number Diff line number Diff line change
Expand Up @@ -71,17 +71,9 @@ apply_base 08-buildkitd-grpcroute
k -n ystack get grpcroute buildkitd
echo "# Validated: grpcroute buildkitd exists"

# 7. Monitoring operator + CRDs
apply_base 30-monitoring-operator
echo "# Waiting for prometheus-operator CRDs to register ..."
until k get crd prometheuses.monitoring.coreos.com >/dev/null 2>&1; do sleep 2; done
until k get crd alertmanagers.monitoring.coreos.com >/dev/null 2>&1; do sleep 2; done
until k get crd servicemonitors.monitoring.coreos.com >/dev/null 2>&1; do sleep 2; done
echo "# Validated: prometheus-operator CRDs registered"

# 8. Monitoring CRs (Prometheus, Alertmanager, exporters)
apply_base 31-monitoring
k -n monitoring get prometheus now
# 7. Monitoring (vanilla Prometheus + Alertmanager + exporters)
apply_base 30-monitoring
k -n monitoring rollout status deploy/prometheus-now --timeout=120s
echo "# Validated: monitoring stack exists"

# 6.8 Prometheus HTTPRoute
Expand Down
7 changes: 5 additions & 2 deletions bin/y-cluster-provision-k3d
Original file line number Diff line number Diff line change
Expand Up @@ -129,5 +129,8 @@ PROD_REGISTRY_IP=$(kubectl --context=$CTX -n ystack get service prod-registry -o
docker exec k3d-ystack-server-0 sh -cex "echo '$BUILDS_REGISTRY_IP builds-registry.ystack.svc.cluster.local' >> /etc/hosts"
docker exec k3d-ystack-server-0 sh -cex "echo '$PROD_REGISTRY_IP prod-registry.ystack.svc.cluster.local' >> /etc/hosts"

echo "# Updating /etc/hosts (requires sudo) ..."
y-k8s-ingress-hosts --context=$CTX -write -override-ip "${YSTACK_PORTS_IP:-127.0.0.1}"
echo "# Checking /etc/hosts ..."
if ! y-k8s-ingress-hosts --context=$CTX -check -override-ip "${YSTACK_PORTS_IP:-127.0.0.1}"; then
echo "# Updating /etc/hosts (requires sudo) ..."
y-k8s-ingress-hosts --context=$CTX -write -override-ip "${YSTACK_PORTS_IP:-127.0.0.1}"
fi
7 changes: 5 additions & 2 deletions bin/y-cluster-provision-lima
Original file line number Diff line number Diff line change
Expand Up @@ -122,5 +122,8 @@ PROD_REGISTRY_IP=$(kubectl --context=$CTX -n ystack get service prod-registry -o
limactl shell ystack sudo sh -c "echo '$BUILDS_REGISTRY_IP builds-registry.ystack.svc.cluster.local' >> /etc/hosts"
limactl shell ystack sudo sh -c "echo '$PROD_REGISTRY_IP prod-registry.ystack.svc.cluster.local' >> /etc/hosts"

echo "# Updating /etc/hosts (requires sudo) ..."
y-k8s-ingress-hosts --context=$CTX -write -override-ip 127.0.0.1
echo "# Checking /etc/hosts ..."
if ! y-k8s-ingress-hosts --context=$CTX -check -override-ip 127.0.0.1; then
echo "# Updating /etc/hosts (requires sudo) ..."
y-k8s-ingress-hosts --context=$CTX -write -override-ip 127.0.0.1
fi
8 changes: 4 additions & 4 deletions bin/y-cluster-validate-ystack
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,10 @@ k -n ystack get grpcroute buildkitd >/dev/null 2>&1 \
&& report "grpcroute buildkitd" "ok" \
|| report "grpcroute buildkitd" "not found"

# 7.6 Monitoring stack
k -n monitoring get prometheus now >/dev/null 2>&1 \
&& report "prometheus now" "ok" \
|| report "prometheus now" "not found"
# 7.6 Monitoring stack (vanilla Prometheus deployment)
ROLLOUT_PROM=$(k -n monitoring rollout status deploy/prometheus-now --timeout=10s 2>&1) \
&& report "prometheus-now rollout" "ok" \
|| report "prometheus-now rollout" "$ROLLOUT_PROM"

# 7.7 Prometheus HTTPRoute
k -n monitoring get httproute prometheus-now >/dev/null 2>&1 \
Expand Down
6 changes: 6 additions & 0 deletions k3s/30-monitoring/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
resources:
- ../../monitoring/namespace
- ../../monitoring/prometheus-now
- ../../monitoring/alertmanager-main
- ../../monitoring/kube-state-metrics-now
- ../../monitoring/node-exporter-now
3 changes: 2 additions & 1 deletion monitoring/alertmanager-main/main-alertmanager-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ spec:
protocol: TCP
targetPort: web
selector:
alertmanager: main
app.kubernetes.io/name: alertmanager
app.kubernetes.io/instance: main
60 changes: 57 additions & 3 deletions monitoring/alertmanager-main/main-alertmanager.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,60 @@
apiVersion: monitoring.coreos.com/v1
kind: Alertmanager
apiVersion: apps/v1
kind: Deployment
metadata:
name: main
name: alertmanager-main
labels:
app.kubernetes.io/name: alertmanager
app.kubernetes.io/instance: main
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: alertmanager
app.kubernetes.io/instance: main
template:
metadata:
labels:
app.kubernetes.io/name: alertmanager
app.kubernetes.io/instance: main
spec:
securityContext:
runAsUser: 65534
runAsGroup: 65534
runAsNonRoot: true
fsGroup: 65534
containers:
- name: alertmanager
image: quay.io/prometheus/alertmanager:v0.28.1
args:
- --config.file=/etc/alertmanager/alertmanager.yaml
- --storage.path=/data
ports:
- name: web
containerPort: 9093
readinessProbe:
httpGet:
path: /-/ready
port: web
initialDelaySeconds: 5
livenessProbe:
httpGet:
path: /-/healthy
port: web
initialDelaySeconds: 15
resources:
requests:
cpu: 10m
memory: 32Mi
limits:
memory: 64Mi
volumeMounts:
- name: config
mountPath: /etc/alertmanager
- name: data
mountPath: /data
volumes:
- name: config
secret:
secretName: alertmanager-main
- name: data
emptyDir: {}
35 changes: 35 additions & 0 deletions monitoring/greptimedb/bucket-create.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
apiVersion: batch/v1
kind: Job
metadata:
name: bucket-create-greptimedb
spec:
template:
spec:
containers:
- name: mc
image: minio/mc:RELEASE.2025-08-13T08-35-41Z
env:
- name: AWS_ACCESS_KEY_ID
valueFrom:
secretKeyRef:
name: minio
key: accesskey
- name: AWS_SECRET_ACCESS_KEY
valueFrom:
secretKeyRef:
name: minio
key: secretkey
- name: BUCKET_NAME
value: greptimedb
- name: S3_ENDPOINT
value: http://blobs-versitygw.ystack.svc.cluster.local
command:
- sh
- -ce
- |
until mc alias set s3 $S3_ENDPOINT $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY 2>/dev/null; do
sleep 2
done
mc mb --ignore-existing s3/$BUCKET_NAME
restartPolicy: Never
backoffLimit: 10
7 changes: 7 additions & 0 deletions monitoring/greptimedb/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[storage]
type = "S3"
bucket = "greptimedb"
endpoint = "http://blobs-versitygw.ystack.svc.cluster.local"
access_key_id = "YstackEXAMPLEKEY"
secret_access_key = "github.com/Yolean/ystack-EXAMPLE"
region = "us-east-1"
72 changes: 72 additions & 0 deletions monitoring/greptimedb/greptimedb.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
apiVersion: v1
kind: Service
metadata:
name: greptimedb
spec:
ports:
- name: http
port: 4000
targetPort: http
- name: grpc
port: 4001
targetPort: grpc
selector:
app: greptimedb
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: greptimedb
labels:
app: greptimedb
spec:
replicas: 1
selector:
matchLabels:
app: greptimedb
template:
metadata:
labels:
app: greptimedb
spec:
containers:
- name: greptimedb
image: greptime/greptimedb:v0.12.0
args:
- standalone
- start
- --config-file=/etc/greptimedb/config.toml
- --http-addr=0.0.0.0:4000
- --rpc-addr=0.0.0.0:4001
ports:
- name: http
containerPort: 4000
- name: grpc
containerPort: 4001
readinessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 10
livenessProbe:
httpGet:
path: /health
port: http
initialDelaySeconds: 15
resources:
requests:
cpu: 50m
memory: 256Mi
limits:
memory: 768Mi
volumeMounts:
- name: data
mountPath: /tmp/greptimedb
- name: config
mountPath: /etc/greptimedb
volumes:
- name: data
emptyDir: {}
- name: config
configMap:
name: greptimedb-config
22 changes: 22 additions & 0 deletions monitoring/greptimedb/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

namespace: monitoring

resources:
- greptimedb.yaml
- bucket-create.yaml

generatorOptions:
disableNameSuffixHash: true

secretGenerator:
- name: minio
literals:
- accesskey=YstackEXAMPLEKEY
- secretkey=github.com/Yolean/ystack-EXAMPLE

configMapGenerator:
- name: greptimedb-config
files:
- config.toml
9 changes: 0 additions & 9 deletions monitoring/kube-state-metrics-now/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,3 @@ namespace: monitoring

resources:
- ../kube-state-metrics

patchesStrategicMerge:
- |-
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: kube-state-metrics
labels:
prometheus: now
1 change: 0 additions & 1 deletion monitoring/kube-state-metrics/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,3 @@ resources:
- deployment.yaml
- service-account.yaml
- service.yaml
- kube-state-metrics-servicemonitor.yaml
16 changes: 0 additions & 16 deletions monitoring/node-exporter-now/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,3 @@ namespace: monitoring

resources:
- ../node-exporter

patchesStrategicMerge:
- |-
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: node-exporter
labels:
prometheus: now
- |-
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: node-exporter
labels:
prometheus: now
2 changes: 0 additions & 2 deletions monitoring/node-exporter/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,3 @@ resources:
- node-exporter-clusterRole.yaml
- node-exporter-clusterRoleBinding.yaml
- node-exporter-daemonset.yaml
- node-exporter-podmonitor.yaml
- example-rules.yaml
11 changes: 11 additions & 0 deletions monitoring/prometheus-now/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,14 @@ resources:
- ../rbac-prometheus
- now-prometheus-service.yaml
- now-prometheus.yaml

generatorOptions:
disableNameSuffixHash: true

configMapGenerator:
- name: prometheus-now-config
files:
- prometheus.yml
- name: prometheus-now-rules
files:
- rules/node-exporter.yml
2 changes: 1 addition & 1 deletion monitoring/prometheus-now/now-prometheus-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,4 @@ spec:
targetPort: web
selector:
app.kubernetes.io/name: prometheus
prometheus: now
app.kubernetes.io/instance: now
Loading