From 99668faea8c33ba7184ee0d4227f401c7c70d335 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Tue, 25 Nov 2025 15:15:35 -0600 Subject: [PATCH 01/33] feat: add dnsdist and dnscollector --- .gitignore | 2 +- Makefile | 2 +- config/agent/dnscollector-config.yaml | 55 +++++++++++++++++++++++ config/agent/dnsdist-config.yaml | 63 +++++++++++++++++++++++++++ config/agent/kustomization.yaml | 6 +++ config/agent/manager.yaml | 62 ++++++++++++++++++++++++-- config/agent/pdns-service.yaml | 4 ++ 7 files changed, 188 insertions(+), 6 deletions(-) create mode 100644 config/agent/dnscollector-config.yaml create mode 100644 config/agent/dnsdist-config.yaml diff --git a/.gitignore b/.gitignore index 401eae5..cfc6196 100644 --- a/.gitignore +++ b/.gitignore @@ -35,5 +35,5 @@ go.work.sum .env # Editor/IDE -# .idea/ +.idea/ # .vscode/ diff --git a/Makefile b/Makefile index 2df25c9..5bbcfed 100644 --- a/Makefile +++ b/Makefile @@ -469,7 +469,7 @@ set -e; \ package=$(2)@$(3) ;\ echo "Downloading $${package}" ;\ rm -f $(1) ;\ -GOBIN=$(LOCALBIN) go install $${package} ;\ +CGO_ENABLED=0 GOOS=$$(go env GOOS) GOARCH=$$(go env GOARCH) GOBIN=$(LOCALBIN) go install $${package} ;\ mv $(1) $(1)-$(3) ;\ } ;\ ln -sf $$(realpath $(1)-$(3)) $(1) diff --git a/config/agent/dnscollector-config.yaml b/config/agent/dnscollector-config.yaml new file mode 100644 index 0000000..e50b369 --- /dev/null +++ b/config/agent/dnscollector-config.yaml @@ -0,0 +1,55 @@ +################################################ +# global configuration +# more details: https://github.com/dmachard/DNS-collector/blob/main/docs/configuration.md#global +################################################ +global: + trace: + verbose: true + log-malformed: true + filename: "" + max-size: 10 + max-backups: 10 + server-identity: "dns-collector" + pid-file: "" + text-format: "timestamp-rfc3339ns identity operation rcode queryip queryport family protocol length-unit qname qtype latency" + text-format-delimiter: " " + text-format-boundary: "\"" + text-jinja: "" + worker: + interval-monitor: 10 + buffer-size: 8192 + telemetry: + enabled: false + web-path: "/metrics" + web-listen: ":9165" + prometheus-prefix: "dnscollector_exporter" + tls-support: false + tls-cert-file: "" + tls-key-file: "" + client-ca-file: "" + basic-auth-enable: false + basic-auth-login: admin + basic-auth-pwd: changeme + +################################################ +# Pipelining configuration +# more details: https://github.com/dmachard/DNS-collector/blob/main/docs/running_mode.md#pipelining +# workers: https://github.com/dmachard/DNS-collector/blob/main/docs/workers.md +# transformers: https://github.com/dmachard/DNS-collector/blob/main/docs/transformers.md +################################################ +pipelines: + - name: tap + dnstap: + listen-ip: 0.0.0.0 + listen-port: 6000 + transforms: + normalize: + qname-lowercase: true + qname-replace-nonprintable: true + routing-policy: + forward: [ console ] + dropped: [ ] + + - name: console + stdout: + mode: text \ No newline at end of file diff --git a/config/agent/dnsdist-config.yaml b/config/agent/dnsdist-config.yaml new file mode 100644 index 0000000..2efc843 --- /dev/null +++ b/config/agent/dnsdist-config.yaml @@ -0,0 +1,63 @@ +acl: + - 0.0.0.0/0 + +binds: + - listen_address: "0.0.0.0:53" + reuseport: true + protocol: Do53 + threads: 2 + +packet_caches: + - name: cache + size: 100 + +pools: + - name: default + packet_cache: cache + +backends: + - address: "127.0.0.1:5300" + protocol: Do53 + pools: + - default + +remote_logging: + dnstap_loggers: + - name: remote_logging + transport: tcp + address: "127.0.0.1:6000" + connection_count: 2 + +query_rules: + - name: "log all queries" + selector: + type: All + action: + type: DnstapLog + identity: dnsdist_v2 + logger_name: remote_logging + + - name: "default rule" + selector: + type: All + action: + type: Pool + pool_name: default + +response_rules: + - name: log all responses + selector: + type: All + action: + type: DnstapLog + identity: dnsdist_v2 + logger_name: remote_logging + +cache_hit_response_rules: + - name: log all responses from cache + selector: + type: All + action: + type: DnstapLog + identity: dnsdist_v2_cache + logger_name: remote_logging \ No newline at end of file diff --git a/config/agent/kustomization.yaml b/config/agent/kustomization.yaml index 08a0a34..797a914 100644 --- a/config/agent/kustomization.yaml +++ b/config/agent/kustomization.yaml @@ -21,6 +21,12 @@ configMapGenerator: - name: pdns-config files: - pdns.conf +- name: dnsdist-config + files: + - dnsdist.yml=dnsdist-config.yaml +- name: dnscollector-config + files: + - config.yaml=dnscollector-config.yaml images: - name: ghcr.io/datum-cloud/dns-operator diff --git a/config/agent/manager.yaml b/config/agent/manager.yaml index e949830..9969249 100644 --- a/config/agent/manager.yaml +++ b/config/agent/manager.yaml @@ -127,14 +127,15 @@ spec: mountPath: /config - name: pdns-shared mountPath: /run/pdns + - name: pdns image: powerdns/pdns-auth-51:latest imagePullPolicy: IfNotPresent ports: - - containerPort: 53 + - containerPort: 5300 name: dns protocol: UDP - - containerPort: 53 + - containerPort: 5300 name: dns-tcp protocol: TCP - containerPort: 8082 @@ -153,7 +154,10 @@ spec: - | set -eu; exec pdns_server \ - --api-key="$(cat /run/pdns/api-key)" --api=yes --webserver-port=8082 + --local-port=5300 \ + --api-key="$(cat /run/pdns/api-key)" \ + --api=yes \ + --webserver-port=8082 securityContext: runAsUser: 953 runAsGroup: 953 @@ -163,6 +167,7 @@ spec: drop: - "ALL" add: ["NET_BIND_SERVICE"] + - name: lightningstream image: powerdns/lightningstream:main imagePullPolicy: IfNotPresent @@ -220,7 +225,50 @@ spec: mountPath: /lmdb - name: lightningstream-config mountPath: /etc/lightningstream - + + - name: dnsdist + image: powerdns/dnsdist-21:latest + args: ["--config", "/etc/dnsdist/dnsdist.yml"] + imagePullPolicy: IfNotPresent + ports: + - containerPort: 53 + name: dnsdist-udp + protocol: UDP + - containerPort: 53 + name: dnsdist-tcp + protocol: TCP + - containerPort: 8083 + name: dnsdist-metrics + protocol: TCP + volumeMounts: + - name: pdns-shared + mountPath: /run/pdns + - name: dnsdist-config + mountPath: /etc/dnsdist + readOnly: true + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - "ALL" + add: ["NET_BIND_SERVICE"] + + - name: dnstap-collector + image: dmachard/dnscollector:latest + imagePullPolicy: IfNotPresent + args: ["-config", "/etc/dnscollector/config.yaml"] + volumeMounts: + - name: dnscollector-config + mountPath: /etc/dnscollector + readOnly: true + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + capabilities: + drop: + - "ALL" + volumes: - name: server-config configMap: @@ -233,5 +281,11 @@ spec: - name: pdns-config configMap: name: pdns-config + - name: dnsdist-config + configMap: + name: dnsdist-config + - name: dnscollector-config + configMap: + name: dnscollector-config serviceAccountName: controller-manager terminationGracePeriodSeconds: 10 diff --git a/config/agent/pdns-service.yaml b/config/agent/pdns-service.yaml index cd1ff9b..c2022cf 100644 --- a/config/agent/pdns-service.yaml +++ b/config/agent/pdns-service.yaml @@ -18,3 +18,7 @@ spec: port: 8082 targetPort: 8082 protocol: TCP + - name: metrics + port: 8083 + targetPort: 8083 + protocol: TCP \ No newline at end of file From 81776101224a4c00aab1980b1f927c8e58be9c0e Mon Sep 17 00:00:00 2001 From: cc-datum Date: Wed, 26 Nov 2025 13:04:21 -0600 Subject: [PATCH 02/33] feat: add vector configuration --- config/agent/dnsdist-config.yaml | 27 +++++++- config/agent/kustomization.yaml | 3 + config/agent/manager.yaml | 30 +++++++++ config/agent/vector-config.yaml | 105 +++++++++++++++++++++++++++++++ 4 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 config/agent/vector-config.yaml diff --git a/config/agent/dnsdist-config.yaml b/config/agent/dnsdist-config.yaml index 2efc843..d5864ac 100644 --- a/config/agent/dnsdist-config.yaml +++ b/config/agent/dnsdist-config.yaml @@ -27,6 +27,10 @@ remote_logging: transport: tcp address: "127.0.0.1:6000" connection_count: 2 + - name: vector_remote_logging + transport: tcp + address: "127.0.0.1:6001" + connection_count: 2 query_rules: - name: "log all queries" @@ -36,6 +40,13 @@ query_rules: type: DnstapLog identity: dnsdist_v2 logger_name: remote_logging + - name: "log all queries to vector" + selector: + type: All + action: + type: DnstapLog + identity: dnsdist_v2 + logger_name: vector_remote_logging - name: "default rule" selector: @@ -52,6 +63,13 @@ response_rules: type: DnstapLog identity: dnsdist_v2 logger_name: remote_logging + - name: log all responses to vector + selector: + type: All + action: + type: DnstapLog + identity: dnsdist_v2 + logger_name: vector_remote_logging cache_hit_response_rules: - name: log all responses from cache @@ -60,4 +78,11 @@ cache_hit_response_rules: action: type: DnstapLog identity: dnsdist_v2_cache - logger_name: remote_logging \ No newline at end of file + logger_name: remote_logging + - name: log all responses from cache to vector + selector: + type: All + action: + type: DnstapLog + identity: dnsdist_v2_cache + logger_name: vector_remote_logging \ No newline at end of file diff --git a/config/agent/kustomization.yaml b/config/agent/kustomization.yaml index 797a914..fa52af3 100644 --- a/config/agent/kustomization.yaml +++ b/config/agent/kustomization.yaml @@ -27,6 +27,9 @@ configMapGenerator: - name: dnscollector-config files: - config.yaml=dnscollector-config.yaml +- name: vector-config + files: + - vector-config.yaml images: - name: ghcr.io/datum-cloud/dns-operator diff --git a/config/agent/manager.yaml b/config/agent/manager.yaml index 9969249..4224fd1 100644 --- a/config/agent/manager.yaml +++ b/config/agent/manager.yaml @@ -269,6 +269,31 @@ spec: drop: - "ALL" + - name: vector + image: timberio/vector:0.51.1-distroless-static + args: + - --log-format=json + - --verbose + - --watch-config + - --config-dir + - /etc/vector/ + volumeMounts: + - name: vector-config + mountPath: /etc/vector/vector-config.yaml + subPath: vector-config.yaml + - name: vector-config-volume + mountPath: /etc/vector + ports: + - containerPort: 9598 + name: vector-metrics + protocol: TCP + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi volumes: - name: server-config configMap: @@ -287,5 +312,10 @@ spec: - name: dnscollector-config configMap: name: dnscollector-config + - name: vector-config + configMap: + name: vector-config + - name: vector-config-volume + emptyDir: {} serviceAccountName: controller-manager terminationGracePeriodSeconds: 10 diff --git a/config/agent/vector-config.yaml b/config/agent/vector-config.yaml new file mode 100644 index 0000000..414ea3f --- /dev/null +++ b/config/agent/vector-config.yaml @@ -0,0 +1,105 @@ +data_dir: /var/lib/vector + +api: + enabled: false + +sources: + dnstap_tcp: + type: dnstap + address: 0.0.0.0:6001 + mode: tcp + internal_metrics: + type: internal_metrics + +transforms: + dnstap_enriched: + type: remap + inputs: + - dnstap_tcp + source: | + .message_type = to_string(.messageType) ?? to_string(.message_type) ?? "unknown" + .proto = to_string(.socketProtocol) ?? to_string(.protocol) ?? "unknown" + .family = to_string(.socketFamily) ?? to_string(.network_family) ?? "unknown" + .opcode = to_string(.requestData.header.opcode) ?? to_string(.responseData.header.opcode) ?? to_string(.opcode) ?? "unknown" + .rcode = to_string(.responseData.rcodeName) ?? to_string(.response_code) ?? to_string(.responseData.header.rcode) ?? "unknown" + .server_id = to_string(.serverId) ?? "unknown" + .qname = to_string(.question[0].domainName) ?? to_string(.query_name) ?? "unknown" + .qtype = to_string(.question[0].questionType) ?? to_string(.question[0].questionTypeId) ?? to_string(.query_type) ?? "unknown" + .hit = 1 + if exists(.requestData.time) && exists(.time) { + req, err_req = to_int(.requestData.time) + resp, err_resp = to_int(.time) + if !is_null(err_req) { req = null } + if !is_null(err_resp) { resp = null } + if is_integer(req) && is_integer(resp) { + diff = to_int!(resp) - to_int!(req) + if diff >= 0 { + .latency_ns = diff + .latency_seconds = to_float(.latency_ns) / 1000000000.0 + } + } + } + + dnstap_metrics: + type: log_to_metric + inputs: + - dnstap_enriched + metrics: + - type: counter + name: dns_queries_total + field: hit + condition: '.message_type == "ClientQuery"' + tags: + family: "{{family}}" + proto: "{{proto}}" + opcode: "{{opcode}}" + qtype: "{{qtype}}" + - type: counter + name: dns_responses_total + field: hit + condition: '.message_type == "ClientResponse"' + tags: + family: "{{family}}" + proto: "{{proto}}" + rcode: "{{rcode}}" + server: "{{server_id}}" + - type: counter + name: dns_nxdomain_total + field: hit + condition: '.message_type == "ClientResponse" && .rcode == "NXDomain"' + tags: + proto: "{{proto}}" + server: "{{server_id}}" + - type: counter + name: dns_servfail_total + field: hit + condition: '.message_type == "ClientResponse" && .rcode == "ServFail"' + tags: + proto: "{{proto}}" + server: "{{server_id}}" + - type: histogram + name: dns_response_latency_seconds + field: latency_seconds + condition: '.message_type == "ClientResponse" && exists(.latency_seconds)' + bins: [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1, 2, 5] + tags: + proto: "{{proto}}" + qtype: "{{qtype}}" + rcode: "{{rcode}}" + server: "{{server_id}}" + +sinks: + console: + type: console + target: stdout + encoding: + codec: json + inputs: + - dnstap_tcp + + prometheus: + type: prometheus_exporter + inputs: + - dnstap_metrics + - internal_metrics + address: 0.0.0.0:9598 From db1d504cf1ce179e15980f56768d5263d16de5fa Mon Sep 17 00:00:00 2001 From: cc-datum Date: Thu, 27 Nov 2025 17:04:37 -0600 Subject: [PATCH 03/33] feat: update dnscollector and dnsdist configurations --- config/agent/dnscollector-config.yaml | 70 +++++++++++++++++++++++++-- config/agent/dnsdist-config.yaml | 10 ++++ config/agent/manager.yaml | 10 ++++ config/agent/pdns-service.yaml | 14 +++++- config/agent/vector-config.yaml | 17 +++++++ 5 files changed, 117 insertions(+), 4 deletions(-) diff --git a/config/agent/dnscollector-config.yaml b/config/agent/dnscollector-config.yaml index e50b369..727445d 100644 --- a/config/agent/dnscollector-config.yaml +++ b/config/agent/dnscollector-config.yaml @@ -19,14 +19,18 @@ global: interval-monitor: 10 buffer-size: 8192 telemetry: - enabled: false + enabled: true web-path: "/metrics" web-listen: ":9165" prometheus-prefix: "dnscollector_exporter" + + # Optional TLS configuration tls-support: false tls-cert-file: "" tls-key-file: "" client-ca-file: "" + + # Optional authentication basic-auth-enable: false basic-auth-login: admin basic-auth-pwd: changeme @@ -46,10 +50,70 @@ pipelines: normalize: qname-lowercase: true qname-replace-nonprintable: true + latency: + measure-latency: true routing-policy: - forward: [ console ] + forward: [ console, prometheus, loki ] dropped: [ ] - name: console stdout: - mode: text \ No newline at end of file + mode: text + + - name: prometheus + prometheus: + listen-ip: 0.0.0.0 + listen-port: 8084 + basic-auth-enable: false + basic-auth-login: admin + basic-auth-pwd: changeme + tls-support: false + tls-mutual: false + tls-min-version: 1.2 + cert-file: "" + key-file: "" + prometheus-prefix: "dnscollector" + top-n: 10 + chan-buffer-size: 0 + histogram-metrics-enabled: true + requesters-metrics-enabled: true + domains-metrics-enabled: true + noerror-metrics-enabled: true + servfail-metrics-enabled: true + nonexistent-metrics-enabled: true + timeout-metrics-enabled: true + prometheus-labels: ["stream_id"] + requesters-cache-size: 250000 + requesters-cache-ttl: 3600 + domains-cache-size: 500000 + domains-cache-ttl: 3600 + noerror-domains-cache-size: 100000 + noerror-domains-cache-ttl: 3600 + servfail-domains-cache-size: 10000 + servfail-domains-cache-ttl: 3600 + nonexistent-domains-cache-size: 10000 + nonexistent-domains-cache-ttl: 3600 + default-domains-cache-size: 1000 + default-domains-cache-ttl: 3600 + + - name: loki + lokiclient: + server-url: "http://loki.dns-monitoring.svc:3100/loki/api/v1/push" + job-name: "dnscollector" + mode: "text" + flush-interval: 5 + batch-size: 1048576 + retry-interval: 10 + text-format: "" + proxy-url: "" + tls-insecure: false + tls-min-version: 1.2 + ca-file: "" + cert-file: "" + key-file: "" + basic-auth-login: "" + basic-auth-pwd: "" + basic-auth-pwd-file: "" + tenant-id: "" + relabel-configs: [] + chan-buffer-size: 0 \ No newline at end of file diff --git a/config/agent/dnsdist-config.yaml b/config/agent/dnsdist-config.yaml index d5864ac..90f45ce 100644 --- a/config/agent/dnsdist-config.yaml +++ b/config/agent/dnsdist-config.yaml @@ -1,3 +1,13 @@ +webserver: + listen_address: 0.0.0.0:8083 + password: "" + api_key: "" + acl: + - 0.0.0.0/0 + api_requires_authentication: false + stats_require_authentication: false + dashboard_requires_authentication: false + acl: - 0.0.0.0/0 diff --git a/config/agent/manager.yaml b/config/agent/manager.yaml index 4224fd1..f4908f7 100644 --- a/config/agent/manager.yaml +++ b/config/agent/manager.yaml @@ -258,6 +258,16 @@ spec: image: dmachard/dnscollector:latest imagePullPolicy: IfNotPresent args: ["-config", "/etc/dnscollector/config.yaml"] + ports: + - containerPort: 6000 + name: dnstap + protocol: TCP + - containerPort: 9165 + name: dnscolt-metrics + protocol: TCP + - containerPort: 8084 + name: dnscol-metrics + protocol: TCP volumeMounts: - name: dnscollector-config mountPath: /etc/dnscollector diff --git a/config/agent/pdns-service.yaml b/config/agent/pdns-service.yaml index c2022cf..5093c2b 100644 --- a/config/agent/pdns-service.yaml +++ b/config/agent/pdns-service.yaml @@ -21,4 +21,16 @@ spec: - name: metrics port: 8083 targetPort: 8083 - protocol: TCP \ No newline at end of file + protocol: TCP + - name: dnscol-metrics + port: 8084 + targetPort: 8084 + protocol: TCP + - name: dnscolt-metrics + port: 9165 + targetPort: 9165 + protocol: TCP + - name: vector-metrics + port: 9598 + targetPort: 9598 + protocol: TCP diff --git a/config/agent/vector-config.yaml b/config/agent/vector-config.yaml index 414ea3f..e0f9d3d 100644 --- a/config/agent/vector-config.yaml +++ b/config/agent/vector-config.yaml @@ -103,3 +103,20 @@ sinks: - dnstap_metrics - internal_metrics address: 0.0.0.0:9598 + + loki: + type: loki + inputs: + - dnstap_enriched + endpoint: http://loki.dns-monitoring.svc:3100 + out_of_order_action: accept + encoding: + codec: json + labels: + job: dnsdist + app: dnsdist + component: vector + server: "{{server_id}}" + proto: "{{proto}}" + family: "{{family}}" + message_type: "{{message_type}}" From 4e33cc92e5d5e109fd46f74678e3cf8d27028eb0 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Sat, 29 Nov 2025 12:44:53 -0600 Subject: [PATCH 04/33] feat: add Grafana, Prometheus, and Loki configurations for DNS observability --- config/monitoring/README.md | 50 ++++++ .../grafana/dashboards/dashboards.yaml | 10 ++ .../grafana/dashboards/dns_observability.json | 153 ++++++++++++++++++ .../grafana/datasources/datasources.yaml | 17 ++ config/monitoring/grafana/deployment.yaml | 81 ++++++++++ config/monitoring/grafana/kustomization.yaml | 26 +++ config/monitoring/grafana/service.yaml | 15 ++ config/monitoring/kustomization.yaml | 10 ++ config/monitoring/loki/config/loki.yaml | 41 +++++ config/monitoring/loki/deployment.yaml | 58 +++++++ config/monitoring/loki/kustomization.yaml | 14 ++ config/monitoring/loki/service.yaml | 14 ++ config/monitoring/namespace.yaml | 6 + .../prometheus/config/prometheus.yml | 43 +++++ config/monitoring/prometheus/deployment.yaml | 47 ++++++ .../monitoring/prometheus/kustomization.yaml | 14 ++ config/monitoring/prometheus/service.yaml | 14 ++ 17 files changed, 613 insertions(+) create mode 100644 config/monitoring/README.md create mode 100644 config/monitoring/grafana/dashboards/dashboards.yaml create mode 100644 config/monitoring/grafana/dashboards/dns_observability.json create mode 100644 config/monitoring/grafana/datasources/datasources.yaml create mode 100644 config/monitoring/grafana/deployment.yaml create mode 100644 config/monitoring/grafana/kustomization.yaml create mode 100644 config/monitoring/grafana/service.yaml create mode 100644 config/monitoring/kustomization.yaml create mode 100644 config/monitoring/loki/config/loki.yaml create mode 100644 config/monitoring/loki/deployment.yaml create mode 100644 config/monitoring/loki/kustomization.yaml create mode 100644 config/monitoring/loki/service.yaml create mode 100644 config/monitoring/namespace.yaml create mode 100644 config/monitoring/prometheus/config/prometheus.yml create mode 100644 config/monitoring/prometheus/deployment.yaml create mode 100644 config/monitoring/prometheus/kustomization.yaml create mode 100644 config/monitoring/prometheus/service.yaml diff --git a/config/monitoring/README.md b/config/monitoring/README.md new file mode 100644 index 0000000..e2e6278 --- /dev/null +++ b/config/monitoring/README.md @@ -0,0 +1,50 @@ +# Telemetry & Observability + +## Overview + +Lightweight observability stack for the DNS agent components. + +## Components +- **Grafana**: Pre-provisioned Prometheus and Loki data sources. +- **Prometheus**: Single instance scraping dnsdist, dnscollector, and vector + metrics out of the box. +- **Loki**: Single-binary log storage for dnstap/log forwarding from vector. +- **Namespace**: `dns-monitoring` is created automatically. + +## Deploy +Apply the full stack: +```bash +kubectl apply -k config/monitoring +``` + +Grafana credentials are `admin` / `admin` (stored in +`Secret/grafana-admin`). + +Port-forward to reach the UI: +```bash +kubectl -n dns-monitoring port-forward svc/grafana 3000:80 +open http://localhost:3000 +``` + +## Data sources +- Prometheus URL: `${PROMETHEUS_URL}` (default + `http://prometheus.dns-monitoring.svc:9090`) +- Loki URL: `${LOKI_URL}` (default `http://loki.dns-monitoring.svc:3100`) + +If you want to use an existing cluster Prometheus instead of the bundled one, +patch `PROMETHEUS_URL` and remove the `prometheus` entry from +`config/monitoring/kustomization.yaml` before applying. + +## Prometheus scraping +The bundled Prometheus scrapes: +- `dnsdist` at `pdns-auth.dns-agent-system.svc:8083` (`/metrics`) +- `dnscollector_exporter` at `pdns-auth.dns-agent-system.svc:9165` (`/metrics`) +- `dnscollector` at `pdns-auth.dns-agent-system.svc:8084` (`/metrics`) +- `vector` at `pdns-auth.dns-agent-system.svc:9598` (`/metrics`) + +## Metrics and logs wiring +- `config/agent/pdns-service.yaml` exposes metrics ports for dnsdist (8083), + dnscollector (9165), and vector (9598). +- `config/agent/dnscollector-config.yaml` enables the telemetry endpoint. +- `config/agent/vector-config.yaml` streams enriched dnstap events to Loki at + `loki.dns-monitoring.svc:3100`. diff --git a/config/monitoring/grafana/dashboards/dashboards.yaml b/config/monitoring/grafana/dashboards/dashboards.yaml new file mode 100644 index 0000000..8923c60 --- /dev/null +++ b/config/monitoring/grafana/dashboards/dashboards.yaml @@ -0,0 +1,10 @@ +apiVersion: 1 +providers: + - name: dns-observability + orgId: 1 + folder: DNS + type: file + disableDeletion: true + editable: true + options: + path: /var/lib/grafana/dashboards \ No newline at end of file diff --git a/config/monitoring/grafana/dashboards/dns_observability.json b/config/monitoring/grafana/dashboards/dns_observability.json new file mode 100644 index 0000000..8fa48ae --- /dev/null +++ b/config/monitoring/grafana/dashboards/dns_observability.json @@ -0,0 +1,153 @@ +{ + "id": null, + "title": "DNS Observability", + "tags": ["dnsdist", "vector", "dnscollector"], + "timezone": "browser", + "schemaVersion": 39, + "version": 1, + "refresh": "30s", + "time": { + "from": "now-15m", + "to": "now" + }, + "templating": { "list": [] }, + "panels": [ + { + "id": 1, + "type": "timeseries", + "title": "DNS Queries per Second", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum by (proto) (rate(dns_queries_total{job=\"vector\"}[1m])) or on() vector(0)", + "legendFormat": "{{proto}}", + "refId": "A" + } + ], + "options": { + "legend": { "showLegend": true } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 } + }, + { + "id": 2, + "type": "timeseries", + "title": "Responses by RCODE", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum by (rcode) (rate(dns_responses_total{job=\"vector\"}[5m])) or on() vector(0)", + "legendFormat": "{{rcode}}", + "refId": "A" + } + ], + "options": { + "legend": { "showLegend": true } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 } + }, + { + "id": 3, + "type": "timeseries", + "title": "NXDOMAIN / SERVFAIL rate", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(rate(dns_nxdomain_total{job=\"vector\"}[5m])) or on() vector(0)", + "legendFormat": "nxdomain", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "sum(rate(dns_servfail_total{job=\"vector\"}[5m])) or on() vector(0)", + "legendFormat": "servfail", + "refId": "B" + } + ], + "gridPos": { "h": 6, "w": 12, "x": 0, "y": 8 } + }, + { + "id": 4, + "type": "timeseries", + "title": "Latency (p50/p95/p99)", + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "histogram_quantile(0.50, sum(rate(dns_response_latency_seconds_bucket{job=\"vector\"}[5m])) by (le))", + "legendFormat": "p50", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "histogram_quantile(0.95, sum(rate(dns_response_latency_seconds_bucket{job=\"vector\"}[5m])) by (le))", + "legendFormat": "p95", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "histogram_quantile(0.99, sum(rate(dns_response_latency_seconds_bucket{job=\"vector\"}[5m])) by (le))", + "legendFormat": "p99", + "refId": "C" + } + ], + "gridPos": { "h": 6, "w": 12, "x": 12, "y": 8 } + }, + { + "id": 5, + "type": "logs", + "title": "DNS Logs (Loki)", + "datasource": { + "type": "loki", + "uid": "loki" + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "expr": "{app=\"dnsdist\"} | json", + "refId": "A" + } + ], + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 14 } + } + ] +} \ No newline at end of file diff --git a/config/monitoring/grafana/datasources/datasources.yaml b/config/monitoring/grafana/datasources/datasources.yaml new file mode 100644 index 0000000..39bfe51 --- /dev/null +++ b/config/monitoring/grafana/datasources/datasources.yaml @@ -0,0 +1,17 @@ +apiVersion: 1 +datasources: + - name: Prometheus + uid: prometheus + type: prometheus + access: proxy + url: ${PROMETHEUS_URL} + isDefault: true + editable: true + jsonData: + timeInterval: 30s + - name: Loki + uid: loki + type: loki + access: proxy + url: ${LOKI_URL} + editable: true \ No newline at end of file diff --git a/config/monitoring/grafana/deployment.yaml b/config/monitoring/grafana/deployment.yaml new file mode 100644 index 0000000..0a4b2d9 --- /dev/null +++ b/config/monitoring/grafana/deployment.yaml @@ -0,0 +1,81 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana + labels: + app.kubernetes.io/name: grafana +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: grafana + template: + metadata: + labels: + app.kubernetes.io/name: grafana + spec: + containers: + - name: grafana + image: grafana/grafana:10.4.2 + imagePullPolicy: IfNotPresent + ports: + - containerPort: 3000 + name: http + protocol: TCP + env: + - name: GF_SECURITY_ADMIN_USER + valueFrom: + secretKeyRef: + name: grafana-admin + key: admin-user + - name: GF_SECURITY_ADMIN_PASSWORD + valueFrom: + secretKeyRef: + name: grafana-admin + key: admin-password + - name: PROMETHEUS_URL + value: http://prometheus.dns-monitoring.svc:9090 + - name: LOKI_URL + value: http://loki.dns-monitoring.svc:3100 + volumeMounts: + - name: datasources + mountPath: /etc/grafana/provisioning/datasources/datasources.yaml + subPath: datasources.yaml + - name: dashboard-providers + mountPath: /etc/grafana/provisioning/dashboards/dashboards.yaml + subPath: dashboards.yaml + - name: dashboards + mountPath: /var/lib/grafana/dashboards + - name: storage + mountPath: /var/lib/grafana + readinessProbe: + httpGet: + path: /api/health + port: 3000 + initialDelaySeconds: 10 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /api/health + port: 3000 + initialDelaySeconds: 30 + periodSeconds: 30 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 1Gi + volumes: + - name: datasources + configMap: + name: grafana-datasources + - name: dashboard-providers + configMap: + name: grafana-dashboard-providers + - name: dashboards + configMap: + name: grafana-dashboards + - name: storage + emptyDir: {} diff --git a/config/monitoring/grafana/kustomization.yaml b/config/monitoring/grafana/kustomization.yaml new file mode 100644 index 0000000..ea8fb6d --- /dev/null +++ b/config/monitoring/grafana/kustomization.yaml @@ -0,0 +1,26 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - deployment.yaml + - service.yaml + +generatorOptions: + disableNameSuffixHash: true + +configMapGenerator: + - name: grafana-dashboard-providers + files: + - dashboards/dashboards.yaml + - name: grafana-dashboards + files: + - dashboards/dns_observability.json + - name: grafana-datasources + files: + - datasources/datasources.yaml + +secretGenerator: + - name: grafana-admin + literals: + - admin-user=admin + - admin-password=admin diff --git a/config/monitoring/grafana/service.yaml b/config/monitoring/grafana/service.yaml new file mode 100644 index 0000000..59145d4 --- /dev/null +++ b/config/monitoring/grafana/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: grafana + labels: + app.kubernetes.io/name: grafana +spec: + type: ClusterIP + selector: + app.kubernetes.io/name: grafana + ports: + - name: http + port: 80 + targetPort: 3000 + protocol: TCP diff --git a/config/monitoring/kustomization.yaml b/config/monitoring/kustomization.yaml new file mode 100644 index 0000000..7113c0c --- /dev/null +++ b/config/monitoring/kustomization.yaml @@ -0,0 +1,10 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: dns-monitoring + +resources: + - namespace.yaml + - grafana + - prometheus + - loki diff --git a/config/monitoring/loki/config/loki.yaml b/config/monitoring/loki/config/loki.yaml new file mode 100644 index 0000000..b871983 --- /dev/null +++ b/config/monitoring/loki/config/loki.yaml @@ -0,0 +1,41 @@ +auth_enabled: false +server: + http_listen_port: 3100 +common: + path_prefix: /var/loki + ring: + instance_addr: 127.0.0.1 + kvstore: + store: inmemory + replication_factor: 1 +schema_config: + configs: + - from: 2020-10-24 + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h +storage_config: + boltdb_shipper: + active_index_directory: /var/loki/index + cache_location: /var/loki/cache + shared_store: filesystem + filesystem: + directory: /var/loki/chunks +ingester: + wal: + dir: /var/loki/wal + chunk_idle_period: 5m + chunk_retain_period: 30s + max_chunk_age: 1h +limits_config: + retention_period: 168h + reject_old_samples: true + reject_old_samples_max_age: 168h +compactor: + working_directory: /var/loki/compactor + shared_store: filesystem +analytics: + reporting_enabled: false \ No newline at end of file diff --git a/config/monitoring/loki/deployment.yaml b/config/monitoring/loki/deployment.yaml new file mode 100644 index 0000000..1ed8144 --- /dev/null +++ b/config/monitoring/loki/deployment.yaml @@ -0,0 +1,58 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: loki + labels: + app.kubernetes.io/name: loki +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: loki + template: + metadata: + labels: + app.kubernetes.io/name: loki + spec: + securityContext: + fsGroup: 10001 + containers: + - name: loki + image: grafana/loki:2.9.4 + imagePullPolicy: IfNotPresent + args: + - -config.file=/etc/loki/loki.yaml + ports: + - containerPort: 3100 + name: http + protocol: TCP + readinessProbe: + httpGet: + path: /ready + port: 3100 + initialDelaySeconds: 10 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /ready + port: 3100 + initialDelaySeconds: 30 + periodSeconds: 30 + resources: + requests: + cpu: 200m + memory: 512Mi + limits: + cpu: 1 + memory: 2Gi + volumeMounts: + - name: config + mountPath: /etc/loki + - name: data + mountPath: /var/loki + volumes: + - name: config + configMap: + name: loki-config + - name: data + emptyDir: {} diff --git a/config/monitoring/loki/kustomization.yaml b/config/monitoring/loki/kustomization.yaml new file mode 100644 index 0000000..0b1acb8 --- /dev/null +++ b/config/monitoring/loki/kustomization.yaml @@ -0,0 +1,14 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - deployment.yaml + - service.yaml + +generatorOptions: + disableNameSuffixHash: true + +configMapGenerator: + - name: loki-config + files: + - config/loki.yaml diff --git a/config/monitoring/loki/service.yaml b/config/monitoring/loki/service.yaml new file mode 100644 index 0000000..48c0c86 --- /dev/null +++ b/config/monitoring/loki/service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: loki + labels: + app.kubernetes.io/name: loki +spec: + selector: + app.kubernetes.io/name: loki + ports: + - name: http + port: 3100 + targetPort: 3100 + protocol: TCP diff --git a/config/monitoring/namespace.yaml b/config/monitoring/namespace.yaml new file mode 100644 index 0000000..cf27890 --- /dev/null +++ b/config/monitoring/namespace.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: dns-monitoring + labels: + app.kubernetes.io/name: dns-monitoring diff --git a/config/monitoring/prometheus/config/prometheus.yml b/config/monitoring/prometheus/config/prometheus.yml new file mode 100644 index 0000000..e331975 --- /dev/null +++ b/config/monitoring/prometheus/config/prometheus.yml @@ -0,0 +1,43 @@ +global: + scrape_interval: 30s + evaluation_interval: 30s + +scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - localhost:9090 + + - job_name: dnsdist + metrics_path: /metrics + static_configs: + - targets: + - pdns-auth.dns-agent-system.svc:8083 + labels: + app: dnsdist + + - job_name: dnscollector_exporter + metrics_path: /metrics + scrape_interval: 5s + static_configs: + - targets: + - pdns-auth.dns-agent-system.svc:9165 + labels: + app: dnscollector_exporter + + - job_name: dnscollector + metrics_path: /metrics + scrape_interval: 5s + static_configs: + - targets: + - pdns-auth.dns-agent-system.svc:8084 + labels: + app: dnscollector + + - job_name: vector + metrics_path: /metrics + static_configs: + - targets: + - pdns-auth.dns-agent-system.svc:9598 + labels: + app: vector \ No newline at end of file diff --git a/config/monitoring/prometheus/deployment.yaml b/config/monitoring/prometheus/deployment.yaml new file mode 100644 index 0000000..aa4686a --- /dev/null +++ b/config/monitoring/prometheus/deployment.yaml @@ -0,0 +1,47 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus + labels: + app.kubernetes.io/name: prometheus +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: prometheus + template: + metadata: + labels: + app.kubernetes.io/name: prometheus + spec: + containers: + - name: prometheus + image: prom/prometheus:v2.52.0 + imagePullPolicy: IfNotPresent + args: + - --config.file=/etc/prometheus/prometheus.yml + - --storage.tsdb.path=/prometheus + - --web.enable-lifecycle + ports: + - name: http + containerPort: 9090 + protocol: TCP + volumeMounts: + - name: prometheus-config + mountPath: /etc/prometheus/prometheus.yml + subPath: prometheus.yml + - name: prometheus-storage + mountPath: /prometheus + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + volumes: + - name: prometheus-config + configMap: + name: prometheus-config + - name: prometheus-storage + emptyDir: {} diff --git a/config/monitoring/prometheus/kustomization.yaml b/config/monitoring/prometheus/kustomization.yaml new file mode 100644 index 0000000..2e6380d --- /dev/null +++ b/config/monitoring/prometheus/kustomization.yaml @@ -0,0 +1,14 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - deployment.yaml + - service.yaml + +generatorOptions: + disableNameSuffixHash: true + +configMapGenerator: + - name: prometheus-config + files: + - config/prometheus.yml diff --git a/config/monitoring/prometheus/service.yaml b/config/monitoring/prometheus/service.yaml new file mode 100644 index 0000000..732d3f1 --- /dev/null +++ b/config/monitoring/prometheus/service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: prometheus + labels: + app.kubernetes.io/name: prometheus +spec: + selector: + app.kubernetes.io/name: prometheus + ports: + - name: http + port: 9090 + targetPort: http + protocol: TCP From 79c100ee62e2d9404fb17d785e42f4a1df7c3825 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Thu, 4 Dec 2025 17:52:14 -0600 Subject: [PATCH 05/33] feat: enhance DNS observability with GeoIP integration and new Grafana dashboards --- config/agent/dnscollector-config.yaml | 49 +- config/agent/kustomization.yaml | 6 + config/agent/manager.yaml | 29 + .../grafana/dashboards/grafana_exporter.json | 922 +++++++++ .../grafana/dashboards/grafana_loki.json | 833 ++++++++ .../dashboards/grafana_prometheus.json | 1692 +++++++++++++++++ config/monitoring/grafana/kustomization.yaml | 3 + 7 files changed, 3532 insertions(+), 2 deletions(-) create mode 100644 config/monitoring/grafana/dashboards/grafana_exporter.json create mode 100644 config/monitoring/grafana/dashboards/grafana_loki.json create mode 100644 config/monitoring/grafana/dashboards/grafana_prometheus.json diff --git a/config/agent/dnscollector-config.yaml b/config/agent/dnscollector-config.yaml index 727445d..d8e9595 100644 --- a/config/agent/dnscollector-config.yaml +++ b/config/agent/dnscollector-config.yaml @@ -48,17 +48,62 @@ pipelines: listen-port: 6000 transforms: normalize: + enable: true qname-lowercase: true + rr-lowercase: true qname-replace-nonprintable: true + add-tld: true + add-tld-plus-one: true + quiet-text: false + reordering: + enable: true + flush-interval: 30 + max-buffer-size: 100 + suspicious: + enable: true + threshold-qname-len: 100 + threshold-packet-len: 1000 + threshold-slow: 1.0 + common-qtypes: + - A + - AAAA + - TXT + - CNAME + - PTR + - NAPTR + - DNSKEY + - SRV + - SOA + - NS + - MX + - DS + - HTTPS + unallowed-chars: + - '"' + - '==' + - '/' + - ':' + threshold-max-labels: 10 + whitelist-domains: + - '\.ip6\.arpa' latency: + enable: true measure-latency: true + unanswered-queries: true + queries-timeout: 2 + geoip: + enable: true + mmdb-country-file: /mmdb/GeoLite2-Country.mmdb + mmdb-city-file: /mmdb/GeoLite2-City.mmdb + mmdb-asn-file: /mmdb/GeoLite2-ASN.mmdb + lookup-ecs: true routing-policy: forward: [ console, prometheus, loki ] dropped: [ ] - name: console stdout: - mode: text + mode: json - name: prometheus prometheus: @@ -100,7 +145,7 @@ pipelines: lokiclient: server-url: "http://loki.dns-monitoring.svc:3100/loki/api/v1/push" job-name: "dnscollector" - mode: "text" + mode: "json" flush-interval: 5 batch-size: 1048576 retry-interval: 10 diff --git a/config/agent/kustomization.yaml b/config/agent/kustomization.yaml index fa52af3..0c8074c 100644 --- a/config/agent/kustomization.yaml +++ b/config/agent/kustomization.yaml @@ -31,6 +31,12 @@ configMapGenerator: files: - vector-config.yaml +secretGenerator: + - name: geoip-credentials + literals: + - account_id=changeme + - license_key=changeme + images: - name: ghcr.io/datum-cloud/dns-operator newName: ghcr.io/datum-cloud/dns-operator diff --git a/config/agent/manager.yaml b/config/agent/manager.yaml index f4908f7..a52f437 100644 --- a/config/agent/manager.yaml +++ b/config/agent/manager.yaml @@ -69,6 +69,30 @@ spec: mountPath: /lmdb - name: pdns-shared mountPath: /run/pdns + - name: init-geoip-data + image: ghcr.io/maxmind/geoipupdate:v7.1.1 + imagePullPolicy: IfNotPresent + env: + - name: GEOIPUPDATE_DB_DIR + value: /mmdb + - name: GEOIPUPDATE_EDITION_IDS + value: GeoLite2-ASN GeoLite2-City GeoLite2-Country + - name: GEOIPUPDATE_ACCOUNT_ID + valueFrom: + secretKeyRef: + name: geoip-credentials + key: account_id + - name: GEOIPUPDATE_LICENSE_KEY + valueFrom: + secretKeyRef: + name: geoip-credentials + key: license_key + securityContext: + runAsUser: 0 + runAsGroup: 0 + volumeMounts: + - name: dnscollector-mmdb + mountPath: /mmdb containers: - command: - /manager @@ -272,6 +296,9 @@ spec: - name: dnscollector-config mountPath: /etc/dnscollector readOnly: true + - name: dnscollector-mmdb + mountPath: /mmdb + readOnly: true securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true @@ -322,6 +349,8 @@ spec: - name: dnscollector-config configMap: name: dnscollector-config + - name: dnscollector-mmdb + emptyDir: {} - name: vector-config configMap: name: vector-config diff --git a/config/monitoring/grafana/dashboards/grafana_exporter.json b/config/monitoring/grafana/dashboards/grafana_exporter.json new file mode 100644 index 0000000..d1f870c --- /dev/null +++ b/config/monitoring/grafana/dashboards/grafana_exporter.json @@ -0,0 +1,922 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 4, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 9, + "panels": [], + "title": "Go", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "go_goroutines{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "dnscollector", + "range": true, + "refId": "A" + } + ], + "title": "Goroutines", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "go_memstats_sys_bytes{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "dnscollector", + "range": true, + "refId": "A" + } + ], + "title": "Total Used Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "rate(process_cpu_seconds_total{job=\"$job\"}[2m])", + "legendFormat": "dnscollector", + "range": true, + "refId": "A" + } + ], + "title": "Process cpu", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 7, + "panels": [], + "title": "Workers", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "tap" + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 11 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "increase(dnscollector_exporter_worker_ingress_traffic_total{job=~\"$job\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{worker}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Worker - Ingress traffic", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 11 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "increase(dnscollector_exporter_worker_egress_traffic_total{job=~\"$job\"}[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{worker}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Worker - Egress traffic", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 11 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "increase(dnscollector_exporter_worker_discarded_traffic_total{job=~\"$job\"}[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{worker}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Worker - Discarded", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 8, + "panels": [], + "title": "Policies", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 20 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "increase(dnscollector_exporter_policy_forwarded_total{job=~\"$job\"}[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{worker}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Policy - Forwarded", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 20 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "maxHeight": 600, + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "increase(dnscollector_exporter_policy_dropped_total{job=~\"$job\"}[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{worker}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Policy - Dropped", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "dnscollector_exporter", + "value": "dnscollector_exporter" + }, + "description": "", + "hide": 0, + "label": "job", + "name": "job", + "options": [ + { + "selected": true, + "text": "dnscollector_exporter", + "value": "dnscollector_exporter" + } + ], + "query": "dnscollector_exporter", + "skipUrlSync": false, + "type": "textbox" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "DNScollector - Exporter", + "uid": "bdo8oaa6fq7lse", + "version": 2, + "weekStart": "" +} \ No newline at end of file diff --git a/config/monitoring/grafana/dashboards/grafana_loki.json b/config/monitoring/grafana/dashboards/grafana_loki.json new file mode 100644 index 0000000..3d4c078 --- /dev/null +++ b/config/monitoring/grafana/dashboards/grafana_loki.json @@ -0,0 +1,833 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 15415, + "graphTooltip": 0, + "id": 7, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 10, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "Frequency", + "type": "row" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 19, + "x": 0, + "y": 1 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "expr": "rate( ({job=\"$job\", identity=\"$identity\"} |~ \"$domain\" |~ \"$ip\" |~ \"$custom\")[5s] )", + "hide": false, + "instant": false, + "legendFormat": "", + "range": true, + "refId": "B" + } + ], + "title": "Frequency", + "type": "timeseries" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 19, + "y": 1 + }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "expr": "sum(count_over_time({job=\"$job\", identity=\"$identity\"} |= \"$domain\" |= \"$ip\" |= \"$custom\" [$__range]))", + "instant": false, + "range": true, + "refId": "A" + } + ], + "title": "Total Events", + "type": "stat" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 12, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "Logs", + "type": "row" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "gridPos": { + "h": 17, + "w": 22, + "x": 0, + "y": 8 + }, + "id": 2, + "options": { + "dedupStrategy": "none", + "enableLogDetails": true, + "prettifyLogMessage": false, + "showCommonLabels": false, + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": false + }, + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "expr": "{job=\"$job\", identity=\"$identity\"} |~ \"$domain\" |~ \"$ip\" |~ \"$custom\"", + "instant": false, + "range": true, + "refId": "A" + } + ], + "title": "DNS Logs", + "type": "logs" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 14, + "panels": [], + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "refId": "A" + } + ], + "title": "Top", + "type": "row" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "qname" + }, + "properties": [ + { + "id": "custom.width", + "value": 553 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "IP" + }, + "properties": [ + { + "id": "custom.width", + "value": 195 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Domains" + }, + "properties": [ + { + "id": "custom.width", + "value": 668 + } + ] + } + ] + }, + "gridPos": { + "h": 16, + "w": 11, + "x": 0, + "y": 26 + }, + "id": 16, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "expr": "topk(15, sum(count_over_time( ({job=\"$job\"} | pattern \"<_> <_> <_> <_> <_> <_> <_> <_> <_>\" |~ \"NOERROR\" |~ \"$domain\" |~ \"$ip\" |~ \"$custom\" )[$__range] )) by (qname))", + "instant": true, + "range": false, + "refId": "A" + } + ], + "title": "Top Domain Names - NOERROR", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": {}, + "renameByName": { + "Value #A": "Hits", + "domain": "Domain Name", + "qname": "Domains", + "queryip": "IP" + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "desc": true, + "field": "Hits" + } + ] + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "qname" + }, + "properties": [ + { + "id": "custom.width", + "value": 553 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "IP" + }, + "properties": [ + { + "id": "custom.width", + "value": 195 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Domains" + }, + "properties": [ + { + "id": "custom.width", + "value": 668 + } + ] + } + ] + }, + "gridPos": { + "h": 16, + "w": 11, + "x": 11, + "y": 26 + }, + "id": 17, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "expr": "topk(15, sum(count_over_time( ({job=\"$job\"} | pattern \"<_> <_> <_> <_> <_> <_> <_> <_> <_>\" |~ \"NXDOMAIN\" |~ \"$domain\" |~ \"$ip\" |~ \"$custom\" )[$__range] )) by (qname))", + "instant": true, + "range": false, + "refId": "A" + } + ], + "title": "Top Domain Names - NXDOMAIN", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": {}, + "renameByName": { + "Value #A": "Hits", + "domain": "Domain Name", + "qname": "Domains", + "queryip": "IP" + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "desc": true, + "field": "Hits" + } + ] + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "qname" + }, + "properties": [ + { + "id": "custom.width", + "value": 553 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "IP" + }, + "properties": [ + { + "id": "custom.width", + "value": 503 + } + ] + } + ] + }, + "gridPos": { + "h": 16, + "w": 11, + "x": 0, + "y": 42 + }, + "id": 8, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "loki", + "uid": "loki" + }, + "expr": "topk(15, sum(count_over_time( ({job=\"$job\"} | pattern \"<_> <_> <_> <_> <_> <_> <_> <_> <_>\" |~ \"$domain\" |~ \"$ip\" |~ \"$custom\" )[$__range] )) by (queryip))", + "instant": true, + "range": false, + "refId": "A" + } + ], + "title": "Top IP", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true + }, + "indexByName": {}, + "renameByName": { + "Value #A": "Hits", + "domain": "Domain Name", + "queryip": "IP" + } + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "desc": true, + "field": "Hits" + } + ] + } + } + ], + "type": "table" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "dnscollector", + "value": "dnscollector" + }, + "datasource": { + "type": "loki", + "uid": "loki" + }, + "definition": "label_values(job)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "job", + "options": [], + "query": "label_values(job)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "dnsdist_v2", + "value": "dnsdist_v2" + }, + "datasource": { + "type": "loki", + "uid": "loki" + }, + "definition": "label_values(identity)", + "hide": 0, + "includeAll": false, + "multi": false, + "name": "identity", + "options": [], + "query": "label_values(identity)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": true, + "text": "", + "value": "" + }, + "hide": 0, + "label": "Domain Name", + "name": "domain", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": true, + "text": "", + "value": "" + }, + "hide": 0, + "label": "Query IP", + "name": "ip", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + }, + { + "current": { + "selected": true, + "text": "", + "value": "" + }, + "hide": 0, + "label": "Custom Search", + "name": "custom", + "options": [ + { + "selected": true, + "text": "", + "value": "" + } + ], + "query": "", + "skipUrlSync": false, + "type": "textbox" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "DnsCollector - Loki", + "uid": "dlsw4AY7k", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/config/monitoring/grafana/dashboards/grafana_prometheus.json b/config/monitoring/grafana/dashboards/grafana_prometheus.json new file mode 100644 index 0000000..c8fbaae --- /dev/null +++ b/config/monitoring/grafana/dashboards/grafana_prometheus.json @@ -0,0 +1,1692 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "gnetId": 16630, + "graphTooltip": 0, + "id": 8, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "light-purple", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 0, + "y": 0 + }, + "id": 79, + "maxDataPoints": 100, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^version$/", + "values": false + }, + "showPercentChange": false, + "text": { + "valueSize": 15 + }, + "textMode": "value", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "${prom_prefix}_build_info{job=~\"$job\"}", + "format": "table", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "step": 240 + } + ], + "title": "DNScollector", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "light-orange", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 2, + "y": 0 + }, + "id": 56, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "vertical", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "count(sum(${prom_prefix}_dnsmessages_total{job=~\"$job\", stream_id=~\"$stream_id\"}) by (stream_id))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": " ", + "range": true, + "refId": "A", + "step": 240 + } + ], + "title": "Total Streams", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-orange", + "value": null + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 4, + "y": 0 + }, + "id": 40, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "value_and_name", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum (${prom_prefix}_throughput_ops{job=~\"$job\", stream_id=~\"$stream_id\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": " ", + "range": true, + "refId": "A", + "step": 240 + } + ], + "title": "Throughput", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "light-orange", + "value": null + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 6, + "y": 0 + }, + "id": 78, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum (${prom_prefix}_throughput_ops_max{job=~\"$job\", stream_id=~\"$stream_id\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "Maximum", + "range": true, + "refId": "A", + "step": 240 + } + ], + "title": "Maximum", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 8, + "y": 0 + }, + "id": 77, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum (${prom_prefix}_total_requesters_lru{job=~\"$job\", stream_id=~\"$stream_id\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": " ", + "range": true, + "refId": "A", + "step": 240 + } + ], + "title": "Total Clients", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 11, + "y": 0 + }, + "id": 27, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum (${prom_prefix}_total_domains_lru{job=~\"$job\", stream_id=~\"$stream_id\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": " ", + "range": true, + "refId": "A", + "step": 240 + } + ], + "title": "Total Domains", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "value": null + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 14, + "y": 0 + }, + "id": 81, + "maxDataPoints": 100, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "horizontal", + "percentChangeColorMode": "standard", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "text": {}, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum (${prom_prefix}_bytes_total{job=~\"$job\", stream_id=~\"$stream_id\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": " ", + "range": true, + "refId": "A", + "step": 240 + } + ], + "title": "Total Bytes", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 84, + "options": { + "legend": { + "calcs": [ + "mean", + "min", + "max" + ], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum (increase(${prom_prefix}_dnsmessages_total{job=~\"$job\",stream_id=~\"$stream_id\"}[5m]))", + "interval": "", + "legendFormat": "Pkts [5m] rate", + "range": true, + "refId": "A" + } + ], + "title": "Packets [rate-5m]", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 0, + "y": 3 + }, + "id": 93, + "options": { + "displayLabels": [ + "percent" + ], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.2.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by (operation) (increase(${prom_prefix}_operations_total{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range])) != 0", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "__auto", + "refId": "A" + } + ], + "title": "DNS messages per operations", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 4, + "y": 3 + }, + "id": 74, + "options": { + "displayLabels": [ + "percent" + ], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.2.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by (return_code) (increase(${prom_prefix}_rcodes_total{job=~\"$job\", stream_id=~\"$stream_id\", return_code!=\"-\"}[$__range])) != 0", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "{{query_type}}", + "refId": "A" + } + ], + "title": "Replies per return code", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 8, + "y": 3 + }, + "id": 70, + "options": { + "displayLabels": [ + "percent" + ], + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.2.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by (query_type) (increase(${prom_prefix}_qtypes_total{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range])) != 0", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "{{query_type}}", + "refId": "A" + } + ], + "title": "Queries per type", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 13, + "y": 3 + }, + "id": 76, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.2.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by (net_transport) (increase(${prom_prefix}_ipprotocol_total{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range])) != 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{net_transport}}", + "refId": "B" + } + ], + "title": "Transports", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 6 + }, + "id": 92, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.2.6", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "topk(20,sum(rate(${prom_prefix}_top_requesters{job=~\"$job\", stream_id=~\"$stream_id\"}[5m])) by (ip))", + "interval": "", + "legendFormat": "{{ip}}", + "range": true, + "refId": "A" + } + ], + "title": "Top Clients [rate-5m] ", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "INET" + }, + "properties": [ + { + "id": "displayName", + "value": "IPv4" + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 13, + "y": 7 + }, + "id": 75, + "options": { + "legend": { + "displayMode": "table", + "placement": "right", + "showLegend": true, + "values": [ + "percent" + ] + }, + "pieType": "donut", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.2.5", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum by (net_family) (increase(${prom_prefix}_ipversion_total{job=~\"$job\", stream_id=~\"$stream_id\", return_code != \"-\"}[$__range])) != 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{net_family}}", + "refId": "B" + } + ], + "title": "IP Protocols", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "filterable": true, + "inspect": false + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Domains" + }, + "properties": [ + { + "id": "custom.width", + "value": 1040 + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 16, + "x": 0, + "y": 11 + }, + "id": 62, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "enablePagination": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 0, + "showHeader": false, + "sortBy": [] + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "topk(20, sum by(domain) (increase(${prom_prefix}_top_noerror_domains{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range]))) != 0", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "range": false, + "refId": "NOERROR" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "topk(20, sum by(domain) (increase(${prom_prefix}_top_nonexistent_domains{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range]))) != 0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "NXDOMAIN" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "topk(20, sum by(domain) (increase(${prom_prefix}_top_servfail_domains{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range]))) != 0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "SERVFAIL" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "topk(20, sum by(domain) (increase(${prom_prefix}_top_suspicious{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range]))) != 0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "Suspicious" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "topk(20, sum by(suffix) (increase(${prom_prefix}_top_tlds{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range]))) != 0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "TLDS" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "topk(20, sum by(ip) (increase(${prom_prefix}_top_requesters{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range]))) != 0", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "__auto", + "refId": "CLIENTS" + } + ], + "title": "Top", + "transformations": [ + { + "id": "renameByRegex", + "options": { + "regex": "Value #NOERROR", + "renamePattern": "NoError Hits" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "Value #NXDOMAIN", + "renamePattern": "NonExistent Hits" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "Value #SERVFAIL", + "renamePattern": "ServFail Hits" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "domain", + "renamePattern": "Top Domains" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "Value #Suspicious", + "renamePattern": "Suspicious Hits" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "Value #TLDS", + "renamePattern": "TLDs Hits" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "Value #CLIENTS", + "renamePattern": "Clients Hits" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "ip", + "renamePattern": "Top Query IPs" + } + }, + { + "id": "renameByRegex", + "options": { + "regex": "suffix", + "renamePattern": "Top TLDs" + } + }, + { + "id": "groupBy", + "options": { + "fields": { + "Clients Hits": { + "aggregations": [], + "operation": "groupby" + }, + "NoError Hits": { + "aggregations": [], + "operation": "groupby" + }, + "NonExistent Hits": { + "aggregations": [], + "operation": "groupby" + }, + "ServFail Hits": { + "aggregations": [], + "operation": "groupby" + }, + "Suspicious Hits": { + "aggregations": [], + "operation": "groupby" + }, + "TLDs Hits": { + "aggregations": [], + "operation": "groupby" + }, + "Top Domains": { + "aggregations": [], + "operation": "groupby" + }, + "Top Query IPs": { + "aggregations": [], + "operation": "groupby" + }, + "Top TLDs": { + "aggregations": [], + "operation": "groupby" + } + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 11, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 11 + }, + "id": 88, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(${prom_prefix}_qtypes_total{job=~\"$job\",stream_id=~\"$stream_id\"}[5m])) by (query_type)", + "interval": "", + "legendFormat": "{{ query_type }}", + "range": true, + "refId": "A" + } + ], + "title": "Packet by Qtype [rate-5m]", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 11, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 17 + }, + "id": 90, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": true, + "expr": "sum(rate(${prom_prefix}_rcodes_total{job=~\"$job\", stream_id=~\"$stream_id\", return_code!=\"-\"}[5m])) by ( return_code )", + "interval": "", + "legendFormat": "{{ return_code }}", + "range": true, + "refId": "A" + } + ], + "title": "Packet by Return Code [rate-5m]", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "hide": 2, + "name": "prom_prefix", + "query": "dnscollector", + "skipUrlSync": false, + "type": "constant" + }, + { + "current": { + "selected": false, + "text": "dnscollector", + "value": "dnscollector" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(job)", + "hide": 0, + "includeAll": false, + "label": "Job", + "multi": false, + "name": "job", + "options": [], + "query": { + "query": "label_values(job)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/dnscollector.*/", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values({job=\"$job\"},stream_id)", + "hide": 0, + "includeAll": true, + "label": "Stream", + "multi": true, + "name": "stream_id", + "options": [], + "query": { + "query": "label_values({job=\"$job\"},stream_id)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "DNScollector - Prometheus", + "uid": "6uPErf6nz", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/config/monitoring/grafana/kustomization.yaml b/config/monitoring/grafana/kustomization.yaml index ea8fb6d..654caa6 100644 --- a/config/monitoring/grafana/kustomization.yaml +++ b/config/monitoring/grafana/kustomization.yaml @@ -15,6 +15,9 @@ configMapGenerator: - name: grafana-dashboards files: - dashboards/dns_observability.json + - dashboards/grafana_exporter.json + - dashboards/grafana_loki.json + - dashboards/grafana_prometheus.json - name: grafana-datasources files: - datasources/datasources.yaml From 166f332595ac5c1e87c94afd205ac9b81f85ed02 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Fri, 5 Dec 2025 16:09:59 -0600 Subject: [PATCH 06/33] chore: update configurations --- config/agent/dnscollector-config.yaml | 77 ++++------------------ config/agent/dnsdist-config.yaml | 27 +------- config/agent/manager.yaml | 29 --------- config/agent/vector-config.yaml | 92 +++++++++++++++++++-------- 4 files changed, 77 insertions(+), 148 deletions(-) diff --git a/config/agent/dnscollector-config.yaml b/config/agent/dnscollector-config.yaml index d8e9595..5db4407 100644 --- a/config/agent/dnscollector-config.yaml +++ b/config/agent/dnscollector-config.yaml @@ -10,11 +10,6 @@ global: max-size: 10 max-backups: 10 server-identity: "dns-collector" - pid-file: "" - text-format: "timestamp-rfc3339ns identity operation rcode queryip queryport family protocol length-unit qname qtype latency" - text-format-delimiter: " " - text-format-boundary: "\"" - text-jinja: "" worker: interval-monitor: 10 buffer-size: 8192 @@ -91,74 +86,24 @@ pipelines: measure-latency: true unanswered-queries: true queries-timeout: 2 - geoip: - enable: true - mmdb-country-file: /mmdb/GeoLite2-Country.mmdb - mmdb-city-file: /mmdb/GeoLite2-City.mmdb - mmdb-asn-file: /mmdb/GeoLite2-ASN.mmdb - lookup-ecs: true routing-policy: - forward: [ console, prometheus, loki ] + forward: [ vector ] dropped: [ ] - - name: console - stdout: - mode: json - - - name: prometheus - prometheus: - listen-ip: 0.0.0.0 - listen-port: 8084 - basic-auth-enable: false - basic-auth-login: admin - basic-auth-pwd: changeme - tls-support: false - tls-mutual: false - tls-min-version: 1.2 - cert-file: "" - key-file: "" - prometheus-prefix: "dnscollector" - top-n: 10 - chan-buffer-size: 0 - histogram-metrics-enabled: true - requesters-metrics-enabled: true - domains-metrics-enabled: true - noerror-metrics-enabled: true - servfail-metrics-enabled: true - nonexistent-metrics-enabled: true - timeout-metrics-enabled: true - prometheus-labels: ["stream_id"] - requesters-cache-size: 250000 - requesters-cache-ttl: 3600 - domains-cache-size: 500000 - domains-cache-ttl: 3600 - noerror-domains-cache-size: 100000 - noerror-domains-cache-ttl: 3600 - servfail-domains-cache-size: 10000 - servfail-domains-cache-ttl: 3600 - nonexistent-domains-cache-size: 10000 - nonexistent-domains-cache-ttl: 3600 - default-domains-cache-size: 1000 - default-domains-cache-ttl: 3600 - - - name: loki - lokiclient: - server-url: "http://loki.dns-monitoring.svc:3100/loki/api/v1/push" - job-name: "dnscollector" - mode: "json" - flush-interval: 5 - batch-size: 1048576 + - name: vector + tcpclient: + transport: tcp + remote-address: 127.0.0.1 + remote-port: 6001 + connect-timeout: 5 retry-interval: 10 - text-format: "" - proxy-url: "" + flush-interval: 30 tls-insecure: false tls-min-version: 1.2 ca-file: "" cert-file: "" key-file: "" - basic-auth-login: "" - basic-auth-pwd: "" - basic-auth-pwd-file: "" - tenant-id: "" - relabel-configs: [] + mode: json + text-format: "" + buffer-size: 100 chan-buffer-size: 0 \ No newline at end of file diff --git a/config/agent/dnsdist-config.yaml b/config/agent/dnsdist-config.yaml index 90f45ce..81254f6 100644 --- a/config/agent/dnsdist-config.yaml +++ b/config/agent/dnsdist-config.yaml @@ -37,10 +37,6 @@ remote_logging: transport: tcp address: "127.0.0.1:6000" connection_count: 2 - - name: vector_remote_logging - transport: tcp - address: "127.0.0.1:6001" - connection_count: 2 query_rules: - name: "log all queries" @@ -50,13 +46,6 @@ query_rules: type: DnstapLog identity: dnsdist_v2 logger_name: remote_logging - - name: "log all queries to vector" - selector: - type: All - action: - type: DnstapLog - identity: dnsdist_v2 - logger_name: vector_remote_logging - name: "default rule" selector: @@ -73,13 +62,6 @@ response_rules: type: DnstapLog identity: dnsdist_v2 logger_name: remote_logging - - name: log all responses to vector - selector: - type: All - action: - type: DnstapLog - identity: dnsdist_v2 - logger_name: vector_remote_logging cache_hit_response_rules: - name: log all responses from cache @@ -88,11 +70,4 @@ cache_hit_response_rules: action: type: DnstapLog identity: dnsdist_v2_cache - logger_name: remote_logging - - name: log all responses from cache to vector - selector: - type: All - action: - type: DnstapLog - identity: dnsdist_v2_cache - logger_name: vector_remote_logging \ No newline at end of file + logger_name: remote_logging \ No newline at end of file diff --git a/config/agent/manager.yaml b/config/agent/manager.yaml index a52f437..f4908f7 100644 --- a/config/agent/manager.yaml +++ b/config/agent/manager.yaml @@ -69,30 +69,6 @@ spec: mountPath: /lmdb - name: pdns-shared mountPath: /run/pdns - - name: init-geoip-data - image: ghcr.io/maxmind/geoipupdate:v7.1.1 - imagePullPolicy: IfNotPresent - env: - - name: GEOIPUPDATE_DB_DIR - value: /mmdb - - name: GEOIPUPDATE_EDITION_IDS - value: GeoLite2-ASN GeoLite2-City GeoLite2-Country - - name: GEOIPUPDATE_ACCOUNT_ID - valueFrom: - secretKeyRef: - name: geoip-credentials - key: account_id - - name: GEOIPUPDATE_LICENSE_KEY - valueFrom: - secretKeyRef: - name: geoip-credentials - key: license_key - securityContext: - runAsUser: 0 - runAsGroup: 0 - volumeMounts: - - name: dnscollector-mmdb - mountPath: /mmdb containers: - command: - /manager @@ -296,9 +272,6 @@ spec: - name: dnscollector-config mountPath: /etc/dnscollector readOnly: true - - name: dnscollector-mmdb - mountPath: /mmdb - readOnly: true securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true @@ -349,8 +322,6 @@ spec: - name: dnscollector-config configMap: name: dnscollector-config - - name: dnscollector-mmdb - emptyDir: {} - name: vector-config configMap: name: vector-config diff --git a/config/agent/vector-config.yaml b/config/agent/vector-config.yaml index e0f9d3d..b873033 100644 --- a/config/agent/vector-config.yaml +++ b/config/agent/vector-config.yaml @@ -5,9 +5,11 @@ api: sources: dnstap_tcp: - type: dnstap + type: socket address: 0.0.0.0:6001 mode: tcp + decoding: + codec: json internal_metrics: type: internal_metrics @@ -17,29 +19,36 @@ transforms: inputs: - dnstap_tcp source: | - .message_type = to_string(.messageType) ?? to_string(.message_type) ?? "unknown" - .proto = to_string(.socketProtocol) ?? to_string(.protocol) ?? "unknown" - .family = to_string(.socketFamily) ?? to_string(.network_family) ?? "unknown" - .opcode = to_string(.requestData.header.opcode) ?? to_string(.responseData.header.opcode) ?? to_string(.opcode) ?? "unknown" - .rcode = to_string(.responseData.rcodeName) ?? to_string(.response_code) ?? to_string(.responseData.header.rcode) ?? "unknown" - .server_id = to_string(.serverId) ?? "unknown" - .qname = to_string(.question[0].domainName) ?? to_string(.query_name) ?? "unknown" - .qtype = to_string(.question[0].questionType) ?? to_string(.question[0].questionTypeId) ?? to_string(.query_type) ?? "unknown" + # Reuse dns-collector enrichment; only normalize for metrics + .message_type = upcase(to_string(.dnstap.operation) ?? "UNKNOWN") + .server_id = to_string(.dnstap.identity) ?? "unknown" + .stream_id = .server_id + .proto = to_string(.network.protocol) ?? "unknown" + .family = to_string(.network.family) ?? "unknown" + .opcode = to_string(.dns.opcode) ?? "unknown" + .qname = downcase(to_string(.dns.qname) ?? "unknown") + .qtype = to_string(.dns.qtype) ?? "unknown" + .rcode = upcase(to_string(.dns.rcode) ?? "UNKNOWN") + .zone = downcase(to_string(.publicsuffix."etld+1") ?? .qname) .hit = 1 - if exists(.requestData.time) && exists(.time) { - req, err_req = to_int(.requestData.time) - resp, err_resp = to_int(.time) - if !is_null(err_req) { req = null } - if !is_null(err_resp) { resp = null } - if is_integer(req) && is_integer(resp) { - diff = to_int!(resp) - to_int!(req) - if diff >= 0 { - .latency_ns = diff - .latency_seconds = to_float(.latency_ns) / 1000000000.0 - } + + if exists(.dnstap.latency) { + lat, err = to_float(.dnstap.latency) + if err == null { + .latency_seconds = lat } } + # Cache heuristic: dnsdist identity naming (e.g., *_cache) + .cache_hit = contains(.server_id, "_cache") + + # DNSSEC status via AD bit and SERVFAIL heuristic + ad = to_bool(.dns.flags.ad) ?? false + status = "insecure" + if ad { status = "secure" } + if .rcode == "SERVFAIL" { status = "bogus" } + .dnssec_status = status + dnstap_metrics: type: log_to_metric inputs: @@ -48,45 +57,74 @@ transforms: - type: counter name: dns_queries_total field: hit - condition: '.message_type == "ClientQuery"' + condition: '.message_type == "CLIENT_QUERY"' tags: family: "{{family}}" proto: "{{proto}}" opcode: "{{opcode}}" qtype: "{{qtype}}" + zone: "{{zone}}" + server: "{{server_id}}" + stream: "{{stream_id}}" - type: counter name: dns_responses_total field: hit - condition: '.message_type == "ClientResponse"' + condition: '.message_type == "CLIENT_RESPONSE"' tags: family: "{{family}}" proto: "{{proto}}" + qtype: "{{qtype}}" rcode: "{{rcode}}" server: "{{server_id}}" + zone: "{{zone}}" + stream: "{{stream_id}}" - type: counter - name: dns_nxdomain_total + name: dns_cache_hits_total field: hit - condition: '.message_type == "ClientResponse" && .rcode == "NXDomain"' + condition: '.message_type == "CLIENT_RESPONSE" && .cache_hit == true' tags: proto: "{{proto}}" server: "{{server_id}}" + zone: "{{zone}}" + qtype: "{{qtype}}" + - type: counter + name: dns_dnssec_status_total + field: hit + condition: '.message_type == "CLIENT_RESPONSE"' + tags: + proto: "{{proto}}" + server: "{{server_id}}" + zone: "{{zone}}" + dnssec_status: "{{dnssec_status}}" - type: counter name: dns_servfail_total field: hit - condition: '.message_type == "ClientResponse" && .rcode == "ServFail"' + condition: '.message_type == "CLIENT_RESPONSE" && .rcode == "SERVFAIL"' tags: proto: "{{proto}}" server: "{{server_id}}" + zone: "{{zone}}" + qtype: "{{qtype}}" + - type: counter + name: dns_nxdomain_total + field: hit + condition: '.message_type == "CLIENT_RESPONSE" && .rcode == "NXDOMAIN"' + tags: + proto: "{{proto}}" + server: "{{server_id}}" + zone: "{{zone}}" + qtype: "{{qtype}}" - type: histogram name: dns_response_latency_seconds field: latency_seconds - condition: '.message_type == "ClientResponse" && exists(.latency_seconds)' + condition: '.message_type == "CLIENT_RESPONSE" && exists(.latency_seconds)' bins: [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1, 2, 5] tags: proto: "{{proto}}" qtype: "{{qtype}}" rcode: "{{rcode}}" server: "{{server_id}}" + zone: "{{zone}}" sinks: console: @@ -107,7 +145,7 @@ sinks: loki: type: loki inputs: - - dnstap_enriched + - dnstap_tcp endpoint: http://loki.dns-monitoring.svc:3100 out_of_order_action: accept encoding: From 786c5df7df7ac68b479003b5cc06f5ac354fbc40 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Fri, 5 Dec 2025 16:15:43 -0600 Subject: [PATCH 07/33] feat: add DNS Workbench dashboard --- config/agent/kustomization.yaml | 6 - config/agent/pdns-service.yaml | 4 - config/agent/vector-config.yaml | 4 +- .../grafana/dashboards/dns_observability.json | 153 -- .../grafana/dashboards/dns_workbench.json | 480 +++++ .../grafana/dashboards/grafana_exporter.json | 922 --------- .../grafana/dashboards/grafana_loki.json | 833 -------- .../dashboards/grafana_prometheus.json | 1692 ----------------- config/monitoring/grafana/kustomization.yaml | 5 +- .../prometheus/config/prometheus.yml | 9 - 10 files changed, 483 insertions(+), 3625 deletions(-) delete mode 100644 config/monitoring/grafana/dashboards/dns_observability.json create mode 100644 config/monitoring/grafana/dashboards/dns_workbench.json delete mode 100644 config/monitoring/grafana/dashboards/grafana_exporter.json delete mode 100644 config/monitoring/grafana/dashboards/grafana_loki.json delete mode 100644 config/monitoring/grafana/dashboards/grafana_prometheus.json diff --git a/config/agent/kustomization.yaml b/config/agent/kustomization.yaml index 0c8074c..fa52af3 100644 --- a/config/agent/kustomization.yaml +++ b/config/agent/kustomization.yaml @@ -31,12 +31,6 @@ configMapGenerator: files: - vector-config.yaml -secretGenerator: - - name: geoip-credentials - literals: - - account_id=changeme - - license_key=changeme - images: - name: ghcr.io/datum-cloud/dns-operator newName: ghcr.io/datum-cloud/dns-operator diff --git a/config/agent/pdns-service.yaml b/config/agent/pdns-service.yaml index 5093c2b..f2edc98 100644 --- a/config/agent/pdns-service.yaml +++ b/config/agent/pdns-service.yaml @@ -23,10 +23,6 @@ spec: targetPort: 8083 protocol: TCP - name: dnscol-metrics - port: 8084 - targetPort: 8084 - protocol: TCP - - name: dnscolt-metrics port: 9165 targetPort: 9165 protocol: TCP diff --git a/config/agent/vector-config.yaml b/config/agent/vector-config.yaml index b873033..ed8dac5 100644 --- a/config/agent/vector-config.yaml +++ b/config/agent/vector-config.yaml @@ -151,8 +151,8 @@ sinks: encoding: codec: json labels: - job: dnsdist - app: dnsdist + job: vector + app: vector component: vector server: "{{server_id}}" proto: "{{proto}}" diff --git a/config/monitoring/grafana/dashboards/dns_observability.json b/config/monitoring/grafana/dashboards/dns_observability.json deleted file mode 100644 index 8fa48ae..0000000 --- a/config/monitoring/grafana/dashboards/dns_observability.json +++ /dev/null @@ -1,153 +0,0 @@ -{ - "id": null, - "title": "DNS Observability", - "tags": ["dnsdist", "vector", "dnscollector"], - "timezone": "browser", - "schemaVersion": 39, - "version": 1, - "refresh": "30s", - "time": { - "from": "now-15m", - "to": "now" - }, - "templating": { "list": [] }, - "panels": [ - { - "id": 1, - "type": "timeseries", - "title": "DNS Queries per Second", - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "sum by (proto) (rate(dns_queries_total{job=\"vector\"}[1m])) or on() vector(0)", - "legendFormat": "{{proto}}", - "refId": "A" - } - ], - "options": { - "legend": { "showLegend": true } - }, - "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 } - }, - { - "id": 2, - "type": "timeseries", - "title": "Responses by RCODE", - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "sum by (rcode) (rate(dns_responses_total{job=\"vector\"}[5m])) or on() vector(0)", - "legendFormat": "{{rcode}}", - "refId": "A" - } - ], - "options": { - "legend": { "showLegend": true } - }, - "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 } - }, - { - "id": 3, - "type": "timeseries", - "title": "NXDOMAIN / SERVFAIL rate", - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "sum(rate(dns_nxdomain_total{job=\"vector\"}[5m])) or on() vector(0)", - "legendFormat": "nxdomain", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "sum(rate(dns_servfail_total{job=\"vector\"}[5m])) or on() vector(0)", - "legendFormat": "servfail", - "refId": "B" - } - ], - "gridPos": { "h": 6, "w": 12, "x": 0, "y": 8 } - }, - { - "id": 4, - "type": "timeseries", - "title": "Latency (p50/p95/p99)", - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "histogram_quantile(0.50, sum(rate(dns_response_latency_seconds_bucket{job=\"vector\"}[5m])) by (le))", - "legendFormat": "p50", - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "histogram_quantile(0.95, sum(rate(dns_response_latency_seconds_bucket{job=\"vector\"}[5m])) by (le))", - "legendFormat": "p95", - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "expr": "histogram_quantile(0.99, sum(rate(dns_response_latency_seconds_bucket{job=\"vector\"}[5m])) by (le))", - "legendFormat": "p99", - "refId": "C" - } - ], - "gridPos": { "h": 6, "w": 12, "x": 12, "y": 8 } - }, - { - "id": 5, - "type": "logs", - "title": "DNS Logs (Loki)", - "datasource": { - "type": "loki", - "uid": "loki" - }, - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "loki" - }, - "expr": "{app=\"dnsdist\"} | json", - "refId": "A" - } - ], - "gridPos": { "h": 8, "w": 24, "x": 0, "y": 14 } - } - ] -} \ No newline at end of file diff --git a/config/monitoring/grafana/dashboards/dns_workbench.json b/config/monitoring/grafana/dashboards/dns_workbench.json new file mode 100644 index 0000000..ca10d9e --- /dev/null +++ b/config/monitoring/grafana/dashboards/dns_workbench.json @@ -0,0 +1,480 @@ +{ + "title": "DNS Workbench", + "schemaVersion": 39, + "version": 1, + "refresh": "30s", + "tags": [ + "dns", + "dnstap", + "workbench" + ], + "timezone": "", + "panels": [ + { + "type": "stat", + "title": "Total Queries", + "id": 1, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "refId": "A", + "expr": "sum(increase(dns_queries_total[$__range]))" + } + ], + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + } + }, + { + "type": "stat", + "title": "Avg Latency", + "id": 2, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 0 + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "refId": "A", + "expr": "1000 * (sum(rate(dns_response_latency_seconds_sum[5m])) / sum(rate(dns_response_latency_seconds_count[5m])))" + } + ], + "fieldConfig": { + "defaults": { + "unit": "ms" + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + } + }, + { + "type": "stat", + "title": "Cache Hit Rate", + "id": 3, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 0 + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(dns_cache_hits_total[5m])) / sum(rate(dns_responses_total[5m]))" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percentunit" + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + } + }, + { + "type": "stat", + "title": "NXDOMAIN Rate", + "id": 4, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 0 + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(dns_responses_total{rcode=\"NXDOMAIN\"}[5m])) / sum(rate(dns_responses_total[5m]))" + } + ], + "fieldConfig": { + "defaults": { + "unit": "percentunit" + }, + "overrides": [] + }, + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + } + }, + { + "type": "bargauge", + "title": "Query Type Distribution", + "id": 5, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 4 + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(dns_queries_total[5m])) by (qtype)" + } + ], + "options": { + "displayMode": "lcd", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + } + }, + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + } + }, + { + "type": "bargauge", + "title": "Response Code Distribution", + "id": 6, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 4 + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(dns_responses_total[5m])) by (rcode)" + } + ], + "options": { + "displayMode": "lcd", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + }, + "fieldConfig": { + "defaults": { + "unit": "short" + }, + "overrides": [] + } + }, + { + "type": "stat", + "title": "DNSSEC Validation", + "id": 7, + "gridPos": { + "h": 4, + "w": 12, + "x": 0, + "y": 12 + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(dns_dnssec_status_total[5m])) by (dnssec_status)" + } + ], + "options": { + "colorMode": "value", + "graphMode": "none", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "textMode": "value_and_name" + } + }, + { + "type": "table", + "title": "Top Zones", + "id": 8, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "refId": "A", + "expr": "topk(10, sum(rate(dns_queries_total[5m])) by (zone))" + } + ], + "options": { + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Value" + } + ] + } + }, + { + "type": "bargauge", + "title": "Query Source Distribution", + "id": 9, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "refId": "A", + "expr": "sum(rate(dns_queries_total[5m])) by (stream)" + } + ], + "options": { + "displayMode": "lcd", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + } + }, + { + "type": "stat", + "title": "p50 Latency", + "id": 10, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 24 + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "refId": "A", + "expr": "1000 * histogram_quantile(0.5, sum(rate(dns_response_latency_seconds_bucket[5m])) by (le))" + } + ], + "fieldConfig": { + "defaults": { + "unit": "ms" + } + }, + "options": { + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + } + } + }, + { + "type": "stat", + "title": "p75 Latency", + "id": 11, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 24 + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "refId": "A", + "expr": "1000 * histogram_quantile(0.75, sum(rate(dns_response_latency_seconds_bucket[5m])) by (le))" + } + ], + "fieldConfig": { + "defaults": { + "unit": "ms" + } + } + }, + { + "type": "stat", + "title": "p90 Latency", + "id": 12, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 24 + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "refId": "A", + "expr": "1000 * histogram_quantile(0.9, sum(rate(dns_response_latency_seconds_bucket[5m])) by (le))" + } + ], + "fieldConfig": { + "defaults": { + "unit": "ms" + } + } + }, + { + "type": "stat", + "title": "p95 Latency", + "id": 13, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 24 + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "refId": "A", + "expr": "1000 * histogram_quantile(0.95, sum(rate(dns_response_latency_seconds_bucket[5m])) by (le))" + } + ], + "fieldConfig": { + "defaults": { + "unit": "ms" + } + } + }, + { + "type": "stat", + "title": "p99 Latency", + "id": 14, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 24 + }, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "targets": [ + { + "refId": "A", + "expr": "1000 * histogram_quantile(0.99, sum(rate(dns_response_latency_seconds_bucket[5m])) by (le))" + } + ], + "fieldConfig": { + "defaults": { + "unit": "ms" + } + } + } + ], + "templating": { + "list": [ + { + "name": "DS_PROMETHEUS", + "type": "datasource", + "label": "Prometheus", + "query": "prometheus", + "current": { + "text": "Prometheus", + "value": "Prometheus" + } + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h" + ] + } +} diff --git a/config/monitoring/grafana/dashboards/grafana_exporter.json b/config/monitoring/grafana/dashboards/grafana_exporter.json deleted file mode 100644 index d1f870c..0000000 --- a/config/monitoring/grafana/dashboards/grafana_exporter.json +++ /dev/null @@ -1,922 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "grafana", - "uid": "-- Grafana --" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 4, - "links": [], - "panels": [ - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 9, - "panels": [], - "title": "Go", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 8, - "x": 0, - "y": 1 - }, - "id": 6, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "maxHeight": 600, - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "go_goroutines{job=~\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "dnscollector", - "range": true, - "refId": "A" - } - ], - "title": "Goroutines", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 8, - "x": 8, - "y": 1 - }, - "id": 10, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "maxHeight": 600, - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "go_memstats_sys_bytes{job=~\"$job\"}", - "format": "time_series", - "intervalFactor": 1, - "legendFormat": "dnscollector", - "range": true, - "refId": "A" - } - ], - "title": "Total Used Memory", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 8, - "x": 16, - "y": 1 - }, - "id": 11, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "maxHeight": 600, - "mode": "multi", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "expr": "rate(process_cpu_seconds_total{job=\"$job\"}[2m])", - "legendFormat": "dnscollector", - "range": true, - "refId": "A" - } - ], - "title": "Process cpu", - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 7, - "panels": [], - "title": "Workers", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ - { - "__systemRef": "hideSeriesFrom", - "matcher": { - "id": "byNames", - "options": { - "mode": "exclude", - "names": [ - "tap" - ], - "prefix": "All except:", - "readOnly": true - } - }, - "properties": [ - { - "id": "custom.hideFrom", - "value": { - "legend": false, - "tooltip": false, - "viz": true - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 11 - }, - "id": 3, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "maxHeight": 600, - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "increase(dnscollector_exporter_worker_ingress_traffic_total{job=~\"$job\"}[$__rate_interval])", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{worker}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Worker - Ingress traffic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 11 - }, - "id": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "maxHeight": 600, - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "increase(dnscollector_exporter_worker_egress_traffic_total{job=~\"$job\"}[$__rate_interval])", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{worker}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Worker - Egress traffic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 11 - }, - "id": 5, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "maxHeight": 600, - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "increase(dnscollector_exporter_worker_discarded_traffic_total{job=~\"$job\"}[$__rate_interval])", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{worker}}", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "Worker - Discarded", - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 19 - }, - "id": 8, - "panels": [], - "title": "Policies", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 20 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "maxHeight": 600, - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "increase(dnscollector_exporter_policy_forwarded_total{job=~\"$job\"}[$__rate_interval])", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{worker}}", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "Policy - Forwarded", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 20 - }, - "id": 4, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "maxHeight": 600, - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "increase(dnscollector_exporter_policy_dropped_total{job=~\"$job\"}[$__rate_interval])", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{worker}}", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "Policy - Dropped", - "type": "timeseries" - } - ], - "refresh": "", - "schemaVersion": 39, - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": false, - "text": "dnscollector_exporter", - "value": "dnscollector_exporter" - }, - "description": "", - "hide": 0, - "label": "job", - "name": "job", - "options": [ - { - "selected": true, - "text": "dnscollector_exporter", - "value": "dnscollector_exporter" - } - ], - "query": "dnscollector_exporter", - "skipUrlSync": false, - "type": "textbox" - } - ] - }, - "time": { - "from": "now-6h", - "to": "now" - }, - "timepicker": {}, - "timezone": "browser", - "title": "DNScollector - Exporter", - "uid": "bdo8oaa6fq7lse", - "version": 2, - "weekStart": "" -} \ No newline at end of file diff --git a/config/monitoring/grafana/dashboards/grafana_loki.json b/config/monitoring/grafana/dashboards/grafana_loki.json deleted file mode 100644 index 3d4c078..0000000 --- a/config/monitoring/grafana/dashboards/grafana_loki.json +++ /dev/null @@ -1,833 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "gnetId": 15415, - "graphTooltip": 0, - "id": 7, - "links": [], - "liveNow": false, - "panels": [ - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 10, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "refId": "A" - } - ], - "title": "Frequency", - "type": "row" - }, - { - "datasource": { - "type": "loki", - "uid": "loki" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "bars", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 19, - "x": 0, - "y": 1 - }, - "id": 6, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "8.2.6", - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "loki" - }, - "expr": "rate( ({job=\"$job\", identity=\"$identity\"} |~ \"$domain\" |~ \"$ip\" |~ \"$custom\")[5s] )", - "hide": false, - "instant": false, - "legendFormat": "", - "range": true, - "refId": "B" - } - ], - "title": "Frequency", - "type": "timeseries" - }, - { - "datasource": { - "type": "loki", - "uid": "loki" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 3, - "x": 19, - "y": 1 - }, - "id": 4, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "loki" - }, - "expr": "sum(count_over_time({job=\"$job\", identity=\"$identity\"} |= \"$domain\" |= \"$ip\" |= \"$custom\" [$__range]))", - "instant": false, - "range": true, - "refId": "A" - } - ], - "title": "Total Events", - "type": "stat" - }, - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 7 - }, - "id": 12, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "refId": "A" - } - ], - "title": "Logs", - "type": "row" - }, - { - "datasource": { - "type": "loki", - "uid": "loki" - }, - "gridPos": { - "h": 17, - "w": 22, - "x": 0, - "y": 8 - }, - "id": 2, - "options": { - "dedupStrategy": "none", - "enableLogDetails": true, - "prettifyLogMessage": false, - "showCommonLabels": false, - "showLabels": false, - "showTime": false, - "sortOrder": "Descending", - "wrapLogMessage": false - }, - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "loki" - }, - "expr": "{job=\"$job\", identity=\"$identity\"} |~ \"$domain\" |~ \"$ip\" |~ \"$custom\"", - "instant": false, - "range": true, - "refId": "A" - } - ], - "title": "DNS Logs", - "type": "logs" - }, - { - "collapsed": false, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 25 - }, - "id": 14, - "panels": [], - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "refId": "A" - } - ], - "title": "Top", - "type": "row" - }, - { - "datasource": { - "type": "loki", - "uid": "loki" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "qname" - }, - "properties": [ - { - "id": "custom.width", - "value": 553 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "IP" - }, - "properties": [ - { - "id": "custom.width", - "value": 195 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Domains" - }, - "properties": [ - { - "id": "custom.width", - "value": 668 - } - ] - } - ] - }, - "gridPos": { - "h": 16, - "w": 11, - "x": 0, - "y": 26 - }, - "id": 16, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true, - "sortBy": [] - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "loki" - }, - "expr": "topk(15, sum(count_over_time( ({job=\"$job\"} | pattern \"<_> <_> <_> <_> <_> <_> <_> <_> <_>\" |~ \"NOERROR\" |~ \"$domain\" |~ \"$ip\" |~ \"$custom\" )[$__range] )) by (qname))", - "instant": true, - "range": false, - "refId": "A" - } - ], - "title": "Top Domain Names - NOERROR", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": {}, - "renameByName": { - "Value #A": "Hits", - "domain": "Domain Name", - "qname": "Domains", - "queryip": "IP" - } - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "desc": true, - "field": "Hits" - } - ] - } - } - ], - "type": "table" - }, - { - "datasource": { - "type": "loki", - "uid": "loki" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "qname" - }, - "properties": [ - { - "id": "custom.width", - "value": 553 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "IP" - }, - "properties": [ - { - "id": "custom.width", - "value": 195 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Domains" - }, - "properties": [ - { - "id": "custom.width", - "value": 668 - } - ] - } - ] - }, - "gridPos": { - "h": 16, - "w": 11, - "x": 11, - "y": 26 - }, - "id": 17, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true, - "sortBy": [] - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "loki" - }, - "expr": "topk(15, sum(count_over_time( ({job=\"$job\"} | pattern \"<_> <_> <_> <_> <_> <_> <_> <_> <_>\" |~ \"NXDOMAIN\" |~ \"$domain\" |~ \"$ip\" |~ \"$custom\" )[$__range] )) by (qname))", - "instant": true, - "range": false, - "refId": "A" - } - ], - "title": "Top Domain Names - NXDOMAIN", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": {}, - "renameByName": { - "Value #A": "Hits", - "domain": "Domain Name", - "qname": "Domains", - "queryip": "IP" - } - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "desc": true, - "field": "Hits" - } - ] - } - } - ], - "type": "table" - }, - { - "datasource": { - "type": "loki", - "uid": "loki" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "qname" - }, - "properties": [ - { - "id": "custom.width", - "value": 553 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "IP" - }, - "properties": [ - { - "id": "custom.width", - "value": 503 - } - ] - } - ] - }, - "gridPos": { - "h": 16, - "w": 11, - "x": 0, - "y": 42 - }, - "id": 8, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true, - "sortBy": [] - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "type": "loki", - "uid": "loki" - }, - "expr": "topk(15, sum(count_over_time( ({job=\"$job\"} | pattern \"<_> <_> <_> <_> <_> <_> <_> <_> <_>\" |~ \"$domain\" |~ \"$ip\" |~ \"$custom\" )[$__range] )) by (queryip))", - "instant": true, - "range": false, - "refId": "A" - } - ], - "title": "Top IP", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true - }, - "indexByName": {}, - "renameByName": { - "Value #A": "Hits", - "domain": "Domain Name", - "queryip": "IP" - } - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "desc": true, - "field": "Hits" - } - ] - } - } - ], - "type": "table" - } - ], - "refresh": "10s", - "schemaVersion": 39, - "tags": [], - "templating": { - "list": [ - { - "current": { - "selected": true, - "text": "dnscollector", - "value": "dnscollector" - }, - "datasource": { - "type": "loki", - "uid": "loki" - }, - "definition": "label_values(job)", - "hide": 0, - "includeAll": false, - "multi": false, - "name": "job", - "options": [], - "query": "label_values(job)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - }, - { - "current": { - "selected": false, - "text": "dnsdist_v2", - "value": "dnsdist_v2" - }, - "datasource": { - "type": "loki", - "uid": "loki" - }, - "definition": "label_values(identity)", - "hide": 0, - "includeAll": false, - "multi": false, - "name": "identity", - "options": [], - "query": "label_values(identity)", - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - }, - { - "current": { - "selected": true, - "text": "", - "value": "" - }, - "hide": 0, - "label": "Domain Name", - "name": "domain", - "options": [ - { - "selected": true, - "text": "", - "value": "" - } - ], - "query": "", - "skipUrlSync": false, - "type": "textbox" - }, - { - "current": { - "selected": true, - "text": "", - "value": "" - }, - "hide": 0, - "label": "Query IP", - "name": "ip", - "options": [ - { - "selected": true, - "text": "", - "value": "" - } - ], - "query": "", - "skipUrlSync": false, - "type": "textbox" - }, - { - "current": { - "selected": true, - "text": "", - "value": "" - }, - "hide": 0, - "label": "Custom Search", - "name": "custom", - "options": [ - { - "selected": true, - "text": "", - "value": "" - } - ], - "query": "", - "skipUrlSync": false, - "type": "textbox" - } - ] - }, - "time": { - "from": "now-5m", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "DnsCollector - Loki", - "uid": "dlsw4AY7k", - "version": 1, - "weekStart": "" -} \ No newline at end of file diff --git a/config/monitoring/grafana/dashboards/grafana_prometheus.json b/config/monitoring/grafana/dashboards/grafana_prometheus.json deleted file mode 100644 index c8fbaae..0000000 --- a/config/monitoring/grafana/dashboards/grafana_prometheus.json +++ /dev/null @@ -1,1692 +0,0 @@ -{ - "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": { - "type": "datasource", - "uid": "grafana" - }, - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] - }, - "editable": true, - "fiscalYearStartMonth": 0, - "gnetId": 16630, - "graphTooltip": 0, - "id": 8, - "links": [], - "liveNow": false, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "light-purple", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 0, - "y": 0 - }, - "id": 79, - "maxDataPoints": 100, - "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "center", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "/^version$/", - "values": false - }, - "showPercentChange": false, - "text": { - "valueSize": 15 - }, - "textMode": "value", - "wideLayout": true - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": false, - "expr": "${prom_prefix}_build_info{job=~\"$job\"}", - "format": "table", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "DNScollector", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "light-orange", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 2, - "y": 0 - }, - "id": 56, - "maxDataPoints": 100, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "center", - "orientation": "vertical", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "count(sum(${prom_prefix}_dnsmessages_total{job=~\"$job\", stream_id=~\"$stream_id\"}) by (stream_id))", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": " ", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Total Streams", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "dark-orange", - "value": null - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 4, - "y": 0 - }, - "id": 40, - "maxDataPoints": 100, - "options": { - "colorMode": "background", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "text": {}, - "textMode": "value_and_name", - "wideLayout": true - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum (${prom_prefix}_throughput_ops{job=~\"$job\", stream_id=~\"$stream_id\"})", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": " ", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Throughput", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "decimals": 0, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "light-orange", - "value": null - } - ] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 6, - "y": 0 - }, - "id": 78, - "maxDataPoints": 100, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "center", - "orientation": "auto", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum (${prom_prefix}_throughput_ops_max{job=~\"$job\", stream_id=~\"$stream_id\"})", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "Maximum", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Maximum", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 3, - "x": 8, - "y": 0 - }, - "id": 77, - "maxDataPoints": 100, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "center", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum (${prom_prefix}_total_requesters_lru{job=~\"$job\", stream_id=~\"$stream_id\"})", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": " ", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Total Clients", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "value": null - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 3, - "x": 11, - "y": 0 - }, - "id": 27, - "maxDataPoints": 100, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "center", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum (${prom_prefix}_total_domains_lru{job=~\"$job\", stream_id=~\"$stream_id\"})", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": " ", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Total Domains", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "value": null - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 3, - "w": 2, - "x": 14, - "y": 0 - }, - "id": 81, - "maxDataPoints": 100, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "center", - "orientation": "horizontal", - "percentChangeColorMode": "standard", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showPercentChange": false, - "text": {}, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum (${prom_prefix}_bytes_total{job=~\"$job\", stream_id=~\"$stream_id\"})", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": " ", - "range": true, - "refId": "A", - "step": 240 - } - ], - "title": "Total Bytes", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 16, - "y": 0 - }, - "id": 84, - "options": { - "legend": { - "calcs": [ - "mean", - "min", - "max" - ], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "8.2.6", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum (increase(${prom_prefix}_dnsmessages_total{job=~\"$job\",stream_id=~\"$stream_id\"}[5m]))", - "interval": "", - "legendFormat": "Pkts [5m] rate", - "range": true, - "refId": "A" - } - ], - "title": "Packets [rate-5m]", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [], - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 4, - "x": 0, - "y": 3 - }, - "id": 93, - "options": { - "displayLabels": [ - "percent" - ], - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "values": [ - "percent" - ] - }, - "pieType": "donut", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.2.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum by (operation) (increase(${prom_prefix}_operations_total{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range])) != 0", - "format": "time_series", - "instant": false, - "interval": "", - "legendFormat": "__auto", - "refId": "A" - } - ], - "title": "DNS messages per operations", - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [], - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 4, - "x": 4, - "y": 3 - }, - "id": 74, - "options": { - "displayLabels": [ - "percent" - ], - "legend": { - "displayMode": "list", - "placement": "bottom", - "showLegend": true, - "values": [ - "percent" - ] - }, - "pieType": "donut", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.2.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum by (return_code) (increase(${prom_prefix}_rcodes_total{job=~\"$job\", stream_id=~\"$stream_id\", return_code!=\"-\"}[$__range])) != 0", - "format": "time_series", - "instant": false, - "interval": "", - "legendFormat": "{{query_type}}", - "refId": "A" - } - ], - "title": "Replies per return code", - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [], - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 5, - "x": 8, - "y": 3 - }, - "id": 70, - "options": { - "displayLabels": [ - "percent" - ], - "legend": { - "displayMode": "table", - "placement": "right", - "showLegend": true, - "values": [ - "percent" - ] - }, - "pieType": "donut", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.2.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum by (query_type) (increase(${prom_prefix}_qtypes_total{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range])) != 0", - "format": "time_series", - "instant": false, - "interval": "", - "legendFormat": "{{query_type}}", - "refId": "A" - } - ], - "title": "Queries per type", - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [] - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 13, - "y": 3 - }, - "id": 76, - "options": { - "legend": { - "displayMode": "table", - "placement": "right", - "showLegend": true, - "values": [ - "percent" - ] - }, - "pieType": "donut", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.2.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum by (net_transport) (increase(${prom_prefix}_ipprotocol_total{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range])) != 0", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "{{net_transport}}", - "refId": "B" - } - ], - "title": "Transports", - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 16, - "y": 6 - }, - "id": 92, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "8.2.6", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "topk(20,sum(rate(${prom_prefix}_top_requesters{job=~\"$job\", stream_id=~\"$stream_id\"}[5m])) by (ip))", - "interval": "", - "legendFormat": "{{ip}}", - "range": true, - "refId": "A" - } - ], - "title": "Top Clients [rate-5m] ", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "mappings": [] - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "INET" - }, - "properties": [ - { - "id": "displayName", - "value": "IPv4" - } - ] - } - ] - }, - "gridPos": { - "h": 4, - "w": 3, - "x": 13, - "y": 7 - }, - "id": 75, - "options": { - "legend": { - "displayMode": "table", - "placement": "right", - "showLegend": true, - "values": [ - "percent" - ] - }, - "pieType": "donut", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.2.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum by (net_family) (increase(${prom_prefix}_ipversion_total{job=~\"$job\", stream_id=~\"$stream_id\", return_code != \"-\"}[$__range])) != 0", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "{{net_family}}", - "refId": "B" - } - ], - "title": "IP Protocols", - "type": "piechart" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" - }, - "filterable": true, - "inspect": false - }, - "decimals": 0, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Domains" - }, - "properties": [ - { - "id": "custom.width", - "value": 1040 - } - ] - } - ] - }, - "gridPos": { - "h": 11, - "w": 16, - "x": 0, - "y": 11 - }, - "id": 62, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "enablePagination": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "frameIndex": 0, - "showHeader": false, - "sortBy": [] - }, - "pluginVersion": "10.4.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": false, - "expr": "topk(20, sum by(domain) (increase(${prom_prefix}_top_noerror_domains{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range]))) != 0", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "range": false, - "refId": "NOERROR" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "topk(20, sum by(domain) (increase(${prom_prefix}_top_nonexistent_domains{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range]))) != 0", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "__auto", - "refId": "NXDOMAIN" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "topk(20, sum by(domain) (increase(${prom_prefix}_top_servfail_domains{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range]))) != 0", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "__auto", - "refId": "SERVFAIL" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "topk(20, sum by(domain) (increase(${prom_prefix}_top_suspicious{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range]))) != 0", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "__auto", - "refId": "Suspicious" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "topk(20, sum by(suffix) (increase(${prom_prefix}_top_tlds{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range]))) != 0", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "__auto", - "refId": "TLDS" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "topk(20, sum by(ip) (increase(${prom_prefix}_top_requesters{job=~\"$job\", stream_id=~\"$stream_id\"}[$__range]))) != 0", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "__auto", - "refId": "CLIENTS" - } - ], - "title": "Top", - "transformations": [ - { - "id": "renameByRegex", - "options": { - "regex": "Value #NOERROR", - "renamePattern": "NoError Hits" - } - }, - { - "id": "renameByRegex", - "options": { - "regex": "Value #NXDOMAIN", - "renamePattern": "NonExistent Hits" - } - }, - { - "id": "renameByRegex", - "options": { - "regex": "Value #SERVFAIL", - "renamePattern": "ServFail Hits" - } - }, - { - "id": "renameByRegex", - "options": { - "regex": "domain", - "renamePattern": "Top Domains" - } - }, - { - "id": "renameByRegex", - "options": { - "regex": "Value #Suspicious", - "renamePattern": "Suspicious Hits" - } - }, - { - "id": "renameByRegex", - "options": { - "regex": "Value #TLDS", - "renamePattern": "TLDs Hits" - } - }, - { - "id": "renameByRegex", - "options": { - "regex": "Value #CLIENTS", - "renamePattern": "Clients Hits" - } - }, - { - "id": "renameByRegex", - "options": { - "regex": "ip", - "renamePattern": "Top Query IPs" - } - }, - { - "id": "renameByRegex", - "options": { - "regex": "suffix", - "renamePattern": "Top TLDs" - } - }, - { - "id": "groupBy", - "options": { - "fields": { - "Clients Hits": { - "aggregations": [], - "operation": "groupby" - }, - "NoError Hits": { - "aggregations": [], - "operation": "groupby" - }, - "NonExistent Hits": { - "aggregations": [], - "operation": "groupby" - }, - "ServFail Hits": { - "aggregations": [], - "operation": "groupby" - }, - "Suspicious Hits": { - "aggregations": [], - "operation": "groupby" - }, - "TLDs Hits": { - "aggregations": [], - "operation": "groupby" - }, - "Top Domains": { - "aggregations": [], - "operation": "groupby" - }, - "Top Query IPs": { - "aggregations": [], - "operation": "groupby" - }, - "Top TLDs": { - "aggregations": [], - "operation": "groupby" - } - } - } - } - ], - "type": "table" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 11, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 6, - "w": 8, - "x": 16, - "y": 11 - }, - "id": 88, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(${prom_prefix}_qtypes_total{job=~\"$job\",stream_id=~\"$stream_id\"}[5m])) by (query_type)", - "interval": "", - "legendFormat": "{{ query_type }}", - "range": true, - "refId": "A" - } - ], - "title": "Packet by Qtype [rate-5m]", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 11, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 2, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 16, - "y": 17 - }, - "id": 90, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "editorMode": "code", - "exemplar": true, - "expr": "sum(rate(${prom_prefix}_rcodes_total{job=~\"$job\", stream_id=~\"$stream_id\", return_code!=\"-\"}[5m])) by ( return_code )", - "interval": "", - "legendFormat": "{{ return_code }}", - "range": true, - "refId": "A" - } - ], - "title": "Packet by Return Code [rate-5m]", - "type": "timeseries" - } - ], - "refresh": "5s", - "schemaVersion": 39, - "tags": [], - "templating": { - "list": [ - { - "hide": 2, - "name": "prom_prefix", - "query": "dnscollector", - "skipUrlSync": false, - "type": "constant" - }, - { - "current": { - "selected": false, - "text": "dnscollector", - "value": "dnscollector" - }, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "definition": "label_values(job)", - "hide": 0, - "includeAll": false, - "label": "Job", - "multi": false, - "name": "job", - "options": [], - "query": { - "query": "label_values(job)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "/dnscollector.*/", - "skipUrlSync": false, - "sort": 0, - "type": "query" - }, - { - "current": { - "selected": false, - "text": "All", - "value": "$__all" - }, - "datasource": { - "type": "prometheus", - "uid": "prometheus" - }, - "definition": "label_values({job=\"$job\"},stream_id)", - "hide": 0, - "includeAll": true, - "label": "Stream", - "multi": true, - "name": "stream_id", - "options": [], - "query": { - "query": "label_values({job=\"$job\"},stream_id)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" - } - ] - }, - "time": { - "from": "now-5m", - "to": "now" - }, - "timepicker": {}, - "timezone": "", - "title": "DNScollector - Prometheus", - "uid": "6uPErf6nz", - "version": 1, - "weekStart": "" -} \ No newline at end of file diff --git a/config/monitoring/grafana/kustomization.yaml b/config/monitoring/grafana/kustomization.yaml index 654caa6..2c1e3cc 100644 --- a/config/monitoring/grafana/kustomization.yaml +++ b/config/monitoring/grafana/kustomization.yaml @@ -14,10 +14,7 @@ configMapGenerator: - dashboards/dashboards.yaml - name: grafana-dashboards files: - - dashboards/dns_observability.json - - dashboards/grafana_exporter.json - - dashboards/grafana_loki.json - - dashboards/grafana_prometheus.json + - dashboards/dns_workbench.json - name: grafana-datasources files: - datasources/datasources.yaml diff --git a/config/monitoring/prometheus/config/prometheus.yml b/config/monitoring/prometheus/config/prometheus.yml index e331975..5847a45 100644 --- a/config/monitoring/prometheus/config/prometheus.yml +++ b/config/monitoring/prometheus/config/prometheus.yml @@ -25,15 +25,6 @@ scrape_configs: labels: app: dnscollector_exporter - - job_name: dnscollector - metrics_path: /metrics - scrape_interval: 5s - static_configs: - - targets: - - pdns-auth.dns-agent-system.svc:8084 - labels: - app: dnscollector - - job_name: vector metrics_path: /metrics static_configs: From bcb3b5fe319d1c988c71c73174e740940407e6fc Mon Sep 17 00:00:00 2001 From: cc-datum Date: Fri, 5 Dec 2025 16:24:30 -0600 Subject: [PATCH 08/33] chore: clean up configurations --- config/agent/dnsdist-config.yaml | 6 +++--- config/agent/manager.yaml | 3 --- config/agent/vector-config.yaml | 17 ----------------- 3 files changed, 3 insertions(+), 23 deletions(-) diff --git a/config/agent/dnsdist-config.yaml b/config/agent/dnsdist-config.yaml index 81254f6..8adfcda 100644 --- a/config/agent/dnsdist-config.yaml +++ b/config/agent/dnsdist-config.yaml @@ -44,7 +44,7 @@ query_rules: type: All action: type: DnstapLog - identity: dnsdist_v2 + identity: dnsdist logger_name: remote_logging - name: "default rule" @@ -60,7 +60,7 @@ response_rules: type: All action: type: DnstapLog - identity: dnsdist_v2 + identity: dnsdist logger_name: remote_logging cache_hit_response_rules: @@ -69,5 +69,5 @@ cache_hit_response_rules: type: All action: type: DnstapLog - identity: dnsdist_v2_cache + identity: dnsdist_cache logger_name: remote_logging \ No newline at end of file diff --git a/config/agent/manager.yaml b/config/agent/manager.yaml index f4908f7..61e75e1 100644 --- a/config/agent/manager.yaml +++ b/config/agent/manager.yaml @@ -263,9 +263,6 @@ spec: name: dnstap protocol: TCP - containerPort: 9165 - name: dnscolt-metrics - protocol: TCP - - containerPort: 8084 name: dnscol-metrics protocol: TCP volumeMounts: diff --git a/config/agent/vector-config.yaml b/config/agent/vector-config.yaml index ed8dac5..a58a69e 100644 --- a/config/agent/vector-config.yaml +++ b/config/agent/vector-config.yaml @@ -141,20 +141,3 @@ sinks: - dnstap_metrics - internal_metrics address: 0.0.0.0:9598 - - loki: - type: loki - inputs: - - dnstap_tcp - endpoint: http://loki.dns-monitoring.svc:3100 - out_of_order_action: accept - encoding: - codec: json - labels: - job: vector - app: vector - component: vector - server: "{{server_id}}" - proto: "{{proto}}" - family: "{{family}}" - message_type: "{{message_type}}" From 83a8fe394906513688b049eead971ad42038b98f Mon Sep 17 00:00:00 2001 From: cc-datum Date: Fri, 5 Dec 2025 18:21:10 -0600 Subject: [PATCH 09/33] feat: add metrics verification --- Makefile | 2 ++ config/monitoring/README.md | 8 ------- test/e2e/chainsaw-test.yaml | 44 +++++++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 5bbcfed..16da69a 100644 --- a/Makefile +++ b/Makefile @@ -260,6 +260,8 @@ bootstrap-downstream: ## Create kind downstream and deploy agent with embedded P CONTEXT=kind-$(DOWNSTREAM_CLUSTER_NAME) KUSTOMIZE_DIR=config/overlays/agent-powerdns $(MAKE) kustomize-apply # Export external kubeconfig for downstream cluster (reachable from host/other containers) CLUSTER=$(DOWNSTREAM_CLUSTER_NAME) OUT=dev/kind.downstream.kubeconfig $(MAKE) export-kind-kubeconfig-raw + # Install monitoring stack into downstream + CONTEXT=kind-$(DOWNSTREAM_CLUSTER_NAME) KUSTOMIZE_DIR=config/monitoring $(MAKE) kustomize-apply .PHONY: bootstrap-upstream bootstrap-upstream: ## Create kind upstream and deploy replicator pointing to downstream diff --git a/config/monitoring/README.md b/config/monitoring/README.md index e2e6278..05ea0e9 100644 --- a/config/monitoring/README.md +++ b/config/monitoring/README.md @@ -39,12 +39,4 @@ patch `PROMETHEUS_URL` and remove the `prometheus` entry from The bundled Prometheus scrapes: - `dnsdist` at `pdns-auth.dns-agent-system.svc:8083` (`/metrics`) - `dnscollector_exporter` at `pdns-auth.dns-agent-system.svc:9165` (`/metrics`) -- `dnscollector` at `pdns-auth.dns-agent-system.svc:8084` (`/metrics`) - `vector` at `pdns-auth.dns-agent-system.svc:9598` (`/metrics`) - -## Metrics and logs wiring -- `config/agent/pdns-service.yaml` exposes metrics ports for dnsdist (8083), - dnscollector (9165), and vector (9598). -- `config/agent/dnscollector-config.yaml` enables the telemetry endpoint. -- `config/agent/vector-config.yaml` streams enriched dnstap events to Loki at - `loki.dns-monitoring.svc:3100`. diff --git a/test/e2e/chainsaw-test.yaml b/test/e2e/chainsaw-test.yaml index 7d98d25..50b6a28 100644 --- a/test/e2e/chainsaw-test.yaml +++ b/test/e2e/chainsaw-test.yaml @@ -404,3 +404,47 @@ spec: kind: DNSZoneClass check: ($error == null): true + + - name: Verify Metrics - curl into vector pod and check for metrics + try: + - create: + cluster: downstream + resource: + apiVersion: v1 + kind: Pod + metadata: + name: vector-metrics-check + namespace: default + spec: + restartPolicy: Never + containers: + - name: curl + image: curlimages/curl:8.5.0 + command: ["sleep", "3600"] + - assert: + cluster: downstream + resource: + apiVersion: v1 + kind: Pod + metadata: + name: vector-metrics-check + namespace: default + status: + phase: Running + - sleep: + duration: 10s + - script: + cluster: downstream + content: | + set -euo pipefail + kubectl -n default exec pod/vector-metrics-check -- sh -c ' + metrics=$(curl -fsS http://pdns-auth.dns-agent-system.svc.cluster.local:9598/metrics) + echo "Prometheus metrics:" + echo "$metrics" | awk "/^dns_/ { split(\$0,a,\"{\"); print a[1] }" | sort -u || true + total=$(echo "$metrics" | awk "/^dns_queries_total/ {print \$2; exit}") + if [ -z "${total:-}" ] || [ "$total" = "0" ]; then + echo "dns_queries_total is zero or missing" >&2 + exit 1 + fi + echo "dns_queries_total: $total" + ' From 7918776eb78e34e6fe8da76c38ed0cdd76a1c602 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Fri, 5 Dec 2025 19:21:11 -0600 Subject: [PATCH 10/33] feat: update DNS Workbench dashboard --- .../grafana/dashboards/dns_workbench.json | 876 +++++++++++++----- 1 file changed, 648 insertions(+), 228 deletions(-) diff --git a/config/monitoring/grafana/dashboards/dns_workbench.json b/config/monitoring/grafana/dashboards/dns_workbench.json index ca10d9e..715b1dc 100644 --- a/config/monitoring/grafana/dashboards/dns_workbench.json +++ b/config/monitoring/grafana/dashboards/dns_workbench.json @@ -1,172 +1,304 @@ { + "timezone": "", "title": "DNS Workbench", + "uid": "", "schemaVersion": 39, "version": 1, - "refresh": "30s", - "tags": [ - "dns", - "dnstap", - "workbench" - ], - "timezone": "", + "weekStart": "", + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], "panels": [ { - "type": "stat", - "title": "Total Queries", - "id": 1, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "targets": [ - { - "refId": "A", - "expr": "sum(increase(dns_queries_total[$__range]))" - } - ], + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 1, "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto", + "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "expr": "sum(increase(dns_queries_total[$__range]))", + "refId": "A" } - } + ], + "title": "Total Queries", + "type": "stat" }, { - "type": "stat", - "title": "Avg Latency", - "id": 2, - "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 0 - }, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "targets": [ - { - "refId": "A", - "expr": "1000 * (sum(rate(dns_response_latency_seconds_sum[5m])) / sum(rate(dns_response_latency_seconds_count[5m])))" - } - ], "fieldConfig": { "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, "unit": "ms" }, "overrides": [] }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 2, "options": { "colorMode": "value", "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ] + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "expr": "1000 * (sum(rate(dns_response_latency_seconds_sum[5m])) / sum(rate(dns_response_latency_seconds_count[5m])))", + "refId": "A" } - } + ], + "title": "Avg Latency", + "type": "stat" }, { - "type": "stat", - "title": "Cache Hit Rate", - "id": 3, - "gridPos": { - "h": 4, - "w": 6, - "x": 12, - "y": 0 - }, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "targets": [ - { - "refId": "A", - "expr": "sum(rate(dns_cache_hits_total[5m])) / sum(rate(dns_responses_total[5m]))" - } - ], "fieldConfig": { "defaults": { + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, "unit": "percentunit" }, "overrides": [] }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 3, "options": { "colorMode": "value", "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ] + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "expr": "sum(rate(dns_cache_hits_total[5m])) / sum(rate(dns_responses_total[5m]))", + "refId": "A" } - } + ], + "title": "Cache Hit Rate", + "type": "stat" }, { - "type": "stat", - "title": "NXDOMAIN Rate", - "id": 4, - "gridPos": { - "h": 4, - "w": 6, - "x": 18, - "y": 0 - }, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "targets": [ - { - "refId": "A", - "expr": "sum(rate(dns_responses_total{rcode=\"NXDOMAIN\"}[5m])) / sum(rate(dns_responses_total[5m]))" - } - ], "fieldConfig": { "defaults": { + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, "unit": "percentunit" }, "overrides": [] }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 4, "options": { "colorMode": "value", "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ] + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "expr": "sum(rate(dns_responses_total{rcode=\"NXDOMAIN\"}[5m])) / sum(rate(dns_responses_total[5m]))", + "refId": "A" } - } + ], + "title": "NXDOMAIN Rate", + "type": "stat" }, { - "type": "bargauge", - "title": "Query Type Distribution", - "id": 5, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "targets": [ - { - "refId": "A", - "expr": "sum(rate(dns_queries_total[5m])) by (qtype)" - } - ], + "id": 5, "options": { "displayMode": "lcd", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", "orientation": "horizontal", "reduceOptions": { "calcs": [ @@ -174,103 +306,245 @@ ], "fields": "", "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "expr": "sum(rate(dns_queries_total[5m])) by (qtype)", + "refId": "A" } + ], + "title": "Query Type Distribution", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, "unit": "short" }, "overrides": [] - } - }, - { - "type": "bargauge", - "title": "Response Code Distribution", - "id": 6, + }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "targets": [ - { - "refId": "A", - "expr": "sum(rate(dns_responses_total[5m])) by (rcode)" - } - ], + "id": 6, "options": { "displayMode": "lcd", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" - ] + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "expr": "sum(rate(dns_responses_total[5m])) by (rcode)", + "refId": "A" } + ], + "title": "Response Code Distribution", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { - "unit": "short" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } }, "overrides": [] - } - }, - { - "type": "stat", - "title": "DNSSEC Validation", - "id": 7, + }, "gridPos": { "h": 4, "w": 12, "x": 0, "y": 12 }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "value_and_name", + "wideLayout": true }, + "pluginVersion": "10.4.2", "targets": [ { - "refId": "A", - "expr": "sum(rate(dns_dnssec_status_total[5m])) by (dnssec_status)" + "expr": "sum(rate(dns_dnssec_status_total[5m])) by (dnssec_status)", + "refId": "A" } ], + "title": "DNSSEC Validation", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 12, + "x": 12, + "y": 12 + }, + "id": 9, "options": { - "colorMode": "value", - "graphMode": "none", + "displayMode": "lcd", + "maxVizHeight": 300, + "minVizHeight": 16, + "minVizWidth": 8, + "namePlacement": "auto", + "orientation": "horizontal", "reduceOptions": { "calcs": [ "lastNotNull" - ] + ], + "fields": "", + "values": false }, - "textMode": "value_and_name" - } + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "expr": "sum(rate(dns_queries_total[5m])) by (stream)", + "refId": "A" + } + ], + "title": "Query Source Distribution", + "type": "bargauge" }, { - "type": "table", - "title": "Top Zones", - "id": 8, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" - }, - "targets": [ - { - "refId": "A", - "expr": "topk(10, sum(rate(dns_queries_total[5m])) by (zone))" - } - ], + "id": 8, "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, "showHeader": true, "sortBy": [ { @@ -278,187 +552,333 @@ "displayName": "Value" } ] - } - }, - { - "type": "bargauge", - "title": "Query Source Distribution", - "id": 9, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 16 - }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" }, + "pluginVersion": "10.4.2", "targets": [ { - "refId": "A", - "expr": "sum(rate(dns_queries_total[5m])) by (stream)" + "expr": "topk(10, sum(rate(dns_queries_total[5m])) by (zone))", + "refId": "A" } ], - "options": { - "displayMode": "lcd", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ] - } - } + "title": "Top Zones", + "type": "table" }, { - "type": "stat", - "title": "p50 Latency", - "id": 10, - "gridPos": { - "h": 3, - "w": 4, - "x": 0, - "y": 24 - }, "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "targets": [ - { - "refId": "A", - "expr": "1000 * histogram_quantile(0.5, sum(rate(dns_response_latency_seconds_bucket[5m])) by (le))" - } - ], "fieldConfig": { "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, "unit": "ms" - } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 24 }, + "id": 10, "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" - ] + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.2", + "targets": [ + { + "expr": "1000 * histogram_quantile(0.5, sum(rate(dns_response_latency_seconds_bucket[5m])) by (le))", + "refId": "A" } - } + ], + "title": "p50 Latency", + "type": "stat" }, { - "type": "stat", - "title": "p75 Latency", - "id": 11, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, "gridPos": { "h": 3, "w": 4, "x": 4, "y": 24 }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "id": 11, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, + "pluginVersion": "10.4.2", "targets": [ { - "refId": "A", - "expr": "1000 * histogram_quantile(0.75, sum(rate(dns_response_latency_seconds_bucket[5m])) by (le))" + "expr": "1000 * histogram_quantile(0.75, sum(rate(dns_response_latency_seconds_bucket[5m])) by (le))", + "refId": "A" } ], + "title": "p75 Latency", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, "unit": "ms" - } - } - }, - { - "type": "stat", - "title": "p90 Latency", - "id": 12, + }, + "overrides": [] + }, "gridPos": { "h": 3, "w": 4, "x": 8, "y": 24 }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "id": 12, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, + "pluginVersion": "10.4.2", "targets": [ { - "refId": "A", - "expr": "1000 * histogram_quantile(0.9, sum(rate(dns_response_latency_seconds_bucket[5m])) by (le))" + "expr": "1000 * histogram_quantile(0.9, sum(rate(dns_response_latency_seconds_bucket[5m])) by (le))", + "refId": "A" } ], + "title": "p90 Latency", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, "unit": "ms" - } - } - }, - { - "type": "stat", - "title": "p95 Latency", - "id": 13, + }, + "overrides": [] + }, "gridPos": { "h": 3, "w": 4, "x": 12, "y": 24 }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "id": 13, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, + "pluginVersion": "10.4.2", "targets": [ { - "refId": "A", - "expr": "1000 * histogram_quantile(0.95, sum(rate(dns_response_latency_seconds_bucket[5m])) by (le))" + "expr": "1000 * histogram_quantile(0.95, sum(rate(dns_response_latency_seconds_bucket[5m])) by (le))", + "refId": "A" } ], + "title": "p95 Latency", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, "fieldConfig": { "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, "unit": "ms" - } - } - }, - { - "type": "stat", - "title": "p99 Latency", - "id": 14, + }, + "overrides": [] + }, "gridPos": { "h": 3, "w": 4, "x": 16, "y": 24 }, - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS}" + "id": 14, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true }, + "pluginVersion": "10.4.2", "targets": [ { - "refId": "A", - "expr": "1000 * histogram_quantile(0.99, sum(rate(dns_response_latency_seconds_bucket[5m])) by (le))" + "expr": "1000 * histogram_quantile(0.99, sum(rate(dns_response_latency_seconds_bucket[5m])) by (le))", + "refId": "A" } ], - "fieldConfig": { - "defaults": { - "unit": "ms" - } - } + "title": "p99 Latency", + "type": "stat" } ], + "refresh": "30s", + "tags": [ + "dns", + "dnstap", + "workbench" + ], "templating": { "list": [ { - "name": "DS_PROMETHEUS", - "type": "datasource", - "label": "Prometheus", - "query": "prometheus", "current": { + "selected": false, "text": "Prometheus", - "value": "Prometheus" - } + "value": "prometheus" + }, + "hide": 0, + "includeAll": false, + "label": "Prometheus", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" } ] }, @@ -477,4 +897,4 @@ "1h" ] } -} +} \ No newline at end of file From 6fb523e85c4c921a2fbd330b6287582fe3c231da Mon Sep 17 00:00:00 2001 From: cc-datum Date: Mon, 5 Jan 2026 19:08:43 -0600 Subject: [PATCH 11/33] chore: empty commit From dd51be186c3501960c06fc8d2ad916f2850f907e Mon Sep 17 00:00:00 2001 From: cc-datum Date: Sat, 10 Jan 2026 00:36:32 -0600 Subject: [PATCH 12/33] feat: implement a metrics push model --- .gitignore | 2 +- config/agent/manager.yaml | 3 + config/agent/vector-config.yaml | 8 ++- config/monitoring/README.md | 4 ++ config/monitoring/kustomization.yaml | 1 + config/monitoring/prometheus/deployment.yaml | 1 + .../vector/config/vector-config.yaml | 18 ++++++ config/monitoring/vector/deployment.yaml | 56 +++++++++++++++++++ config/monitoring/vector/kustomization.yaml | 14 +++++ config/monitoring/vector/service.yaml | 14 +++++ 10 files changed, 119 insertions(+), 2 deletions(-) create mode 100644 config/monitoring/vector/config/vector-config.yaml create mode 100644 config/monitoring/vector/deployment.yaml create mode 100644 config/monitoring/vector/kustomization.yaml create mode 100644 config/monitoring/vector/service.yaml diff --git a/.gitignore b/.gitignore index cfc6196..781e094 100644 --- a/.gitignore +++ b/.gitignore @@ -23,7 +23,7 @@ config/**/charts coverage.* *.coverprofile profile.cov - +test/e2e/kubeconfig-* # Dependency directories (remove the comment below to include it) # vendor/ diff --git a/config/agent/manager.yaml b/config/agent/manager.yaml index 61e75e1..e9245cb 100644 --- a/config/agent/manager.yaml +++ b/config/agent/manager.yaml @@ -284,6 +284,9 @@ spec: - --watch-config - --config-dir - /etc/vector/ + env: + - name: VECTOR_CENTRAL_ADDRESS + value: vector.dns-monitoring.svc:9000 volumeMounts: - name: vector-config mountPath: /etc/vector/vector-config.yaml diff --git a/config/agent/vector-config.yaml b/config/agent/vector-config.yaml index a58a69e..4b8c549 100644 --- a/config/agent/vector-config.yaml +++ b/config/agent/vector-config.yaml @@ -134,10 +134,16 @@ sinks: codec: json inputs: - dnstap_tcp + - dnstap_metrics + + central_vector: + type: vector + inputs: + - dnstap_metrics + address: ${VECTOR_CENTRAL_ADDRESS} prometheus: type: prometheus_exporter inputs: - - dnstap_metrics - internal_metrics address: 0.0.0.0:9598 diff --git a/config/monitoring/README.md b/config/monitoring/README.md index 05ea0e9..47631fb 100644 --- a/config/monitoring/README.md +++ b/config/monitoring/README.md @@ -9,6 +9,8 @@ Lightweight observability stack for the DNS agent components. - **Prometheus**: Single instance scraping dnsdist, dnscollector, and vector metrics out of the box. - **Loki**: Single-binary log storage for dnstap/log forwarding from vector. +- **Vector (central)**: Receives metrics from edge Vector and forwards to the + consumer-facing Victoria Metrics remote-write endpoint. - **Namespace**: `dns-monitoring` is created automatically. ## Deploy @@ -30,6 +32,8 @@ open http://localhost:3000 - Prometheus URL: `${PROMETHEUS_URL}` (default `http://prometheus.dns-monitoring.svc:9090`) - Loki URL: `${LOKI_URL}` (default `http://loki.dns-monitoring.svc:3100`) +The central Vector instance uses `REMOTE_WRITE_ENDPOINT` (default +`http://prometheus.dns-monitoring.svc:9090/api/v1/write`). If you want to use an existing cluster Prometheus instead of the bundled one, patch `PROMETHEUS_URL` and remove the `prometheus` entry from diff --git a/config/monitoring/kustomization.yaml b/config/monitoring/kustomization.yaml index 7113c0c..c3fdd34 100644 --- a/config/monitoring/kustomization.yaml +++ b/config/monitoring/kustomization.yaml @@ -8,3 +8,4 @@ resources: - grafana - prometheus - loki + - vector diff --git a/config/monitoring/prometheus/deployment.yaml b/config/monitoring/prometheus/deployment.yaml index aa4686a..8ecfbc2 100644 --- a/config/monitoring/prometheus/deployment.yaml +++ b/config/monitoring/prometheus/deployment.yaml @@ -22,6 +22,7 @@ spec: - --config.file=/etc/prometheus/prometheus.yml - --storage.tsdb.path=/prometheus - --web.enable-lifecycle + - --web.enable-remote-write-receiver ports: - name: http containerPort: 9090 diff --git a/config/monitoring/vector/config/vector-config.yaml b/config/monitoring/vector/config/vector-config.yaml new file mode 100644 index 0000000..9c6228c --- /dev/null +++ b/config/monitoring/vector/config/vector-config.yaml @@ -0,0 +1,18 @@ +data_dir: /var/lib/vector + +api: + enabled: false + +sources: + edge_metrics: + type: vector + address: 0.0.0.0:9000 + +sinks: + victoria_metrics: + type: prometheus_remote_write + inputs: + - edge_metrics + endpoint: ${REMOTE_WRITE_ENDPOINT} + healthcheck: + enabled: false \ No newline at end of file diff --git a/config/monitoring/vector/deployment.yaml b/config/monitoring/vector/deployment.yaml new file mode 100644 index 0000000..bdac1f7 --- /dev/null +++ b/config/monitoring/vector/deployment.yaml @@ -0,0 +1,56 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector + labels: + app.kubernetes.io/name: vector +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: vector + template: + metadata: + labels: + app.kubernetes.io/name: vector + spec: + containers: + - name: vector + image: timberio/vector:0.51.1-distroless-static + imagePullPolicy: IfNotPresent + args: + - --log-format=json + - --verbose + - --watch-config + - --config-dir + - /etc/vector/ + env: + - name: REMOTE_WRITE_ENDPOINT + value: http://prometheus.dns-monitoring.svc:9090/api/v1/write + ports: + - containerPort: 9000 + name: vector + protocol: TCP + volumeMounts: + - name: vector-config + mountPath: /etc/vector/vector-config.yaml + subPath: vector-config.yaml + - name: vector-config-volume + mountPath: /etc/vector + - name: vector-data + mountPath: /var/lib/vector + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 500m + memory: 512Mi + volumes: + - name: vector-config + configMap: + name: vector-config + - name: vector-config-volume + emptyDir: {} + - name: vector-data + emptyDir: {} diff --git a/config/monitoring/vector/kustomization.yaml b/config/monitoring/vector/kustomization.yaml new file mode 100644 index 0000000..f8c1cc0 --- /dev/null +++ b/config/monitoring/vector/kustomization.yaml @@ -0,0 +1,14 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - deployment.yaml + - service.yaml + +generatorOptions: + disableNameSuffixHash: true + +configMapGenerator: + - name: vector-config + files: + - config/vector-config.yaml diff --git a/config/monitoring/vector/service.yaml b/config/monitoring/vector/service.yaml new file mode 100644 index 0000000..b77149a --- /dev/null +++ b/config/monitoring/vector/service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: vector + labels: + app.kubernetes.io/name: vector +spec: + selector: + app.kubernetes.io/name: vector + ports: + - name: vector + port: 9000 + targetPort: vector + protocol: TCP From bd8073dcc9ef8588a09af2269a2e05f130c6a550 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Sun, 11 Jan 2026 22:41:52 -0600 Subject: [PATCH 13/33] feat: add enrichment table for zone and project mapping --- config/monitoring/README.md | 2 +- .../vector/config/vector-config.yaml | 36 +++++++++++- config/monitoring/vector/deployment.yaml | 53 ++++++++++++++++- config/monitoring/vector/kustomization.yaml | 4 ++ config/monitoring/vector/rbac.yaml | 26 +++++++++ .../vector/scripts/build-zone-project.sh | 57 +++++++++++++++++++ .../dnszone_replicator_controller.go | 3 + 7 files changed, 177 insertions(+), 4 deletions(-) create mode 100644 config/monitoring/vector/rbac.yaml create mode 100644 config/monitoring/vector/scripts/build-zone-project.sh diff --git a/config/monitoring/README.md b/config/monitoring/README.md index 47631fb..c3f4b26 100644 --- a/config/monitoring/README.md +++ b/config/monitoring/README.md @@ -32,7 +32,7 @@ open http://localhost:3000 - Prometheus URL: `${PROMETHEUS_URL}` (default `http://prometheus.dns-monitoring.svc:9090`) - Loki URL: `${LOKI_URL}` (default `http://loki.dns-monitoring.svc:3100`) -The central Vector instance uses `REMOTE_WRITE_ENDPOINT` (default +The central Vector instance uses `VICTORIA_METRICS_REMOTE_WRITE_URL` (default `http://prometheus.dns-monitoring.svc:9090/api/v1/write`). If you want to use an existing cluster Prometheus instead of the bundled one, diff --git a/config/monitoring/vector/config/vector-config.yaml b/config/monitoring/vector/config/vector-config.yaml index 9c6228c..77bbe30 100644 --- a/config/monitoring/vector/config/vector-config.yaml +++ b/config/monitoring/vector/config/vector-config.yaml @@ -3,16 +3,48 @@ data_dir: /var/lib/vector api: enabled: false +enrichment_tables: + zone_project: + type: file + file: + path: /etc/vector/enrichment/zone_project.csv + encoding: + type: csv + delimiter: "," + schema: + zone: string + project: string + sources: edge_metrics: type: vector address: 0.0.0.0:9000 +transforms: + add_project_tags: + type: remap + inputs: + - edge_metrics + source: | + # Get zone from metric tags + zone = to_string(.tags.zone) ?? null + + if zone != null { + + # Lookup project from enrichment table + record = get_enrichment_table_record!("zone_project", {"zone": zone}) + + # Add project tag if found + if record != null && exists(record.project) { + .tags.project = to_string(record.project) ?? record.project + } + } + sinks: victoria_metrics: type: prometheus_remote_write inputs: - - edge_metrics - endpoint: ${REMOTE_WRITE_ENDPOINT} + - add_project_tags + endpoint: ${VICTORIA_METRICS_REMOTE_WRITE_URL} healthcheck: enabled: false \ No newline at end of file diff --git a/config/monitoring/vector/deployment.yaml b/config/monitoring/vector/deployment.yaml index bdac1f7..2ea15d8 100644 --- a/config/monitoring/vector/deployment.yaml +++ b/config/monitoring/vector/deployment.yaml @@ -14,6 +14,22 @@ spec: labels: app.kubernetes.io/name: vector spec: + serviceAccountName: vector + initContainers: + - name: init-enrichment-table + image: busybox:1.36 + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - | + set -eu + mkdir -p /enrichment/configmaps + # create a valid CSV with headers so Vector can compile VRL at startup + printf 'zone,project\n' > /enrichment/zone_project.csv + volumeMounts: + - name: vector-enrichment + mountPath: /enrichment containers: - name: vector image: timberio/vector:0.51.1-distroless-static @@ -25,7 +41,7 @@ spec: - --config-dir - /etc/vector/ env: - - name: REMOTE_WRITE_ENDPOINT + - name: VICTORIA_METRICS_REMOTE_WRITE_URL value: http://prometheus.dns-monitoring.svc:9090/api/v1/write ports: - containerPort: 9000 @@ -37,6 +53,9 @@ spec: subPath: vector-config.yaml - name: vector-config-volume mountPath: /etc/vector + - name: vector-enrichment + mountPath: /etc/vector/enrichment + readOnly: true - name: vector-data mountPath: /var/lib/vector resources: @@ -46,11 +65,43 @@ spec: limits: cpu: 500m memory: 512Mi + - name: zone-accounting-sidecar + image: kiwigrid/k8s-sidecar:1.30.9 + imagePullPolicy: IfNotPresent + env: + - name: RESOURCE + value: configmap + - name: NAMESPACE + value: datum-downstream-dnszone-accounting + - name: LABEL + value: datum.net/dnszone-accounting + - name: LABEL_VALUE + value: enabled + - name: FOLDER + value: /tmp/ + - name: UNIQUE_FILENAMES + value: "true" + - name: METHOD + value: WATCH + - name: SCRIPT + value: /scripts/build-zone-project.sh + volumeMounts: + - name: vector-enrichment + mountPath: /enrichment + - name: vector-enrichment-scripts + mountPath: /scripts + readOnly: true volumes: - name: vector-config configMap: name: vector-config - name: vector-config-volume emptyDir: {} + - name: vector-enrichment + emptyDir: {} + - name: vector-enrichment-scripts + configMap: + name: vector-enrichment-scripts + defaultMode: 0555 - name: vector-data emptyDir: {} diff --git a/config/monitoring/vector/kustomization.yaml b/config/monitoring/vector/kustomization.yaml index f8c1cc0..6f11db5 100644 --- a/config/monitoring/vector/kustomization.yaml +++ b/config/monitoring/vector/kustomization.yaml @@ -4,6 +4,7 @@ kind: Kustomization resources: - deployment.yaml - service.yaml + - rbac.yaml generatorOptions: disableNameSuffixHash: true @@ -12,3 +13,6 @@ configMapGenerator: - name: vector-config files: - config/vector-config.yaml + - name: vector-enrichment-scripts + files: + - scripts/build-zone-project.sh diff --git a/config/monitoring/vector/rbac.yaml b/config/monitoring/vector/rbac.yaml new file mode 100644 index 0000000..0a60106 --- /dev/null +++ b/config/monitoring/vector/rbac.yaml @@ -0,0 +1,26 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vector +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: vector-configmap-reader +rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: vector-configmap-reader +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: vector-configmap-reader +subjects: + - kind: ServiceAccount + name: vector + namespace: dns-monitoring diff --git a/config/monitoring/vector/scripts/build-zone-project.sh b/config/monitoring/vector/scripts/build-zone-project.sh new file mode 100644 index 0000000..f57cdb1 --- /dev/null +++ b/config/monitoring/vector/scripts/build-zone-project.sh @@ -0,0 +1,57 @@ +#!/bin/sh +# Build zone_project.csv from datum-downstream-dnszone-accounting ConfigMaps. +# +# Input file example: +# namespace_.configmap_.owner (content: //<...>) +# +# Output CSV: +# zone,project +# , + +set -eu + +SRC_DIR="${ENRICHMENT_CONFIGMAP_DIR:-/tmp}" +OUT_FILE="${ENRICHMENT_OUTPUT_FILE:-/enrichment/zone_project.csv}" + +mkdir -p "$(dirname "$OUT_FILE")" + +tmp="$(mktemp)" +trap 'rm -f "$tmp"' EXIT + +# Always produce a valid CSV (header at minimum). +printf 'zone,project\n' > "$tmp" + +# Nothing to do if input directory doesn't exist. +[ -d "$SRC_DIR" ] || { mv "$tmp" "$OUT_FILE"; exit 0; } + +# Collect rows, then sort+dedupe for stable output. +rows="$(mktemp)" +trap 'rm -f "$tmp" "$rows"' EXIT +: > "$rows" + +for f in "$SRC_DIR"/*.owner; do + [ -f "$f" ] || continue + + base="$(basename "$f")" + # Expect: namespace_.configmap_.owner -> extract + zone="${base#*configmap_}" + zone="${zone%.owner}" + + # Skip if we can't infer zone. + [ -n "$zone" ] && [ "$zone" != "$base" ] || continue + + owner="$(tr -d '\r\n' < "$f")" + owner="${owner#/}" # trim leading / + project="${owner%%/*}" # first segment + + [ -n "$project" ] || continue + + printf '%s,%s\n' "$zone" "$project" >> "$rows" +done + +# Append sorted unique rows. +if [ -s "$rows" ]; then + sort "$rows" | uniq >> "$tmp" +fi + +mv "$tmp" "$OUT_FILE" \ No newline at end of file diff --git a/internal/controller/dnszone_replicator_controller.go b/internal/controller/dnszone_replicator_controller.go index 4fb5446..615971a 100644 --- a/internal/controller/dnszone_replicator_controller.go +++ b/internal/controller/dnszone_replicator_controller.go @@ -323,6 +323,9 @@ func (r *DNSZoneReplicator) ensureZoneAccounting(ctx context.Context, upstream * newCM.Data = map[string]string{ "owner": owner, } + newCM.Labels = map[string]string{ + "datum.net/dnszone-accounting": "enabled", + } if cerr := r.DownstreamClient.Create(ctx, &newCM); cerr != nil { // A race can occur; if created by another, treat as not owned and let next reconcile decide if apierrors.IsAlreadyExists(cerr) { From 582ee3c1ea4b2d6fe19cbe58761f799891de7992 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Mon, 12 Jan 2026 09:49:21 -0600 Subject: [PATCH 14/33] feat: update buffer configuration --- config/agent/vector-config.yaml | 4 ++++ config/monitoring/vector/config/vector-config.yaml | 13 ++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/config/agent/vector-config.yaml b/config/agent/vector-config.yaml index 4b8c549..873d4a0 100644 --- a/config/agent/vector-config.yaml +++ b/config/agent/vector-config.yaml @@ -141,6 +141,10 @@ sinks: inputs: - dnstap_metrics address: ${VECTOR_CENTRAL_ADDRESS} + buffer: + type: disk + max_size: 268435488 + when_full: block prometheus: type: prometheus_exporter diff --git a/config/monitoring/vector/config/vector-config.yaml b/config/monitoring/vector/config/vector-config.yaml index 77bbe30..09694fd 100644 --- a/config/monitoring/vector/config/vector-config.yaml +++ b/config/monitoring/vector/config/vector-config.yaml @@ -26,16 +26,19 @@ transforms: inputs: - edge_metrics source: | + # Add tags if missing + if !exists(.tags) { .tags = {} } + # Get zone from metric tags zone = to_string(.tags.zone) ?? null if zone != null { # Lookup project from enrichment table - record = get_enrichment_table_record!("zone_project", {"zone": zone}) + record, err = get_enrichment_table_record("zone_project", {"zone": zone}) # Add project tag if found - if record != null && exists(record.project) { + if err == null && record != null && exists(record.project) { .tags.project = to_string(record.project) ?? record.project } } @@ -47,4 +50,8 @@ sinks: - add_project_tags endpoint: ${VICTORIA_METRICS_REMOTE_WRITE_URL} healthcheck: - enabled: false \ No newline at end of file + enabled: false + buffer: + type: disk + max_size: 268435488 + when_full: block From 2d80e5ed36ebaee44b6a6f9e517c95aa2743bb61 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Tue, 13 Jan 2026 15:02:25 -0600 Subject: [PATCH 15/33] feat: add Helm repository and release configuration for Vector metrics gateway --- Makefile | 1 + config/agent/manager.yaml | 2 +- .../helmrepository.yaml | 8 + .../vector-metrics-gateway/kustomization.yaml | 6 + .../vector-metrics-gateway/vector-hr.yaml | 314 ++++++++++++++++++ config/monitoring/kustomization.yaml | 1 - .../vector-metrics-gateway/kustomization.yaml | 7 + 7 files changed, 337 insertions(+), 2 deletions(-) create mode 100644 config/components/vector-metrics-gateway/helmrepository.yaml create mode 100644 config/components/vector-metrics-gateway/kustomization.yaml create mode 100644 config/components/vector-metrics-gateway/vector-hr.yaml create mode 100644 config/overlays/vector-metrics-gateway/kustomization.yaml diff --git a/Makefile b/Makefile index 16da69a..ca8e8b4 100644 --- a/Makefile +++ b/Makefile @@ -262,6 +262,7 @@ bootstrap-downstream: ## Create kind downstream and deploy agent with embedded P CLUSTER=$(DOWNSTREAM_CLUSTER_NAME) OUT=dev/kind.downstream.kubeconfig $(MAKE) export-kind-kubeconfig-raw # Install monitoring stack into downstream CONTEXT=kind-$(DOWNSTREAM_CLUSTER_NAME) KUSTOMIZE_DIR=config/monitoring $(MAKE) kustomize-apply + CONTEXT=kind-$(DOWNSTREAM_CLUSTER_NAME) KUSTOMIZE_DIR=config/overlays/vector-metrics-gateway $(MAKE) kustomize-apply .PHONY: bootstrap-upstream bootstrap-upstream: ## Create kind upstream and deploy replicator pointing to downstream diff --git a/config/agent/manager.yaml b/config/agent/manager.yaml index e9245cb..c1a298b 100644 --- a/config/agent/manager.yaml +++ b/config/agent/manager.yaml @@ -286,7 +286,7 @@ spec: - /etc/vector/ env: - name: VECTOR_CENTRAL_ADDRESS - value: vector.dns-monitoring.svc:9000 + value: vector:9000 volumeMounts: - name: vector-config mountPath: /etc/vector/vector-config.yaml diff --git a/config/components/vector-metrics-gateway/helmrepository.yaml b/config/components/vector-metrics-gateway/helmrepository.yaml new file mode 100644 index 0000000..602bb21 --- /dev/null +++ b/config/components/vector-metrics-gateway/helmrepository.yaml @@ -0,0 +1,8 @@ +apiVersion: source.toolkit.fluxcd.io/v1 +kind: HelmRepository +metadata: + name: vector + namespace: dns-agent-system +spec: + interval: 1h + url: https://helm.vector.dev diff --git a/config/components/vector-metrics-gateway/kustomization.yaml b/config/components/vector-metrics-gateway/kustomization.yaml new file mode 100644 index 0000000..cf3a2a7 --- /dev/null +++ b/config/components/vector-metrics-gateway/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1alpha1 +kind: Component + +resources: + - helmrepository.yaml + - vector-hr.yaml diff --git a/config/components/vector-metrics-gateway/vector-hr.yaml b/config/components/vector-metrics-gateway/vector-hr.yaml new file mode 100644 index 0000000..d09b740 --- /dev/null +++ b/config/components/vector-metrics-gateway/vector-hr.yaml @@ -0,0 +1,314 @@ +--- +apiVersion: helm.toolkit.fluxcd.io/v2 +kind: HelmRelease +metadata: + name: vector-metrics-gateway + namespace: dns-agent-system +spec: + interval: 5m + timeout: 1m + + chart: + spec: + chart: vector + version: 0.49.x + sourceRef: + kind: HelmRepository + name: vector + namespace: dns-agent-system + interval: 1h + values: + fullnameOverride: vector + # Role: Aggregator (stateless, can run as Deployment) + role: Stateless-Aggregator + replicas: 2 + resources: + requests: + cpu: 200m + memory: 256Mi + limits: + cpu: 1000m + memory: 1Gi + podDisruptionBudget: + enabled: true + minAvailable: 1 + autoscaling: + enabled: true + minReplicas: 2 + maxReplicas: 5 + targetCPUUtilizationPercentage: 70 + targetMemoryUtilizationPercentage: 80 + service: + enabled: true + type: ClusterIP + ports: + - name: vector + port: 9000 + targetPort: vector + protocol: TCP + podMonitor: + enabled: true + port: internal-promet + + serviceAccount: + create: true + name: vector + + containerPorts: + - name: vector + containerPort: 9000 + protocol: TCP + + serviceHeadless: + enabled: false + + defaultVolumes: [] + defaultVolumeMounts: [] + + extraVolumes: + - name: vector-enrichment + emptyDir: {} + - name: vector-enrichment-scripts + configMap: + name: vector-enrichment-scripts + defaultMode: 0555 + - name: vector-data + emptyDir: {} + + extraVolumeMounts: + - name: vector-enrichment + mountPath: /etc/vector/enrichment + readOnly: true + - name: vector-data + mountPath: /var/lib/vector + + initContainers: + - name: init-enrichment-table + image: busybox:1.36 + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - | + set -eu + mkdir -p /enrichment/configmaps + # create a valid CSV with headers so Vector can compile VRL at startup + printf 'zone,project\n' > /enrichment/zone_project.csv + volumeMounts: + - name: vector-enrichment + mountPath: /enrichment + + extraContainers: + - name: zone-accounting-sidecar + image: kiwigrid/k8s-sidecar:1.30.9 + imagePullPolicy: IfNotPresent + env: + - name: RESOURCE + value: configmap + - name: NAMESPACE + value: datum-downstream-dnszone-accounting + - name: LABEL + value: datum.net/dnszone-accounting + - name: LABEL_VALUE + value: enabled + - name: FOLDER + value: /tmp/ + - name: UNIQUE_FILENAMES + value: "true" + - name: METHOD + value: WATCH + - name: SCRIPT + value: /scripts/build-zone-project.sh + volumeMounts: + - name: vector-enrichment + mountPath: /enrichment + - name: vector-enrichment-scripts + mountPath: /scripts + readOnly: true + + customConfig: + data_dir: /var/lib/vector + api: + enabled: false + enrichment_tables: + zone_project: + type: file + file: + path: /etc/vector/enrichment/zone_project.csv + encoding: + type: csv + delimiter: "," + schema: + zone: string + project: string + sources: + edge_metrics: + type: vector + address: 0.0.0.0:9000 + internal_metrics: + type: internal_metrics + namespace: vector + transforms: + add_project_tags: + type: remap + inputs: + - edge_metrics + source: | + # Add tags if missing + if !exists(.tags) { .tags = {} } + + # Get zone from metric tags + zone = to_string(.tags.zone) ?? null + + if zone != null { + + # Lookup project from enrichment table + record, err = get_enrichment_table_record("zone_project", {"zone": zone}) + + # Add project tag if found + if err == null && record != null && exists(record.project) { + .tags.project = to_string(record.project) ?? record.project + } + } + sinks: + victoria_metrics: + type: prometheus_remote_write + inputs: + - add_project_tags + endpoint: ${VICTORIA_METRICS_REMOTE_WRITE_URL} + batch: + max_bytes: 10485760 + max_events: 10000 + timeout_secs: 10 + buffer: + type: disk + max_size: 10737418240 + when_full: block + request: + retry_attempts: 9999999 + retry_initial_backoff_secs: 1 + retry_max_duration_secs: 300 + timeout_secs: 60 + healthcheck: + enabled: true + compression: gzip + + # Export metrics over prometheus + internal_prometheus: + type: prometheus_exporter + inputs: + - internal_metrics + address: 0.0.0.0:9091 + default_namespace: vector_internal + + extraObjects: + - apiVersion: v1 + kind: ConfigMap + metadata: + name: vector-enrichment-scripts + namespace: "{{ .Release.Namespace }}" + data: + build-zone-project.sh: | + #!/bin/sh + # Build zone_project.csv from datum-downstream-dnszone-accounting ConfigMaps. + # + # Input file example: + # namespace_.configmap_.owner (content: //<...>) + # + # Output CSV: + # zone,project + # , + + set -eu + + SRC_DIR="${ENRICHMENT_CONFIGMAP_DIR:-/tmp}" + OUT_FILE="${ENRICHMENT_OUTPUT_FILE:-/enrichment/zone_project.csv}" + + mkdir -p "$(dirname "$OUT_FILE")" + + tmp="$(mktemp)" + trap 'rm -f "$tmp"' EXIT + + # Always produce a valid CSV (header at minimum). + printf 'zone,project\n' > "$tmp" + + # Nothing to do if input directory doesn't exist. + [ -d "$SRC_DIR" ] || { mv "$tmp" "$OUT_FILE"; exit 0; } + + # Collect rows, then sort+dedupe for stable output. + rows="$(mktemp)" + trap 'rm -f "$tmp" "$rows"' EXIT + : > "$rows" + + for f in "$SRC_DIR"/*.owner; do + [ -f "$f" ] || continue + + base="$(basename "$f")" + # Expect: namespace_.configmap_.owner -> extract + zone="${base#*configmap_}" + zone="${zone%.owner}" + + # Skip if we can't infer zone. + [ -n "$zone" ] && [ "$zone" != "$base" ] || continue + + owner="$(tr -d '\r\n' < "$f")" + owner="${owner#/}" # trim leading / + project="${owner%%/*}" # first segment + + [ -n "$project" ] || continue + + printf '%s,%s\n' "$zone" "$project" >> "$rows" + done + + # Append sorted unique rows. + if [ -s "$rows" ]; then + sort "$rows" | uniq >> "$tmp" + fi + + mv "$tmp" "$OUT_FILE" + - apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRole + metadata: + name: vector-configmap-reader + rules: + - apiGroups: [""] + resources: ["configmaps"] + verbs: ["get", "list", "watch"] + - apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRoleBinding + metadata: + name: vector-configmap-reader + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: vector-configmap-reader + subjects: + - kind: ServiceAccount + name: vector + namespace: "{{ .Release.Namespace }}" + + env: + - name: VICTORIA_METRICS_REMOTE_WRITE_URL + value: http://prometheus.dns-monitoring.svc:9090/api/v1/write + - name: VECTOR_LOG + value: "info" + - name: VECTOR_LOG_FORMAT + value: "json" + + # Persistence for buffer + persistence: + enabled: true + storageClassName: "" # Use default storage class + accessModes: + - ReadWriteOnce + size: 10Gi + + install: + crds: Create + createNamespace: false + + upgrade: + crds: CreateReplace + + uninstall: + keepHistory: false diff --git a/config/monitoring/kustomization.yaml b/config/monitoring/kustomization.yaml index c3fdd34..7113c0c 100644 --- a/config/monitoring/kustomization.yaml +++ b/config/monitoring/kustomization.yaml @@ -8,4 +8,3 @@ resources: - grafana - prometheus - loki - - vector diff --git a/config/overlays/vector-metrics-gateway/kustomization.yaml b/config/overlays/vector-metrics-gateway/kustomization.yaml new file mode 100644 index 0000000..bdbc976 --- /dev/null +++ b/config/overlays/vector-metrics-gateway/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: dns-agent-system + +resources: + - ../../monitoring/vector From 02288c32f373aea626b90c41b01074536c408ec0 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Tue, 13 Jan 2026 18:11:57 -0600 Subject: [PATCH 16/33] feat: add HTTP basic authentication for Vector configuration --- config/agent/manager.yaml | 12 ++++++++++- config/agent/vector-config.yaml | 14 +++++++++++-- .../vector-metrics-gateway/vector-hr.yaml | 20 ++++++++++++++++++- .../vector/config/vector-config.yaml | 10 +++++++++- config/monitoring/vector/deployment.yaml | 10 ++++++++++ config/monitoring/vector/kustomization.yaml | 6 ++++++ 6 files changed, 67 insertions(+), 5 deletions(-) diff --git a/config/agent/manager.yaml b/config/agent/manager.yaml index c1a298b..b99c86c 100644 --- a/config/agent/manager.yaml +++ b/config/agent/manager.yaml @@ -286,7 +286,17 @@ spec: - /etc/vector/ env: - name: VECTOR_CENTRAL_ADDRESS - value: vector:9000 + value: http://vector:9000 + - name: VECTOR_HTTP_AUTH_USERNAME + valueFrom: + secretKeyRef: + name: vector-http-auth + key: username + - name: VECTOR_HTTP_AUTH_PASSWORD + valueFrom: + secretKeyRef: + name: vector-http-auth + key: password volumeMounts: - name: vector-config mountPath: /etc/vector/vector-config.yaml diff --git a/config/agent/vector-config.yaml b/config/agent/vector-config.yaml index 873d4a0..be6ff4d 100644 --- a/config/agent/vector-config.yaml +++ b/config/agent/vector-config.yaml @@ -137,10 +137,20 @@ sinks: - dnstap_metrics central_vector: - type: vector + type: http inputs: - dnstap_metrics - address: ${VECTOR_CENTRAL_ADDRESS} + uri: ${VECTOR_CENTRAL_ADDRESS} + encoding: + codec: native_json + framing: + method: newline_delimited + healthcheck: + enabled: true + auth: + strategy: basic + user: ${VECTOR_HTTP_AUTH_USERNAME} + password: ${VECTOR_HTTP_AUTH_PASSWORD} buffer: type: disk max_size: 268435488 diff --git a/config/components/vector-metrics-gateway/vector-hr.yaml b/config/components/vector-metrics-gateway/vector-hr.yaml index d09b740..d8c1fc6 100644 --- a/config/components/vector-metrics-gateway/vector-hr.yaml +++ b/config/components/vector-metrics-gateway/vector-hr.yaml @@ -143,8 +143,16 @@ spec: project: string sources: edge_metrics: - type: vector + type: http_server address: 0.0.0.0:9000 + decoding: + codec: native_json + framing: + method: newline_delimited + auth: + strategy: basic + username: ${VECTOR_HTTP_AUTH_USERNAME} + password: ${VECTOR_HTTP_AUTH_PASSWORD} internal_metrics: type: internal_metrics namespace: vector @@ -290,6 +298,16 @@ spec: env: - name: VICTORIA_METRICS_REMOTE_WRITE_URL value: http://prometheus.dns-monitoring.svc:9090/api/v1/write + - name: VECTOR_HTTP_AUTH_USERNAME + valueFrom: + secretKeyRef: + name: vector-http-auth + key: username + - name: VECTOR_HTTP_AUTH_PASSWORD + valueFrom: + secretKeyRef: + name: vector-http-auth + key: password - name: VECTOR_LOG value: "info" - name: VECTOR_LOG_FORMAT diff --git a/config/monitoring/vector/config/vector-config.yaml b/config/monitoring/vector/config/vector-config.yaml index 09694fd..bbc6fa3 100644 --- a/config/monitoring/vector/config/vector-config.yaml +++ b/config/monitoring/vector/config/vector-config.yaml @@ -17,8 +17,16 @@ enrichment_tables: sources: edge_metrics: - type: vector + type: http_server address: 0.0.0.0:9000 + decoding: + codec: native_json + framing: + method: newline_delimited + auth: + strategy: basic + username: ${VECTOR_HTTP_AUTH_USERNAME} + password: ${VECTOR_HTTP_AUTH_PASSWORD} transforms: add_project_tags: diff --git a/config/monitoring/vector/deployment.yaml b/config/monitoring/vector/deployment.yaml index 2ea15d8..a259920 100644 --- a/config/monitoring/vector/deployment.yaml +++ b/config/monitoring/vector/deployment.yaml @@ -43,6 +43,16 @@ spec: env: - name: VICTORIA_METRICS_REMOTE_WRITE_URL value: http://prometheus.dns-monitoring.svc:9090/api/v1/write + - name: VECTOR_HTTP_AUTH_USERNAME + valueFrom: + secretKeyRef: + name: vector-http-auth + key: username + - name: VECTOR_HTTP_AUTH_PASSWORD + valueFrom: + secretKeyRef: + name: vector-http-auth + key: password ports: - containerPort: 9000 name: vector diff --git a/config/monitoring/vector/kustomization.yaml b/config/monitoring/vector/kustomization.yaml index 6f11db5..d85b39a 100644 --- a/config/monitoring/vector/kustomization.yaml +++ b/config/monitoring/vector/kustomization.yaml @@ -16,3 +16,9 @@ configMapGenerator: - name: vector-enrichment-scripts files: - scripts/build-zone-project.sh + +secretGenerator: + - name: vector-http-auth + literals: + - username=admin + - password=admin \ No newline at end of file From 7849f745c87cb0d6f21f2bd78559b3527d06cc67 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Tue, 13 Jan 2026 18:22:26 -0600 Subject: [PATCH 17/33] feat: add console sink for project tags --- config/monitoring/vector/config/vector-config.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/config/monitoring/vector/config/vector-config.yaml b/config/monitoring/vector/config/vector-config.yaml index bbc6fa3..d115c86 100644 --- a/config/monitoring/vector/config/vector-config.yaml +++ b/config/monitoring/vector/config/vector-config.yaml @@ -52,6 +52,14 @@ transforms: } sinks: + console: + type: console + target: stdout + encoding: + codec: json + inputs: + - add_project_tags + victoria_metrics: type: prometheus_remote_write inputs: From c2cfd021288854b52912edbb39b1cbb5a3dd06db Mon Sep 17 00:00:00 2001 From: cc-datum Date: Tue, 13 Jan 2026 19:01:54 -0600 Subject: [PATCH 18/33] feat: update authentication configuration for Vector metrics gateway --- config/agent/manager.yaml | 11 +++-------- config/agent/vector-config.yaml | 6 +++--- .../vector-metrics-gateway/vector-hr.yaml | 13 ++++--------- config/monitoring/vector/config/vector-config.yaml | 4 ++-- config/monitoring/vector/deployment.yaml | 9 ++------- config/monitoring/vector/kustomization.yaml | 5 ++--- 6 files changed, 16 insertions(+), 32 deletions(-) diff --git a/config/agent/manager.yaml b/config/agent/manager.yaml index b99c86c..32eb3b7 100644 --- a/config/agent/manager.yaml +++ b/config/agent/manager.yaml @@ -285,17 +285,12 @@ spec: - --config-dir - /etc/vector/ env: - - name: VECTOR_CENTRAL_ADDRESS + - name: VECTOR_METRICS_GATEWAY_ADDRESS value: http://vector:9000 - - name: VECTOR_HTTP_AUTH_USERNAME + - name: VECTOR_METRICS_GATEWAY_PASSWORD valueFrom: secretKeyRef: - name: vector-http-auth - key: username - - name: VECTOR_HTTP_AUTH_PASSWORD - valueFrom: - secretKeyRef: - name: vector-http-auth + name: vector-metrics-gateway-password key: password volumeMounts: - name: vector-config diff --git a/config/agent/vector-config.yaml b/config/agent/vector-config.yaml index be6ff4d..c7bb768 100644 --- a/config/agent/vector-config.yaml +++ b/config/agent/vector-config.yaml @@ -140,7 +140,7 @@ sinks: type: http inputs: - dnstap_metrics - uri: ${VECTOR_CENTRAL_ADDRESS} + uri: ${VECTOR_METRICS_GATEWAY_ADDRESS} encoding: codec: native_json framing: @@ -149,8 +149,8 @@ sinks: enabled: true auth: strategy: basic - user: ${VECTOR_HTTP_AUTH_USERNAME} - password: ${VECTOR_HTTP_AUTH_PASSWORD} + user: vector + password: ${VECTOR_METRICS_GATEWAY_PASSWORD} buffer: type: disk max_size: 268435488 diff --git a/config/components/vector-metrics-gateway/vector-hr.yaml b/config/components/vector-metrics-gateway/vector-hr.yaml index d8c1fc6..c16fc04 100644 --- a/config/components/vector-metrics-gateway/vector-hr.yaml +++ b/config/components/vector-metrics-gateway/vector-hr.yaml @@ -151,8 +151,8 @@ spec: method: newline_delimited auth: strategy: basic - username: ${VECTOR_HTTP_AUTH_USERNAME} - password: ${VECTOR_HTTP_AUTH_PASSWORD} + username: vector + password: ${VECTOR_METRICS_GATEWAY_PASSWORD} internal_metrics: type: internal_metrics namespace: vector @@ -298,15 +298,10 @@ spec: env: - name: VICTORIA_METRICS_REMOTE_WRITE_URL value: http://prometheus.dns-monitoring.svc:9090/api/v1/write - - name: VECTOR_HTTP_AUTH_USERNAME + - name: VECTOR_METRICS_GATEWAY_PASSWORD valueFrom: secretKeyRef: - name: vector-http-auth - key: username - - name: VECTOR_HTTP_AUTH_PASSWORD - valueFrom: - secretKeyRef: - name: vector-http-auth + name: vector-metrics-gateway-password key: password - name: VECTOR_LOG value: "info" diff --git a/config/monitoring/vector/config/vector-config.yaml b/config/monitoring/vector/config/vector-config.yaml index d115c86..89a84dd 100644 --- a/config/monitoring/vector/config/vector-config.yaml +++ b/config/monitoring/vector/config/vector-config.yaml @@ -25,8 +25,8 @@ sources: method: newline_delimited auth: strategy: basic - username: ${VECTOR_HTTP_AUTH_USERNAME} - password: ${VECTOR_HTTP_AUTH_PASSWORD} + username: vector + password: ${VECTOR_METRICS_GATEWAY_PASSWORD} transforms: add_project_tags: diff --git a/config/monitoring/vector/deployment.yaml b/config/monitoring/vector/deployment.yaml index a259920..99226d0 100644 --- a/config/monitoring/vector/deployment.yaml +++ b/config/monitoring/vector/deployment.yaml @@ -43,15 +43,10 @@ spec: env: - name: VICTORIA_METRICS_REMOTE_WRITE_URL value: http://prometheus.dns-monitoring.svc:9090/api/v1/write - - name: VECTOR_HTTP_AUTH_USERNAME + - name: VECTOR_METRICS_GATEWAY_PASSWORD valueFrom: secretKeyRef: - name: vector-http-auth - key: username - - name: VECTOR_HTTP_AUTH_PASSWORD - valueFrom: - secretKeyRef: - name: vector-http-auth + name: vector-metrics-gateway-password key: password ports: - containerPort: 9000 diff --git a/config/monitoring/vector/kustomization.yaml b/config/monitoring/vector/kustomization.yaml index d85b39a..3ab1c7d 100644 --- a/config/monitoring/vector/kustomization.yaml +++ b/config/monitoring/vector/kustomization.yaml @@ -18,7 +18,6 @@ configMapGenerator: - scripts/build-zone-project.sh secretGenerator: - - name: vector-http-auth + - name: vector-metrics-gateway-password literals: - - username=admin - - password=admin \ No newline at end of file + - password=password \ No newline at end of file From d073598747ce2ab72b326031682dbd79ef7b8205 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Mon, 19 Jan 2026 13:17:26 -0600 Subject: [PATCH 19/33] chore: update vector configuration --- config/agent/manager.yaml | 7 +------ config/agent/vector-config.yaml | 12 ++---------- .../vector-metrics-gateway/vector-hr.yaml | 17 ++--------------- .../monitoring/vector/config/vector-config.yaml | 10 +--------- config/monitoring/vector/deployment.yaml | 7 +------ config/monitoring/vector/kustomization.yaml | 5 ----- .../controller/dnszone_replicator_controller.go | 2 +- 7 files changed, 8 insertions(+), 52 deletions(-) diff --git a/config/agent/manager.yaml b/config/agent/manager.yaml index 32eb3b7..1139f38 100644 --- a/config/agent/manager.yaml +++ b/config/agent/manager.yaml @@ -286,12 +286,7 @@ spec: - /etc/vector/ env: - name: VECTOR_METRICS_GATEWAY_ADDRESS - value: http://vector:9000 - - name: VECTOR_METRICS_GATEWAY_PASSWORD - valueFrom: - secretKeyRef: - name: vector-metrics-gateway-password - key: password + value: vector:9000 volumeMounts: - name: vector-config mountPath: /etc/vector/vector-config.yaml diff --git a/config/agent/vector-config.yaml b/config/agent/vector-config.yaml index c7bb768..aae9015 100644 --- a/config/agent/vector-config.yaml +++ b/config/agent/vector-config.yaml @@ -137,20 +137,12 @@ sinks: - dnstap_metrics central_vector: - type: http + type: vector inputs: - dnstap_metrics - uri: ${VECTOR_METRICS_GATEWAY_ADDRESS} - encoding: - codec: native_json - framing: - method: newline_delimited + address: ${VECTOR_METRICS_GATEWAY_ADDRESS} healthcheck: enabled: true - auth: - strategy: basic - user: vector - password: ${VECTOR_METRICS_GATEWAY_PASSWORD} buffer: type: disk max_size: 268435488 diff --git a/config/components/vector-metrics-gateway/vector-hr.yaml b/config/components/vector-metrics-gateway/vector-hr.yaml index c16fc04..0635cf5 100644 --- a/config/components/vector-metrics-gateway/vector-hr.yaml +++ b/config/components/vector-metrics-gateway/vector-hr.yaml @@ -110,7 +110,7 @@ spec: - name: LABEL value: datum.net/dnszone-accounting - name: LABEL_VALUE - value: enabled + value: "" - name: FOLDER value: /tmp/ - name: UNIQUE_FILENAMES @@ -143,16 +143,8 @@ spec: project: string sources: edge_metrics: - type: http_server + type: vector address: 0.0.0.0:9000 - decoding: - codec: native_json - framing: - method: newline_delimited - auth: - strategy: basic - username: vector - password: ${VECTOR_METRICS_GATEWAY_PASSWORD} internal_metrics: type: internal_metrics namespace: vector @@ -298,11 +290,6 @@ spec: env: - name: VICTORIA_METRICS_REMOTE_WRITE_URL value: http://prometheus.dns-monitoring.svc:9090/api/v1/write - - name: VECTOR_METRICS_GATEWAY_PASSWORD - valueFrom: - secretKeyRef: - name: vector-metrics-gateway-password - key: password - name: VECTOR_LOG value: "info" - name: VECTOR_LOG_FORMAT diff --git a/config/monitoring/vector/config/vector-config.yaml b/config/monitoring/vector/config/vector-config.yaml index 89a84dd..999fca5 100644 --- a/config/monitoring/vector/config/vector-config.yaml +++ b/config/monitoring/vector/config/vector-config.yaml @@ -17,16 +17,8 @@ enrichment_tables: sources: edge_metrics: - type: http_server + type: vector address: 0.0.0.0:9000 - decoding: - codec: native_json - framing: - method: newline_delimited - auth: - strategy: basic - username: vector - password: ${VECTOR_METRICS_GATEWAY_PASSWORD} transforms: add_project_tags: diff --git a/config/monitoring/vector/deployment.yaml b/config/monitoring/vector/deployment.yaml index 99226d0..7e22751 100644 --- a/config/monitoring/vector/deployment.yaml +++ b/config/monitoring/vector/deployment.yaml @@ -43,11 +43,6 @@ spec: env: - name: VICTORIA_METRICS_REMOTE_WRITE_URL value: http://prometheus.dns-monitoring.svc:9090/api/v1/write - - name: VECTOR_METRICS_GATEWAY_PASSWORD - valueFrom: - secretKeyRef: - name: vector-metrics-gateway-password - key: password ports: - containerPort: 9000 name: vector @@ -81,7 +76,7 @@ spec: - name: LABEL value: datum.net/dnszone-accounting - name: LABEL_VALUE - value: enabled + value: "" - name: FOLDER value: /tmp/ - name: UNIQUE_FILENAMES diff --git a/config/monitoring/vector/kustomization.yaml b/config/monitoring/vector/kustomization.yaml index 3ab1c7d..6f11db5 100644 --- a/config/monitoring/vector/kustomization.yaml +++ b/config/monitoring/vector/kustomization.yaml @@ -16,8 +16,3 @@ configMapGenerator: - name: vector-enrichment-scripts files: - scripts/build-zone-project.sh - -secretGenerator: - - name: vector-metrics-gateway-password - literals: - - password=password \ No newline at end of file diff --git a/internal/controller/dnszone_replicator_controller.go b/internal/controller/dnszone_replicator_controller.go index 615971a..a89c6d0 100644 --- a/internal/controller/dnszone_replicator_controller.go +++ b/internal/controller/dnszone_replicator_controller.go @@ -324,7 +324,7 @@ func (r *DNSZoneReplicator) ensureZoneAccounting(ctx context.Context, upstream * "owner": owner, } newCM.Labels = map[string]string{ - "datum.net/dnszone-accounting": "enabled", + "datum.net/dnszone-owner": owner, } if cerr := r.DownstreamClient.Create(ctx, &newCM); cerr != nil { // A race can occur; if created by another, treat as not owned and let next reconcile decide From 7ba5d6212be1618e8314ef2e1a0853035a982e73 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Tue, 20 Jan 2026 19:44:51 -0600 Subject: [PATCH 20/33] chore: update vector-hr configuration --- config/components/vector-metrics-gateway/vector-hr.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/config/components/vector-metrics-gateway/vector-hr.yaml b/config/components/vector-metrics-gateway/vector-hr.yaml index 0635cf5..a0203b8 100644 --- a/config/components/vector-metrics-gateway/vector-hr.yaml +++ b/config/components/vector-metrics-gateway/vector-hr.yaml @@ -72,15 +72,11 @@ spec: configMap: name: vector-enrichment-scripts defaultMode: 0555 - - name: vector-data - emptyDir: {} extraVolumeMounts: - name: vector-enrichment mountPath: /etc/vector/enrichment readOnly: true - - name: vector-data - mountPath: /var/lib/vector initContainers: - name: init-enrichment-table From 0a934a83427963b9f5cf411d0eca3f848608864f Mon Sep 17 00:00:00 2001 From: cc-datum Date: Tue, 20 Jan 2026 20:55:05 -0600 Subject: [PATCH 21/33] chore: update vector-hr configuration --- .../components/vector-metrics-gateway/vector-hr.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/config/components/vector-metrics-gateway/vector-hr.yaml b/config/components/vector-metrics-gateway/vector-hr.yaml index a0203b8..37df0df 100644 --- a/config/components/vector-metrics-gateway/vector-hr.yaml +++ b/config/components/vector-metrics-gateway/vector-hr.yaml @@ -172,10 +172,10 @@ spec: inputs: - add_project_tags endpoint: ${VICTORIA_METRICS_REMOTE_WRITE_URL} - batch: - max_bytes: 10485760 - max_events: 10000 - timeout_secs: 10 +# batch: +# max_bytes: 10485760 +# max_events: 10000 +# timeout_secs: 10 buffer: type: disk max_size: 10737418240 @@ -187,7 +187,7 @@ spec: timeout_secs: 60 healthcheck: enabled: true - compression: gzip +# compression: gzip # Export metrics over prometheus internal_prometheus: From a642c242c1fe1d7c60de447386c4ddd14e0bf1be Mon Sep 17 00:00:00 2001 From: cc-datum Date: Tue, 20 Jan 2026 21:03:42 -0600 Subject: [PATCH 22/33] feat: update fullnameOverride for vector metrics gateway --- config/components/vector-metrics-gateway/vector-hr.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/components/vector-metrics-gateway/vector-hr.yaml b/config/components/vector-metrics-gateway/vector-hr.yaml index 37df0df..9e63f76 100644 --- a/config/components/vector-metrics-gateway/vector-hr.yaml +++ b/config/components/vector-metrics-gateway/vector-hr.yaml @@ -18,7 +18,7 @@ spec: namespace: dns-agent-system interval: 1h values: - fullnameOverride: vector + fullnameOverride: vector-metrics-gateway # Role: Aggregator (stateless, can run as Deployment) role: Stateless-Aggregator replicas: 2 From f653b2ba5c7c1649599e26c24b5ee073e4de5869 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Tue, 20 Jan 2026 22:02:53 -0600 Subject: [PATCH 23/33] feat: update resolver port for ALIAS expansion --- config/agent/pdns.conf | 3 +-- config/agent/recursor.conf | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/config/agent/pdns.conf b/config/agent/pdns.conf index 278d685..fa04b6f 100644 --- a/config/agent/pdns.conf +++ b/config/agent/pdns.conf @@ -23,7 +23,7 @@ api=yes # api-key will be passed via CLI using /run/pdns/api-key # Required for ALIAS expansion: point to an in-pod resolver/recursor. -resolver=127.0.0.1:5300 +resolver=127.0.0.1:5301 expand-alias=yes zone-cache-refresh-interval=0 @@ -37,4 +37,3 @@ lmdb-random-ids=yes lmdb-flag-deleted=yes lmdb-map-size=1000 lmdb-lightning-stream=yes - diff --git a/config/agent/recursor.conf b/config/agent/recursor.conf index 182f4cd..3f0670e 100644 --- a/config/agent/recursor.conf +++ b/config/agent/recursor.conf @@ -14,8 +14,8 @@ recursor: incoming: # Only serve recursion to the local pod; auth will use this for ALIAS expansion. listen: - - "127.0.0.1:5300" - - "[::1]:5300" + - "127.0.0.1:5301" + - "[::1]:5301" allow_from: - "127.0.0.1/32" - "::1/128" From 1b0b4222042937dc9b8f24987c02c8c308209909 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Tue, 20 Jan 2026 23:35:42 -0600 Subject: [PATCH 24/33] feat: add console sink for project tags in vector-hr configuration --- config/components/vector-metrics-gateway/vector-hr.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/config/components/vector-metrics-gateway/vector-hr.yaml b/config/components/vector-metrics-gateway/vector-hr.yaml index 9e63f76..dd4e660 100644 --- a/config/components/vector-metrics-gateway/vector-hr.yaml +++ b/config/components/vector-metrics-gateway/vector-hr.yaml @@ -167,6 +167,15 @@ spec: } } sinks: + + console: + type: console + target: stdout + encoding: + codec: json + inputs: + - add_project_tags + victoria_metrics: type: prometheus_remote_write inputs: From 60b00ee3dbb3270d25f3c5acf84de8fa48fa8acb Mon Sep 17 00:00:00 2001 From: cc-datum Date: Tue, 20 Jan 2026 23:45:59 -0600 Subject: [PATCH 25/33] feat: update VECTOR_METRICS_GATEWAY_ADDRESS --- config/agent/manager.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/agent/manager.yaml b/config/agent/manager.yaml index b2d1ac4..1d7ed5d 100644 --- a/config/agent/manager.yaml +++ b/config/agent/manager.yaml @@ -325,7 +325,7 @@ spec: - /etc/vector/ env: - name: VECTOR_METRICS_GATEWAY_ADDRESS - value: vector:9000 + value: vector-metrics-gateway:9000 volumeMounts: - name: vector-config mountPath: /etc/vector/vector-config.yaml From 01d9de74cb205d0bb03e2248c5464c0dab2ffffb Mon Sep 17 00:00:00 2001 From: cc-datum Date: Wed, 21 Jan 2026 00:46:35 -0600 Subject: [PATCH 26/33] feat: add TLS container port for vector service --- config/components/vector-metrics-gateway/vector-hr.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/config/components/vector-metrics-gateway/vector-hr.yaml b/config/components/vector-metrics-gateway/vector-hr.yaml index dd4e660..5ad7885 100644 --- a/config/components/vector-metrics-gateway/vector-hr.yaml +++ b/config/components/vector-metrics-gateway/vector-hr.yaml @@ -58,6 +58,9 @@ spec: - name: vector containerPort: 9000 protocol: TCP + - name: vector-tls + containerPort: 9443 + protocol: TCP serviceHeadless: enabled: false From 8d154349613ea83a8547f659cf688b7669c5ab6a Mon Sep 17 00:00:00 2001 From: cc-datum Date: Wed, 21 Jan 2026 01:01:28 -0600 Subject: [PATCH 27/33] feat: remove TLS container port configuration for vector service --- config/components/vector-metrics-gateway/vector-hr.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/config/components/vector-metrics-gateway/vector-hr.yaml b/config/components/vector-metrics-gateway/vector-hr.yaml index 5ad7885..dd4e660 100644 --- a/config/components/vector-metrics-gateway/vector-hr.yaml +++ b/config/components/vector-metrics-gateway/vector-hr.yaml @@ -58,9 +58,6 @@ spec: - name: vector containerPort: 9000 protocol: TCP - - name: vector-tls - containerPort: 9443 - protocol: TCP serviceHeadless: enabled: false From 427f0574b65db7a1024c2f78d3ce7deaf4558355 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Wed, 21 Jan 2026 02:02:10 -0600 Subject: [PATCH 28/33] feat: update batch configuration and timeout settings in vector-hr --- .../components/vector-metrics-gateway/vector-hr.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/config/components/vector-metrics-gateway/vector-hr.yaml b/config/components/vector-metrics-gateway/vector-hr.yaml index dd4e660..e338831 100644 --- a/config/components/vector-metrics-gateway/vector-hr.yaml +++ b/config/components/vector-metrics-gateway/vector-hr.yaml @@ -181,10 +181,10 @@ spec: inputs: - add_project_tags endpoint: ${VICTORIA_METRICS_REMOTE_WRITE_URL} -# batch: -# max_bytes: 10485760 -# max_events: 10000 -# timeout_secs: 10 + batch: + max_bytes: 1048576 + max_events: 2000 + timeout_secs: 10 buffer: type: disk max_size: 10737418240 @@ -193,10 +193,10 @@ spec: retry_attempts: 9999999 retry_initial_backoff_secs: 1 retry_max_duration_secs: 300 - timeout_secs: 60 + timeout_secs: 120 healthcheck: enabled: true -# compression: gzip + compression: gzip # Export metrics over prometheus internal_prometheus: From 2dba126bcf729f6c4541517dd5f458191b3771c0 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Wed, 21 Jan 2026 02:29:53 -0600 Subject: [PATCH 29/33] feat: update batch configuration and timeout settings in vector-hr --- config/components/vector-metrics-gateway/vector-hr.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/config/components/vector-metrics-gateway/vector-hr.yaml b/config/components/vector-metrics-gateway/vector-hr.yaml index e338831..bd376c5 100644 --- a/config/components/vector-metrics-gateway/vector-hr.yaml +++ b/config/components/vector-metrics-gateway/vector-hr.yaml @@ -182,9 +182,9 @@ spec: - add_project_tags endpoint: ${VICTORIA_METRICS_REMOTE_WRITE_URL} batch: - max_bytes: 1048576 - max_events: 2000 - timeout_secs: 10 + max_bytes: 262144 + max_events: 500 + timeout_secs: 5 buffer: type: disk max_size: 10737418240 @@ -193,7 +193,7 @@ spec: retry_attempts: 9999999 retry_initial_backoff_secs: 1 retry_max_duration_secs: 300 - timeout_secs: 120 + timeout_secs: 300 healthcheck: enabled: true compression: gzip From 6f36474ee80f51d39de2854c2d5b852579b571ad Mon Sep 17 00:00:00 2001 From: cc-datum Date: Mon, 26 Jan 2026 20:39:18 -0600 Subject: [PATCH 30/33] feat: change compression method from gzip to snappy in vector-hr configuration --- config/components/vector-metrics-gateway/vector-hr.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/components/vector-metrics-gateway/vector-hr.yaml b/config/components/vector-metrics-gateway/vector-hr.yaml index bd376c5..78f2c99 100644 --- a/config/components/vector-metrics-gateway/vector-hr.yaml +++ b/config/components/vector-metrics-gateway/vector-hr.yaml @@ -196,7 +196,7 @@ spec: timeout_secs: 300 healthcheck: enabled: true - compression: gzip + compression: snappy # Export metrics over prometheus internal_prometheus: From 1b50144b09334d12cefedaed4da568730707861a Mon Sep 17 00:00:00 2001 From: cc-datum Date: Tue, 27 Jan 2026 23:18:53 -0600 Subject: [PATCH 31/33] feat: update label for dnszone in vector-hr configuration --- config/components/vector-metrics-gateway/vector-hr.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/components/vector-metrics-gateway/vector-hr.yaml b/config/components/vector-metrics-gateway/vector-hr.yaml index 78f2c99..750cada 100644 --- a/config/components/vector-metrics-gateway/vector-hr.yaml +++ b/config/components/vector-metrics-gateway/vector-hr.yaml @@ -104,7 +104,7 @@ spec: - name: NAMESPACE value: datum-downstream-dnszone-accounting - name: LABEL - value: datum.net/dnszone-accounting + value: datum.net/dnszone-owner - name: LABEL_VALUE value: "" - name: FOLDER From 05f935b0161f17bc8e60e11ac892b8277d2533f5 Mon Sep 17 00:00:00 2001 From: cc-datum Date: Tue, 27 Jan 2026 23:40:58 -0600 Subject: [PATCH 32/33] feat: update vector configuration with image details --- .../vector-metrics-gateway/vector-hr.yaml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/config/components/vector-metrics-gateway/vector-hr.yaml b/config/components/vector-metrics-gateway/vector-hr.yaml index 750cada..634d1a5 100644 --- a/config/components/vector-metrics-gateway/vector-hr.yaml +++ b/config/components/vector-metrics-gateway/vector-hr.yaml @@ -22,6 +22,16 @@ spec: # Role: Aggregator (stateless, can run as Deployment) role: Stateless-Aggregator replicas: 2 + image: + repository: timberio/vector + tag: 0.52.0-distroless-libc + pullPolicy: IfNotPresent + args: + - --log-format=json + - --verbose + - --watch-config + - --config-dir + - /etc/vector/ resources: requests: cpu: 200m @@ -295,10 +305,6 @@ spec: env: - name: VICTORIA_METRICS_REMOTE_WRITE_URL value: http://prometheus.dns-monitoring.svc:9090/api/v1/write - - name: VECTOR_LOG - value: "info" - - name: VECTOR_LOG_FORMAT - value: "json" # Persistence for buffer persistence: From 79309465da28ff867eb5b2279fe375e7cab24046 Mon Sep 17 00:00:00 2001 From: Zach Smith Date: Wed, 4 Feb 2026 11:12:10 -0800 Subject: [PATCH 33/33] fix: listener addresses --- config/agent/dnsdist-config.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/config/agent/dnsdist-config.yaml b/config/agent/dnsdist-config.yaml index 8adfcda..bfb472f 100644 --- a/config/agent/dnsdist-config.yaml +++ b/config/agent/dnsdist-config.yaml @@ -1,5 +1,7 @@ +# TODO(cristhian): Make sure we block api access from outside the cluster. webserver: - listen_address: 0.0.0.0:8083 + listen_addresses: + - "0.0.0.0:8083" password: "" api_key: "" acl: