From 6d5799e637dd1e5c6dc5d03ac6f0d8daa13f4a68 Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Tue, 10 Mar 2026 10:06:26 -0600 Subject: [PATCH 01/32] feat(grafana): improve Running Version panel display in Posit Team Overview dashboard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Grafana transformations and field overrides to the Running Version panel to improve readability and presentation: - Hide unnecessary "Value" and "Time" columns using organize transformation - Rename columns to user-friendly names (Site, Product, Version, Cluster) - Set logical column order (Site → Product → Version → Cluster) - Configure appropriate column widths (Site: 200px, Product: 150px, Version: 120px, Cluster: 150px) This change is presentation-only and does not modify the underlying Prometheus query. The dashboard will now display a cleaner, more operator-friendly table view. Co-Authored-By: Claude Sonnet 4.5 --- .../posit_team_overview.json | 235 ++++++++++++++++++ 1 file changed, 235 insertions(+) create mode 100644 python-pulumi/src/ptd/grafana_dashboards/posit_team_overview.json diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit_team_overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit_team_overview.json new file mode 100644 index 0000000..dc0628d --- /dev/null +++ b/python-pulumi/src/ptd/grafana_dashboards/posit_team_overview.json @@ -0,0 +1,235 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 48, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [], + "title": "Workbench", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Site" + }, + "properties": [ + { + "id": "custom.width", + "value": 200 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Product" + }, + "properties": [ + { + "id": "custom.width", + "value": 150 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Version" + }, + "properties": [ + { + "id": "custom.width", + "value": 120 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Cluster" + }, + "properties": [ + { + "id": "custom.width", + "value": 150 + } + ] + } + ] + }, + "gridPos": { + "h": 14, + "w": 13, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": [], + "reducer": [ + "sum" + ], + "show": false + }, + "frameIndex": 2, + "showHeader": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "builder", + "exemplar": false, + "expr": "sum by(release_name, version, cluster, ptd_site) (last_over_time(pwb_build_info{cluster=~\"$cluster_name\"}[10m]))", + "format": "table", + "fullMetaSearch": false, + "includeNullMetadata": true, + "instant": true, + "legendFormat": "{{ptd_site}}: {{release_name}} ({{version}})", + "range": false, + "refId": "A", + "useBackend": false + } + ], + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Value": true, + "Time": true + }, + "indexByName": { + "ptd_site": 0, + "release_name": 1, + "version": 2, + "cluster": 3 + }, + "renameByName": { + "ptd_site": "Site", + "release_name": "Product", + "version": "Version", + "cluster": "Cluster" + } + } + } + ], + "title": "Running Version", + "type": "table" + } + ], + "refresh": "", + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default_duplicado03-staging-20250411-control-plane", + "value": "default_duplicado03-staging-20250411-control-plane" + }, + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "definition": "label_values(kube_node_info,cluster)", + "description": "", + "hide": 0, + "includeAll": true, + "label": "Cluster", + "multi": false, + "name": "cluster_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(kube_node_info,cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Posit Team Overview", + "uid": "", + "version": 1, + "weekStart": "" +} \ No newline at end of file From 404be07c018997f5165171009b0393c6c81f6b9c Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Tue, 10 Mar 2026 10:14:05 -0600 Subject: [PATCH 02/32] fix(grafana): correct dashboard provisioning settings for posit_team_overview Fix three provisioning issues in the Posit Team Overview dashboard: 1. Set dashboard ID to null (was hardcoded to 48) to prevent import conflicts across different Grafana instances. Grafana auto-assigns IDs on import. 2. Add meaningful UID "posit_team_overview" (was empty string) for programmatic references and consistency with other dashboards. 3. Default cluster_name template variable to "All" (was hardcoded test cluster "default_duplicado03-staging-20250411-control-plane") to prevent exposure of internal cluster names and provide better default behavior. These changes align the dashboard with provisioning best practices used in other dashboards (alerts_dashboard.json, k8s-views-global.json). Co-Authored-By: Claude Sonnet 4.5 --- .../src/ptd/grafana_dashboards/posit_team_overview.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit_team_overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit_team_overview.json index dc0628d..81341ea 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit_team_overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit_team_overview.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 48, + "id": null, "links": [], "liveNow": false, "panels": [ @@ -194,8 +194,8 @@ { "current": { "selected": true, - "text": "default_duplicado03-staging-20250411-control-plane", - "value": "default_duplicado03-staging-20250411-control-plane" + "text": "All", + "value": "$__all" }, "datasource": { "type": "prometheus", @@ -229,7 +229,7 @@ "timepicker": {}, "timezone": "", "title": "Posit Team Overview", - "uid": "", + "uid": "posit_team_overview", "version": 1, "weekStart": "" } \ No newline at end of file From 5abff7980f5e587d072b4efed610ea5ffd67c5f9 Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Tue, 10 Mar 2026 10:15:27 -0600 Subject: [PATCH 03/32] Rename file --- .../{posit_team_overview.json => posit-team-overview.json} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename python-pulumi/src/ptd/grafana_dashboards/{posit_team_overview.json => posit-team-overview.json} (100%) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit_team_overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json similarity index 100% rename from python-pulumi/src/ptd/grafana_dashboards/posit_team_overview.json rename to python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json From 4d3354e775ee5aa94b9bca92a0cf145cb184f570 Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Tue, 10 Mar 2026 10:22:05 -0600 Subject: [PATCH 04/32] Simplify table --- .../posit-team-overview.json | 73 ++++--------------- 1 file changed, 13 insertions(+), 60 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 81341ea..b00584d 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -50,6 +50,7 @@ "cellOptions": { "type": "auto" }, + "filterable": false, "inspect": false }, "mappings": [], @@ -67,60 +68,11 @@ ] } }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "Site" - }, - "properties": [ - { - "id": "custom.width", - "value": 200 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Product" - }, - "properties": [ - { - "id": "custom.width", - "value": 150 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Version" - }, - "properties": [ - { - "id": "custom.width", - "value": 120 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "Cluster" - }, - "properties": [ - { - "id": "custom.width", - "value": 150 - } - ] - } - ] + "overrides": [] }, "gridPos": { "h": 14, - "w": 13, + "w": 10, "x": 0, "y": 1 }, @@ -129,6 +81,7 @@ "cellHeight": "sm", "footer": { "countRows": false, + "enablePagination": true, "fields": [], "reducer": [ "sum" @@ -148,7 +101,7 @@ "disableTextWrap": false, "editorMode": "builder", "exemplar": false, - "expr": "sum by(release_name, version, cluster, ptd_site) (last_over_time(pwb_build_info{cluster=~\"$cluster_name\"}[10m]))", + "expr": "sum by(release_name, version, ptd_site) (last_over_time(pwb_build_info{cluster=~\"$cluster_name\"}[10m]))", "format": "table", "fullMetaSearch": false, "includeNullMetadata": true, @@ -159,30 +112,30 @@ "useBackend": false } ], + "title": "Running Version", "transformations": [ { "id": "organize", "options": { "excludeByName": { - "Value": true, - "Time": true + "Time": true, + "Value": true }, "indexByName": { + "cluster": 3, "ptd_site": 0, "release_name": 1, - "version": 2, - "cluster": 3 + "version": 2 }, "renameByName": { + "cluster": "Cluster", "ptd_site": "Site", "release_name": "Product", - "version": "Version", - "cluster": "Cluster" + "version": "Version" } } } ], - "title": "Running Version", "type": "table" } ], @@ -230,6 +183,6 @@ "timezone": "", "title": "Posit Team Overview", "uid": "posit_team_overview", - "version": 1, + "version": 2, "weekStart": "" } \ No newline at end of file From 5539a594daa9fdb9dfbb45b42851543471ee3f34 Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Tue, 10 Mar 2026 10:32:34 -0600 Subject: [PATCH 05/32] fix(grafana): remove orphaned cluster references from posit-team-overview transformations The PromQL query at line 104 groups by (release_name, version, ptd_site) and does not include cluster, but the transformation configuration still referenced cluster in both indexByName and renameByName. This mismatch would cause the Cluster column to be missing or empty in the rendered table. Removed orphaned references: - Deleted "cluster": 3 from indexByName - Deleted "cluster": "Cluster" from renameByName The transformation now correctly matches the query output, displaying only Site, Product, and Version columns. Co-Authored-By: Claude Sonnet 4.5 --- .../src/ptd/grafana_dashboards/posit-team-overview.json | 2 -- 1 file changed, 2 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index b00584d..7cd47cd 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -122,13 +122,11 @@ "Value": true }, "indexByName": { - "cluster": 3, "ptd_site": 0, "release_name": 1, "version": 2 }, "renameByName": { - "cluster": "Cluster", "ptd_site": "Site", "release_name": "Product", "version": "Version" From 7ae773235f53a4566972e118b243aeda247b9763 Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Tue, 10 Mar 2026 11:20:59 -0600 Subject: [PATCH 06/32] fix(grafana): handle division by zero and fix labels in License Consumption gauge MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed two display issues in the "License Consumption by Site" gauge panel: 1. Division by Zero: Added value mappings to display "No Limit" (blue) for infinite values when license_seats is 0 (unlimited licenses), and "No Data" for NaN values when metrics are unavailable. This replaces the confusing infinity symbol (∞) with clear text. 2. Gauge Labels: Added field override using byName matcher to display only the site name (e.g., "site1") instead of showing the expression reference prefix (e.g., "C site1"). Changes to posit-team-overview.json: - Added special value mappings in fieldConfig.defaults.mappings for inf and null+nan - Replaced byValue override with byName override targeting expression "C" - Set displayName to ${__field.labels.ptd_site} to extract only the site label Co-Authored-By: Claude Sonnet 4.5 --- .../posit-team-overview.json | 146 +++++++++++++++++- 1 file changed, 142 insertions(+), 4 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 7cd47cd..0a3b094 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -18,17 +18,155 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": null, + "id": 49, "links": [], "liveNow": false, "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "type": "special", + "options": { + "match": "null+nan", + "result": { + "text": "No Data", + "color": "text" + } + } + }, + { + "type": "special", + "options": { + "match": "inf", + "result": { + "text": "No Limit", + "color": "blue" + } + } + } + ], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 80 + }, + { + "color": "dark-red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "C" + }, + "properties": [ + { + "id": "displayName", + "value": "${__field.labels.ptd_site}" + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 3, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "name": "Expression", + "type": "__expr__", + "uid": "__expr__" + }, + "expression": "($A / $B) * 100", + "hide": false, + "refId": "C", + "type": "math" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "max by(ptd_site) (last_over_time(pwb_license_active_users{cluster=\"$cluster_name\"}[$__interval]))", + "fullMetaSearch": false, + "hide": true, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "max by(ptd_site) (last_over_time(pwb_license_user_seats{cluster=\"$cluster_name\"}[$__interval]))", + "fullMetaSearch": false, + "hide": true, + "includeNullMetadata": true, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "License Consumption by Site", + "type": "gauge" + }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 0 + "y": 8 }, "id": 2, "panels": [], @@ -74,7 +212,7 @@ "h": 14, "w": 10, "x": 0, - "y": 1 + "y": 9 }, "id": 1, "options": { @@ -181,6 +319,6 @@ "timezone": "", "title": "Posit Team Overview", "uid": "posit_team_overview", - "version": 2, + "version": 6, "weekStart": "" } \ No newline at end of file From f18abf81baa00622114f88d6fb45779e8b1bb12a Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Tue, 10 Mar 2026 11:28:46 -0600 Subject: [PATCH 07/32] fix(grafana): - update dashboard ID to null - correct UID format in Posit Team Overview - correct infinity value mapping case --- .../src/ptd/grafana_dashboards/posit-team-overview.json | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 0a3b094..b929d8a 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -18,7 +18,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 49, + "id": null, "links": [], "liveNow": false, "panels": [ @@ -44,10 +44,9 @@ } }, { - "type": "special", + "type": "value", "options": { - "match": "inf", - "result": { + "Infinity": { "text": "No Limit", "color": "blue" } @@ -318,7 +317,7 @@ "timepicker": {}, "timezone": "", "title": "Posit Team Overview", - "uid": "posit_team_overview", + "uid": "posit-team-overview", "version": 6, "weekStart": "" } \ No newline at end of file From 74a696494f7d6e87b52d26e001a49a721fff07be Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Tue, 10 Mar 2026 15:23:23 -0600 Subject: [PATCH 08/32] feat(grafana): add cluster filter to all Posit Team Overview dashboard panels Added cluster=~"$cluster_name" filter to all non-library panel queries to enable per-cluster filtering in the Posit Team Overview dashboard. This allows users to view metrics for specific clusters or all clusters using the dashboard variable. Updated panels: - Avg session start time (24h) - Active IDE sessions - Active user sessions - Registered users - Licensed users - License expires - Build info (both queries) - Requests/min(1m) - Avg resp secs (1m) - Response size /min (kb) - Request time quantiles (all 4 histogram queries) - Active sessions Co-Authored-By: Claude Sonnet 4.5 --- .../posit-team-overview.json | 1044 ++++++++++++++--- 1 file changed, 907 insertions(+), 137 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index b929d8a..327e44d 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -27,76 +27,630 @@ "type": "prometheus", "uid": "mimir" }, + "description": "", "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, - "mappings": [ - { - "type": "special", - "options": { - "match": "null+nan", - "result": { - "text": "No Data", - "color": "text" - } + "mappings": [], + "max": 150, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 30 + }, + { + "color": "red", + "value": 120 } - }, - { - "type": "value", - "options": { - "Infinity": { - "text": "No Limit", - "color": "blue" - } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 24, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum by (type) (rate(pwb_session_startup_duration_seconds_sum{cluster=~\"$cluster_name\"}[24h] @ end())) /\nsum by (type) (rate(pwb_session_startup_duration_seconds_count{cluster=~\"$cluster_name\"}[24h] @ end()))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Avg session start time (24h)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 1000, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 500 + }, + { + "color": "red", + "value": 750 } - } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 20, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "pwb_sessions{cluster=~\"$cluster_name\",status=\"Running\"}", + "instant": true, + "interval": "", + "legendFormat": "{{type}}", + "range": false, + "refId": "A" + } + ], + "title": "Active IDE sessions", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 12, + "y": 0 + }, + "id": 32, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "pwb_active_user_sessions{cluster=~\"$cluster_name\"}", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Active user sessions", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "The number of unlocked users that have logged in to workbench. ", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 15, + "y": 0 + }, + "id": 18, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "pwb_license_active_users{cluster=~\"$cluster_name\"}", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Registered users", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 18, + "y": 0 + }, + "id": 26, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "pwb_license_user_seats{cluster=~\"$cluster_name\"}", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Licensed users", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "dateTimeAsLocal" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 3, + "x": 21, + "y": 0 + }, + "id": 30, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "valueSize": 12 + }, + "textMode": "auto" + }, + "pluginVersion": "9.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "pwb_license_expiry{cluster=~\"$cluster_name\"}*1000", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "License expires", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 3, + "x": 21, + "y": 2 + }, + "id": 16, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "titleSize": 2, + "valueSize": 12 + }, + "textMode": "name" + }, + "pluginVersion": "9.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "pwb_build_info{cluster=~\"$cluster_name\"}", + "instant": true, + "legendFormat": "{{release_name}}", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "pwb_build_info{cluster=~\"$cluster_name\"}", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "{{version}}", + "range": false, + "refId": "B" + } + ], + "title": "Build info", + "type": "stat" + }, + { + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 6, + "libraryPanel": { + "uid": "VF2ADjw4z", + "name": "Requests/min" + } + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "max": 50000, + "min": 0, "thresholds": { - "mode": "percentage", + "mode": "absolute", "steps": [ { "color": "green", "value": null }, { - "color": "yellow", - "value": 80 + "color": "#EAB839", + "value": 20000 }, { - "color": "dark-red", - "value": 90 + "color": "red", + "value": 40000 } ] - }, - "unit": "percent" + } }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "C" - }, - "properties": [ + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 4 + }, + "id": 28, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "9.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum without(code,uri,method) (increase(pwb_http_requests_total{cluster=~\"$cluster_name\"}[1m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Requests/min(1m)", + "type": "gauge" + }, + { + "gridPos": { + "h": 6, + "w": 9, + "x": 15, + "y": 4 + }, + "id": 2, + "libraryPanel": { + "uid": "3OZDDjQVz", + "name": "Avg request duration (secs)" + } + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "Average response time in latency for all requests over the last 5 minutes.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "max": 2, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ { - "id": "displayName", - "value": "${__field.labels.ptd_site}" + "color": "green", + "value": null + }, + { + "color": "#EAB839", + "value": 0.5 + }, + { + "color": "red", + "value": 1.5 } ] } - ] + }, + "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 0 + "h": 3, + "w": 3, + "x": 12, + "y": 7 }, - "id": 3, + "id": 22, "options": { - "minVizHeight": 75, - "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -108,88 +662,308 @@ "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "10.2.2", + "pluginVersion": "9.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum without (code,method,uri)(rate(pwb_http_response_duration_seconds_sum{cluster=~\"$cluster_name\"}[1m] @ end())) / \nsum without (code,method,uri)(rate(pwb_http_response_duration_seconds_count{cluster=~\"$cluster_name\"}[1m] @ end()))\n\n", + "format": "time_series", + "instant": true, + "interval": "", + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Avg resp secs (1m)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 34, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "increase(pwb_http_response_size_bytes_total{cluster=~\"$cluster_name\"}[1m]) / 1024", + "interval": "", + "legendFormat": "{{uri}}", + "range": true, + "refId": "A" + } + ], + "title": "Response size /min (kb)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "Shows the response time threshold that partitions 99.9%, 99%, 95%, and 50% of the requests, to understand SLA agreements and how response times are distributed.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 7, + "x": 12, + "y": 10 + }, + "id": 14, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, "targets": [ { "datasource": { - "name": "Expression", - "type": "__expr__", - "uid": "__expr__" + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.999, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\"}[1m])) by (le)) ", + "interval": "", + "legendFormat": "99.9%", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" }, - "expression": "($A / $B) * 100", + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\"}[1m])) by (le)) ", "hide": false, - "refId": "C", - "type": "math" + "interval": "", + "legendFormat": "99%", + "range": true, + "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "mimir" }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "max by(ptd_site) (last_over_time(pwb_license_active_users{cluster=\"$cluster_name\"}[$__interval]))", - "fullMetaSearch": false, - "hide": true, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\"}[1m])) by (le)) ", + "hide": false, + "interval": "", + "legendFormat": "95%", "range": true, - "refId": "A", - "useBackend": false + "refId": "C" }, { "datasource": { "type": "prometheus", "uid": "mimir" }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "max by(ptd_site) (last_over_time(pwb_license_user_seats{cluster=\"$cluster_name\"}[$__interval]))", - "fullMetaSearch": false, - "hide": true, - "includeNullMetadata": true, - "instant": false, - "legendFormat": "__auto", + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\"}[1m])) by (le)) ", + "hide": false, + "interval": "", + "legendFormat": "50%", "range": true, - "refId": "B", - "useBackend": false + "refId": "D" } ], - "title": "License Consumption by Site", - "type": "gauge" + "title": "Request time quantiles (1m)", + "type": "timeseries" }, { - "collapsed": false, "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 + "h": 6, + "w": 5, + "x": 19, + "y": 10 }, - "id": 2, - "panels": [], - "title": "Workbench", - "type": "row" + "id": 10, + "libraryPanel": { + "uid": "OZ7ovCw4k", + "name": "Requests/handlers in progress" + } }, { "datasource": { "type": "prometheus", "uid": "mimir" }, + "description": "Includes running,, pending, starting", "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" }, "custom": { - "align": "auto", - "cellOptions": { - "type": "auto" + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "filterable": false, - "inspect": false + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, + "decimals": 0, "mappings": [], "thresholds": { "mode": "absolute", @@ -208,70 +982,66 @@ "overrides": [] }, "gridPos": { - "h": 14, - "w": 10, + "h": 5, + "w": 12, "x": 0, - "y": 9 + "y": 16 }, - "id": 1, + "id": 12, "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "enablePagination": true, - "fields": [], - "reducer": [ - "sum" - ], - "show": false + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "frameIndex": 2, - "showHeader": true + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "pluginVersion": "10.2.2", "targets": [ { "datasource": { "type": "prometheus", "uid": "mimir" }, - "disableTextWrap": false, - "editorMode": "builder", - "exemplar": false, - "expr": "sum by(release_name, version, ptd_site) (last_over_time(pwb_build_info{cluster=~\"$cluster_name\"}[10m]))", - "format": "table", - "fullMetaSearch": false, - "includeNullMetadata": true, - "instant": true, - "legendFormat": "{{ptd_site}}: {{release_name}} ({{version}})", - "range": false, - "refId": "A", - "useBackend": false - } - ], - "title": "Running Version", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "Value": true - }, - "indexByName": { - "ptd_site": 0, - "release_name": 1, - "version": 2 - }, - "renameByName": { - "ptd_site": "Site", - "release_name": "Product", - "version": "Version" - } - } + "editorMode": "code", + "expr": "pwb_sessions{cluster=~\"$cluster_name\",status=~\"Running|Pending|Starting\"}", + "interval": "", + "legendFormat": "{{type}} - {{cluster}}", + "range": true, + "refId": "A" } ], - "type": "table" + "title": "Active sessions", + "type": "timeseries" + }, + { + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 16 + }, + "id": 4, + "libraryPanel": { + "uid": "C7lvvCQ4z", + "name": "Session start secs" + } + }, + { + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 16 + }, + "id": 8, + "libraryPanel": { + "uid": "oSsbDjw4k", + "name": "Session start to join secs" + } } ], "refresh": "", From 83e18275e28e71fa9081f2a54d1229ef3bf3b1d3 Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 08:59:20 -0600 Subject: [PATCH 09/32] Unlink panels --- .../posit-team-overview.json | 581 ++++++++++++++++-- 1 file changed, 546 insertions(+), 35 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 327e44d..a5532b5 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -18,7 +18,6 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": null, "links": [], "liveNow": false, "panels": [ @@ -64,6 +63,8 @@ }, "id": 24, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -75,7 +76,7 @@ "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.2.2", + "pluginVersion": "10.2.2", "targets": [ { "datasource": { @@ -133,6 +134,8 @@ }, "id": 20, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -144,7 +147,7 @@ "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.2.2", + "pluginVersion": "10.2.2", "targets": [ { "datasource": { @@ -200,6 +203,8 @@ }, "id": 32, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -211,7 +216,7 @@ "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.2.2", + "pluginVersion": "10.2.2", "targets": [ { "datasource": { @@ -266,6 +271,8 @@ }, "id": 18, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -277,7 +284,7 @@ "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.2.2", + "pluginVersion": "10.2.2", "targets": [ { "datasource": { @@ -330,6 +337,8 @@ }, "id": 26, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -341,7 +350,7 @@ "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.2.2", + "pluginVersion": "10.2.2", "targets": [ { "datasource": { @@ -405,9 +414,10 @@ "text": { "valueSize": 12 }, - "textMode": "auto" + "textMode": "auto", + "wideLayout": true }, - "pluginVersion": "9.2.2", + "pluginVersion": "10.2.2", "targets": [ { "datasource": { @@ -477,9 +487,10 @@ "titleSize": 2, "valueSize": 12 }, - "textMode": "name" + "textMode": "name", + "wideLayout": true }, - "pluginVersion": "9.2.2", + "pluginVersion": "10.2.2", "targets": [ { "datasource": { @@ -514,6 +525,65 @@ "type": "stat" }, { + "datasource": { + "type": "prometheus", + "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, "gridPos": { "h": 6, "w": 12, @@ -521,10 +591,40 @@ "y": 4 }, "id": 6, - "libraryPanel": { - "uid": "VF2ADjw4z", - "name": "Requests/min" - } + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum without(code, method) (increase(pwb_http_requests_total{cluster=~\"$cluster_name\"}[1m]))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "interval": "", + "legendFormat": "{{uri}} ", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Requests/minute", + "type": "timeseries" }, { "datasource": { @@ -567,6 +667,8 @@ }, "id": 28, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -578,7 +680,7 @@ "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.2.2", + "pluginVersion": "10.2.2", "targets": [ { "datasource": { @@ -596,6 +698,90 @@ "type": "gauge" }, { + "datasource": { + "type": "prometheus", + "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "__systemRef": "hideSeriesFrom", + "matcher": { + "id": "byNames", + "options": { + "mode": "exclude", + "names": [ + "/auth-update-credentials " + ], + "prefix": "All except:", + "readOnly": true + } + }, + "properties": [ + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": false, + "viz": true + } + } + ] + } + ] + }, "gridPos": { "h": 6, "w": 9, @@ -603,10 +789,37 @@ "y": 4 }, "id": 2, - "libraryPanel": { - "uid": "3OZDDjQVz", - "name": "Avg request duration (secs)" - } + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(sum without(code,method) (rate(pwb_http_response_duration_seconds_sum[30s]))) / (sum without(code,method) (rate(pwb_http_response_duration_seconds_count[30s])))", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "{{uri}} ", + "range": true, + "refId": "A" + } + ], + "title": "Avg request duration (secs)", + "type": "timeseries" }, { "datasource": { @@ -651,6 +864,8 @@ }, "id": 22, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -662,7 +877,7 @@ "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.2.2", + "pluginVersion": "10.2.2", "targets": [ { "datasource": { @@ -694,6 +909,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -707,6 +923,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -788,6 +1005,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -801,6 +1019,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -910,6 +1129,65 @@ "type": "timeseries" }, { + "datasource": { + "type": "prometheus", + "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" + }, + "description": "Current number of requests in progress", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, "gridPos": { "h": 6, "w": 5, @@ -917,10 +1195,51 @@ "y": 10 }, "id": 10, - "libraryPanel": { - "uid": "OZ7ovCw4k", - "name": "Requests/handlers in progress" - } + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "pwb_http_requests_inflight{cluster=~\"$cluster_name\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, + "interval": "", + "legendFormat": "requests", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" + }, + "editorMode": "code", + "expr": "pwb_http_handlers_inflight", + "hide": false, + "interval": "", + "legendFormat": "handlers", + "range": true, + "refId": "B" + } + ], + "title": "Requests in flight", + "type": "timeseries" }, { "datasource": { @@ -934,6 +1253,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -947,6 +1267,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1018,6 +1339,65 @@ "type": "timeseries" }, { + "datasource": { + "type": "prometheus", + "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" + }, + "description": "Warning: misses some fast session starts", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, "gridPos": { "h": 5, "w": 6, @@ -1025,12 +1405,95 @@ "y": 16 }, "id": 4, - "libraryPanel": { - "uid": "C7lvvCQ4z", - "name": "Session start secs" - } + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" + }, + "editorMode": "code", + "expr": "rate(pwb_session_startup_duration_seconds_sum[20s]) / rate(pwb_session_startup_duration_seconds_count[20s])", + "interval": "", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Session start secs", + "type": "timeseries" }, { + "datasource": { + "type": "prometheus", + "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" + }, + "description": "Includes user wait time if auto-join is not used. Measures time till first response from session is proxied by the server that started the session.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, "gridPos": { "h": 5, "w": 6, @@ -1038,10 +1501,34 @@ "y": 16 }, "id": 8, - "libraryPanel": { - "uid": "oSsbDjw4k", - "name": "Session start to join secs" - } + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" + }, + "editorMode": "code", + "expr": "rate(pwb_session_startup_and_connect_duration_seconds_sum[1m]) / \nrate(pwb_session_startup_and_connect_duration_seconds_count[1m])", + "interval": "", + "legendFormat": "{{type}}", + "range": true, + "refId": "A" + } + ], + "title": "Session start to join secs", + "type": "timeseries" } ], "refresh": "", @@ -1052,8 +1539,8 @@ { "current": { "selected": true, - "text": "All", - "value": "$__all" + "text": "default_demo01-staging-20250423-control-plane", + "value": "default_demo01-staging-20250423-control-plane" }, "datasource": { "type": "prometheus", @@ -1077,6 +1564,30 @@ "skipUrlSync": false, "sort": 1, "type": "query" + }, + { + "current": { + "selected": true, + "text": "databricks", + "value": "databricks" + }, + "definition": "label_values(pwb_build_info,ptd_site)", + "hide": 0, + "includeAll": false, + "label": "Site", + "multi": false, + "name": "site_name", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(pwb_build_info,ptd_site)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" } ] }, @@ -1088,6 +1599,6 @@ "timezone": "", "title": "Posit Team Overview", "uid": "posit-team-overview", - "version": 6, + "version": 1, "weekStart": "" } \ No newline at end of file From 63c07a1453aa00aa1b606e4cad5f2aeee2dc31c1 Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 09:15:26 -0600 Subject: [PATCH 10/32] fix(grafana): add missing cluster filters to unlinked panels in Posit Team Overview Addresses review feedback by adding cluster=~"$cluster_name" filter to four unlinked panels that were missing it: - Panel 2: Avg request duration (secs) - Panel 10: Requests in flight (second target) - Panel 4: Session start secs - Panel 8: Session start to join secs Also restores cluster variable default to "All" ($__all) and increments dashboard version from 1 to 7 to track evolution properly. These changes ensure consistent cluster filtering behavior across all panels when users select a specific cluster in the dashboard. Co-Authored-By: Claude Sonnet 4.5 --- .../grafana_dashboards/posit-team-overview.json | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index a5532b5..123a00c 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -809,7 +809,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "(sum without(code,method) (rate(pwb_http_response_duration_seconds_sum[30s]))) / (sum without(code,method) (rate(pwb_http_response_duration_seconds_count[30s])))", + "expr": "(sum without(code,method) (rate(pwb_http_response_duration_seconds_sum{cluster=~\"$cluster_name\"}[30s]))) / (sum without(code,method) (rate(pwb_http_response_duration_seconds_count{cluster=~\"$cluster_name\"}[30s])))", "format": "time_series", "instant": false, "interval": "", @@ -1230,7 +1230,7 @@ "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" }, "editorMode": "code", - "expr": "pwb_http_handlers_inflight", + "expr": "pwb_http_handlers_inflight{cluster=~\"$cluster_name\"}", "hide": false, "interval": "", "legendFormat": "handlers", @@ -1424,7 +1424,7 @@ "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" }, "editorMode": "code", - "expr": "rate(pwb_session_startup_duration_seconds_sum[20s]) / rate(pwb_session_startup_duration_seconds_count[20s])", + "expr": "rate(pwb_session_startup_duration_seconds_sum{cluster=~\"$cluster_name\"}[20s]) / rate(pwb_session_startup_duration_seconds_count{cluster=~\"$cluster_name\"}[20s])", "interval": "", "legendFormat": "{{type}}", "range": true, @@ -1520,7 +1520,7 @@ "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" }, "editorMode": "code", - "expr": "rate(pwb_session_startup_and_connect_duration_seconds_sum[1m]) / \nrate(pwb_session_startup_and_connect_duration_seconds_count[1m])", + "expr": "rate(pwb_session_startup_and_connect_duration_seconds_sum{cluster=~\"$cluster_name\"}[1m]) / \nrate(pwb_session_startup_and_connect_duration_seconds_count{cluster=~\"$cluster_name\"}[1m])", "interval": "", "legendFormat": "{{type}}", "range": true, @@ -1539,8 +1539,8 @@ { "current": { "selected": true, - "text": "default_demo01-staging-20250423-control-plane", - "value": "default_demo01-staging-20250423-control-plane" + "text": "All", + "value": "$__all" }, "datasource": { "type": "prometheus", @@ -1599,6 +1599,6 @@ "timezone": "", "title": "Posit Team Overview", "uid": "posit-team-overview", - "version": 1, + "version": 7, "weekStart": "" } \ No newline at end of file From 71d750f9f48bb81dba586ce63c879aec68eab30c Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 10:20:42 -0600 Subject: [PATCH 11/32] Start adding site to queries --- .../posit-team-overview.json | 28 ++++++++++++------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 123a00c..2cd33b5 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -497,13 +497,17 @@ "type": "prometheus", "uid": "mimir" }, + "disableTextWrap": false, "editorMode": "code", "exemplar": false, - "expr": "pwb_build_info{cluster=~\"$cluster_name\"}", + "expr": "pwb_build_info{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}", + "fullMetaSearch": false, + "includeNullMetadata": true, "instant": true, "legendFormat": "{{release_name}}", "range": false, - "refId": "A" + "refId": "A", + "useBackend": false }, { "datasource": { @@ -512,7 +516,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "pwb_build_info{cluster=~\"$cluster_name\"}", + "expr": "pwb_build_info{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}", "hide": false, "instant": true, "interval": "", @@ -1567,20 +1571,24 @@ }, { "current": { - "selected": true, - "text": "databricks", - "value": "databricks" + "selected": false, + "text": "All", + "value": "$__all" }, - "definition": "label_values(pwb_build_info,ptd_site)", + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "definition": "label_values(pwb_build_info{cluster=~\"$cluster_name\"},ptd_site)", "hide": 0, - "includeAll": false, + "includeAll": true, "label": "Site", "multi": false, "name": "site_name", "options": [], "query": { "qryType": 1, - "query": "label_values(pwb_build_info,ptd_site)", + "query": "label_values(pwb_build_info{cluster=~\"$cluster_name\"},ptd_site)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -1599,6 +1607,6 @@ "timezone": "", "title": "Posit Team Overview", "uid": "posit-team-overview", - "version": 7, + "version": 1, "weekStart": "" } \ No newline at end of file From 20c53d62ab2311f565d26441e624ac21d29f07fd Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 10:26:04 -0600 Subject: [PATCH 12/32] fix(grafana): apply site filter consistently and correct version in Posit Team Overview Applied ptd_site="$site_name" filter to all 22 pwb_* metric queries across the dashboard, ensuring the Site dropdown filters all panels consistently (previously only 2 of 24 panels were filtered). Also incremented dashboard version from 1 to 8 to properly continue version tracking. Co-Authored-By: Claude Sonnet 4.5 --- .../posit-team-overview.json | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 2cd33b5..4fc9b22 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -84,7 +84,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "sum by (type) (rate(pwb_session_startup_duration_seconds_sum{cluster=~\"$cluster_name\"}[24h] @ end())) /\nsum by (type) (rate(pwb_session_startup_duration_seconds_count{cluster=~\"$cluster_name\"}[24h] @ end()))", + "expr": "sum by (type) (rate(pwb_session_startup_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[24h] @ end())) /\nsum by (type) (rate(pwb_session_startup_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[24h] @ end()))", "legendFormat": "__auto", "range": true, "refId": "A" @@ -156,7 +156,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "pwb_sessions{cluster=~\"$cluster_name\",status=\"Running\"}", + "expr": "pwb_sessions{cluster=~\"$cluster_name\", ptd_site=\"$site_name\",status=\"Running\"}", "instant": true, "interval": "", "legendFormat": "{{type}}", @@ -225,7 +225,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "pwb_active_user_sessions{cluster=~\"$cluster_name\"}", + "expr": "pwb_active_user_sessions{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}", "instant": true, "legendFormat": "__auto", "range": false, @@ -292,7 +292,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "pwb_license_active_users{cluster=~\"$cluster_name\"}", + "expr": "pwb_license_active_users{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}", "legendFormat": "__auto", "range": true, "refId": "A" @@ -358,7 +358,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "pwb_license_user_seats{cluster=~\"$cluster_name\"}", + "expr": "pwb_license_user_seats{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}", "legendFormat": "__auto", "range": true, "refId": "A" @@ -426,7 +426,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "pwb_license_expiry{cluster=~\"$cluster_name\"}*1000", + "expr": "pwb_license_expiry{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}*1000", "instant": true, "legendFormat": "__auto", "range": false, @@ -616,7 +616,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum without(code, method) (increase(pwb_http_requests_total{cluster=~\"$cluster_name\"}[1m]))", + "expr": "sum without(code, method) (increase(pwb_http_requests_total{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m]))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, @@ -692,7 +692,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "sum without(code,uri,method) (increase(pwb_http_requests_total{cluster=~\"$cluster_name\"}[1m]))", + "expr": "sum without(code,uri,method) (increase(pwb_http_requests_total{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m]))", "legendFormat": "__auto", "range": true, "refId": "A" @@ -813,7 +813,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "(sum without(code,method) (rate(pwb_http_response_duration_seconds_sum{cluster=~\"$cluster_name\"}[30s]))) / (sum without(code,method) (rate(pwb_http_response_duration_seconds_count{cluster=~\"$cluster_name\"}[30s])))", + "expr": "(sum without(code,method) (rate(pwb_http_response_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[30s]))) / (sum without(code,method) (rate(pwb_http_response_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[30s])))", "format": "time_series", "instant": false, "interval": "", @@ -890,7 +890,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum without (code,method,uri)(rate(pwb_http_response_duration_seconds_sum{cluster=~\"$cluster_name\"}[1m] @ end())) / \nsum without (code,method,uri)(rate(pwb_http_response_duration_seconds_count{cluster=~\"$cluster_name\"}[1m] @ end()))\n\n", + "expr": "sum without (code,method,uri)(rate(pwb_http_response_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m] @ end())) / \nsum without (code,method,uri)(rate(pwb_http_response_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m] @ end()))\n\n", "format": "time_series", "instant": true, "interval": "", @@ -987,7 +987,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "increase(pwb_http_response_size_bytes_total{cluster=~\"$cluster_name\"}[1m]) / 1024", + "expr": "increase(pwb_http_response_size_bytes_total{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m]) / 1024", "interval": "", "legendFormat": "{{uri}}", "range": true, @@ -1083,7 +1083,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "histogram_quantile(0.999, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\"}[1m])) by (le)) ", + "expr": "histogram_quantile(0.999, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m])) by (le)) ", "interval": "", "legendFormat": "99.9%", "range": true, @@ -1095,7 +1095,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\"}[1m])) by (le)) ", + "expr": "histogram_quantile(0.99, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m])) by (le)) ", "hide": false, "interval": "", "legendFormat": "99%", @@ -1108,7 +1108,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "histogram_quantile(0.95, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\"}[1m])) by (le)) ", + "expr": "histogram_quantile(0.95, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m])) by (le)) ", "hide": false, "interval": "", "legendFormat": "95%", @@ -1121,7 +1121,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\"}[1m])) by (le)) ", + "expr": "histogram_quantile(0.5, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m])) by (le)) ", "hide": false, "interval": "", "legendFormat": "50%", @@ -1219,7 +1219,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "pwb_http_requests_inflight{cluster=~\"$cluster_name\"}", + "expr": "pwb_http_requests_inflight{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "", @@ -1234,7 +1234,7 @@ "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" }, "editorMode": "code", - "expr": "pwb_http_handlers_inflight{cluster=~\"$cluster_name\"}", + "expr": "pwb_http_handlers_inflight{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}", "hide": false, "interval": "", "legendFormat": "handlers", @@ -1332,7 +1332,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "pwb_sessions{cluster=~\"$cluster_name\",status=~\"Running|Pending|Starting\"}", + "expr": "pwb_sessions{cluster=~\"$cluster_name\", ptd_site=\"$site_name\",status=~\"Running|Pending|Starting\"}", "interval": "", "legendFormat": "{{type}} - {{cluster}}", "range": true, @@ -1428,7 +1428,7 @@ "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" }, "editorMode": "code", - "expr": "rate(pwb_session_startup_duration_seconds_sum{cluster=~\"$cluster_name\"}[20s]) / rate(pwb_session_startup_duration_seconds_count{cluster=~\"$cluster_name\"}[20s])", + "expr": "rate(pwb_session_startup_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[20s]) / rate(pwb_session_startup_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[20s])", "interval": "", "legendFormat": "{{type}}", "range": true, @@ -1524,7 +1524,7 @@ "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" }, "editorMode": "code", - "expr": "rate(pwb_session_startup_and_connect_duration_seconds_sum{cluster=~\"$cluster_name\"}[1m]) / \nrate(pwb_session_startup_and_connect_duration_seconds_count{cluster=~\"$cluster_name\"}[1m])", + "expr": "rate(pwb_session_startup_and_connect_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m]) / \nrate(pwb_session_startup_and_connect_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m])", "interval": "", "legendFormat": "{{type}}", "range": true, @@ -1607,6 +1607,6 @@ "timezone": "", "title": "Posit Team Overview", "uid": "posit-team-overview", - "version": 1, + "version": 8, "weekStart": "" } \ No newline at end of file From e80742aae2aea42e39c4400fc9b386b4c47aac1c Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 10:34:41 -0600 Subject: [PATCH 13/32] fix(grafana): use pattern match operator for site filter in Posit Team Overview Changed all site filters from ptd_site="$site_name" to ptd_site=~"$site_name" to support regex pattern matching, which allows the "All" option in the Site dropdown to work correctly (when $site_name is set to ".*"). Updated all 22 pwb_* metric queries across the dashboard. Co-Authored-By: Claude Sonnet 4.5 --- .../posit-team-overview.json | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 4fc9b22..b5f061b 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -84,7 +84,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "sum by (type) (rate(pwb_session_startup_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[24h] @ end())) /\nsum by (type) (rate(pwb_session_startup_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[24h] @ end()))", + "expr": "sum by (type) (rate(pwb_session_startup_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[24h] @ end())) /\nsum by (type) (rate(pwb_session_startup_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[24h] @ end()))", "legendFormat": "__auto", "range": true, "refId": "A" @@ -156,7 +156,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "pwb_sessions{cluster=~\"$cluster_name\", ptd_site=\"$site_name\",status=\"Running\"}", + "expr": "pwb_sessions{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\",status=\"Running\"}", "instant": true, "interval": "", "legendFormat": "{{type}}", @@ -225,7 +225,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "pwb_active_user_sessions{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}", + "expr": "pwb_active_user_sessions{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}", "instant": true, "legendFormat": "__auto", "range": false, @@ -292,7 +292,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "pwb_license_active_users{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}", + "expr": "pwb_license_active_users{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}", "legendFormat": "__auto", "range": true, "refId": "A" @@ -358,7 +358,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "pwb_license_user_seats{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}", + "expr": "pwb_license_user_seats{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}", "legendFormat": "__auto", "range": true, "refId": "A" @@ -426,7 +426,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "pwb_license_expiry{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}*1000", + "expr": "pwb_license_expiry{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}*1000", "instant": true, "legendFormat": "__auto", "range": false, @@ -500,7 +500,7 @@ "disableTextWrap": false, "editorMode": "code", "exemplar": false, - "expr": "pwb_build_info{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}", + "expr": "pwb_build_info{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}", "fullMetaSearch": false, "includeNullMetadata": true, "instant": true, @@ -516,7 +516,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "pwb_build_info{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}", + "expr": "pwb_build_info{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}", "hide": false, "instant": true, "interval": "", @@ -616,7 +616,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum without(code, method) (increase(pwb_http_requests_total{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m]))", + "expr": "sum without(code, method) (increase(pwb_http_requests_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m]))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, @@ -692,7 +692,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "sum without(code,uri,method) (increase(pwb_http_requests_total{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m]))", + "expr": "sum without(code,uri,method) (increase(pwb_http_requests_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m]))", "legendFormat": "__auto", "range": true, "refId": "A" @@ -813,7 +813,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "(sum without(code,method) (rate(pwb_http_response_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[30s]))) / (sum without(code,method) (rate(pwb_http_response_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[30s])))", + "expr": "(sum without(code,method) (rate(pwb_http_response_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[30s]))) / (sum without(code,method) (rate(pwb_http_response_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[30s])))", "format": "time_series", "instant": false, "interval": "", @@ -890,7 +890,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum without (code,method,uri)(rate(pwb_http_response_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m] @ end())) / \nsum without (code,method,uri)(rate(pwb_http_response_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m] @ end()))\n\n", + "expr": "sum without (code,method,uri)(rate(pwb_http_response_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m] @ end())) / \nsum without (code,method,uri)(rate(pwb_http_response_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m] @ end()))\n\n", "format": "time_series", "instant": true, "interval": "", @@ -987,7 +987,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "increase(pwb_http_response_size_bytes_total{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m]) / 1024", + "expr": "increase(pwb_http_response_size_bytes_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m]) / 1024", "interval": "", "legendFormat": "{{uri}}", "range": true, @@ -1083,7 +1083,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "histogram_quantile(0.999, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m])) by (le)) ", + "expr": "histogram_quantile(0.999, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) by (le)) ", "interval": "", "legendFormat": "99.9%", "range": true, @@ -1095,7 +1095,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m])) by (le)) ", + "expr": "histogram_quantile(0.99, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) by (le)) ", "hide": false, "interval": "", "legendFormat": "99%", @@ -1108,7 +1108,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "histogram_quantile(0.95, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m])) by (le)) ", + "expr": "histogram_quantile(0.95, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) by (le)) ", "hide": false, "interval": "", "legendFormat": "95%", @@ -1121,7 +1121,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m])) by (le)) ", + "expr": "histogram_quantile(0.5, sum(rate(pwb_http_response_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) by (le)) ", "hide": false, "interval": "", "legendFormat": "50%", @@ -1219,7 +1219,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "pwb_http_requests_inflight{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}", + "expr": "pwb_http_requests_inflight{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}", "fullMetaSearch": false, "includeNullMetadata": true, "interval": "", @@ -1234,7 +1234,7 @@ "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" }, "editorMode": "code", - "expr": "pwb_http_handlers_inflight{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}", + "expr": "pwb_http_handlers_inflight{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}", "hide": false, "interval": "", "legendFormat": "handlers", @@ -1332,7 +1332,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "pwb_sessions{cluster=~\"$cluster_name\", ptd_site=\"$site_name\",status=~\"Running|Pending|Starting\"}", + "expr": "pwb_sessions{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\",status=~\"Running|Pending|Starting\"}", "interval": "", "legendFormat": "{{type}} - {{cluster}}", "range": true, @@ -1428,7 +1428,7 @@ "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" }, "editorMode": "code", - "expr": "rate(pwb_session_startup_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[20s]) / rate(pwb_session_startup_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[20s])", + "expr": "rate(pwb_session_startup_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[20s]) / rate(pwb_session_startup_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[20s])", "interval": "", "legendFormat": "{{type}}", "range": true, @@ -1524,7 +1524,7 @@ "uid": "ee711a05-f90f-47d1-acad-0a3a79a54662" }, "editorMode": "code", - "expr": "rate(pwb_session_startup_and_connect_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m]) / \nrate(pwb_session_startup_and_connect_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=\"$site_name\"}[1m])", + "expr": "rate(pwb_session_startup_and_connect_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m]) / \nrate(pwb_session_startup_and_connect_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])", "interval": "", "legendFormat": "{{type}}", "range": true, From 29c44bff82dd331f34544cd23a0bebb9a7ba0cdd Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 10:42:36 -0600 Subject: [PATCH 14/32] Aggregate user metrics --- .../posit-team-overview.json | 22 +++++++++++++------ 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index b5f061b..b1251f7 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -291,11 +291,15 @@ "type": "prometheus", "uid": "mimir" }, - "editorMode": "code", - "expr": "pwb_license_active_users{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}", + "disableTextWrap": false, + "editorMode": "builder", + "expr": "max by(ptd_site, cluster) (pwb_license_active_users{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, "legendFormat": "__auto", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Registered users", @@ -357,11 +361,15 @@ "type": "prometheus", "uid": "mimir" }, - "editorMode": "code", - "expr": "pwb_license_user_seats{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}", + "disableTextWrap": false, + "editorMode": "builder", + "expr": "max by(cluster, ptd_site) (pwb_license_user_seats{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", + "fullMetaSearch": false, + "includeNullMetadata": true, "legendFormat": "__auto", "range": true, - "refId": "A" + "refId": "A", + "useBackend": false } ], "title": "Licensed users", @@ -1607,6 +1615,6 @@ "timezone": "", "title": "Posit Team Overview", "uid": "posit-team-overview", - "version": 8, + "version": 9, "weekStart": "" } \ No newline at end of file From 583b698a3eb1575f8b000cc84c67013cd9680ccf Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 10:45:31 -0600 Subject: [PATCH 15/32] fix(grafana): standardize label ordering in by() clauses for license metrics Changed "Registered users" panel from `max by(ptd_site, cluster)` to `max by(cluster, ptd_site)` to match the consistent label ordering pattern used throughout the dashboard and in the "Licensed users" panel. Co-Authored-By: Claude Sonnet 4.5 --- .../src/ptd/grafana_dashboards/posit-team-overview.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index b1251f7..a4c0e24 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -293,7 +293,7 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "max by(ptd_site, cluster) (pwb_license_active_users{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", + "expr": "max by(cluster, ptd_site) (pwb_license_active_users{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", "fullMetaSearch": false, "includeNullMetadata": true, "legendFormat": "__auto", From 425ac88ba2bf17dd6955e044d652573208963f8a Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 10:56:57 -0600 Subject: [PATCH 16/32] Add grouping for Workbench metrics --- .../posit-team-overview.json | 49 ++++++++++++------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index a4c0e24..35a2000 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -21,6 +21,19 @@ "links": [], "liveNow": false, "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 35, + "panels": [], + "title": "Workbench", + "type": "row" + }, { "datasource": { "type": "prometheus", @@ -59,7 +72,7 @@ "h": 4, "w": 6, "x": 0, - "y": 0 + "y": 1 }, "id": 24, "options": { @@ -130,7 +143,7 @@ "h": 4, "w": 6, "x": 6, - "y": 0 + "y": 1 }, "id": 20, "options": { @@ -199,7 +212,7 @@ "h": 4, "w": 3, "x": 12, - "y": 0 + "y": 1 }, "id": 32, "options": { @@ -267,7 +280,7 @@ "h": 4, "w": 3, "x": 15, - "y": 0 + "y": 1 }, "id": 18, "options": { @@ -337,7 +350,7 @@ "h": 4, "w": 3, "x": 18, - "y": 0 + "y": 1 }, "id": 26, "options": { @@ -404,7 +417,7 @@ "h": 2, "w": 3, "x": 21, - "y": 0 + "y": 1 }, "id": 30, "options": { @@ -476,7 +489,7 @@ "h": 2, "w": 3, "x": 21, - "y": 2 + "y": 3 }, "id": 16, "options": { @@ -600,7 +613,7 @@ "h": 6, "w": 12, "x": 0, - "y": 4 + "y": 5 }, "id": 6, "maxDataPoints": 300, @@ -675,7 +688,7 @@ "h": 3, "w": 3, "x": 12, - "y": 4 + "y": 5 }, "id": 28, "options": { @@ -798,7 +811,7 @@ "h": 6, "w": 9, "x": 15, - "y": 4 + "y": 5 }, "id": 2, "options": { @@ -872,7 +885,7 @@ "h": 3, "w": 3, "x": 12, - "y": 7 + "y": 8 }, "id": 22, "options": { @@ -973,7 +986,7 @@ "h": 6, "w": 12, "x": 0, - "y": 10 + "y": 11 }, "id": 34, "options": { @@ -1069,7 +1082,7 @@ "h": 6, "w": 7, "x": 12, - "y": 10 + "y": 11 }, "id": 14, "options": { @@ -1204,7 +1217,7 @@ "h": 6, "w": 5, "x": 19, - "y": 10 + "y": 11 }, "id": 10, "options": { @@ -1318,7 +1331,7 @@ "h": 5, "w": 12, "x": 0, - "y": 16 + "y": 17 }, "id": 12, "options": { @@ -1414,7 +1427,7 @@ "h": 5, "w": 6, "x": 12, - "y": 16 + "y": 17 }, "id": 4, "options": { @@ -1510,7 +1523,7 @@ "h": 5, "w": 6, "x": 18, - "y": 16 + "y": 17 }, "id": 8, "options": { @@ -1615,6 +1628,6 @@ "timezone": "", "title": "Posit Team Overview", "uid": "posit-team-overview", - "version": 9, + "version": 10, "weekStart": "" } \ No newline at end of file From 55570596caa363c54791932757334503f8effc2e Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 11:26:27 -0600 Subject: [PATCH 17/32] feat(grafana): add Connect row to Posit Team Overview dashboard Added a complete "Connect" row with 17 panels that mirror the Workbench metrics structure, providing visibility into Connect operational metrics. Changes: - Added Connect row at y=23 with 17 panels (IDs 100-117) - Panels include HTTP metrics, user activity, queue metrics, and Shiny sessions - Updated site_name variable query to include both pwb_build_info and go_build_info - Updated dashboard version from 10 to 11 - Total panels increased from 18 to 36 Panel breakdown: - 7 gauge panels for key metrics (content views, active sessions, users) - 2 stat panels (build info, queue jobs) - 8 timeseries panels for detailed monitoring All panels use cluster and ptd_site filters for consistent filtering across both Workbench and Connect sections. Co-Authored-By: Claude Sonnet 4.5 --- .../posit-team-overview.json | 1458 ++++++++++++++++- 1 file changed, 1456 insertions(+), 2 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 35a2000..7cccee6 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -1554,6 +1554,1460 @@ ], "title": "Session start to join secs", "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 100, + "panels": [], + "title": "Connect", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 500 + }, + { + "color": "red", + "value": 800 + } + ] + }, + "max": 1000, + "min": 0 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 24 + }, + "id": 101, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(increase(connect_content_hits_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[24h] @ end())) / (24 * 60)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Content views/min (24h)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 50 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "max": 100, + "min": 0 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 24 + }, + "id": 102, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(connect_content_app_sessions_current{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Active Shiny sessions", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 50 + }, + { + "color": "red", + "value": 80 + } + ] + }, + "max": 100, + "min": 0 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 12, + "y": 24 + }, + "id": 103, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum by (cluster, ptd_site) (connect_users_active{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\", window=\"24h\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Active users (24h)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 100 + }, + { + "color": "red", + "value": 150 + } + ] + }, + "max": 200, + "min": 0 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 15, + "y": 24 + }, + "id": 104, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum by (cluster, ptd_site) (connect_users_active{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\", window=\"7d\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Active users (7d)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 150 + }, + { + "color": "red", + "value": 250 + } + ] + }, + "max": 300, + "min": 0 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 18, + "y": 24 + }, + "id": 105, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum by (cluster, ptd_site) (connect_users_active{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\", window=\"30d\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Active users (30d)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 3, + "x": 21, + "y": 24 + }, + "id": 106, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "valueSize": 12 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "go_build_info{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\", job=~\".*connect.*\"}", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Build info", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 3, + "x": 21, + "y": 26 + }, + "id": 107, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "valueSize": 12 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (cluster, ptd_site) (connect_jobs_queue_total_jobs_in_queue{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Queue jobs", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 108, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum without(code, method) (increase(connect_http_request_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m]))", + "legendFormat": "{{route}}", + "range": true, + "refId": "A" + } + ], + "title": "Requests/minute", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 500 + }, + { + "color": "red", + "value": 800 + } + ] + }, + "max": 1000, + "min": 0 + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 28 + }, + "id": 109, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(rate(connect_http_request_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) * 60", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Requests/min (1m)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 15, + "y": 28 + }, + "id": 110, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum by (route) (rate(connect_http_request_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) /\nsum by (route) (rate(connect_http_request_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m]))", + "legendFormat": "{{route}}", + "range": true, + "refId": "A" + } + ], + "title": "Avg request duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 2 + }, + { + "color": "red", + "value": 4 + } + ] + }, + "max": 5, + "min": 0 + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 31 + }, + "id": 111, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(rate(connect_http_request_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) /\nsum(rate(connect_http_request_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Avg resp secs (1m)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 34 + }, + "id": 112, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum without(code, method) (increase(connect_http_response_size_bytes{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) / 1024", + "legendFormat": "{{route}}", + "range": true, + "refId": "A" + } + ], + "title": "Response size/min (kb)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 7, + "x": 12, + "y": 34 + }, + "id": 113, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.999, sum(rate(connect_http_request_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) by (le))", + "legendFormat": "99.9%", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum(rate(connect_http_request_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) by (le))", + "legendFormat": "99%", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(connect_http_request_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) by (le))", + "legendFormat": "95%", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.5, sum(rate(connect_http_request_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) by (le))", + "legendFormat": "50%", + "range": true, + "refId": "D" + } + ], + "title": "Request time quantiles", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 5, + "x": 19, + "y": 34 + }, + "id": 114, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "connect_http_request_inflight_gauge{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}", + "legendFormat": "{{route}}", + "range": true, + "refId": "A" + } + ], + "title": "Requests in flight", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 115, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(connect_content_app_sessions_current{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", + "legendFormat": "Active sessions", + "range": true, + "refId": "A" + } + ], + "title": "Active Shiny sessions", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 12, + "y": 40 + }, + "id": 116, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "connect_jobs_queue_total_jobs_in_queue{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}", + "legendFormat": "{{queue_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Queue depth", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 6, + "x": 18, + "y": 40 + }, + "id": 117, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "connect_jobs_queue_oldest_job_age_seconds{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}", + "legendFormat": "{{queue_name}}", + "range": true, + "refId": "A" + } + ], + "title": "Oldest job age (secs)", + "type": "timeseries" } ], "refresh": "", @@ -1609,7 +3063,7 @@ "options": [], "query": { "qryType": 1, - "query": "label_values(pwb_build_info{cluster=~\"$cluster_name\"},ptd_site)", + "query": "label_values({cluster=~\"$cluster_name\", __name__=~\"pwb_build_info|go_build_info\"},ptd_site)", "refId": "PrometheusVariableQueryEditor-VariableQuery" }, "refresh": 1, @@ -1628,6 +3082,6 @@ "timezone": "", "title": "Posit Team Overview", "uid": "posit-team-overview", - "version": 10, + "version": 11, "weekStart": "" } \ No newline at end of file From cbe35f056c340b44f940a71b930df5e7a35a10de Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 11:30:53 -0600 Subject: [PATCH 18/32] fix(grafana): use max aggregation for Connect global metrics Changed aggregation from sum to max for Connect metrics that are reported identically by all pods, preventing double-counting when multiple Connect pods are running. Fixed panels: - Panel 103: Active users (24h) - changed sum to max - Panel 104: Active users (7d) - changed sum to max - Panel 105: Active users (30d) - changed sum to max - Panel 107: Queue jobs - changed sum to max Rationale: connect_users_active and connect_jobs_queue_total_jobs_in_queue represent global system state queried from shared resources (database, queue). All Connect pods report identical values, so sum() incorrectly multiplies by the number of pods. Using max() deduplicates across pods while preserving the correct value. This mirrors the fix in commit 29c44bf for Workbench license metrics. Co-Authored-By: Claude Sonnet 4.5 --- .../src/ptd/grafana_dashboards/posit-team-overview.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 7cccee6..a6b3f53 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -1775,7 +1775,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "sum by (cluster, ptd_site) (connect_users_active{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\", window=\"24h\"})", + "expr": "max by (cluster, ptd_site) (connect_users_active{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\", window=\"24h\"})", "legendFormat": "__auto", "range": true, "refId": "A" @@ -1847,7 +1847,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "sum by (cluster, ptd_site) (connect_users_active{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\", window=\"7d\"})", + "expr": "max by (cluster, ptd_site) (connect_users_active{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\", window=\"7d\"})", "legendFormat": "__auto", "range": true, "refId": "A" @@ -1919,7 +1919,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "sum by (cluster, ptd_site) (connect_users_active{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\", window=\"30d\"})", + "expr": "max by (cluster, ptd_site) (connect_users_active{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\", window=\"30d\"})", "legendFormat": "__auto", "range": true, "refId": "A" @@ -2054,7 +2054,7 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum by (cluster, ptd_site) (connect_jobs_queue_total_jobs_in_queue{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", + "expr": "max by (cluster, ptd_site) (connect_jobs_queue_total_jobs_in_queue{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", "instant": true, "legendFormat": "__auto", "range": false, From 8ca1d889d47dee4c5ab89df09871ee633f111e0d Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 11:54:23 -0600 Subject: [PATCH 19/32] Change scrape interval for some panels --- .../posit-team-overview.json | 84 +++++++++---------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index a6b3f53..7415d7b 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -1561,7 +1561,7 @@ "h": 1, "w": 24, "x": 0, - "y": 23 + "y": 22 }, "id": 100, "panels": [], @@ -1580,6 +1580,8 @@ "mode": "thresholds" }, "mappings": [], + "max": 1000, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -1596,9 +1598,7 @@ "value": 800 } ] - }, - "max": 1000, - "min": 0 + } }, "overrides": [] }, @@ -1606,7 +1606,7 @@ "h": 4, "w": 6, "x": 0, - "y": 24 + "y": 23 }, "id": 101, "options": { @@ -1652,6 +1652,8 @@ "mode": "thresholds" }, "mappings": [], + "max": 100, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -1668,9 +1670,7 @@ "value": 80 } ] - }, - "max": 100, - "min": 0 + } }, "overrides": [] }, @@ -1678,7 +1678,7 @@ "h": 4, "w": 6, "x": 6, - "y": 24 + "y": 23 }, "id": 102, "options": { @@ -1724,6 +1724,8 @@ "mode": "thresholds" }, "mappings": [], + "max": 100, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -1740,9 +1742,7 @@ "value": 80 } ] - }, - "max": 100, - "min": 0 + } }, "overrides": [] }, @@ -1750,7 +1750,7 @@ "h": 4, "w": 3, "x": 12, - "y": 24 + "y": 23 }, "id": 103, "options": { @@ -1796,6 +1796,8 @@ "mode": "thresholds" }, "mappings": [], + "max": 200, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -1812,9 +1814,7 @@ "value": 150 } ] - }, - "max": 200, - "min": 0 + } }, "overrides": [] }, @@ -1822,7 +1822,7 @@ "h": 4, "w": 3, "x": 15, - "y": 24 + "y": 23 }, "id": 104, "options": { @@ -1868,6 +1868,8 @@ "mode": "thresholds" }, "mappings": [], + "max": 300, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -1884,9 +1886,7 @@ "value": 250 } ] - }, - "max": 300, - "min": 0 + } }, "overrides": [] }, @@ -1894,7 +1894,7 @@ "h": 4, "w": 3, "x": 18, - "y": 24 + "y": 23 }, "id": 105, "options": { @@ -1956,7 +1956,7 @@ "h": 2, "w": 3, "x": 21, - "y": 24 + "y": 23 }, "id": 106, "options": { @@ -2024,7 +2024,7 @@ "h": 2, "w": 3, "x": 21, - "y": 26 + "y": 25 }, "id": 107, "options": { @@ -2128,7 +2128,7 @@ "h": 6, "w": 12, "x": 0, - "y": 28 + "y": 27 }, "id": 108, "maxDataPoints": 300, @@ -2151,13 +2151,13 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "sum without(code, method) (increase(connect_http_request_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m]))", + "expr": "sum without(code, method) (increase(connect_http_request_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m]))", "legendFormat": "{{route}}", "range": true, "refId": "A" } ], - "title": "Requests/minute", + "title": "Requests/5m", "type": "timeseries" }, { @@ -2172,6 +2172,8 @@ "mode": "thresholds" }, "mappings": [], + "max": 1000, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -2188,9 +2190,7 @@ "value": 800 } ] - }, - "max": 1000, - "min": 0 + } }, "overrides": [] }, @@ -2198,7 +2198,7 @@ "h": 3, "w": 3, "x": 12, - "y": 28 + "y": 27 }, "id": 109, "options": { @@ -2296,7 +2296,7 @@ "h": 6, "w": 9, "x": 15, - "y": 28 + "y": 27 }, "id": 110, "maxDataPoints": 300, @@ -2340,6 +2340,8 @@ "mode": "thresholds" }, "mappings": [], + "max": 5, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -2356,9 +2358,7 @@ "value": 4 } ] - }, - "max": 5, - "min": 0 + } }, "overrides": [] }, @@ -2366,7 +2366,7 @@ "h": 3, "w": 3, "x": 12, - "y": 31 + "y": 30 }, "id": 111, "options": { @@ -2464,7 +2464,7 @@ "h": 6, "w": 12, "x": 0, - "y": 34 + "y": 33 }, "id": 112, "maxDataPoints": 300, @@ -2560,7 +2560,7 @@ "h": 6, "w": 7, "x": 12, - "y": 34 + "y": 33 }, "id": 113, "maxDataPoints": 300, @@ -2689,7 +2689,7 @@ "h": 6, "w": 5, "x": 19, - "y": 34 + "y": 33 }, "id": 114, "maxDataPoints": 300, @@ -2785,7 +2785,7 @@ "h": 5, "w": 12, "x": 0, - "y": 40 + "y": 39 }, "id": 115, "maxDataPoints": 300, @@ -2881,7 +2881,7 @@ "h": 5, "w": 6, "x": 12, - "y": 40 + "y": 39 }, "id": 116, "maxDataPoints": 300, @@ -2977,7 +2977,7 @@ "h": 5, "w": 6, "x": 18, - "y": 40 + "y": 39 }, "id": 117, "maxDataPoints": 300, @@ -3082,6 +3082,6 @@ "timezone": "", "title": "Posit Team Overview", "uid": "posit-team-overview", - "version": 11, + "version": 12, "weekStart": "" } \ No newline at end of file From 65e4cfca1c9618fc8865c2c5d29368375567e0f2 Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 13:01:20 -0600 Subject: [PATCH 20/32] Fix resp bytes metric --- .../src/ptd/grafana_dashboards/posit-team-overview.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 7415d7b..7930896 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -2487,7 +2487,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "sum without(code, method) (increase(connect_http_response_size_bytes{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) / 1024", + "expr": "sum without(code, method) (increase(connect_http_response_size_bytes_sum{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m])) / 1024", "legendFormat": "{{route}}", "range": true, "refId": "A" @@ -3017,7 +3017,7 @@ "list": [ { "current": { - "selected": true, + "selected": false, "text": "All", "value": "$__all" }, @@ -3082,6 +3082,6 @@ "timezone": "", "title": "Posit Team Overview", "uid": "posit-team-overview", - "version": 12, + "version": 13, "weekStart": "" } \ No newline at end of file From c8765d84ff56d429b1bd3366c489478e625b40e0 Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 13:06:17 -0600 Subject: [PATCH 21/32] Fix panel title --- .../src/ptd/grafana_dashboards/posit-team-overview.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 7930896..8e54b18 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -2493,7 +2493,7 @@ "refId": "A" } ], - "title": "Response size/min (kb)", + "title": "Response size/5m (kb)", "type": "timeseries" }, { From 14a8fa1c3156ba86d15a0e78b5b4df331a0ae97c Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 13:10:59 -0600 Subject: [PATCH 22/32] Remove empty build info panel, update collection intervals --- .../posit-team-overview.json | 92 +++---------------- 1 file changed, 12 insertions(+), 80 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 8e54b18..93e6956 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -1953,79 +1953,11 @@ "overrides": [] }, "gridPos": { - "h": 2, + "h": 4, "w": 3, "x": 21, "y": 23 }, - "id": 106, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": { - "valueSize": 12 - }, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.2.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "mimir" - }, - "editorMode": "code", - "exemplar": false, - "expr": "go_build_info{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\", job=~\".*connect.*\"}", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "Build info", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "mimir" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 3, - "x": 21, - "y": 25 - }, "id": 107, "options": { "colorMode": "value", @@ -2223,13 +2155,13 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "sum(rate(connect_http_request_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) * 60", + "expr": "sum(rate(connect_http_request_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m])) * 60", "legendFormat": "__auto", "range": true, "refId": "A" } ], - "title": "Requests/min (1m)", + "title": "Requests/5m", "type": "gauge" }, { @@ -2319,7 +2251,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "sum by (route) (rate(connect_http_request_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) /\nsum by (route) (rate(connect_http_request_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m]))", + "expr": "sum by (route) (rate(connect_http_request_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m])) /\nsum by (route) (rate(connect_http_request_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m]))", "legendFormat": "{{route}}", "range": true, "refId": "A" @@ -2391,13 +2323,13 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "sum(rate(connect_http_request_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) /\nsum(rate(connect_http_request_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m]))", + "expr": "sum(rate(connect_http_request_duration_seconds_sum{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m])) /\nsum(rate(connect_http_request_duration_seconds_count{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m]))", "legendFormat": "__auto", "range": true, "refId": "A" } ], - "title": "Avg resp secs (1m)", + "title": "Avg resp secs (5m)", "type": "gauge" }, { @@ -2493,7 +2425,7 @@ "refId": "A" } ], - "title": "Response size/5m (kb)", + "title": "Response size/min (kb)", "type": "timeseries" }, { @@ -2583,7 +2515,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "histogram_quantile(0.999, sum(rate(connect_http_request_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) by (le))", + "expr": "histogram_quantile(0.999, sum(rate(connect_http_request_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m])) by (le))", "legendFormat": "99.9%", "range": true, "refId": "A" @@ -2594,7 +2526,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum(rate(connect_http_request_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) by (le))", + "expr": "histogram_quantile(0.99, sum(rate(connect_http_request_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m])) by (le))", "legendFormat": "99%", "range": true, "refId": "B" @@ -2605,7 +2537,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "histogram_quantile(0.95, sum(rate(connect_http_request_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) by (le))", + "expr": "histogram_quantile(0.95, sum(rate(connect_http_request_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m])) by (le))", "legendFormat": "95%", "range": true, "refId": "C" @@ -2616,7 +2548,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "histogram_quantile(0.5, sum(rate(connect_http_request_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[1m])) by (le))", + "expr": "histogram_quantile(0.5, sum(rate(connect_http_request_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m])) by (le))", "legendFormat": "50%", "range": true, "refId": "D" @@ -3082,6 +3014,6 @@ "timezone": "", "title": "Posit Team Overview", "uid": "posit-team-overview", - "version": 13, + "version": 14, "weekStart": "" } \ No newline at end of file From a7e6ffbd13833446fd77f7451f593ca64792c70c Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 13:14:46 -0600 Subject: [PATCH 23/32] fix(grafana): correct Connect panel titles to match query semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed two panel titles where the title did not accurately reflect what the query was calculating: Panel 112 - Response size: - Changed: "Response size/min (kb)" → "Response size/5m (kb)" - Query: increase(...[5m]) / 1024 - Rationale: Query uses increase() over 5m window, which returns total bytes over 5 minutes, not per-minute rate. Title must match. - This reverts the problematic title change from commit 14a8fa1 and restores the correct title from commit c8765d8. Panel 109 - Request rate gauge: - Changed: "Requests/5m" → "Requests/min (5m)" - Query: rate(...[5m]) * 60 - Rationale: Query calculates rate (per-second) * 60 = per-minute rate. The "(5m)" clarifies the lookback window used for the rate calculation. Previous title "Requests/5m" incorrectly suggested total requests over 5 minutes rather than a per-minute rate. Co-Authored-By: Claude Sonnet 4.5 --- .../src/ptd/grafana_dashboards/posit-team-overview.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 93e6956..99fc6b5 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -2161,7 +2161,7 @@ "refId": "A" } ], - "title": "Requests/5m", + "title": "Requests/min (5m)", "type": "gauge" }, { @@ -2425,7 +2425,7 @@ "refId": "A" } ], - "title": "Response size/min (kb)", + "title": "Response size/5m (kb)", "type": "timeseries" }, { From 9bcea14edb7e9db1b81ad71312eed48013f9774d Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 13:20:33 -0600 Subject: [PATCH 24/32] feat(grafana): add Package Manager row to Posit Team Overview dashboard Added a complete "Package Manager" row with 18 panels that provide comprehensive monitoring of Package Manager operational metrics. Changes: - Added Package Manager row at y=46 with 18 panels (IDs 200-218) - Dashboard version updated from 14 to 15 - Total panels increased from 36 to 54 Panel Categories: Storage Health (critical for PPM): - Storage used % (gauge) - Calculated percentage of storage consumed - Storage free (GB) (gauge) - Available storage space - Storage used over time (timeseries) - Historical usage by path HTTP Performance (parallel to Workbench/Connect): - Requests/5m (timeseries) - HTTP request volume - Requests/min (5m) (gauge) - Current request rate - Avg response size (timeseries) - Response size by status code - Avg resp size (5m) (gauge) - Current average response size - Response size/5m (kb) (timeseries) - Total response bytes - Requests in flight (stat + timeseries) - Current/historical in-flight requests Package Operations (unique to PPM): - Package downloads/min (24h) (gauge) - Average daily download rate - Package downloads/5m (timeseries) - Download activity by repo Repository Sync Operations: - Sync duration (timeseries) - p95 sync time by source type (CRAN, PyPI, etc.) Binary Routing: - Binary routing fallbacks (timeseries) - Failed binary routing by reason License Status: - License days left (gauge) - Days until license expiry - License expires (stat) - Expiry timestamp - Failed Git builds (stat) - Total failed Git builds - Build info (stat) - Version information All panels use cluster and ptd_site filters for consistent filtering across all three product sections (Workbench, Connect, Package Manager). Key metrics use max() aggregation to prevent double-counting when multiple PPM pods report identical global state (storage, license). Co-Authored-By: Claude Sonnet 4.5 --- .../posit-team-overview.json | 1492 ++++++++++++++++- 1 file changed, 1491 insertions(+), 1 deletion(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 99fc6b5..361d618 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -2940,6 +2940,1496 @@ ], "title": "Oldest job age (secs)", "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 200, + "panels": [], + "title": "Package Manager", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 70 + }, + { + "color": "red", + "value": 85 + } + ] + }, + "max": 100, + "min": 0, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 47 + }, + "id": 201, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "(max by (cluster, ptd_site) (ppm_storage_used{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}) / max by (cluster, ptd_site) (ppm_storage_size{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})) * 100", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Storage used %", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 500 + }, + { + "color": "red", + "value": 800 + } + ] + }, + "max": 1000, + "min": 0 + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 47 + }, + "id": 202, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(increase(ppm_pkg_downloads_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[24h] @ end())) / (24 * 60)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Package downloads/min (24h)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 30 + }, + { + "color": "green", + "value": 90 + } + ] + }, + "max": 365, + "min": 0, + "unit": "d" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 12, + "y": 47 + }, + "id": 203, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "max by (cluster, ptd_site) (ppm_license_days_left{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "License days left", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "orange", + "value": 100 + }, + { + "color": "green", + "value": 300 + } + ] + }, + "max": 1000, + "min": 0, + "unit": "decgbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 3, + "x": 15, + "y": 47 + }, + "id": 204, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "max by (cluster, ptd_site) (ppm_storage_free{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Storage free (GB)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 3, + "x": 18, + "y": 47 + }, + "id": 205, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "valueSize": 12 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (cluster, ptd_site) (ppm_git_builds_failed_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Failed Git builds", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 3, + "x": 21, + "y": 47 + }, + "id": 206, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "valueSize": 12 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "go_build_info{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\", job=~\".*package-manager.*\"}", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Build info", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 3, + "x": 18, + "y": 49 + }, + "id": 207, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "valueSize": 12 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max by (cluster, ptd_site) (ppm_http_requests_inflight{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "Requests in flight", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "dateTimeAsLocal" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 3, + "x": 21, + "y": 49 + }, + "id": 208, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": { + "valueSize": 12 + }, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "exemplar": false, + "expr": "max by (cluster, ptd_site) (ppm_license_expiry{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}) * 1000", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "License expires", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 51 + }, + "id": 209, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum without(code, method) (increase(ppm_http_requests_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m]))", + "legendFormat": "{{code}}", + "range": true, + "refId": "A" + } + ], + "title": "Requests/5m", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 500 + }, + { + "color": "red", + "value": 800 + } + ] + }, + "max": 1000, + "min": 0 + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 51 + }, + "id": 210, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(rate(ppm_http_requests_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m])) * 60", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Requests/min (5m)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 9, + "x": 15, + "y": 51 + }, + "id": 211, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum by (code) (rate(ppm_http_response_size_bytes_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m])) /\nsum by (code) (rate(ppm_http_requests_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m]))", + "legendFormat": "{{code}}", + "range": true, + "refId": "A" + } + ], + "title": "Avg response size (bytes)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 500000 + }, + { + "color": "red", + "value": 800000 + } + ] + }, + "max": 1000000, + "min": 0, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 54 + }, + "id": 212, + "options": { + "minVizHeight": 75, + "minVizWidth": 75, + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "pluginVersion": "10.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum(rate(ppm_http_response_size_bytes_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m])) /\nsum(rate(ppm_http_requests_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Avg resp size (5m)", + "type": "gauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 57 + }, + "id": 213, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "sum without(code, method) (increase(ppm_http_response_size_bytes_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m])) / 1024", + "legendFormat": "{{code}}", + "range": true, + "refId": "A" + } + ], + "title": "Response size/5m (kb)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 12, + "y": 57 + }, + "id": 214, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "increase(ppm_pkg_downloads_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m])", + "legendFormat": "{{repo}}", + "range": true, + "refId": "A" + } + ], + "title": "Package downloads/5m", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 18, + "y": 57 + }, + "id": 215, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "ppm_http_requests_inflight{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}", + "legendFormat": "In flight", + "range": true, + "refId": "A" + } + ], + "title": "Requests in flight", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 0, + "y": 63 + }, + "id": 216, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "ppm_storage_used{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}", + "legendFormat": "{{path}}", + "range": true, + "refId": "A" + } + ], + "title": "Storage used (GB)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 8, + "y": 63 + }, + "id": 217, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum(rate(ppm_repo_source_sync_duration_seconds_bucket{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m])) by (le, type))", + "legendFormat": "{{type}} (p95)", + "range": true, + "refId": "A" + } + ], + "title": "Sync duration (seconds)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 63 + }, + "id": 218, + "maxDataPoints": 300, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "mimir" + }, + "editorMode": "code", + "expr": "increase(ppm_binary_routing_fallback{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[5m])", + "legendFormat": "{{reason}}", + "range": true, + "refId": "A" + } + ], + "title": "Binary routing fallbacks", + "type": "timeseries" } ], "refresh": "", @@ -3014,6 +4504,6 @@ "timezone": "", "title": "Posit Team Overview", "uid": "posit-team-overview", - "version": 14, + "version": 15, "weekStart": "" } \ No newline at end of file From 0445bf4d8ea88a73cf94fdb3d2b6d6bf50dd1eae Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 13:24:34 -0600 Subject: [PATCH 25/32] fix(grafana): fix Package Manager panel query and display issues Fixed two issues in Package Manager panels: Panel 202 - Package downloads/min (24h): - Removed `@ end()` modifier from query - Previous: increase(...[24h] @ end()) / (24 * 60) - Fixed: increase(...[24h]) / (24 * 60) - Rationale: The @ end() modifier locks the query to the range end timestamp, breaking historical playback in time-range dashboards and causing incorrect behavior in real-time views. For a gauge showing current download rate, the query should evaluate at the current dashboard time, not a fixed endpoint. Panel 216 - Storage used (GB): - Added missing unit configuration: "unit": "decgbytes" - Rationale: Panel 204 (Storage free GB) uses "decgbytes" unit for consistent GB formatting. Panel 216 was displaying raw metric values without unit formatting, creating display inconsistency. Both panels now show storage metrics with consistent GB display formatting. Co-Authored-By: Claude Sonnet 4.5 --- .../src/ptd/grafana_dashboards/posit-team-overview.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 361d618..ea0fb6d 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -3090,7 +3090,7 @@ "uid": "mimir" }, "editorMode": "code", - "expr": "sum(increase(ppm_pkg_downloads_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[24h] @ end())) / (24 * 60)", + "expr": "sum(increase(ppm_pkg_downloads_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}[24h])) / (24 * 60)", "legendFormat": "__auto", "range": true, "refId": "A" @@ -4199,7 +4199,8 @@ "value": 80 } ] - } + }, + "unit": "decgbytes" }, "overrides": [] }, From c22ec8a2f3d7fcaa28b9bc6a4decf090e57d470f Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 13:54:02 -0600 Subject: [PATCH 26/32] Minor UI adjustments --- .../posit-team-overview.json | 74 +++++++++---------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index ea0fb6d..0c9f6a3 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -2947,7 +2947,7 @@ "h": 1, "w": 24, "x": 0, - "y": 46 + "y": 44 }, "id": 200, "panels": [], @@ -2966,6 +2966,8 @@ "mode": "thresholds" }, "mappings": [], + "max": 100, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -2983,8 +2985,6 @@ } ] }, - "max": 100, - "min": 0, "unit": "percent" }, "overrides": [] @@ -2993,7 +2993,7 @@ "h": 4, "w": 6, "x": 0, - "y": 47 + "y": 45 }, "id": 201, "options": { @@ -3039,6 +3039,8 @@ "mode": "thresholds" }, "mappings": [], + "max": 1000, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -3055,9 +3057,7 @@ "value": 800 } ] - }, - "max": 1000, - "min": 0 + } }, "overrides": [] }, @@ -3065,7 +3065,7 @@ "h": 4, "w": 6, "x": 6, - "y": 47 + "y": 45 }, "id": 202, "options": { @@ -3111,6 +3111,8 @@ "mode": "thresholds" }, "mappings": [], + "max": 365, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -3124,12 +3126,10 @@ }, { "color": "green", - "value": 90 + "value": 60 } ] }, - "max": 365, - "min": 0, "unit": "d" }, "overrides": [] @@ -3138,7 +3138,7 @@ "h": 4, "w": 3, "x": 12, - "y": 47 + "y": 45 }, "id": 203, "options": { @@ -3184,6 +3184,8 @@ "mode": "thresholds" }, "mappings": [], + "max": 1000, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -3201,8 +3203,6 @@ } ] }, - "max": 1000, - "min": 0, "unit": "decgbytes" }, "overrides": [] @@ -3211,7 +3211,7 @@ "h": 4, "w": 3, "x": 15, - "y": 47 + "y": 45 }, "id": 204, "options": { @@ -3273,7 +3273,7 @@ "h": 2, "w": 3, "x": 18, - "y": 47 + "y": 45 }, "id": 205, "options": { @@ -3341,7 +3341,7 @@ "h": 2, "w": 3, "x": 21, - "y": 47 + "y": 45 }, "id": 206, "options": { @@ -3409,7 +3409,7 @@ "h": 2, "w": 3, "x": 18, - "y": 49 + "y": 47 }, "id": 207, "options": { @@ -3478,7 +3478,7 @@ "h": 2, "w": 3, "x": 21, - "y": 49 + "y": 47 }, "id": 208, "options": { @@ -3582,7 +3582,7 @@ "h": 6, "w": 12, "x": 0, - "y": 51 + "y": 49 }, "id": 209, "maxDataPoints": 300, @@ -3626,6 +3626,8 @@ "mode": "thresholds" }, "mappings": [], + "max": 1000, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -3642,9 +3644,7 @@ "value": 800 } ] - }, - "max": 1000, - "min": 0 + } }, "overrides": [] }, @@ -3652,7 +3652,7 @@ "h": 3, "w": 3, "x": 12, - "y": 51 + "y": 49 }, "id": 210, "options": { @@ -3750,7 +3750,7 @@ "h": 6, "w": 9, "x": 15, - "y": 51 + "y": 49 }, "id": 211, "maxDataPoints": 300, @@ -3794,6 +3794,8 @@ "mode": "thresholds" }, "mappings": [], + "max": 1000000, + "min": 0, "thresholds": { "mode": "absolute", "steps": [ @@ -3811,8 +3813,6 @@ } ] }, - "max": 1000000, - "min": 0, "unit": "decbytes" }, "overrides": [] @@ -3821,7 +3821,7 @@ "h": 3, "w": 3, "x": 12, - "y": 54 + "y": 52 }, "id": 212, "options": { @@ -3919,7 +3919,7 @@ "h": 6, "w": 12, "x": 0, - "y": 57 + "y": 55 }, "id": 213, "maxDataPoints": 300, @@ -4015,7 +4015,7 @@ "h": 6, "w": 6, "x": 12, - "y": 57 + "y": 55 }, "id": 214, "maxDataPoints": 300, @@ -4111,7 +4111,7 @@ "h": 6, "w": 6, "x": 18, - "y": 57 + "y": 55 }, "id": 215, "maxDataPoints": 300, @@ -4208,7 +4208,7 @@ "h": 5, "w": 8, "x": 0, - "y": 63 + "y": 61 }, "id": 216, "maxDataPoints": 300, @@ -4304,7 +4304,7 @@ "h": 5, "w": 8, "x": 8, - "y": 63 + "y": 61 }, "id": 217, "maxDataPoints": 300, @@ -4400,7 +4400,7 @@ "h": 5, "w": 8, "x": 16, - "y": 63 + "y": 61 }, "id": 218, "maxDataPoints": 300, @@ -4440,7 +4440,7 @@ "list": [ { "current": { - "selected": false, + "selected": true, "text": "All", "value": "$__all" }, @@ -4469,7 +4469,7 @@ }, { "current": { - "selected": false, + "selected": true, "text": "All", "value": "$__all" }, @@ -4505,6 +4505,6 @@ "timezone": "", "title": "Posit Team Overview", "uid": "posit-team-overview", - "version": 15, + "version": 16, "weekStart": "" } \ No newline at end of file From 3807177e6ab4e7bff8bcd8d9bea7817fa43987ad Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 13:57:00 -0600 Subject: [PATCH 27/32] fix(grafana): prevent automatic time unit conversion in License days left panel Changed panel 203 (License days left) to display the value with a fixed "days" suffix instead of allowing Grafana to auto-convert to larger time units like weeks. Changes: - Unit changed from "d" (days) to "none" - Added custom suffix: " days" Previous behavior: - Value of 43 displayed as "6.14 weeks" - Grafana's "d" unit automatically converts to larger time units Fixed behavior: - Value of 43 displays as "43 days" - Plain numeric value with static " days" suffix This provides clearer, more consistent display of license expiry timing without confusing time unit conversions. Co-Authored-By: Claude Sonnet 4.5 --- .../src/ptd/grafana_dashboards/posit-team-overview.json | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 0c9f6a3..8bcc56e 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -3130,7 +3130,10 @@ } ] }, - "unit": "d" + "unit": "none", + "custom": { + "suffix": " days" + } }, "overrides": [] }, From f625a2674168df5cbf72d710de66d6860d6a3047 Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 14:10:53 -0600 Subject: [PATCH 28/32] Remove some invalid metrics --- .../posit-team-overview.json | 166 ++---------------- 1 file changed, 11 insertions(+), 155 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json index 8bcc56e..ca969fc 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json +++ b/python-pulumi/src/ptd/grafana_dashboards/posit-team-overview.json @@ -3130,10 +3130,7 @@ } ] }, - "unit": "none", - "custom": { - "suffix": " days" - } + "unit": "none" }, "overrides": [] }, @@ -3187,26 +3184,16 @@ "mode": "thresholds" }, "mappings": [], - "max": 1000, - "min": 0, "thresholds": { "mode": "absolute", "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "orange", - "value": 100 - }, { "color": "green", - "value": 300 + "value": null } ] }, - "unit": "decgbytes" + "unit": "dateTimeAsLocal" }, "overrides": [] }, @@ -3216,69 +3203,7 @@ "x": 15, "y": 45 }, - "id": 204, - "options": { - "minVizHeight": 75, - "minVizWidth": 75, - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true - }, - "pluginVersion": "10.2.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "mimir" - }, - "editorMode": "code", - "expr": "max by (cluster, ptd_site) (ppm_storage_free{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Storage free (GB)", - "type": "gauge" - }, - { - "datasource": { - "type": "prometheus", - "uid": "mimir" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 3, - "x": 18, - "y": 45 - }, - "id": 205, + "id": 208, "options": { "colorMode": "value", "graphMode": "area", @@ -3306,14 +3231,14 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum by (cluster, ptd_site) (ppm_git_builds_failed_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", + "expr": "max by (cluster, ptd_site) (ppm_license_expiry{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}) * 1000", "instant": true, "legendFormat": "__auto", "range": false, "refId": "A" } ], - "title": "Failed Git builds", + "title": "License expires", "type": "stat" }, { @@ -3343,10 +3268,10 @@ "gridPos": { "h": 2, "w": 3, - "x": 21, + "x": 18, "y": 45 }, - "id": 206, + "id": 205, "options": { "colorMode": "value", "graphMode": "area", @@ -3374,14 +3299,14 @@ }, "editorMode": "code", "exemplar": false, - "expr": "go_build_info{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\", job=~\".*package-manager.*\"}", + "expr": "sum by (cluster, ptd_site) (ppm_git_builds_failed_total{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"})", "instant": true, "legendFormat": "__auto", "range": false, "refId": "A" } ], - "title": "Build info", + "title": "Failed Git builds", "type": "stat" }, { @@ -3452,75 +3377,6 @@ "title": "Requests in flight", "type": "stat" }, - { - "datasource": { - "type": "prometheus", - "uid": "mimir" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "dateTimeAsLocal" - }, - "overrides": [] - }, - "gridPos": { - "h": 2, - "w": 3, - "x": 21, - "y": 47 - }, - "id": 208, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": { - "valueSize": 12 - }, - "textMode": "auto", - "wideLayout": true - }, - "pluginVersion": "10.2.2", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "mimir" - }, - "editorMode": "code", - "exemplar": false, - "expr": "max by (cluster, ptd_site) (ppm_license_expiry{cluster=~\"$cluster_name\", ptd_site=~\"$site_name\"}) * 1000", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "License expires", - "type": "stat" - }, { "datasource": { "type": "prometheus", @@ -4508,6 +4364,6 @@ "timezone": "", "title": "Posit Team Overview", "uid": "posit-team-overview", - "version": 16, + "version": 17, "weekStart": "" } \ No newline at end of file From e7710006e7958a4ae95bbe8557545a371f51d679 Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 15:10:38 -0600 Subject: [PATCH 29/32] docs(grafana): add comprehensive dashboard management guide Added README.md to grafana_dashboards/ directory with complete documentation for creating, editing, and deploying Grafana dashboards. Documentation includes: Dashboard Deployment: - How dashboards are deployed via ConfigMaps - Automatic Grafana provisioning process - Important note: version field is not used for version control Creating New Dashboards: - Step-by-step UI workflow - JSON export process - JSON cleanup guidelines (removing ID, formatting) Editing Existing Dashboards: - UI editing workflow - JSON update process - Commit and deployment steps Best Practices: - Template variable usage (cluster_name, site_name) - Panel naming conventions - Query best practices (max vs sum, avoiding @ end()) - Panel layout guidelines - Units configuration (preventing auto-conversion) Testing: - JSON syntax validation - Deployment testing workflow - Common issues and verification steps Troubleshooting: - Dashboard not updating after deployment - Variables not populating - Panels showing "N/A" - JSON validation errors This provides a complete reference for anyone working with PTD Grafana dashboards, from first-time contributors to experienced developers. Co-Authored-By: Claude Sonnet 4.5 --- .../src/ptd/grafana_dashboards/README.md | 326 ++++++++++++++++++ 1 file changed, 326 insertions(+) create mode 100644 python-pulumi/src/ptd/grafana_dashboards/README.md diff --git a/python-pulumi/src/ptd/grafana_dashboards/README.md b/python-pulumi/src/ptd/grafana_dashboards/README.md new file mode 100644 index 0000000..d1376b9 --- /dev/null +++ b/python-pulumi/src/ptd/grafana_dashboards/README.md @@ -0,0 +1,326 @@ +# Grafana Dashboard Management + +This directory contains JSON definitions for Grafana dashboards deployed with Posit Team Dedicated (PTD). + +## Dashboard Deployment + +Dashboards are deployed as Kubernetes ConfigMaps and automatically loaded into Grafana. The deployment process: + +1. JSON files in this directory are read by `pulumi_resources/aws_workload_helm.py` or `pulumi_resources/azure_workload_helm.py` +2. Each JSON file becomes a ConfigMap in the `grafana` namespace +3. Grafana's dashboard provisioning watches these ConfigMaps and loads dashboards automatically +4. Changes to JSON files trigger ConfigMap updates, which Grafana detects and reloads + +**Important:** The `version` field in dashboard JSON is **not used** for version control since we're deploying via ConfigMap. Grafana ignores this field during provisioning. Version numbers are informational only. + +## Creating a New Dashboard + +### 1. Create Through Grafana UI + +1. Access Grafana in your PTD deployment: + ```bash + ptd proxy + # Open browser to Grafana URL + ``` + +2. Click **"+ Create"** → **"Dashboard"** + +3. Add panels, configure queries, set up variables, etc. + +4. Click **"Save dashboard"** (disk icon in top-right) + +5. Name your dashboard and click **"Save"** + +### 2. Export the JSON + +After saving your dashboard: + +1. Click the **settings gear icon** (⚙️) in the top-right corner + +2. Click **"JSON Model"** in the left sidebar + +3. Click **"Copy to Clipboard"** or manually select and copy all JSON + +4. Create a new file in this directory: + ```bash + cd python-pulumi/src/ptd/grafana_dashboards/ + # Paste the JSON into a new file + vim my-new-dashboard.json + ``` + +### 3. Clean Up the JSON + +Before committing, clean up Grafana-generated metadata: + +```bash +# Remove the internal ID (Grafana generates this) +jq 'del(.id)' my-new-dashboard.json > tmp.json && mv tmp.json my-new-dashboard.json + +# Optional: Set version to 1 (informational only) +jq '.version = 1' my-new-dashboard.json > tmp.json && mv tmp.json my-new-dashboard.json + +# Format with consistent indentation +jq '.' my-new-dashboard.json > tmp.json && mv tmp.json my-new-dashboard.json +``` + +**What to remove:** +- `"id"` field at the root level (Grafana auto-generates IDs) +- `"uid"` field if you want Grafana to generate a new unique identifier +- Any datasource UIDs that are environment-specific (use `"uid": "mimir"` for Prometheus) + +**What to keep:** +- Panel IDs (these are stable and used for referencing) +- Grid positions (`gridPos`) +- All queries and configurations +- Template variables +- Version number (informational, not functional) + +## Editing an Existing Dashboard + +### 1. Edit Through Grafana UI + +1. Open the dashboard in Grafana + +2. Click **"Dashboard settings"** (⚙️) in top-right + +3. Make your changes (add/remove panels, modify queries, etc.) + +4. Click **"Save dashboard"** + +### 2. Update the JSON File + +1. Export the JSON (see "Export the JSON" above) + +2. **Important:** Replace the *entire* JSON file with the new export + ```bash + # Copy the JSON from Grafana + # Paste into the existing file, replacing all content + vim posit-team-overview.json + ``` + +3. Clean up the JSON (see "Clean Up the JSON" above) + +4. Verify the changes: + ```bash + # Check JSON syntax + jq '.' posit-team-overview.json > /dev/null + + # See what changed + git diff posit-team-overview.json + ``` + +### 3. Commit and Deploy + +```bash +git add posit-team-overview.json +git commit -m "feat(grafana): add new panel to overview dashboard" + +# Deploy to test the changes +ptd ensure +``` + +## Dashboard Best Practices + +### Use Template Variables + +Always use template variables for cluster and site filtering: + +```json +{ + "templating": { + "list": [ + { + "name": "cluster_name", + "query": "label_values(up, cluster)", + "type": "query" + }, + { + "name": "site_name", + "query": "label_values(up{cluster=~\"$cluster_name\"}, ptd_site)", + "type": "query" + } + ] + } +} +``` + +Reference variables in queries: +```promql +metric_name{cluster=~"$cluster_name", ptd_site=~"$site_name"} +``` + +### Panel Naming Conventions + +- Use descriptive, concise titles +- Include time window in title if relevant: "Requests/min (5m)", "CPU usage (1h avg)" +- Avoid redundant prefixes (panel is already in a section/row) + +### Query Best Practices + +**Avoid double-counting with max/sum:** +```promql +# ✓ Good - use max() for metrics reported identically by all pods +max by (cluster, ptd_site) (ppm_license_days_left{cluster=~"$cluster_name", ptd_site=~"$site_name"}) + +# ✗ Bad - sum() will multiply by number of pods +sum by (cluster, ptd_site) (ppm_license_days_left{cluster=~"$cluster_name", ptd_site=~"$site_name"}) +``` + +**Avoid `@ end()` in gauges:** +```promql +# ✓ Good - evaluate at current dashboard time +increase(metric[24h]) / (24 * 60) + +# ✗ Bad - locks to end of time range, breaks historical playback +increase(metric[24h] @ end()) / (24 * 60) +``` + +**Use appropriate functions:** +- `increase()` for cumulative counters over a time window (returns total change) +- `rate()` for per-second rates (multiply by 60 for per-minute) +- `irate()` for instant rate (sensitive to scrape intervals) + +### Panel Layout Guidelines + +- Dashboard width is 24 grid units +- Use rows (`type: "row"`) to organize related panels +- Standard panel heights: 2-4 (stats), 4-6 (gauges), 5-8 (timeseries) +- Align panels on a consistent grid (avoid overlaps) + +### Units Configuration + +Use appropriate units to prevent auto-conversion: + +```json +{ + "fieldConfig": { + "defaults": { + "unit": "decgbytes" // ✓ Shows GB without converting to TB + } + } +} +``` + +Common units: +- `"none"` - Plain number (use with custom suffix) +- `"percent"` - Percentage (0-100) +- `"decbytes"`, `"decgbytes"` - Decimal bytes/GB +- `"s"`, `"ms"` - Time durations (auto-converts, use "none" + suffix to prevent) +- `"dateTimeAsLocal"` - Unix timestamp as local datetime + +To prevent auto-conversion: +```json +{ + "unit": "none", + "custom": { + "suffix": " days" // Shows "43 days" instead of "6.14 weeks" + } +} +``` + +## Testing Your Changes + +### 1. Validate JSON Syntax + +```bash +jq '.' my-dashboard.json > /dev/null +``` + +### 2. Deploy to a Test Environment + +```bash +# Deploy the updated dashboard +ptd ensure --dry-run # Preview changes +ptd ensure + +# Access Grafana +ptd proxy +``` + +### 3. Verify Dashboard Loads + +1. Open Grafana in your browser +2. Navigate to your dashboard +3. Check that all panels load without errors +4. Test variable selectors (cluster, site) +5. Verify queries return data +6. Test time range selector + +### 4. Check for Common Issues + +- **"No data"**: Check if metrics exist in Prometheus (`ptd proxy` → Prometheus UI) +- **"Template variables failed to init"**: Check variable queries are valid +- **Panels overlap**: Review `gridPos` coordinates +- **Missing panels**: Check panel IDs don't conflict with existing IDs + +## Dashboard Files in This Directory + +| File | Description | +|------|-------------| +| `posit-team-overview.json` | Main operational dashboard with Workbench, Connect, and Package Manager metrics | + +## Troubleshooting + +### Dashboard doesn't update after deployment + +1. Check ConfigMap was created/updated: + ```bash + kubectl get configmap -n grafana + kubectl describe configmap grafana-dashboard-posit-team-overview -n grafana + ``` + +2. Check Grafana logs: + ```bash + kubectl logs -n grafana deployment/grafana -f + ``` + +3. Force Grafana to reload: + ```bash + kubectl rollout restart deployment/grafana -n grafana + ``` + +### Dashboard variables not populating + +- Verify variable queries use correct metric names +- Check if Prometheus has the required metrics: + ```bash + # From Prometheus UI + label_values(up, cluster) + ``` +- Ensure datasource UID is correct (`"uid": "mimir"`) + +### Panels showing "N/A" or "No data" + +- Verify metric exists in Prometheus +- Check label selectors match your data (cluster, ptd_site, job, etc.) +- Verify time range is appropriate for the data +- Check if aggregation functions are correct (max vs sum) + +### JSON validation errors + +```bash +# Check syntax +jq '.' dashboard.json + +# Common issues: +# - Trailing commas (not allowed in JSON) +# - Missing closing braces/brackets +# - Unescaped quotes in strings +``` + +## Contributing + +When adding or modifying dashboards: + +1. **Test thoroughly** in a development environment +2. **Document** any new variables or unusual configurations +3. **Use conventional commits**: `feat(grafana):`, `fix(grafana):`, etc. +4. **Review diffs** carefully - dashboard JSON changes can be large +5. **Avoid reformatting** existing dashboards without functional changes + +## Resources + +- [Grafana Dashboard JSON Model](https://grafana.com/docs/grafana/latest/dashboards/build-dashboards/view-dashboard-json-model/) +- [Grafana Provisioning](https://grafana.com/docs/grafana/latest/administration/provisioning/) +- [PromQL Basics](https://prometheus.io/docs/prometheus/latest/querying/basics/) +- [Grafana Panel Editor](https://grafana.com/docs/grafana/latest/panels-visualizations/) From 9bc595c7c61ef5dfa993b6e7766f9fa7490c0569 Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 15:16:31 -0600 Subject: [PATCH 30/32] fix(docs): correct dashboard deployment documentation inaccuracies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed three critical inaccuracies in the Grafana dashboard README: 1. Corrected file reference (High severity): - Changed: pulumi_resources/aws_workload_helm.py - Fixed: pulumi_resources/aws_eks_cluster.py - Rationale: Dashboards are loaded by aws_eks_cluster.py method _create_dashboard_configmaps (line 2556), not aws_workload_helm.py 2. Added Azure limitation note (High severity): - Added prominent warning at top of Dashboard Deployment section - Documented that ConfigMap provisioning only works for AWS - Added Azure manual import workflow (Grafana UI → Import) - Rationale: Azure's _define_grafana in azure_workload_helm.py (line 602) does not configure Grafana sidecar for dashboard watching, unlike AWS which enables it at aws_eks_cluster.py:2110-2114. All deployment workflows in the README only apply to AWS. 3. Fixed UID field guidance (Medium severity): - Removed incorrect advice: "remove uid field to generate new identifier" - Added correct info: "uid automatically set to match filename" - Rationale: Code at aws_eks_cluster.py:2586 enforces uid = filename for idempotency. Auto-generated UIDs are not allowed. These fixes ensure the documentation accurately reflects the actual implementation and prevents confusion for users working with Azure deployments or understanding how dashboard UIDs work. Co-Authored-By: Claude Sonnet 4.5 --- .../src/ptd/grafana_dashboards/README.md | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/README.md b/python-pulumi/src/ptd/grafana_dashboards/README.md index d1376b9..d51aab0 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/README.md +++ b/python-pulumi/src/ptd/grafana_dashboards/README.md @@ -4,14 +4,30 @@ This directory contains JSON definitions for Grafana dashboards deployed with Po ## Dashboard Deployment +**⚠️ Important: Dashboard provisioning via ConfigMaps is currently only supported for AWS workloads. Azure workloads require manual dashboard import through the Grafana UI.** + +### AWS Dashboard Deployment + Dashboards are deployed as Kubernetes ConfigMaps and automatically loaded into Grafana. The deployment process: -1. JSON files in this directory are read by `pulumi_resources/aws_workload_helm.py` or `pulumi_resources/azure_workload_helm.py` +1. JSON files in this directory are read by `pulumi_resources/aws_eks_cluster.py` (method `_create_dashboard_configmaps`) 2. Each JSON file becomes a ConfigMap in the `grafana` namespace -3. Grafana's dashboard provisioning watches these ConfigMaps and loads dashboards automatically +3. Grafana's dashboard provisioning sidecar watches these ConfigMaps and loads dashboards automatically 4. Changes to JSON files trigger ConfigMap updates, which Grafana detects and reloads +5. The dashboard `uid` is automatically set to match the filename (without `.json` extension) for idempotency + +### Azure Dashboard Deployment + +Azure deployments do not currently support automatic dashboard provisioning. To use dashboards on Azure: -**Important:** The `version` field in dashboard JSON is **not used** for version control since we're deploying via ConfigMap. Grafana ignores this field during provisioning. Version numbers are informational only. +1. Export the JSON from this directory +2. Access Grafana in your Azure deployment: `ptd proxy ` +3. Navigate to **Dashboards** → **Import** +4. Paste the JSON and click **Import** + +**Note:** The rest of this documentation assumes AWS deployment unless otherwise noted. + +**Important:** The `version` field in dashboard JSON is **not used** for version control since we're deploying via ConfigMap (AWS) or manual import (Azure). Grafana ignores this field during provisioning. Version numbers are informational only. ## Creating a New Dashboard @@ -65,9 +81,11 @@ jq '.' my-new-dashboard.json > tmp.json && mv tmp.json my-new-dashboard.json **What to remove:** - `"id"` field at the root level (Grafana auto-generates IDs) -- `"uid"` field if you want Grafana to generate a new unique identifier - Any datasource UIDs that are environment-specific (use `"uid": "mimir"` for Prometheus) +**What gets automatically set (AWS only):** +- `"uid"` field - Will be automatically set to match the filename (without `.json` extension) for idempotency + **What to keep:** - Panel IDs (these are stable and used for referencing) - Grid positions (`gridPos`) From 9446605f9ddbd768ea88499d727e52132ce655af Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 15:38:01 -0600 Subject: [PATCH 31/32] Simplify documentation --- .../src/ptd/grafana_dashboards/README.md | 335 +----------------- 1 file changed, 3 insertions(+), 332 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/README.md b/python-pulumi/src/ptd/grafana_dashboards/README.md index d51aab0..62e8f5d 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/README.md +++ b/python-pulumi/src/ptd/grafana_dashboards/README.md @@ -2,11 +2,7 @@ This directory contains JSON definitions for Grafana dashboards deployed with Posit Team Dedicated (PTD). -## Dashboard Deployment - -**⚠️ Important: Dashboard provisioning via ConfigMaps is currently only supported for AWS workloads. Azure workloads require manual dashboard import through the Grafana UI.** - -### AWS Dashboard Deployment +**Important:** Dashboard provisioning via ConfigMaps is currently only supported for AWS workloads. Dashboards are deployed as Kubernetes ConfigMaps and automatically loaded into Grafana. The deployment process: @@ -14,331 +10,6 @@ Dashboards are deployed as Kubernetes ConfigMaps and automatically loaded into G 2. Each JSON file becomes a ConfigMap in the `grafana` namespace 3. Grafana's dashboard provisioning sidecar watches these ConfigMaps and loads dashboards automatically 4. Changes to JSON files trigger ConfigMap updates, which Grafana detects and reloads -5. The dashboard `uid` is automatically set to match the filename (without `.json` extension) for idempotency - -### Azure Dashboard Deployment - -Azure deployments do not currently support automatic dashboard provisioning. To use dashboards on Azure: - -1. Export the JSON from this directory -2. Access Grafana in your Azure deployment: `ptd proxy ` -3. Navigate to **Dashboards** → **Import** -4. Paste the JSON and click **Import** - -**Note:** The rest of this documentation assumes AWS deployment unless otherwise noted. - -**Important:** The `version` field in dashboard JSON is **not used** for version control since we're deploying via ConfigMap (AWS) or manual import (Azure). Grafana ignores this field during provisioning. Version numbers are informational only. - -## Creating a New Dashboard - -### 1. Create Through Grafana UI - -1. Access Grafana in your PTD deployment: - ```bash - ptd proxy - # Open browser to Grafana URL - ``` - -2. Click **"+ Create"** → **"Dashboard"** - -3. Add panels, configure queries, set up variables, etc. - -4. Click **"Save dashboard"** (disk icon in top-right) - -5. Name your dashboard and click **"Save"** - -### 2. Export the JSON - -After saving your dashboard: - -1. Click the **settings gear icon** (⚙️) in the top-right corner - -2. Click **"JSON Model"** in the left sidebar - -3. Click **"Copy to Clipboard"** or manually select and copy all JSON - -4. Create a new file in this directory: - ```bash - cd python-pulumi/src/ptd/grafana_dashboards/ - # Paste the JSON into a new file - vim my-new-dashboard.json - ``` - -### 3. Clean Up the JSON - -Before committing, clean up Grafana-generated metadata: - -```bash -# Remove the internal ID (Grafana generates this) -jq 'del(.id)' my-new-dashboard.json > tmp.json && mv tmp.json my-new-dashboard.json - -# Optional: Set version to 1 (informational only) -jq '.version = 1' my-new-dashboard.json > tmp.json && mv tmp.json my-new-dashboard.json - -# Format with consistent indentation -jq '.' my-new-dashboard.json > tmp.json && mv tmp.json my-new-dashboard.json -``` - -**What to remove:** -- `"id"` field at the root level (Grafana auto-generates IDs) -- Any datasource UIDs that are environment-specific (use `"uid": "mimir"` for Prometheus) - -**What gets automatically set (AWS only):** -- `"uid"` field - Will be automatically set to match the filename (without `.json` extension) for idempotency - -**What to keep:** -- Panel IDs (these are stable and used for referencing) -- Grid positions (`gridPos`) -- All queries and configurations -- Template variables -- Version number (informational, not functional) - -## Editing an Existing Dashboard - -### 1. Edit Through Grafana UI - -1. Open the dashboard in Grafana - -2. Click **"Dashboard settings"** (⚙️) in top-right - -3. Make your changes (add/remove panels, modify queries, etc.) - -4. Click **"Save dashboard"** - -### 2. Update the JSON File - -1. Export the JSON (see "Export the JSON" above) - -2. **Important:** Replace the *entire* JSON file with the new export - ```bash - # Copy the JSON from Grafana - # Paste into the existing file, replacing all content - vim posit-team-overview.json - ``` - -3. Clean up the JSON (see "Clean Up the JSON" above) - -4. Verify the changes: - ```bash - # Check JSON syntax - jq '.' posit-team-overview.json > /dev/null - - # See what changed - git diff posit-team-overview.json - ``` - -### 3. Commit and Deploy - -```bash -git add posit-team-overview.json -git commit -m "feat(grafana): add new panel to overview dashboard" - -# Deploy to test the changes -ptd ensure -``` - -## Dashboard Best Practices - -### Use Template Variables - -Always use template variables for cluster and site filtering: - -```json -{ - "templating": { - "list": [ - { - "name": "cluster_name", - "query": "label_values(up, cluster)", - "type": "query" - }, - { - "name": "site_name", - "query": "label_values(up{cluster=~\"$cluster_name\"}, ptd_site)", - "type": "query" - } - ] - } -} -``` - -Reference variables in queries: -```promql -metric_name{cluster=~"$cluster_name", ptd_site=~"$site_name"} -``` - -### Panel Naming Conventions - -- Use descriptive, concise titles -- Include time window in title if relevant: "Requests/min (5m)", "CPU usage (1h avg)" -- Avoid redundant prefixes (panel is already in a section/row) - -### Query Best Practices - -**Avoid double-counting with max/sum:** -```promql -# ✓ Good - use max() for metrics reported identically by all pods -max by (cluster, ptd_site) (ppm_license_days_left{cluster=~"$cluster_name", ptd_site=~"$site_name"}) - -# ✗ Bad - sum() will multiply by number of pods -sum by (cluster, ptd_site) (ppm_license_days_left{cluster=~"$cluster_name", ptd_site=~"$site_name"}) -``` - -**Avoid `@ end()` in gauges:** -```promql -# ✓ Good - evaluate at current dashboard time -increase(metric[24h]) / (24 * 60) - -# ✗ Bad - locks to end of time range, breaks historical playback -increase(metric[24h] @ end()) / (24 * 60) -``` - -**Use appropriate functions:** -- `increase()` for cumulative counters over a time window (returns total change) -- `rate()` for per-second rates (multiply by 60 for per-minute) -- `irate()` for instant rate (sensitive to scrape intervals) - -### Panel Layout Guidelines - -- Dashboard width is 24 grid units -- Use rows (`type: "row"`) to organize related panels -- Standard panel heights: 2-4 (stats), 4-6 (gauges), 5-8 (timeseries) -- Align panels on a consistent grid (avoid overlaps) - -### Units Configuration - -Use appropriate units to prevent auto-conversion: - -```json -{ - "fieldConfig": { - "defaults": { - "unit": "decgbytes" // ✓ Shows GB without converting to TB - } - } -} -``` - -Common units: -- `"none"` - Plain number (use with custom suffix) -- `"percent"` - Percentage (0-100) -- `"decbytes"`, `"decgbytes"` - Decimal bytes/GB -- `"s"`, `"ms"` - Time durations (auto-converts, use "none" + suffix to prevent) -- `"dateTimeAsLocal"` - Unix timestamp as local datetime - -To prevent auto-conversion: -```json -{ - "unit": "none", - "custom": { - "suffix": " days" // Shows "43 days" instead of "6.14 weeks" - } -} -``` - -## Testing Your Changes - -### 1. Validate JSON Syntax - -```bash -jq '.' my-dashboard.json > /dev/null -``` - -### 2. Deploy to a Test Environment - -```bash -# Deploy the updated dashboard -ptd ensure --dry-run # Preview changes -ptd ensure - -# Access Grafana -ptd proxy -``` - -### 3. Verify Dashboard Loads - -1. Open Grafana in your browser -2. Navigate to your dashboard -3. Check that all panels load without errors -4. Test variable selectors (cluster, site) -5. Verify queries return data -6. Test time range selector - -### 4. Check for Common Issues - -- **"No data"**: Check if metrics exist in Prometheus (`ptd proxy` → Prometheus UI) -- **"Template variables failed to init"**: Check variable queries are valid -- **Panels overlap**: Review `gridPos` coordinates -- **Missing panels**: Check panel IDs don't conflict with existing IDs - -## Dashboard Files in This Directory - -| File | Description | -|------|-------------| -| `posit-team-overview.json` | Main operational dashboard with Workbench, Connect, and Package Manager metrics | - -## Troubleshooting - -### Dashboard doesn't update after deployment - -1. Check ConfigMap was created/updated: - ```bash - kubectl get configmap -n grafana - kubectl describe configmap grafana-dashboard-posit-team-overview -n grafana - ``` - -2. Check Grafana logs: - ```bash - kubectl logs -n grafana deployment/grafana -f - ``` - -3. Force Grafana to reload: - ```bash - kubectl rollout restart deployment/grafana -n grafana - ``` - -### Dashboard variables not populating - -- Verify variable queries use correct metric names -- Check if Prometheus has the required metrics: - ```bash - # From Prometheus UI - label_values(up, cluster) - ``` -- Ensure datasource UID is correct (`"uid": "mimir"`) - -### Panels showing "N/A" or "No data" - -- Verify metric exists in Prometheus -- Check label selectors match your data (cluster, ptd_site, job, etc.) -- Verify time range is appropriate for the data -- Check if aggregation functions are correct (max vs sum) - -### JSON validation errors - -```bash -# Check syntax -jq '.' dashboard.json - -# Common issues: -# - Trailing commas (not allowed in JSON) -# - Missing closing braces/brackets -# - Unescaped quotes in strings -``` - -## Contributing - -When adding or modifying dashboards: - -1. **Test thoroughly** in a development environment -2. **Document** any new variables or unusual configurations -3. **Use conventional commits**: `feat(grafana):`, `fix(grafana):`, etc. -4. **Review diffs** carefully - dashboard JSON changes can be large -5. **Avoid reformatting** existing dashboards without functional changes - -## Resources +5. The dashboard `uid` is automatically set to match a sanitized version of the filename (without `.json` extension) for idempotency -- [Grafana Dashboard JSON Model](https://grafana.com/docs/grafana/latest/dashboards/build-dashboards/view-dashboard-json-model/) -- [Grafana Provisioning](https://grafana.com/docs/grafana/latest/administration/provisioning/) -- [PromQL Basics](https://prometheus.io/docs/prometheus/latest/querying/basics/) -- [Grafana Panel Editor](https://grafana.com/docs/grafana/latest/panels-visualizations/) +**Important:** The `version` field in dashboard JSON is **not used** for version control since we're deploying via ConfigMap (AWS). \ No newline at end of file From 336272599f5c72eb8a2b6a35f4dbbedb30f0e392 Mon Sep 17 00:00:00 2001 From: Tim Margheim Date: Wed, 11 Mar 2026 15:40:40 -0600 Subject: [PATCH 32/32] fix(docs): correct dashboard UID description and add trailing newline Fixed two issues in the simplified Grafana dashboard README: 1. Removed incorrect "sanitized version" claim (Medium severity): - Changed: "uid is set to match a sanitized version of the filename" - Fixed: "uid is set to match the filename" - Rationale: Code at aws_eks_cluster.py:2586 sets dashboard_json["uid"] = dashboard_name, where dashboard_name is the raw filename stem (line 2571). The sanitize_k8s_name() function (line 2574) is only applied to k8s_safe_name for the ConfigMap name, not the dashboard UID. 2. Added missing trailing newline (Low severity): - File ended without a newline, violating POSIX text file standards - Can cause issues with some tools and git diffs - Added newline at end of file Co-Authored-By: Claude Sonnet 4.5 --- python-pulumi/src/ptd/grafana_dashboards/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-pulumi/src/ptd/grafana_dashboards/README.md b/python-pulumi/src/ptd/grafana_dashboards/README.md index 62e8f5d..ffec2c2 100644 --- a/python-pulumi/src/ptd/grafana_dashboards/README.md +++ b/python-pulumi/src/ptd/grafana_dashboards/README.md @@ -10,6 +10,6 @@ Dashboards are deployed as Kubernetes ConfigMaps and automatically loaded into G 2. Each JSON file becomes a ConfigMap in the `grafana` namespace 3. Grafana's dashboard provisioning sidecar watches these ConfigMaps and loads dashboards automatically 4. Changes to JSON files trigger ConfigMap updates, which Grafana detects and reloads -5. The dashboard `uid` is automatically set to match a sanitized version of the filename (without `.json` extension) for idempotency +5. The dashboard `uid` is automatically set to match the filename (without `.json` extension) for idempotency -**Important:** The `version` field in dashboard JSON is **not used** for version control since we're deploying via ConfigMap (AWS). \ No newline at end of file +**Important:** The `version` field in dashboard JSON is **not used** for version control since we're deploying via ConfigMap (AWS).