Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
138 commits
Select commit Hold shift + click to select a range
cdc0c92
provision Azure Managed Grafana workspace
Oct 8, 2025
fb7997d
put variables in the right position
Oct 8, 2025
e13fb60
remove conditional statement
Oct 9, 2025
03b5d4f
update windows to use 1es-windows-2022
Oct 9, 2025
4a5ed6d
remove bicep installation task
Oct 9, 2025
ec7b956
remove parameters that are not needed
Oct 9, 2025
28e0eda
changed parameters file format for bicep
Oct 9, 2025
a5805d6
changed parameters file format for bicep
Oct 9, 2025
5424b0c
remove role assignment from bicep
Oct 9, 2025
e0004fc
remove role assignment from bicep
Oct 9, 2025
0ef9267
changed parameters file format for bicep
Oct 9, 2025
0382325
changed parameters file format for bicep
Oct 9, 2025
e8a9e30
add task to install amg extension
Oct 9, 2025
aa6018d
remove allow-preview-versions flag
Oct 9, 2025
ac429bd
assign grafana admin role to .net eng services
Oct 9, 2025
5683982
assign grafana admin role to .net eng services
Oct 9, 2025
340831d
assign grafana admin role to .net eng services
Oct 9, 2025
c76bcc4
remove grafana admin role assignment
Oct 9, 2025
eed1aed
add release job type
Oct 9, 2025
90cdefc
remove release job type
Oct 9, 2025
f533797
provision azure managed grafana workspace
Oct 13, 2025
a074e78
fix bicep file path
Oct 13, 2025
7fc2c34
add provsion grafana stage to the deployment
Oct 13, 2025
2273352
Merge branch 'main' into haruna/managed-grafana-new
haruna99 Oct 13, 2025
b5ea9ca
add deploy azure managed grafana script
Oct 14, 2025
ccc335f
Merge updates for Azure Managed Grafana provisioning pipeline
Oct 14, 2025
6ff2dba
remove test pipeline
Oct 14, 2025
8106d3b
delete deploy-grafana.ps1 file
Oct 15, 2025
7e71347
add grafana bicep validation task to the pr jobs
Oct 15, 2025
2c39d25
add grafana bicep validation to the build stage
Oct 15, 2025
4d2041c
remove unused GrafanaKeyVault parameter
Oct 15, 2025
758a85c
remove unused GrafanaVariableGroup parameter
Oct 15, 2025
71c3d73
add skuName variable
Oct 15, 2025
cf44564
remove unused SkipGrafanaProvisioning parameter
Oct 15, 2025
0e9664d
reduce maximum attempts to 5 for grafana deployment
Oct 15, 2025
4d387fe
remove output variables since there is no downstream usage
Oct 15, 2025
ed34f93
make the dotnet eng services group the grafana admin
Oct 15, 2025
85b7227
test grafana
Oct 15, 2025
669da0d
test grafana
Oct 15, 2025
9b2a2e5
test grafana
Oct 15, 2025
bfa2882
remove group grafana admin assignment
Oct 16, 2025
bc29c7b
add user assigned managed identity
Oct 16, 2025
0351d84
add user assigned managed identity
Oct 16, 2025
363a9af
add key vault for grafana
Oct 16, 2025
f3e8036
change resource group name
Oct 16, 2025
6fae23b
change resource group validation script
Oct 16, 2025
170a6c2
change service connection
Oct 16, 2025
8a6a3a4
change service connection
Oct 16, 2025
a13a12d
change service connection to use nethelix sc
Oct 16, 2025
330b56f
grant the managed identity permissions to the keyvault
Oct 16, 2025
b5306a4
give the .net eng services group grafana admin permissions
Oct 17, 2025
d99edad
change the service connection to Dotnet Engineering services
Oct 17, 2025
48cdf59
change grafana keyvault name
Oct 17, 2025
c10a8d8
add application gateway
Nov 4, 2025
a216a49
add application gateway
Nov 4, 2025
9ac0129
rectify image used for app gateway
Nov 4, 2025
2b36a31
rectify image used for app gateway
Nov 4, 2025
b988c49
remove unused service connection id and rectify subscription
Nov 4, 2025
ac504aa
Enable HTTPS on Application Gateway
Nov 6, 2025
5445e58
Enable HTTPS on Application Gateway
Nov 6, 2025
b106927
remove incorrect operator
Nov 6, 2025
17c1ba7
remove incorrect operator
Nov 6, 2025
acc362b
remove incorrect operator
Nov 6, 2025
11618a7
fix script path
Nov 6, 2025
916faf5
grant the grafana MI Key Vault Certificates Officer role
Nov 6, 2025
c8eec92
grant pipeline service principal Key Vault Certificates Officer role
Nov 6, 2025
a71a2cc
Grant Application Gateway Access to Key Vault
Nov 6, 2025
6d98a29
Fix 502 error: Accept 401 status from Grafana health probe
Nov 6, 2025
c1f94c3
publish grafana dashboard
Nov 7, 2025
59540e6
add token creation to publish grafana stage
Nov 9, 2025
0a79b46
grant service principal grafana admin role
Nov 9, 2025
7d95e70
grant service principal key vault officer role
Nov 9, 2025
0e2b751
Grant pipeline SP Key Vault Secrets Officer role in Grafana provision…
Nov 9, 2025
25be23e
Add Key Vault permission verification and retry logic for RBAC propag…
Nov 10, 2025
ab8601e
Pass Azure Pipelines credentials to MSBuild SDK for Key Vault authent…
Nov 10, 2025
d40d857
add service connection ID
Nov 11, 2025
ca72eb5
add service connection client ID
Nov 11, 2025
4b97270
remove undefined ServiceConnectionId
Nov 11, 2025
7f3fda8
import secrets from dotnet-grafana-secrets.yaml
Nov 11, 2025
9f6d58d
remove dnceng-amg-int-kv.yaml file
Nov 11, 2025
81f4ed5
remove notification alerts and synchronize secrets
Nov 11, 2025
381b936
fix key vault access propagation
Nov 11, 2025
c617b3a
fix keyvault secret access
Nov 11, 2025
6f68e64
grant MI monitoring reader permission to subscriptions
Nov 12, 2025
5ce1be5
grant MI monitoring reader permission to subscriptions in the pipeline
Nov 12, 2025
5d05dbe
fix grafana notification contact points
Nov 13, 2025
b30cf22
azure managed grafana alert rules
Nov 18, 2025
69a2709
add quota alerts
Nov 19, 2025
d09e4e6
import secret to staging and prod KV
Nov 19, 2025
7b74036
remove unused output variables
Nov 19, 2025
0fdc086
remove unused output variables in azure managed grafana bicep
Nov 19, 2025
e5048af
remove EnableCustomDomain variable from deploy-managed-grafana.yml
Nov 19, 2025
edab753
remove unnecessary comments
Nov 20, 2025
b73f5f6
remove unnecessary comments
Nov 20, 2025
e24c9df
remove unnecessary comments
Nov 20, 2025
a0fd967
change token lifespan to 30 days
Nov 20, 2025
3dd87e9
remove unnecessary comments
Nov 20, 2025
2b65635
remove unused contact point
Nov 20, 2025
d5582a7
fix duplication of contact points
Nov 20, 2025
070fbeb
include dashboard for homepage
Nov 20, 2025
3f10420
remove plugin version
Nov 21, 2025
0517896
remove app gateway logic
Nov 21, 2025
98f709a
Migrate alert rules to Azure Managed Grafana unified alerting format
Nov 22, 2025
f091797
grant grafa MI access to engineeringdata
Nov 23, 2025
e4a35f5
grant grafa MI access to engineeringdata
Nov 23, 2025
9ad1f73
grant grafa MI access to engineeringdata
Nov 23, 2025
ddcce3e
grant grafana MI access to engineeringdata
Nov 23, 2025
6e3def1
remove grafana MI access to engineeringdata
Nov 23, 2025
e9d06c9
fix data source for dashboard
Nov 24, 2025
88c34a1
update grafana dashboards
Nov 25, 2025
5606316
set homepage preference
Nov 25, 2025
c223f45
fix dashboard alert annotations
Nov 27, 2025
af36144
add grafana keyvault manifest file
Nov 27, 2025
83d0b66
add grafana annotation settings for infinity datasource
Nov 27, 2025
2ca58ac
show inactive alerts
Dec 1, 2025
7d0a9cd
set alert rule timeframe
Dec 3, 2025
fd13b74
fix alerting rules folder directory
Jan 6, 2026
f25346e
refactor grafana publishing to use only one stage
Jan 6, 2026
3e02792
refactor grafana publishing to use only one stage
Jan 6, 2026
1b3005e
add managed grafana to the dotnet-dnceng-ci pipeline
Jan 7, 2026
44bc88a
add managed grafana to the dotnet-dnceng-ci pipeline
Jan 7, 2026
98aa83b
remove self hosted grafana dashboard publishing
Jan 7, 2026
c087a93
fix grafana dashboard publishing error
Jan 7, 2026
dd901b2
fix error when adding ANG to dotnet-dnceng-ci pipeline
Jan 7, 2026
6eaa430
remove test pipeline
Jan 7, 2026
89c3cf8
fix deploy-managed-grafana.ml filepath
Jan 7, 2026
382350c
include serviceConnectionName variable
Jan 7, 2026
5e64cdb
Allow anonymous access to alert webhook endpoint for Grafana
Jan 9, 2026
840b1e3
remove AllowAnonymous from alertHookController
Jan 13, 2026
2064f8a
Merge remote-tracking branch 'origin/main' into haruna/managed-grafan…
Jan 13, 2026
75f2091
delete azure-pipelines-managed-grafana pipeline
Jan 13, 2026
f406766
remove unnecessary comment
Jan 13, 2026
eff6b13
Merge branch 'main' into haruna/managed-grafana-new
haruna99 Jan 13, 2026
93896b0
fix service connection naming
Jan 14, 2026
cc08ecf
Merge branch 'haruna/managed-grafana-new' of https://github.com/dotne…
Jan 14, 2026
554a3e2
remove Validation of the Grafana Bicep Template from the PR stage
Jan 14, 2026
b42bc65
add azure managed grafana api key to secret manager
Jan 15, 2026
708dd92
remove unused files
Jan 23, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .vault-config/dnceng-amg-int-kv.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
storageLocation:
type: azure-key-vault
parameters:
subscription: a4fc5514-21a9-4296-bfaf-5c7ee7fa35d1
name: dnceng-amg-int-kv

secrets:
# API token for DotNet Status website
dotnet-build-bot-dotnet-eng-status-token:
type: text
parameters:
description: API token from https://dotneteng-status-staging.azurewebsites.net/ - Generated using dotnet-build-bot account

# Authorization header for Deployment Annotations datasource
dotneteng-status-auth-header:
type: text
parameters:
description: "Bearer token for status API - Format: Bearer <dotnet-build-bot-dotnet-eng-status-token>"

# Teams webhook URL for alert notifications
fr-bot-notifications-teams-notification-url:
type: text
parameters:
description: Teams Incoming Webhook URL - Do not rotate
24 changes: 24 additions & 0 deletions .vault-config/dnceng-amg-prod-kv.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
storageLocation:
type: azure-key-vault
parameters:
subscription: a4fc5514-21a9-4296-bfaf-5c7ee7fa35d1
name: dnceng-amg-prod-kv

secrets:
# API token for DotNet Status website
dotnet-build-bot-dotnet-eng-status-token:
type: text
parameters:
description: API token from https://dotneteng-status.azurewebsites.net/ - Generated using dotnet-build-bot account

# Authorization header for Deployment Annotations datasource
dotneteng-status-auth-header:
type: text
parameters:
description: "Bearer token for status API - Format: Bearer <dotnet-build-bot-dotnet-eng-status-token>"

# Teams webhook URL for alert notifications
fr-bot-notifications-teams-notification-url:
type: text
parameters:
description: Teams Incoming Webhook URL - Do not rotate
2 changes: 1 addition & 1 deletion azure-pipelines-pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,4 @@ stages:
dotnet run --project src/SecretManager/Microsoft.DncEng.SecretManager -- validate-all -b src @manifestArgs
displayName: Verify Secret Usages

- template: /eng/test.yaml
- template: /eng/test.yaml
40 changes: 33 additions & 7 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ variables:
- name: _DotNetArtifactsCategory
value: .NETCore
- group: SDL_Settings
- name: ServiceConnectionName
value: 'Dotnet Engineering services'

trigger:
batch: true
Expand Down Expand Up @@ -195,6 +197,24 @@ extends:
contents: '*'
targetFolder: $(Build.ArtifactStagingDirectory)\eng

- task: AzureCLI@2
displayName: 'Validate Grafana Bicep Template'
inputs:
azureSubscription: '$(ServiceConnectionName)'
scriptType: 'ps'
scriptLocation: 'inlineScript'
inlineScript: |
Write-Host "Validating Grafana Bicep template..."
if (!(Test-Path "eng/deployment/azure-managed-grafana.bicep")) {
throw "Bicep template not found: azure-managed-grafana.bicep"
}

az bicep build --file eng/deployment/azure-managed-grafana.bicep
if ($LASTEXITCODE -ne 0) {
throw "Bicep template validation failed"
}
Write-Host "SUCCESS: Bicep template validation successful"

- template: /eng/common/templates-official/post-build/post-build.yml@self
parameters:
enableSymbolValidation: false
Expand Down Expand Up @@ -225,20 +245,26 @@ extends:
PublishProfile: Int
ServiceConnectionName: NetHelixStaging
StatusVariableGroup: DotNetStatus Staging
GrafanaHost: https://dotnet-eng-grafana-staging.westus2.cloudapp.azure.com
GrafanaKeyVault: dotnet-grafana-staging
GrafanaVariableGroup: Dotnet-Grafana-Staging
ServiceConnectionClientId: 57f299da-15de-4117-b8f6-7c10451926f0
ServiceConnectionId: 7829de7e-fb4e-4118-8370-475d6bc61905
AMGServiceConnectionName: 'Dotnet Engineering services'
AMGServiceConnectionId: dd8c2cfc-b9c9-452c-a168-ccd4240ada55
AMGServiceConnectionClientId: fc1eb341-aea4-4a11-8f80-d14b8775b2ba
AMGDeploymentEnvironment: Staging
AMGGrafanaWorkspaceName: dnceng-grafana-staging
AMGGrafanaKeyVault: dnceng-amg-int-kv
${{ else }}:
DeploymentEnvironment: Production
DotNetStatusAppName: dotneteng-status
DotNetStatusEndpoint: .NET Engineering Deployment Notification - Production
PublishProfile: Prod
ServiceConnectionName: NetHelix
StatusVariableGroup: DotNetStatus Production
GrafanaHost: https://dotnet-eng-grafana.westus2.cloudapp.azure.com
GrafanaKeyVault: dotnet-grafana
GrafanaVariableGroup: Dotnet-Grafana-Production
ServiceConnectionClientId: fc1eb341-aea4-4a11-8f80-d14b8775b2ba
ServiceConnectionId: 4a511f6f-b538-48e6-a389-207e430634d1
ServiceConnectionId: 4a511f6f-b538-48e6-a389-207e430634d1
AMGServiceConnectionName: 'Dotnet Engineering services'
AMGServiceConnectionId: dd8c2cfc-b9c9-452c-a168-ccd4240ada55
AMGServiceConnectionClientId: fc1eb341-aea4-4a11-8f80-d14b8775b2ba
AMGDeploymentEnvironment: Production
AMGGrafanaWorkspaceName: dnceng-grafana
AMGGrafanaKeyVault: dnceng-amg-prod-kv
208 changes: 208 additions & 0 deletions eng/deploy-managed-grafana.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
parameters:
- name: AMGServiceConnectionName
type: string
- name: AMGServiceConnectionClientId
type: string
- name: AMGServiceConnectionId
type: string
- name: AMGDeploymentEnvironment
type: string
- name: AMGGrafanaWorkspaceName
type: string
- name: AMGGrafanaKeyVault
type: string

stages:
- stage: DeployGrafana
displayName: 'Deploy Grafana Infrastructure and Dashboards'
pool:
name: NetCore1ESPool-Internal-NoMSI
demands: ImageOverride -equals 1es-windows-2019
dependsOn:
- predeploy
- approval
jobs:
- template: /eng/provision-grafana.yaml@self
parameters:
DeploymentEnvironment: ${{ parameters.AMGDeploymentEnvironment }}
ServiceConnectionName: ${{ parameters.AMGServiceConnectionName }}
GrafanaResourceGroup: 'monitoring-managed'
GrafanaWorkspaceName: ${{ parameters.AMGGrafanaWorkspaceName }}
GrafanaLocation: 'westus2'
GrafanaKeyVault: ${{ parameters.AMGGrafanaKeyVault }}
- job: SetupToken
dependsOn: ProvisionGrafana
displayName: 'Setup Grafana API Token'
variables:
GrafanaEndpoint: $[ dependencies.ProvisionGrafana.outputs['ExportGrafanaInfo.GrafanaEndpoint'] ]
Copy link
Member

@garath garath Jan 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why use this method with ExportGrafanaInfo instead of parameter variables (like e.g. parameters.AMGGrafanaKeyVault)?

pool:
name: NetCore1ESPool-Internal
demands: ImageOverride -equals 1es-windows-2022
steps:
- task: AzureCLI@2
displayName: 'Grant Pipeline Service Principal Grafana Admin Role'
inputs:
azureSubscription: ${{ parameters.AMGServiceConnectionName }}
scriptType: 'pscore'
scriptLocation: 'inlineScript'
inlineScript: |
Write-Host "Granting pipeline service principal Grafana Admin role..."

$workspaceName = "${{ parameters.AMGGrafanaWorkspaceName }}"
$rgName = "monitoring-managed"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider making this a parameter as well. It can have this as the default value. Then all the tweakable variables will be up at the top in the same area.


# Get the current service principal object ID
$spObjectId = az account show --query "user.name" --output tsv
Write-Host "Service Principal Object ID: $spObjectId"

# Get the Grafana workspace resource ID
$grafanaId = az grafana show --name $workspaceName --resource-group $rgName --query "id" --output tsv
Write-Host "Grafana Workspace: $workspaceName"
Write-Host "Grafana ID: $grafanaId"

# Check if role assignment already exists
$existingAssignment = az role assignment list `
--assignee $spObjectId `
--scope $grafanaId `
--role "Grafana Admin" `
--query "[0].id" `
--output tsv

if ($existingAssignment) {
Write-Host "✓ Pipeline service principal already has Grafana Admin role"
} else {
Write-Host "Granting Grafana Admin role..."
az role assignment create `
--role "Grafana Admin" `
--assignee $spObjectId `
--scope $grafanaId `
--output none

if ($LASTEXITCODE -eq 0) {
Write-Host "✓ Pipeline service principal granted Grafana Admin role"
Write-Host "⏱ Waiting 15 seconds for role assignment to propagate..."
Start-Sleep -Seconds 15
} else {
Write-Error "Failed to grant Grafana Admin role"
exit 1
}
}

- task: AzureCLI@2
displayName: 'Create or Validate Grafana API Token'
inputs:
azureSubscription: ${{ parameters.AMGServiceConnectionName }}
scriptType: 'pscore'
scriptLocation: 'scriptPath'
scriptPath: 'eng/setup-grafana-api-token.ps1'
arguments: >-
-Environment "${{ parameters.AMGDeploymentEnvironment }}"
-KeyVaultName "${{ parameters.AMGGrafanaKeyVault }}"

- job: PublishDashboards
displayName: 'Publish Dashboards to Azure Managed Grafana'
dependsOn:
- ProvisionGrafana
- SetupToken
pool:
name: NetCore1ESPool-Internal
demands: ImageOverride -equals 1es-windows-2022
variables:
GrafanaEndpoint: $[ dependencies.ProvisionGrafana.outputs['ExportGrafanaInfo.GrafanaEndpoint'] ]
System.AccessToken: $(System.AccessToken)
steps:
- task: UseDotNet@2
displayName: 'Install Correct .NET Version'
inputs:
useGlobalJson: true

- script: dotnet publish --configuration Release $(Build.SourcesDirectory)\src\Monitoring\Sdk\Microsoft.DotNet.Monitoring.Sdk.csproj -f net8.0
displayName: 'Build Monitoring SDK'

- task: AzureCLI@2
displayName: 'Publish Grafana Dashboards'
inputs:
azureSubscription: ${{ parameters.AMGServiceConnectionName }}
scriptType: 'pscore'
scriptLocation: 'inlineScript'
addSpnToEnvironment: true
inlineScript: |
Write-Host "=========================================="
Write-Host "Publishing Dashboards to Azure Managed Grafana"
Write-Host "=========================================="
Write-Host "Grafana Endpoint: $(GrafanaEndpoint)"
Write-Host "Environment: ${{ parameters.AMGDeploymentEnvironment }}"
Write-Host ""

# Get the API token from Key Vault with retry logic for RBAC propagation
$tokenSecretName = "grafana-admin-api-key"
Write-Host "Retrieving API token from Key Vault..."

$apiToken = $null
$maxRetries = 5
$retryCount = 0
$waitSeconds = 60

while (-not $apiToken -and $retryCount -lt $maxRetries) {
try {
$apiToken = az keyvault secret show --vault-name "${{ parameters.AMGGrafanaKeyVault }}" --name $tokenSecretName --query "value" --output tsv 2>&1

if ($LASTEXITCODE -eq 0 -and $apiToken -and $apiToken.Trim()) {
Write-Host "✓ API token retrieved successfully from Key Vault"
break
} else {
$apiToken = $null
throw "Failed to retrieve token"
}
} catch {
$retryCount++
if ($retryCount -lt $maxRetries) {
Write-Host "⏱ Waiting for Key Vault access (attempt $retryCount/$maxRetries, waiting $waitSeconds seconds)..."
Start-Sleep -Seconds $waitSeconds
} else {
Write-Error "Unable to retrieve API token after $maxRetries attempts ($($maxRetries * $waitSeconds) seconds total)"
Write-Error "Secret name: $tokenSecretName"
Write-Error "Key Vault: ${{ parameters.AMGGrafanaKeyVault }}"
Write-Error ""
Write-Error "Possible causes:"
Write-Error "1. RBAC permissions haven't propagated yet (can take 5-10 minutes)"
Write-Error "2. The SetupToken job failed to create the token"
Write-Error "3. The pipeline service principal doesn't have Key Vault Secrets Officer role"
Write-Error ""
exit 1
}
}
}

Write-Host ""
Write-Host "Publishing dashboards using MSBuild SDK..."
Write-Host ""

# Publish using the same MSBuild SDK as self-hosted Grafana
dotnet build $(Build.SourcesDirectory)\src\Monitoring\Monitoring.ArcadeServices\Monitoring.ArcadeServices.proj `
--configuration Release `
-t:PublishGrafana `
-p:GrafanaAccessToken=$apiToken `
-p:GrafanaHost="$(GrafanaEndpoint)" `
-p:GrafanaKeyVaultName="${{ parameters.AMGGrafanaKeyVault }}" `
-p:GrafanaEnvironment="${{ parameters.AMGDeploymentEnvironment }}" `
-p:ParametersFile=parameters.json `
-p:ClientId="${{ parameters.AMGServiceConnectionClientId }}" `
-p:ServiceConnectionId="${{ parameters.AMGServiceConnectionId }}" `
-p:SystemAccessToken="$(System.AccessToken)" `
-v:normal

if ($LASTEXITCODE -ne 0) {
Write-Error "Failed to publish dashboards to Grafana"
exit 1
}

Write-Host ""
Write-Host "=========================================="
Write-Host "✓ SUCCESS! Dashboards Published"
Write-Host "=========================================="
Write-Host ""
Write-Host "View your dashboards at:"
Write-Host "$(GrafanaEndpoint)/dashboards"
Write-Host ""

Loading