Skip to content

run-auto-reset-parent-or-child-lab-machine #3231

run-auto-reset-parent-or-child-lab-machine

run-auto-reset-parent-or-child-lab-machine #3231

name: auto-reset-parent-or-child-lab-machine
on:
schedule:
- cron: "0 */6 * * *" # Runs once every 6 hours
workflow_dispatch:
inputs:
parent-or-child:
description: "Lab jobs to observe"
required: false
type: string
vm-name:
description: "The name of the VM to reset"
default: "netperf-windows-2022-client"
required: false
type: string
reset-all-active:
description: "Resets all active lab machines"
default: false
required: false
type: boolean
repository_dispatch:
types: [run-auto-reset-parent-or-child-lab-machine]
jobs:
# This job ensures there is currently no actively running netperf jobs using the Github API. If there is, fail this step.
sanity-check:
name: Sanity check for ${{ github.event.client_payload.workflow_id || 'dispatch' }}
runs-on: windows-latest
steps:
- name: Check for active jobs
if: ${{ github.event_name != 'repository_dispatch' }}
run: |
$headers = @{
"Accept" = "application/vnd.github+json"
"Authorization" = "Bearer ${{ secrets.GITHUB_TOKEN }}"
"X-GitHub-Api-Version" = "2022-11-28"
}
$url = "https://api.github.com/repos/microsoft/netperf/actions/runs?status=in_progress"
Write-Debug "GET $url"
$activeRuns = ((Invoke-WebRequest -Uri $url -Method GET -Headers $headers).Content | ConvertFrom-Json).workflow_runs
foreach ($run in $activeRuns) {
if ($run.name -ne "auto-reset-parent-or-child-lab-machine") {
Write-Host "Found active netperf job: $($run.name)"
exit 1
}
}
shell: pwsh
- name: Check_Dispatched_ID;${{ github.event.client_payload.workflow_id }}
if: ${{ github.event_name == 'repository_dispatch' }}
timeout-minutes: 45
run: |
$headers = @{
"Accept" = "application/vnd.github+json"
"Authorization" = "Bearer ${{ secrets.GITHUB_TOKEN }}"
"X-GitHub-Api-Version" = "2022-11-28"
}
$url = "https://api.github.com/repos/microsoft/netperf/actions/runs?status=in_progress"
Write-Debug "GET $url"
$workflow_id_that_dispatched_this_workflow = "${{ github.event.client_payload.workflow_id }}"
$stop = $true
while ($true) {
$stop = $true
$activeRuns = ((Invoke-WebRequest -Uri $url -Method GET -Headers $headers).Content | ConvertFrom-Json).workflow_runs
foreach ($run in $activeRuns) {
if ($run.name -ne "auto-reset-parent-or-child-lab-machine" -and $run.id -ne $workflow_id_that_dispatched_this_workflow) {
Write-Host "Waiting for active netperf job (that didn't trigger this workflow run): $($run.name), id: $($run.id) to finish first."
Start-Sleep -Seconds 60
$stop = $false
break
}
}
if ($stop) {
Write-Host "No active jobs (besides this one and the workflow that triggered it) found. Enqueuing reset."
break
}
}
do-reset-all-active:
name: Reset all active lab machines
if: ${{ inputs.reset-all-active == true || github.event_name == 'schedule' || github.event_name == 'repository_dispatch' }}
timeout-minutes: 8
needs: sanity-check
strategy:
fail-fast: false
matrix:
vec: [
{ parent-or-child: "parent=rr1-netperf-26\\localadminuser", vm-name: "netperf-windows-2022-client" },
{ parent-or-child: "child=rr1-netperf-25\\localadminuser", vm-name: "netperf-windows-2022-server" },
{ parent-or-child: "child=rr1-netperf-05\\localadminuser", vm-name: "netperf" },
{ parent-or-child: "parent=rr1-netperf-10\\localadminuser", vm-name: "netperf" },
{ parent-or-child: "child=rr1-netperf-01\\localadminuser", vm-name: "netperf-windows-2022-server" },
{ parent-or-child: "parent=rr1-netperf-02\\localadminuser", vm-name: "netperf-windows-2022-client" },
{ parent-or-child: "child=rr1-netperf-03\\localadminuser", vm-name: "netperf-windows-2022-server" },
{ parent-or-child: "parent=rr1-netperf-04\\localadminuser", vm-name: "netperf-windows-2022-client" },
{ parent-or-child: "child=rr1-netperf-06\\localadminuser", vm-name: "netperf-windows-2022-server" },
{ parent-or-child: "parent=rr1-netperf-07\\localadminuser", vm-name: "netperf-windows-2022-client" },
{ parent-or-child: "child=rr1-netperf-08\\localadminuser", vm-name: "netperf-windows-2022-server" },
{ parent-or-child: "parent=rr1-netperf-09\\localadminuser", vm-name: "netperf-windows-2022-client" },
# { parent-or-child: "rr1-netperf-15", vm-name: "kernel-bvt-runner" },
{ parent-or-child: "child=rr1-netperf-11\\localadminuser", vm-name: "netperf-ubuntu-server" },
{ parent-or-child: "parent=rr1-netperf-12\\localadminuser", vm-name: "netperf-ubuntu-client" }
]
runs-on:
- self-hosted
- ${{ matrix.vec.parent-or-child }}
steps:
- name: RESET STATE (parent or child)
run: |
# TODO: Eventually, for WS 2025, we want to instead CRUD the VMs, instead of simply reseting their checkpoints here.
$vmName = "${{ matrix.vec.vm-name }}"
$checkPointName = "LATEST"
Restore-VMSnapshot -VMName $vmName -Name $checkPointName -Confirm:$false
- name: Start VM, wait for online status, alert observer.
run: |
$vmName = "${{ matrix.vec.vm-name }}"
if ($vmName.Contains("linux") -or $vmName.Contains("ubuntu")) {
Restart-VM -Name $vmName -Force
} else {
Start-VM -Name $vmName
while (-not (Get-VMNetworkAdapter -VMName $vmName).IPAddresses) {
Write-Host "Waiting for VM to be online..."
Start-Sleep -Seconds 5
}
Start-Sleep 10
}
do-reset-manual:
name: Reset parent or child Machine
if: ${{ inputs.reset-all-active == false && github.event_name != 'schedule' && github.event_name != 'repository_dispatch' }}
needs: sanity-check
timeout-minutes: 8
runs-on:
- self-hosted
- ${{ inputs.parent-or-child }}
steps:
- name: RESET STATE (parent or child)
run: |
# TODO: Eventually, for WS 2025, we want to instead CRUD the VMs, instead of simply reseting their checkpoints here.
$vmName = "${{ inputs.vm-name }}"
$checkPointName = "LATEST"
Restore-VMSnapshot -VMName $vmName -Name $checkPointName -Confirm:$false
- name: Start VM, wait for online status, alert observer.
run: |
$vmName = "${{ inputs.vm-name }}"
if ($vmName.Contains("linux") -or $vmName.Contains("ubuntu")) {
Restart-VM -Name $vmName -Force
} else {
Start-VM -Name $vmName
while (-not (Get-VMNetworkAdapter -VMName $vmName).IPAddresses) {
Write-Host "Waiting for VM to be online..."
Start-Sleep -Seconds 5
}
Start-Sleep 10
}