run-auto-reset-parent-or-child-lab-machine #3231
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: auto-reset-parent-or-child-lab-machine | |
| on: | |
| schedule: | |
| - cron: "0 */6 * * *" # Runs once every 6 hours | |
| workflow_dispatch: | |
| inputs: | |
| parent-or-child: | |
| description: "Lab jobs to observe" | |
| required: false | |
| type: string | |
| vm-name: | |
| description: "The name of the VM to reset" | |
| default: "netperf-windows-2022-client" | |
| required: false | |
| type: string | |
| reset-all-active: | |
| description: "Resets all active lab machines" | |
| default: false | |
| required: false | |
| type: boolean | |
| repository_dispatch: | |
| types: [run-auto-reset-parent-or-child-lab-machine] | |
| jobs: | |
| # This job ensures there is currently no actively running netperf jobs using the Github API. If there is, fail this step. | |
| sanity-check: | |
| name: Sanity check for ${{ github.event.client_payload.workflow_id || 'dispatch' }} | |
| runs-on: windows-latest | |
| steps: | |
| - name: Check for active jobs | |
| if: ${{ github.event_name != 'repository_dispatch' }} | |
| run: | | |
| $headers = @{ | |
| "Accept" = "application/vnd.github+json" | |
| "Authorization" = "Bearer ${{ secrets.GITHUB_TOKEN }}" | |
| "X-GitHub-Api-Version" = "2022-11-28" | |
| } | |
| $url = "https://api.github.com/repos/microsoft/netperf/actions/runs?status=in_progress" | |
| Write-Debug "GET $url" | |
| $activeRuns = ((Invoke-WebRequest -Uri $url -Method GET -Headers $headers).Content | ConvertFrom-Json).workflow_runs | |
| foreach ($run in $activeRuns) { | |
| if ($run.name -ne "auto-reset-parent-or-child-lab-machine") { | |
| Write-Host "Found active netperf job: $($run.name)" | |
| exit 1 | |
| } | |
| } | |
| shell: pwsh | |
| - name: Check_Dispatched_ID;${{ github.event.client_payload.workflow_id }} | |
| if: ${{ github.event_name == 'repository_dispatch' }} | |
| timeout-minutes: 45 | |
| run: | | |
| $headers = @{ | |
| "Accept" = "application/vnd.github+json" | |
| "Authorization" = "Bearer ${{ secrets.GITHUB_TOKEN }}" | |
| "X-GitHub-Api-Version" = "2022-11-28" | |
| } | |
| $url = "https://api.github.com/repos/microsoft/netperf/actions/runs?status=in_progress" | |
| Write-Debug "GET $url" | |
| $workflow_id_that_dispatched_this_workflow = "${{ github.event.client_payload.workflow_id }}" | |
| $stop = $true | |
| while ($true) { | |
| $stop = $true | |
| $activeRuns = ((Invoke-WebRequest -Uri $url -Method GET -Headers $headers).Content | ConvertFrom-Json).workflow_runs | |
| foreach ($run in $activeRuns) { | |
| if ($run.name -ne "auto-reset-parent-or-child-lab-machine" -and $run.id -ne $workflow_id_that_dispatched_this_workflow) { | |
| Write-Host "Waiting for active netperf job (that didn't trigger this workflow run): $($run.name), id: $($run.id) to finish first." | |
| Start-Sleep -Seconds 60 | |
| $stop = $false | |
| break | |
| } | |
| } | |
| if ($stop) { | |
| Write-Host "No active jobs (besides this one and the workflow that triggered it) found. Enqueuing reset." | |
| break | |
| } | |
| } | |
| do-reset-all-active: | |
| name: Reset all active lab machines | |
| if: ${{ inputs.reset-all-active == true || github.event_name == 'schedule' || github.event_name == 'repository_dispatch' }} | |
| timeout-minutes: 8 | |
| needs: sanity-check | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| vec: [ | |
| { parent-or-child: "parent=rr1-netperf-26\\localadminuser", vm-name: "netperf-windows-2022-client" }, | |
| { parent-or-child: "child=rr1-netperf-25\\localadminuser", vm-name: "netperf-windows-2022-server" }, | |
| { parent-or-child: "child=rr1-netperf-05\\localadminuser", vm-name: "netperf" }, | |
| { parent-or-child: "parent=rr1-netperf-10\\localadminuser", vm-name: "netperf" }, | |
| { parent-or-child: "child=rr1-netperf-01\\localadminuser", vm-name: "netperf-windows-2022-server" }, | |
| { parent-or-child: "parent=rr1-netperf-02\\localadminuser", vm-name: "netperf-windows-2022-client" }, | |
| { parent-or-child: "child=rr1-netperf-03\\localadminuser", vm-name: "netperf-windows-2022-server" }, | |
| { parent-or-child: "parent=rr1-netperf-04\\localadminuser", vm-name: "netperf-windows-2022-client" }, | |
| { parent-or-child: "child=rr1-netperf-06\\localadminuser", vm-name: "netperf-windows-2022-server" }, | |
| { parent-or-child: "parent=rr1-netperf-07\\localadminuser", vm-name: "netperf-windows-2022-client" }, | |
| { parent-or-child: "child=rr1-netperf-08\\localadminuser", vm-name: "netperf-windows-2022-server" }, | |
| { parent-or-child: "parent=rr1-netperf-09\\localadminuser", vm-name: "netperf-windows-2022-client" }, | |
| # { parent-or-child: "rr1-netperf-15", vm-name: "kernel-bvt-runner" }, | |
| { parent-or-child: "child=rr1-netperf-11\\localadminuser", vm-name: "netperf-ubuntu-server" }, | |
| { parent-or-child: "parent=rr1-netperf-12\\localadminuser", vm-name: "netperf-ubuntu-client" } | |
| ] | |
| runs-on: | |
| - self-hosted | |
| - ${{ matrix.vec.parent-or-child }} | |
| steps: | |
| - name: RESET STATE (parent or child) | |
| run: | | |
| # TODO: Eventually, for WS 2025, we want to instead CRUD the VMs, instead of simply reseting their checkpoints here. | |
| $vmName = "${{ matrix.vec.vm-name }}" | |
| $checkPointName = "LATEST" | |
| Restore-VMSnapshot -VMName $vmName -Name $checkPointName -Confirm:$false | |
| - name: Start VM, wait for online status, alert observer. | |
| run: | | |
| $vmName = "${{ matrix.vec.vm-name }}" | |
| if ($vmName.Contains("linux") -or $vmName.Contains("ubuntu")) { | |
| Restart-VM -Name $vmName -Force | |
| } else { | |
| Start-VM -Name $vmName | |
| while (-not (Get-VMNetworkAdapter -VMName $vmName).IPAddresses) { | |
| Write-Host "Waiting for VM to be online..." | |
| Start-Sleep -Seconds 5 | |
| } | |
| Start-Sleep 10 | |
| } | |
| do-reset-manual: | |
| name: Reset parent or child Machine | |
| if: ${{ inputs.reset-all-active == false && github.event_name != 'schedule' && github.event_name != 'repository_dispatch' }} | |
| needs: sanity-check | |
| timeout-minutes: 8 | |
| runs-on: | |
| - self-hosted | |
| - ${{ inputs.parent-or-child }} | |
| steps: | |
| - name: RESET STATE (parent or child) | |
| run: | | |
| # TODO: Eventually, for WS 2025, we want to instead CRUD the VMs, instead of simply reseting their checkpoints here. | |
| $vmName = "${{ inputs.vm-name }}" | |
| $checkPointName = "LATEST" | |
| Restore-VMSnapshot -VMName $vmName -Name $checkPointName -Confirm:$false | |
| - name: Start VM, wait for online status, alert observer. | |
| run: | | |
| $vmName = "${{ inputs.vm-name }}" | |
| if ($vmName.Contains("linux") -or $vmName.Contains("ubuntu")) { | |
| Restart-VM -Name $vmName -Force | |
| } else { | |
| Start-VM -Name $vmName | |
| while (-not (Get-VMNetworkAdapter -VMName $vmName).IPAddresses) { | |
| Write-Host "Waiting for VM to be online..." | |
| Start-Sleep -Seconds 5 | |
| } | |
| Start-Sleep 10 | |
| } |