diff --git a/.github/workflows/generate-summary.ps1 b/.github/workflows/generate-summary.ps1 index 53805dab7..0d0f4acbf 100644 --- a/.github/workflows/generate-summary.ps1 +++ b/.github/workflows/generate-summary.ps1 @@ -51,11 +51,10 @@ function Write-ThroughputRow { else { $row += " $(($Results[$i] / 1000000).ToString('F2')) |" } } - # TODO: Regression detection is heinously broken. Let's reduce the noise. - # $row += " " + $Regression.CumulativeResult + " |" - # $row += " " + $Regression.Baseline + " |" - # if ($Regression.BestResultCommit -eq "N/A") { $row += "N/A |" } - # else { $row += "[" + $Regression.BestResult + "](https://github.com/microsoft/msquic/commit/" + $Regression.BestResultCommit + ") |" } + $row += " " + $Regression.CumulativeResult + " |" + $row += " " + $Regression.Baseline + " |" + if ($Regression.BestResultCommit -eq "N/A") { $row += "N/A |" } + else { $row += "[" + $Regression.BestResult + "](https://github.com/microsoft/msquic/commit/" + $Regression.BestResultCommit + ") |" } $Script:markdown += $row } @@ -79,11 +78,10 @@ function Write-HpsRow { else { $row += " $($Results[$i]) |" } } - # TODO: Regression detection is heinously broken. Let's reduce the noise. - # $row += " " + $Regression.CumulativeResult + " |" - # $row += " " + $Regression.Baseline + " |" - # if ($Regression.BestResultCommit -eq "N/A") { $row += "N/A |" } - # else { $row += "[" + $Regression.BestResult + "](https://github.com/microsoft/msquic/commit/" + $Regression.BestResultCommit + ") |" } + $row += " " + $Regression.CumulativeResult + " |" + $row += " " + $Regression.Baseline + " |" + if ($Regression.BestResultCommit -eq "N/A") { $row += "N/A |" } + else { $row += "[" + $Regression.BestResult + "](https://github.com/microsoft/msquic/commit/" + $Regression.BestResultCommit + ") |" } $Script:markdown += $row } @@ -108,11 +106,10 @@ function Write-RpsRow { $row += " $($Results[$i+$j]) |" } - # TODO: Regression detection is heinously broken. Let's reduce the noise. - # $row += " " + $Regression.CumulativeResult + " |" - # $row += " " + $Regression.Baseline + " |" - # if ($Regression.BestResultCommit -eq "N/A") { $row += "N/A |" } - # else { $row += "[" + $Regression.BestResult + "](https://github.com/microsoft/msquic/commit/" + $Regression.BestResultCommit + ") |" } + $row += " " + $Regression.CumulativeResult + " |" + $row += " " + $Regression.Baseline + " |" + if ($Regression.BestResultCommit -eq "N/A") { $row += "N/A |" } + else { $row += "[" + $Regression.BestResult + "](https://github.com/microsoft/msquic/commit/" + $Regression.BestResultCommit + ") |" } $Script:markdown += $row } @@ -131,8 +128,8 @@ $hasRegression = $false # Write the Upload table. $markdown = @" # Upload Throughput (Gbps) -| Pass/Fail | Env | OS | Version | Arch | TLS | IO | Transport | Result 1 | Result 2 | Result 3 | -| --------- | --- | -- | ------- | ---- | --- | -- | --------- | -------- | -------- | -------- | +| Pass/Fail | Env | OS | Version | Arch | TLS | IO | Transport | Result 1 | Result 2 | Result 3 | Avg | Noise | Best Ever | +| --------- | --- | -- | ------- | ---- | --- | -- | --------- | -------- | -------- | -------- | --- | ----- | --------- | "@ foreach ($file in $files) { Write-Host "Upload Tput: Processing $file..." @@ -180,8 +177,8 @@ foreach ($file in $files) { $markdown += @" `n # Download Throughput (Gbps) -| Pass/Fail | Env | OS | Version | Arch | TLS | IO | Transport | Result 1 | Result 2 | Result 3 | -| --------- | --- | -- | ------- | ---- | --- | -- | --------- | -------- | -------- | -------- | +| Pass/Fail | Env | OS | Version | Arch | TLS | IO | Transport | Result 1 | Result 2 | Result 3 | Avg | Noise | Best Ever | +| --------- | --- | -- | ------- | ---- | --- | -- | --------- | -------- | -------- | -------- | --- | ----- | --------- | "@ foreach ($file in $files) { Write-Host "Download Tput: Processing $file..." @@ -228,8 +225,8 @@ foreach ($file in $files) { $markdown += @" `n # Handshakes Per Second (HPS) -| Pass/Fail | Env | OS | Version | Arch | TLS | IO | Transport | Result 1 | Result 2 | Result 3 | -| --------- | --- | -- | ------- | ---- | --- | -- | --------- | -------- | -------- | -------- | +| Pass/Fail | Env | OS | Version | Arch | TLS | IO | Transport | Result 1 | Result 2 | Result 3 | Avg | Noise | Best Ever | +| --------- | --- | -- | ------- | ---- | --- | -- | --------- | -------- | -------- | -------- | --- | ----- | --------- | "@ foreach ($file in $files) { Write-Host "HPS: Processing $file..." @@ -275,8 +272,8 @@ foreach ($file in $files) { $markdown += @" `n # Request Per Second (RPS) and Latency (µs) -| Pass/Fail | Env | OS | Version | Arch | TLS | IO | Transport | Min | P50 | P90 | P99 | P99.9 | P99.99 | P99.999 | P99.9999 | RPS | -| --------- | --- | -- | ------- | ---- | --- | -- | --------- | --- | --- | --- | --- | ----- | ------ | ------- | -------- | --- | +| Pass/Fail | Env | OS | Version | Arch | TLS | IO | Transport | Min | P50 | P90 | P99 | P99.9 | P99.99 | P99.999 | P99.9999 | RPS | Avg | Noise | Best Ever | +| --------- | --- | -- | ------- | ---- | --- | -- | --------- | --- | --- | --- | --- | ----- | ------ | ------- | -------- | --- | --- | ----- | --------- | "@ foreach ($file in $files) { # TODO: Right now, we are not using a watermark based method for regression detection of latency percentile values because we don't know how to determine a "Best Ever" distribution. diff --git a/.github/workflows/quic.yml b/.github/workflows/quic.yml index e5395e570..f6df4d8ec 100644 --- a/.github/workflows/quic.yml +++ b/.github/workflows/quic.yml @@ -477,7 +477,9 @@ jobs: repository: microsoft/netperf ref: sqlite - run: ls - - run: python regression.py + - name: Fetch regression.py from main branch + run: wget https://raw.githubusercontent.com/microsoft/netperf/main/pipeline/regression.py -O regression.py + - run: python regression.py --featureint 2 - name: Git commit if: ${{ (github.event_name == 'repository_dispatch' && github.event.client_payload.pr == '') || inputs.commit }} run: 'git config user.name "QUIC Dev[bot]" && git config user.email "quicdev@microsoft.com" && git add *.json && git commit -m "Update regression metrics" && git push' diff --git a/pipeline/regression.py b/pipeline/regression.py index 4c63c4b78..2a1b19eac 100644 --- a/pipeline/regression.py +++ b/pipeline/regression.py @@ -5,6 +5,48 @@ import glob import os +# Mapping from DB test IDs (old-format and new scenario-* format) to consumer keys. +# The consumer (secnetperf.ps1 / secnetperf-helpers.psm1) builds keys as "$scenario-$transport", +# e.g. "upload-quic", "download-tcp". The DB uses different naming conventions. +DB_TO_CONSUMER_KEY = { + # Old format → consumer + "tput-up-quic": "upload-quic", + "tput-up-tcp": "upload-tcp", + "tput-down-quic": "download-quic", + "tput-down-tcp": "download-tcp", + "hps-conns-100-quic": "hps-quic", + "hps-conns-100-tcp": "hps-tcp", + "rps-up-512-down-4000-quic": "rps-quic", + "rps-up-512-down-4000-tcp": "rps-tcp", + "max-rps-up-512-down-4000-quic": "rps-quic", + "max-rps-up-512-down-4000-tcp": "rps-tcp", + # New scenario-* format → consumer + "scenario-upload-quic": "upload-quic", + "scenario-upload-tcp": "upload-tcp", + "scenario-download-quic": "download-quic", + "scenario-download-tcp": "download-tcp", + "scenario-hps-quic": "hps-quic", + "scenario-hps-tcp": "hps-tcp", + "scenario-rps-quic": "rps-quic", + "scenario-rps-tcp": "rps-tcp", + "scenario-rps-multi-quic": "rps-multi-quic", + "scenario-rps-multi-tcp": "rps-multi-tcp", + "scenario-latency-quic": "latency-quic", + "scenario-latency-tcp": "latency-tcp", +} + + +def remap_to_consumer_keys(data): + """Remap DB test IDs to consumer-format keys, merging env data when collisions occur.""" + result = {} + for db_key, env_data in data.items(): + consumer_key = DB_TO_CONSUMER_KEY.get(db_key, db_key) + if consumer_key not in result: + result[consumer_key] = {} + # Merge: newer data (scenario-*) overwrites older data for same env + result[consumer_key].update(env_data) + return result + # Create the parser parser = argparse.ArgumentParser(description="Process a feature integer.") @@ -307,6 +349,10 @@ def sliding_window(): regression_file[testid][f"{os_name}-{arch}-{context}-{io}-{tls}"] = compute_baseline(data, testid) watermark_regression_file[testid][f"{os_name}-{arch}-{context}-{io}-{tls}"] = compute_baseline_watermark(data, testid) + # Remap DB test IDs to consumer-format keys before saving + regression_file = remap_to_consumer_keys(regression_file) + watermark_regression_file = remap_to_consumer_keys(watermark_regression_file) + # Save results to a json file. with open('regression.json', 'w') as f: json.dump(regression_file, f, indent=4) @@ -322,4 +368,4 @@ def sliding_window(): elif args.featureint == 2: sliding_window() else: - print("Method not supported.") \ No newline at end of file + print("Method not supported.")