From e30b8ef891af47b2f11a9275a8daeab47757d1f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 18 Feb 2026 13:07:18 +0000 Subject: [PATCH 01/12] build: Bump NGC PyTorch to 26.01 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- docker/.ngc_version.dev | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/.ngc_version.dev b/docker/.ngc_version.dev index 8e8108b9a9a..3efd88dbe34 100644 --- a/docker/.ngc_version.dev +++ b/docker/.ngc_version.dev @@ -1 +1 @@ -nvcr.io/nvidia/pytorch:25.11-py3 \ No newline at end of file +nvcr.io/nvidia/pytorch:26.01-py3 \ No newline at end of file From 6352ea9c1c85b5989d3ae967e813e97dbc20fbae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 18 Feb 2026 13:08:09 +0000 Subject: [PATCH 02/12] gitlab bump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .gitlab/stages/01.build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab/stages/01.build.yml b/.gitlab/stages/01.build.yml index 61521295a93..498041e5646 100644 --- a/.gitlab/stages/01.build.yml +++ b/.gitlab/stages/01.build.yml @@ -64,12 +64,12 @@ test:pre_build_image: - IMAGE: CI_MCORE_DEV_IMAGE FILE: Dockerfile.ci.dev IMAGE_TYPE: dev - BASE_IMAGE: nvcr.io/nvidia/pytorch:25.11-py3 + BASE_IMAGE: nvcr.io/nvidia/pytorch:26.01-py3 PLATFORM: amd64 - IMAGE: CI_MCORE_DEV_IMAGE FILE: Dockerfile.ci.dev IMAGE_TYPE: dev - BASE_IMAGE: nvcr.io/nvidia/pytorch:25.11-py3 + BASE_IMAGE: nvcr.io/nvidia/pytorch:26.01-py3 PLATFORM: arm64 - IMAGE: UTILITY_IMAGE FILE: Dockerfile.linting From d394d5b7840ae3497eec9284efa55ab90f37d02c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 18 Feb 2026 13:42:53 +0000 Subject: [PATCH 03/12] omit weird new file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 9b75fcf3596..2be68f5e7b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -234,6 +234,7 @@ omit = [ "/tmp/*", "/workspace/tests/*", "/usr/local/lib/python3.12/dist-packages/*", + "/opt/megatron-lm/_remote_module_non_scriptable", ] parallel = true sigterm = false From 57fcd86d679315c19763bd789a175e7190b09f1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 18 Feb 2026 15:00:20 +0000 Subject: [PATCH 04/12] test_dcp_checkpoint_save_and_load MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .../distributed/megatron_fsdp/test_mfsdp_fully_shard.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit_tests/distributed/megatron_fsdp/test_mfsdp_fully_shard.py b/tests/unit_tests/distributed/megatron_fsdp/test_mfsdp_fully_shard.py index cbca505b405..ea58411333f 100644 --- a/tests/unit_tests/distributed/megatron_fsdp/test_mfsdp_fully_shard.py +++ b/tests/unit_tests/distributed/megatron_fsdp/test_mfsdp_fully_shard.py @@ -395,6 +395,7 @@ def test_fully_shard( # Required to reset the parallelism environment. destroy_device_mesh(device_mesh) + @pytest.mark.flaky_in_dev @pytest.mark.skipif( version.parse(torch.__version__) < version.parse('2.4.0'), reason="Requires DTensor and DeviceMesh support in (approximately) PyTorch 2.4.0 or later. Should not be run on 2.2.0a0+81ea7a4 (LTS).", From cf32f2fcaf68a8c72d87a34007bc1eb38d918ca1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 18 Feb 2026 17:58:54 +0000 Subject: [PATCH 05/12] update golden values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .../golden_values_dev_dgx_h100.json | 164 +-- .../golden_values_dev_dgx_h100.json | 288 ++--- .../golden_values_dev_dgx_gb200.json | 760 ++++++------- .../golden_values_dev_dgx_h100.json | 572 +++++----- .../golden_values_dev_dgx_h100.json | 964 ++++++++--------- .../golden_values_dev_dgx_h100.json | 566 +++++----- .../golden_values_dev_dgx_h100.json | 966 ++++++++--------- .../golden_values_dev_dgx_gb200.json | 470 ++++----- .../golden_values_dev_dgx_h100.json | 480 ++++----- .../golden_values_dev_dgx_h100.json | 968 ++++++++--------- .../golden_values_dev_dgx_h100.json | 284 ++--- .../golden_values_dev_dgx_h100.json | 480 ++++----- .../golden_values_dev_dgx_h100.json | 480 ++++----- .../golden_values_dev_dgx_h100.json | 480 ++++----- .../golden_values_dev_dgx_h100.json | 480 ++++----- .../golden_values_dev_dgx_h100.json | 964 ++++++++--------- .../golden_values_dev_dgx_h100.json | 934 ++++++++-------- .../golden_values_dev_dgx_h100.json | 934 ++++++++-------- .../golden_values_dev_dgx_h100.json | 458 ++++---- .../golden_values_dev_dgx_h100.json | 458 ++++---- .../golden_values_dev_dgx_h100.json | 494 ++++----- .../golden_values_dev_dgx_h100.json | 996 +++++++++--------- .../golden_values_dev_dgx_gb200.json | 958 ++++++++--------- .../golden_values_dev_dgx_h100.json | 600 +++++------ .../golden_values_dev_dgx_h100.json | 398 +++---- .../golden_values_dev_dgx_gb200.json | 790 +++++++------- .../golden_values_dev_dgx_h100.json | 984 ++++++++--------- .../golden_values_dev_dgx_h100.json | 278 ++--- .../golden_values_dev_dgx_h100.json | 496 ++++----- 29 files changed, 9072 insertions(+), 9072 deletions(-) diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json index 2ed3bf0784f..1719b6e9578 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_h100.json @@ -6,29 +6,29 @@ "values": { "1": 12.58569, "2": 12.58406, - "3": 12.58486, - "4": 12.58642, - "5": 12.58279, - "6": 12.57912, - "7": 12.56177, - "8": 12.52304, - "9": 12.4966, - "10": 12.4826, - "11": 12.31462, - "12": 12.272, - "13": 12.20924, - "14": 12.20094, + "3": 12.58488, + "4": 12.58632, + "5": 12.5828, + "6": 12.57908, + "7": 12.56192, + "8": 12.52305, + "9": 12.49668, + "10": 12.48245, + "11": 12.31444, + "12": 12.27191, + "13": 12.20938, + "14": 12.20089, "15": 11.79651, - "16": 11.78035, - "17": 11.74188, - "18": 11.71656, - "19": 11.59074, - "20": 11.47672, - "21": 11.23784, - "22": 11.3586, - "23": 11.25768, + "16": 11.78043, + "17": 11.74179, + "18": 11.71657, + "19": 11.59068, + "20": 11.47674, + "21": 11.2379, + "22": 11.35857, + "23": 11.25769, "24": 11.14081, - "25": 10.97989 + "25": 10.97993 } }, "num-zeros": { @@ -36,31 +36,31 @@ "end_step": 25, "step_interval": 1, "values": { - "1": 521035392.0, - "2": 521662624.0, - "3": 520932992.0, - "4": 521225120.0, - "5": 520993600.0, - "6": 521369824.0, - "7": 521417344.0, - "8": 521054784.0, - "9": 521458592.0, - "10": 521175520.0, - "11": 522277376.0, - "12": 521435904.0, - "13": 521472640.0, - "14": 522442496.0, - "15": 521589568.0, - "16": 521414080.0, - "17": 521025696.0, - "18": 521279168.0, - "19": 521154400.0, - "20": 521132352.0, - "21": 522909696.0, - "22": 521591904.0, - "23": 521353504.0, - "24": 521426496.0, - "25": 523547008.0 + "1": 521034848.0, + "2": 521662912.0, + "3": 520932320.0, + "4": 521225216.0, + "5": 520992768.0, + "6": 521369920.0, + "7": 521417152.0, + "8": 521055744.0, + "9": 521459008.0, + "10": 521175872.0, + "11": 522276992.0, + "12": 521435104.0, + "13": 521472960.0, + "14": 522443232.0, + "15": 521589504.0, + "16": 521413760.0, + "17": 521026112.0, + "18": 521279232.0, + "19": 521152640.0, + "20": 521132288.0, + "21": 522908864.0, + "22": 521591872.0, + "23": 521353024.0, + "24": 521427040.0, + "25": 523546112.0 } }, "mem-allocated-bytes": { @@ -101,30 +101,30 @@ "step_interval": 1, "values": { "1": 52730814464.0, - "2": 60518424576.0, - "3": 60518424576.0, - "4": 60518424576.0, - "5": 60518424576.0, - "6": 60518424576.0, - "7": 60518424576.0, - "8": 60518424576.0, - "9": 60518424576.0, - "10": 60518424576.0, - "11": 60518424576.0, - "12": 60518424576.0, - "13": 60518424576.0, - "14": 60518424576.0, - "15": 60518424576.0, - "16": 60518424576.0, - "17": 60518424576.0, - "18": 60518424576.0, - "19": 60518424576.0, - "20": 60518424576.0, - "21": 60518424576.0, - "22": 60518424576.0, - "23": 60518424576.0, - "24": 60518424576.0, - "25": 60518424576.0 + "2": 60518313984.0, + "3": 60518313984.0, + "4": 60518313984.0, + "5": 60518313984.0, + "6": 60518313984.0, + "7": 60518313984.0, + "8": 60518313984.0, + "9": 60518313984.0, + "10": 60518313984.0, + "11": 60518313984.0, + "12": 60518313984.0, + "13": 60518313984.0, + "14": 60518313984.0, + "15": 60518313984.0, + "16": 60518313984.0, + "17": 60518313984.0, + "18": 60518313984.0, + "19": 60518313984.0, + "20": 60518313984.0, + "21": 60518313984.0, + "22": 60518313984.0, + "23": 60518313984.0, + "24": 60518313984.0, + "25": 60518313984.0 } }, "iteration-time": { @@ -133,29 +133,29 @@ "step_interval": 1, "values": { "1": "nan", - "2": 11.06832, + "2": 9.40588, "3": "nan", - "4": 1.16152, + "4": 1.14216, "5": "nan", - "6": 1.15069, + "6": 1.13426, "7": "nan", - "8": 1.15402, + "8": 1.13417, "9": "nan", - "10": 1.15412, + "10": 1.13556, "11": "nan", - "12": 1.15321, + "12": 1.13935, "13": "nan", - "14": 1.15624, + "14": 1.13873, "15": "nan", - "16": 1.1571, + "16": 1.13957, "17": "nan", - "18": 1.15577, + "18": 1.14035, "19": "nan", - "20": 1.15939, + "20": 1.13973, "21": "nan", - "22": 1.15675, + "22": 1.13936, "23": "nan", - "24": 1.15533, + "24": 1.13705, "25": "nan" } } diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json index 036b53dabb1..3fc7359ad8a 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_h100.json @@ -6,54 +6,54 @@ "values": { "1": 10.89074, "2": 10.89234, - "3": 10.89032, - "4": 10.89221, - "5": 10.89416, - "6": 10.90226, - "7": 10.8884, - "8": 10.90211, - "9": 10.90202, - "10": 10.88512, + "3": 10.8903, + "4": 10.8922, + "5": 10.89414, + "6": 10.90232, + "7": 10.88841, + "8": 10.9021, + "9": 10.90201, + "10": 10.88511, "11": 10.87636, - "12": 10.89499, - "13": 10.89837, - "14": 10.89182, - "15": 10.85125, - "16": 10.8534, - "17": 10.82862, - "18": 10.83653, - "19": 10.82847, - "20": 10.74583, - "21": 10.73117, - "22": 10.61256, - "23": 10.72616, - "24": 10.62932, - "25": 10.59394, - "26": 10.63357, - "27": 10.63137, - "28": 10.58201, - "29": 10.58671, + "12": 10.895, + "13": 10.89838, + "14": 10.89179, + "15": 10.85121, + "16": 10.85339, + "17": 10.82863, + "18": 10.83657, + "19": 10.82846, + "20": 10.74587, + "21": 10.73112, + "22": 10.6126, + "23": 10.72614, + "24": 10.62933, + "25": 10.59397, + "26": 10.63359, + "27": 10.63131, + "28": 10.58202, + "29": 10.58668, "30": 10.40936, - "31": 10.15873, - "32": 10.48319, - "33": 10.46977, - "34": 10.23978, - "35": 10.28144, - "36": 10.23894, - "37": 10.35198, - "38": 10.20565, - "39": 10.40496, - "40": 10.09271, + "31": 10.15875, + "32": 10.48318, + "33": 10.46978, + "34": 10.23975, + "35": 10.28146, + "36": 10.23893, + "37": 10.35197, + "38": 10.20568, + "39": 10.40494, + "40": 10.09274, "41": 10.16148, - "42": 10.2231, - "43": 9.84152, - "44": 9.97329, - "45": 9.84544, - "46": 9.82102, - "47": 10.14261, - "48": 9.86553, + "42": 10.22306, + "43": 9.84153, + "44": 9.97326, + "45": 9.84547, + "46": 9.82101, + "47": 10.14262, + "48": 9.86552, "49": 9.54033, - "50": 9.9169 + "50": 9.91688 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1544.0, - "2": 1729.0, - "3": 1672.0, - "4": 1807.0, - "5": 1942.0, - "6": 1736.0, - "7": 1956.0, - "8": 1716.0, - "9": 2011.0, - "10": 1385.0, - "11": 1864.0, - "12": 1767.0, - "13": 2019.0, - "14": 1787.0, - "15": 1828.0, - "16": 1908.0, - "17": 1718.0, - "18": 1602.0, - "19": 1785.0, - "20": 1679.0, - "21": 1917.0, - "22": 1712.0, - "23": 2034.0, - "24": 1752.0, - "25": 1645.0, - "26": 1820.0, - "27": 1915.0, - "28": 1996.0, - "29": 2051.0, - "30": 1890.0, - "31": 1577.0, - "32": 1886.0, - "33": 2116.0, - "34": 1912.0, - "35": 2037.0, - "36": 1924.0, - "37": 2462.0, - "38": 2241.0, - "39": 2321.0, - "40": 2221.0, - "41": 2345.0, - "42": 2386.0, - "43": 2027.0, - "44": 2211.0, - "45": 2096.0, - "46": 2285.0, - "47": 2536.0, - "48": 2289.0, - "49": 2270.0, - "50": 2421.0 + "1": 1601.0, + "2": 1708.0, + "3": 1739.0, + "4": 1740.0, + "5": 1963.0, + "6": 1802.0, + "7": 1896.0, + "8": 1618.0, + "9": 1935.0, + "10": 1449.0, + "11": 1960.0, + "12": 1860.0, + "13": 1950.0, + "14": 1907.0, + "15": 1864.0, + "16": 1972.0, + "17": 1719.0, + "18": 1561.0, + "19": 1764.0, + "20": 1668.0, + "21": 1922.0, + "22": 1811.0, + "23": 2038.0, + "24": 1655.0, + "25": 1664.0, + "26": 1757.0, + "27": 1860.0, + "28": 2051.0, + "29": 1995.0, + "30": 1976.0, + "31": 1530.0, + "32": 1961.0, + "33": 2077.0, + "34": 1941.0, + "35": 1965.0, + "36": 1916.0, + "37": 2344.0, + "38": 2201.0, + "39": 2388.0, + "40": 2246.0, + "41": 2411.0, + "42": 2366.0, + "43": 2054.0, + "44": 2154.0, + "45": 2135.0, + "46": 2347.0, + "47": 2301.0, + "48": 2354.0, + "49": 2230.0, + "50": 2385.0 } }, "mem-allocated-bytes": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 6.80579, - "2": 0.08104, - "3": 0.07547, - "4": 0.05731, - "5": 0.06226, - "6": 0.05988, - "7": 0.06566, - "8": 0.06635, - "9": 0.06593, - "10": 0.06639, - "11": 0.06591, - "12": 0.06568, - "13": 0.06504, - "14": 0.06232, - "15": 0.06162, - "16": 0.05614, - "17": 0.06083, - "18": 0.05789, - "19": 0.05867, - "20": 0.05574, - "21": 0.06043, - "22": 0.05778, - "23": 0.06166, - "24": 0.05671, - "25": 0.05765, - "26": 0.05638, - "27": 0.05601, - "28": 0.05637, - "29": 0.05497, - "30": 0.05757, - "31": 0.05556, - "32": 0.05715, - "33": 0.05761, - "34": 0.05779, - "35": 0.05996, - "36": 0.05761, - "37": 0.06454, - "38": 0.0575, - "39": 0.05802, - "40": 0.05752, - "41": 0.05904, - "42": 0.05622, - "43": 0.0555, - "44": 0.05785, - "45": 0.0578, - "46": 0.05758, - "47": 0.05729, - "48": 0.05652, - "49": 0.05619, - "50": 0.05705 + "1": "nan", + "2": 3.36008, + "3": 0.07706, + "4": 0.05754, + "5": 0.06556, + "6": 0.05767, + "7": 0.06278, + "8": 0.05622, + "9": 0.05975, + "10": 0.05635, + "11": 0.06095, + "12": 0.05668, + "13": 0.06242, + "14": 0.05756, + "15": 0.06014, + "16": 0.05763, + "17": 0.06282, + "18": 0.05672, + "19": 0.06248, + "20": 0.05666, + "21": 0.05943, + "22": 0.05746, + "23": 0.06248, + "24": 0.05782, + "25": 0.0626, + "26": 0.05697, + "27": 0.06234, + "28": 0.05681, + "29": 0.06175, + "30": 0.0575, + "31": 0.06289, + "32": 0.05752, + "33": 0.06247, + "34": 0.05817, + "35": 0.06189, + "36": 0.05843, + "37": 0.06228, + "38": 0.05885, + "39": 0.0623, + "40": 0.05846, + "41": 0.06215, + "42": 0.05671, + "43": 0.06175, + "44": 0.05738, + "45": 0.06472, + "46": 0.05749, + "47": 0.06223, + "48": 0.05703, + "49": 0.06163, + "50": 0.05739 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200.json index eb5a06ac1fc..63661ce44ba 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200.json @@ -6,104 +6,104 @@ "values": { "1": 10.77447, "2": 10.78365, - "3": 10.78346, - "4": 10.74822, - "5": 10.81983, - "6": 10.82303, - "7": 10.79055, - "8": 10.77956, - "9": 10.78595, - "10": 10.74453, - "11": 10.83267, + "3": 10.78344, + "4": 10.74824, + "5": 10.81984, + "6": 10.82302, + "7": 10.79059, + "8": 10.77958, + "9": 10.78598, + "10": 10.74454, + "11": 10.83266, "12": 10.80426, - "13": 10.82082, - "14": 10.82567, - "15": 10.74206, - "16": 10.74904, - "17": 10.7252, - "18": 10.74176, - "19": 10.74412, - "20": 10.63678, - "21": 10.63055, - "22": 10.47962, + "13": 10.82087, + "14": 10.8257, + "15": 10.74203, + "16": 10.74906, + "17": 10.72516, + "18": 10.74181, + "19": 10.74413, + "20": 10.63677, + "21": 10.6305, + "22": 10.47963, "23": 10.65976, - "24": 10.52477, - "25": 10.47552, + "24": 10.52481, + "25": 10.47557, "26": 10.54117, - "27": 10.55491, + "27": 10.55492, "28": 10.52139, - "29": 10.536, - "30": 10.3053, - "31": 10.0644, - "32": 10.41569, - "33": 10.42199, - "34": 10.17393, - "35": 10.22403, - "36": 10.18498, - "37": 10.30417, - "38": 10.14995, + "29": 10.53597, + "30": 10.30531, + "31": 10.06441, + "32": 10.41574, + "33": 10.42201, + "34": 10.17392, + "35": 10.22404, + "36": 10.18502, + "37": 10.30415, + "38": 10.14997, "39": 10.37042, - "40": 10.03994, + "40": 10.03995, "41": 10.10953, - "42": 10.17937, - "43": 9.79747, - "44": 9.90812, - "45": 9.79809, - "46": 9.7966, + "42": 10.17942, + "43": 9.79746, + "44": 9.90813, + "45": 9.79805, + "46": 9.79659, "47": 10.12109, - "48": 9.82083, + "48": 9.82082, "49": 9.50495, - "50": 9.88025, - "51": 9.83614, - "52": 9.72315, - "53": 10.05318, + "50": 9.88028, + "51": 9.83616, + "52": 9.72316, + "53": 10.05321, "54": 9.93747, - "55": 9.87384, - "56": 9.60449, - "57": 9.4523, - "58": 9.8188, - "59": 9.5772, - "60": 9.48534, - "61": 9.68548, - "62": 9.97906, - "63": 9.36419, - "64": 9.76203, - "65": 8.94097, - "66": 9.69475, - "67": 9.36656, - "68": 9.77745, - "69": 9.79001, + "55": 9.87386, + "56": 9.6045, + "57": 9.45231, + "58": 9.81878, + "59": 9.57719, + "60": 9.48532, + "61": 9.68547, + "62": 9.97908, + "63": 9.36418, + "64": 9.76205, + "65": 8.94098, + "66": 9.69474, + "67": 9.36662, + "68": 9.77744, + "69": 9.79003, "70": 9.72374, "71": 9.62037, - "72": 9.57423, + "72": 9.5742, "73": 9.48575, - "74": 8.92729, - "75": 9.41651, - "76": 9.07747, - "77": 10.05444, - "78": 9.71914, + "74": 8.9273, + "75": 9.41656, + "76": 9.07746, + "77": 10.05445, + "78": 9.71916, "79": 9.37306, - "80": 9.40003, - "81": 9.47844, - "82": 9.69867, - "83": 9.31155, - "84": 9.41457, + "80": 9.40002, + "81": 9.47843, + "82": 9.69866, + "83": 9.31154, + "84": 9.41458, "85": 9.61163, - "86": 9.07418, + "86": 9.07421, "87": 9.5939, - "88": 9.74928, - "89": 9.5985, - "90": 9.82761, - "91": 9.33631, + "88": 9.74929, + "89": 9.59848, + "90": 9.82763, + "91": 9.33629, "92": 9.35805, - "93": 9.08552, - "94": 8.82786, - "95": 9.5303, - "96": 9.52663, - "97": 9.30483, - "98": 9.67007, - "99": 8.89606, - "100": 9.40702 + "93": 9.08555, + "94": 8.8279, + "95": 9.53034, + "96": 9.5266, + "97": 9.30484, + "98": 9.67005, + "99": 8.89605, + "100": 9.40698 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1531.0, - "2": 1722.0, - "3": 1589.0, - "4": 1870.0, - "5": 1992.0, - "6": 1894.0, - "7": 1954.0, - "8": 1697.0, - "9": 1855.0, - "10": 1477.0, - "11": 1889.0, - "12": 1848.0, - "13": 1973.0, - "14": 1877.0, - "15": 2015.0, - "16": 1943.0, - "17": 1772.0, - "18": 1764.0, - "19": 1782.0, - "20": 1678.0, - "21": 1906.0, - "22": 1738.0, - "23": 2057.0, - "24": 1597.0, - "25": 1567.0, - "26": 1762.0, - "27": 1932.0, - "28": 1987.0, - "29": 1936.0, - "30": 1965.0, - "31": 1554.0, - "32": 1846.0, - "33": 2148.0, - "34": 1872.0, - "35": 1985.0, - "36": 1906.0, - "37": 2245.0, - "38": 2119.0, - "39": 2495.0, - "40": 2274.0, - "41": 2236.0, - "42": 2318.0, - "43": 2068.0, - "44": 2120.0, - "45": 2265.0, - "46": 2447.0, - "47": 2584.0, - "48": 2296.0, - "49": 2252.0, - "50": 2568.0, - "51": 2650.0, - "52": 2700.0, - "53": 2863.0, - "54": 2676.0, - "55": 2390.0, - "56": 2753.0, - "57": 2430.0, - "58": 2919.0, - "59": 2831.0, - "60": 2428.0, - "61": 2932.0, - "62": 2724.0, - "63": 2579.0, - "64": 2987.0, - "65": 2506.0, - "66": 2886.0, - "67": 2871.0, - "68": 2870.0, - "69": 3001.0, - "70": 3294.0, - "71": 3043.0, - "72": 2614.0, - "73": 3054.0, - "74": 2024.0, - "75": 2507.0, - "76": 3020.0, - "77": 3253.0, - "78": 3230.0, - "79": 3210.0, - "80": 3252.0, - "81": 3614.0, - "82": 3395.0, - "83": 2919.0, - "84": 3296.0, - "85": 3320.0, - "86": 2865.0, - "87": 3931.0, - "88": 3240.0, - "89": 3428.0, - "90": 3127.0, - "91": 2815.0, - "92": 3098.0, - "93": 2796.0, - "94": 3324.0, - "95": 3428.0, - "96": 3541.0, - "97": 3216.0, - "98": 3705.0, - "99": 3184.0, - "100": 3073.0 + "1": 1587.0, + "2": 1684.0, + "3": 1639.0, + "4": 1845.0, + "5": 2013.0, + "6": 1866.0, + "7": 1966.0, + "8": 1660.0, + "9": 1910.0, + "10": 1448.0, + "11": 2024.0, + "12": 1767.0, + "13": 1863.0, + "14": 1833.0, + "15": 2020.0, + "16": 1957.0, + "17": 1799.0, + "18": 1751.0, + "19": 1745.0, + "20": 1673.0, + "21": 1901.0, + "22": 1689.0, + "23": 2062.0, + "24": 1658.0, + "25": 1594.0, + "26": 1728.0, + "27": 1913.0, + "28": 1933.0, + "29": 1983.0, + "30": 1950.0, + "31": 1616.0, + "32": 1879.0, + "33": 2091.0, + "34": 1833.0, + "35": 1993.0, + "36": 2025.0, + "37": 2385.0, + "38": 2102.0, + "39": 2431.0, + "40": 2238.0, + "41": 2359.0, + "42": 2225.0, + "43": 2138.0, + "44": 2090.0, + "45": 2178.0, + "46": 2287.0, + "47": 2655.0, + "48": 2336.0, + "49": 2214.0, + "50": 2526.0, + "51": 2690.0, + "52": 2683.0, + "53": 2975.0, + "54": 2691.0, + "55": 2449.0, + "56": 2802.0, + "57": 2291.0, + "58": 2876.0, + "59": 2795.0, + "60": 2457.0, + "61": 2992.0, + "62": 2686.0, + "63": 2532.0, + "64": 2965.0, + "65": 2585.0, + "66": 3084.0, + "67": 2858.0, + "68": 2869.0, + "69": 2976.0, + "70": 3125.0, + "71": 3013.0, + "72": 2619.0, + "73": 3083.0, + "74": 2056.0, + "75": 2613.0, + "76": 2984.0, + "77": 3289.0, + "78": 3241.0, + "79": 3185.0, + "80": 3326.0, + "81": 3485.0, + "82": 3309.0, + "83": 2835.0, + "84": 3288.0, + "85": 3314.0, + "86": 2900.0, + "87": 3890.0, + "88": 3216.0, + "89": 3337.0, + "90": 3102.0, + "91": 2785.0, + "92": 3063.0, + "93": 2919.0, + "94": 3367.0, + "95": 3351.0, + "96": 3415.0, + "97": 3221.0, + "98": 3763.0, + "99": 3123.0, + "100": 3091.0 } }, "mem-allocated-bytes": { @@ -326,105 +326,105 @@ "step_interval": 1, "values": { "1": 2644459008.0, - "2": 2741075456.0, - "3": 2741075456.0, - "4": 2741075456.0, - "5": 2741075456.0, - "6": 2741075456.0, - "7": 2741075456.0, - "8": 2741075456.0, - "9": 2741075456.0, - "10": 2741075456.0, - "11": 2741075456.0, - "12": 2741075456.0, - "13": 2741075456.0, - "14": 2741075456.0, - "15": 2741075456.0, - "16": 2741075456.0, - "17": 2741075456.0, - "18": 2741075456.0, - "19": 2741075456.0, - "20": 2741075456.0, - "21": 2741075456.0, - "22": 2741075456.0, - "23": 2741075456.0, - "24": 2741075456.0, - "25": 2741075456.0, - "26": 2741075456.0, - "27": 2741075456.0, - "28": 2741075456.0, - "29": 2741075456.0, - "30": 2741075456.0, - "31": 2741075456.0, - "32": 2741075456.0, - "33": 2741075456.0, - "34": 2741075456.0, - "35": 2741075456.0, - "36": 2741075456.0, - "37": 2741075456.0, - "38": 2741075456.0, - "39": 2741075456.0, - "40": 2741075456.0, - "41": 2741075456.0, - "42": 2741075456.0, - "43": 2741075456.0, - "44": 2741075456.0, - "45": 2741075456.0, - "46": 2741075456.0, - "47": 2741075456.0, - "48": 2741075456.0, - "49": 2741075456.0, - "50": 2741075456.0, - "51": 2741075456.0, - "52": 2741075456.0, - "53": 2741075456.0, - "54": 2741075456.0, - "55": 2741075456.0, - "56": 2741075456.0, - "57": 2741075456.0, - "58": 2741075456.0, - "59": 2741075456.0, - "60": 2741075456.0, - "61": 2741075456.0, - "62": 2741075456.0, - "63": 2741075456.0, - "64": 2741075456.0, - "65": 2741075456.0, - "66": 2741075456.0, - "67": 2741075456.0, - "68": 2741075456.0, - "69": 2741075456.0, - "70": 2741075456.0, - "71": 2741075456.0, - "72": 2741075456.0, - "73": 2741075456.0, - "74": 2741075456.0, - "75": 2741075456.0, - "76": 2741075456.0, - "77": 2741075456.0, - "78": 2741075456.0, - "79": 2741075456.0, - "80": 2741075456.0, - "81": 2741075456.0, - "82": 2741075456.0, - "83": 2741075456.0, - "84": 2741075456.0, - "85": 2741075456.0, - "86": 2741075456.0, - "87": 2741075456.0, - "88": 2741075456.0, - "89": 2741075456.0, - "90": 2741075456.0, - "91": 2741075456.0, - "92": 2741075456.0, - "93": 2741075456.0, - "94": 2741075456.0, - "95": 2741075456.0, - "96": 2741075456.0, - "97": 2741075456.0, - "98": 2741075456.0, - "99": 2741075456.0, - "100": 2741075456.0 + "2": 2741977600.0, + "3": 2741977600.0, + "4": 2741977600.0, + "5": 2741977600.0, + "6": 2741977600.0, + "7": 2741977600.0, + "8": 2741977600.0, + "9": 2741977600.0, + "10": 2741977600.0, + "11": 2741977600.0, + "12": 2741977600.0, + "13": 2741977600.0, + "14": 2741977600.0, + "15": 2741977600.0, + "16": 2741977600.0, + "17": 2741977600.0, + "18": 2741977600.0, + "19": 2741977600.0, + "20": 2741977600.0, + "21": 2741977600.0, + "22": 2741977600.0, + "23": 2741977600.0, + "24": 2741977600.0, + "25": 2741977600.0, + "26": 2741977600.0, + "27": 2741977600.0, + "28": 2741977600.0, + "29": 2741977600.0, + "30": 2741977600.0, + "31": 2741977600.0, + "32": 2741977600.0, + "33": 2741977600.0, + "34": 2741977600.0, + "35": 2741977600.0, + "36": 2741977600.0, + "37": 2741977600.0, + "38": 2741977600.0, + "39": 2741977600.0, + "40": 2741977600.0, + "41": 2741977600.0, + "42": 2741977600.0, + "43": 2741977600.0, + "44": 2741977600.0, + "45": 2741977600.0, + "46": 2741977600.0, + "47": 2741977600.0, + "48": 2741977600.0, + "49": 2741977600.0, + "50": 2741977600.0, + "51": 2741977600.0, + "52": 2741977600.0, + "53": 2741977600.0, + "54": 2741977600.0, + "55": 2741977600.0, + "56": 2741977600.0, + "57": 2741977600.0, + "58": 2741977600.0, + "59": 2741977600.0, + "60": 2741977600.0, + "61": 2741977600.0, + "62": 2741977600.0, + "63": 2741977600.0, + "64": 2741977600.0, + "65": 2741977600.0, + "66": 2741977600.0, + "67": 2741977600.0, + "68": 2741977600.0, + "69": 2741977600.0, + "70": 2741977600.0, + "71": 2741977600.0, + "72": 2741977600.0, + "73": 2741977600.0, + "74": 2741977600.0, + "75": 2741977600.0, + "76": 2741977600.0, + "77": 2741977600.0, + "78": 2741977600.0, + "79": 2741977600.0, + "80": 2741977600.0, + "81": 2741977600.0, + "82": 2741977600.0, + "83": 2741977600.0, + "84": 2741977600.0, + "85": 2741977600.0, + "86": 2741977600.0, + "87": 2741977600.0, + "88": 2741977600.0, + "89": 2741977600.0, + "90": 2741977600.0, + "91": 2741977600.0, + "92": 2741977600.0, + "93": 2741977600.0, + "94": 2741977600.0, + "95": 2741977600.0, + "96": 2741977600.0, + "97": 2741977600.0, + "98": 2741977600.0, + "99": 2741977600.0, + "100": 2741977600.0 } }, "iteration-time": { @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 2.38769, - "3": 0.09884, - "4": 0.08514, - "5": 0.08435, - "6": 0.08412, - "7": 0.08558, - "8": 0.08425, - "9": 0.08436, - "10": 0.08457, - "11": 0.08469, - "12": 0.0848, - "13": 0.08487, - "14": 0.08571, - "15": 0.08487, - "16": 0.08529, - "17": 0.08559, - "18": 0.0898, - "19": 0.08482, - "20": 0.08509, - "21": 0.08527, - "22": 0.08597, - "23": 0.08592, - "24": 0.08654, - "25": 0.08608, - "26": 0.08574, - "27": 0.08542, - "28": 0.0856, - "29": 0.08581, - "30": 0.08539, - "31": 0.08675, - "32": 0.08679, - "33": 0.08699, - "34": 0.08599, - "35": 0.08568, - "36": 0.08528, - "37": 0.08527, - "38": 0.08526, - "39": 0.08614, - "40": 0.08507, - "41": 0.08552, - "42": 0.08586, - "43": 0.08568, - "44": 0.0866, - "45": 0.08692, - "46": 0.08614, - "47": 0.0859, - "48": 0.0863, - "49": 0.08723, - "50": 0.08703, - "51": 0.09195, - "52": 0.0775, - "53": 0.07822, - "54": 0.07813, - "55": 0.0784, - "56": 0.07871, - "57": 0.07816, - "58": 0.07787, - "59": 0.07958, - "60": 0.07893, - "61": 0.07873, - "62": 0.07887, - "63": 0.07945, - "64": 0.07879, - "65": 0.08059, - "66": 0.08041, - "67": 0.08127, - "68": 0.07996, - "69": 0.0799, - "70": 0.07821, - "71": 0.07712, - "72": 0.07745, - "73": 0.07774, - "74": 0.07859, - "75": 0.07741, - "76": 0.07753, - "77": 0.07725, - "78": 0.07676, - "79": 0.07838, - "80": 0.07786, - "81": 0.07743, - "82": 0.07732, - "83": 0.0773, - "84": 0.07664, - "85": 0.07753, - "86": 0.07826, - "87": 0.07764, - "88": 0.07681, - "89": 0.07911, - "90": 0.07799, - "91": 0.07796, - "92": 0.07853, - "93": 0.07736, - "94": 0.07777, - "95": 0.07791, - "96": 0.07723, - "97": 0.07753, - "98": 0.07789, - "99": 0.07782, - "100": 0.07733 + "2": 3.91, + "3": 0.09176, + "4": 0.07867, + "5": 0.07784, + "6": 0.07723, + "7": 0.07698, + "8": 0.0758, + "9": 0.07589, + "10": 0.07634, + "11": 0.07626, + "12": 0.0768, + "13": 0.07681, + "14": 0.07682, + "15": 0.0759, + "16": 0.07596, + "17": 0.07651, + "18": 0.07603, + "19": 0.07618, + "20": 0.07636, + "21": 0.07747, + "22": 0.07846, + "23": 0.07695, + "24": 0.0769, + "25": 0.07901, + "26": 0.07721, + "27": 0.07758, + "28": 0.07805, + "29": 0.07566, + "30": 0.07566, + "31": 0.07601, + "32": 0.07665, + "33": 0.07669, + "34": 0.07738, + "35": 0.07656, + "36": 0.07717, + "37": 0.07666, + "38": 0.07714, + "39": 0.07666, + "40": 0.07677, + "41": 0.07635, + "42": 0.07656, + "43": 0.07591, + "44": 0.07577, + "45": 0.07726, + "46": 0.07728, + "47": 0.07659, + "48": 0.07695, + "49": 0.07762, + "50": 0.07653, + "51": 0.09458, + "52": 0.07903, + "53": 0.07694, + "54": 0.0755, + "55": 0.07647, + "56": 0.07565, + "57": 0.07705, + "58": 0.07632, + "59": 0.07796, + "60": 0.07569, + "61": 0.07613, + "62": 0.07503, + "63": 0.0764, + "64": 0.07603, + "65": 0.07611, + "66": 0.07585, + "67": 0.07858, + "68": 0.07626, + "69": 0.07694, + "70": 0.07697, + "71": 0.07593, + "72": 0.07697, + "73": 0.07657, + "74": 0.07722, + "75": 0.07641, + "76": 0.07669, + "77": 0.07681, + "78": 0.07631, + "79": 0.07651, + "80": 0.07632, + "81": 0.07737, + "82": 0.07659, + "83": 0.07741, + "84": 0.0764, + "85": 0.07666, + "86": 0.07693, + "87": 0.07693, + "88": 0.07656, + "89": 0.07649, + "90": 0.07761, + "91": 0.07685, + "92": 0.07639, + "93": 0.07647, + "94": 0.07726, + "95": 0.07663, + "96": 0.07721, + "97": 0.0767, + "98": 0.07736, + "99": 0.07719, + "100": 0.07713 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json index 131bcbe928e..63e2f03839f 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json @@ -6,104 +6,104 @@ "values": { "1": 10.89631, "2": 10.89416, - "3": 10.88786, - "4": 10.8914, - "5": 10.89154, - "6": 10.90001, - "7": 10.89184, + "3": 10.88785, + "4": 10.89141, + "5": 10.89153, + "6": 10.90002, + "7": 10.89187, "8": 10.89886, - "9": 10.90208, - "10": 10.88361, - "11": 10.87816, + "9": 10.90211, + "10": 10.88366, + "11": 10.87818, "12": 10.89332, - "13": 10.89816, - "14": 10.89241, - "15": 10.84798, - "16": 10.854, + "13": 10.89817, + "14": 10.89242, + "15": 10.84801, + "16": 10.85399, "17": 10.83093, - "18": 10.83991, - "19": 10.82802, - "20": 10.74822, - "21": 10.73494, - "22": 10.61719, - "23": 10.72621, - "24": 10.63177, - "25": 10.5931, - "26": 10.63365, - "27": 10.63304, - "28": 10.58259, - "29": 10.58595, - "30": 10.41201, - "31": 10.15907, - "32": 10.48362, - "33": 10.46704, + "18": 10.83988, + "19": 10.82804, + "20": 10.74824, + "21": 10.73491, + "22": 10.61715, + "23": 10.72617, + "24": 10.63178, + "25": 10.59314, + "26": 10.63368, + "27": 10.63299, + "28": 10.5826, + "29": 10.58594, + "30": 10.41207, + "31": 10.15904, + "32": 10.48361, + "33": 10.46707, "34": 10.23815, - "35": 10.28193, - "36": 10.24052, - "37": 10.36227, - "38": 10.20306, - "39": 10.40456, - "40": 10.09271, - "41": 10.15831, - "42": 10.21934, + "35": 10.28191, + "36": 10.24054, + "37": 10.36221, + "38": 10.2031, + "39": 10.40457, + "40": 10.0927, + "41": 10.15833, + "42": 10.21932, "43": 9.8436, - "44": 9.97299, - "45": 9.84189, - "46": 9.82017, + "44": 9.97302, + "45": 9.84192, + "46": 9.82018, "47": 10.14968, - "48": 9.86021, - "49": 9.54238, - "50": 9.91347, - "51": 9.85447, - "52": 9.73936, - "53": 10.07426, - "54": 9.96915, - "55": 9.88574, + "48": 9.86019, + "49": 9.54235, + "50": 9.91348, + "51": 9.85448, + "52": 9.73931, + "53": 10.0743, + "54": 9.96913, + "55": 9.8857, "56": 9.62437, - "57": 9.4823, - "58": 9.83483, - "59": 9.58732, - "60": 9.50245, - "61": 9.69343, + "57": 9.48228, + "58": 9.83485, + "59": 9.5873, + "60": 9.50243, + "61": 9.69341, "62": 9.98806, "63": 9.39103, - "64": 9.78021, + "64": 9.78025, "65": 8.94515, - "66": 9.70494, - "67": 9.37251, - "68": 9.78329, - "69": 9.79058, - "70": 9.74454, - "71": 9.62301, - "72": 9.58458, - "73": 9.50513, - "74": 8.94312, - "75": 9.42524, - "76": 9.07601, - "77": 10.06353, - "78": 9.72308, - "79": 9.37502, - "80": 9.40453, - "81": 9.47794, - "82": 9.69667, - "83": 9.3072, - "84": 9.41526, + "66": 9.70491, + "67": 9.37249, + "68": 9.78331, + "69": 9.7906, + "70": 9.74449, + "71": 9.62299, + "72": 9.58459, + "73": 9.5051, + "74": 8.94308, + "75": 9.42526, + "76": 9.07602, + "77": 10.06351, + "78": 9.72309, + "79": 9.37504, + "80": 9.40451, + "81": 9.47792, + "82": 9.69668, + "83": 9.30716, + "84": 9.41528, "85": 9.61293, - "86": 9.07195, - "87": 9.5884, - "88": 9.74762, - "89": 9.59982, - "90": 9.81672, - "91": 9.3379, - "92": 9.35605, - "93": 9.07425, + "86": 9.07193, + "87": 9.58838, + "88": 9.74763, + "89": 9.59984, + "90": 9.8167, + "91": 9.33789, + "92": 9.35602, + "93": 9.07424, "94": 8.8351, - "95": 9.5184, - "96": 9.52391, - "97": 9.30923, - "98": 9.66743, - "99": 8.88419, - "100": 9.39924 + "95": 9.51839, + "96": 9.52393, + "97": 9.30921, + "98": 9.66745, + "99": 8.88417, + "100": 9.39922 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1483.0, - "2": 1650.0, - "3": 1681.0, - "4": 1767.0, - "5": 1903.0, - "6": 1952.0, - "7": 1967.0, - "8": 1651.0, - "9": 1886.0, - "10": 1427.0, - "11": 1897.0, - "12": 1855.0, - "13": 1941.0, - "14": 1749.0, - "15": 1901.0, - "16": 1813.0, - "17": 1710.0, - "18": 1707.0, - "19": 1819.0, - "20": 1639.0, - "21": 1880.0, - "22": 1769.0, - "23": 2016.0, - "24": 1692.0, - "25": 1672.0, - "26": 1778.0, - "27": 1861.0, - "28": 1964.0, - "29": 2021.0, - "30": 1938.0, - "31": 1645.0, - "32": 1864.0, - "33": 2150.0, - "34": 1828.0, - "35": 1982.0, - "36": 1864.0, - "37": 2355.0, - "38": 2358.0, - "39": 2385.0, - "40": 2407.0, - "41": 2501.0, - "42": 2435.0, - "43": 2033.0, - "44": 2089.0, - "45": 2210.0, - "46": 2351.0, - "47": 2502.0, - "48": 2444.0, - "49": 2302.0, - "50": 2492.0, - "51": 2598.0, - "52": 2547.0, - "53": 2957.0, - "54": 2750.0, - "55": 2372.0, - "56": 2569.0, - "57": 2395.0, - "58": 2901.0, - "59": 2741.0, - "60": 2430.0, - "61": 2868.0, - "62": 2651.0, - "63": 2507.0, - "64": 3014.0, - "65": 2683.0, - "66": 2935.0, - "67": 2783.0, - "68": 2725.0, - "69": 2788.0, - "70": 3152.0, - "71": 3026.0, - "72": 2415.0, - "73": 3122.0, - "74": 1967.0, - "75": 2581.0, - "76": 3010.0, - "77": 3294.0, - "78": 3166.0, - "79": 3150.0, - "80": 3246.0, - "81": 3566.0, - "82": 3285.0, - "83": 2817.0, - "84": 3269.0, - "85": 3425.0, - "86": 2819.0, - "87": 3577.0, - "88": 3004.0, - "89": 3323.0, - "90": 3023.0, - "91": 2661.0, - "92": 3066.0, - "93": 2691.0, - "94": 3305.0, - "95": 3403.0, - "96": 3377.0, - "97": 3242.0, - "98": 3697.0, - "99": 3112.0, - "100": 3199.0 + "1": 1524.0, + "2": 1653.0, + "3": 1732.0, + "4": 1794.0, + "5": 1835.0, + "6": 1904.0, + "7": 1919.0, + "8": 1747.0, + "9": 1860.0, + "10": 1363.0, + "11": 1886.0, + "12": 1808.0, + "13": 1956.0, + "14": 1754.0, + "15": 1833.0, + "16": 1855.0, + "17": 1780.0, + "18": 1729.0, + "19": 1777.0, + "20": 1697.0, + "21": 1884.0, + "22": 1765.0, + "23": 2080.0, + "24": 1675.0, + "25": 1705.0, + "26": 1767.0, + "27": 1858.0, + "28": 2041.0, + "29": 1983.0, + "30": 1959.0, + "31": 1555.0, + "32": 1953.0, + "33": 2118.0, + "34": 1862.0, + "35": 1973.0, + "36": 1880.0, + "37": 2313.0, + "38": 2319.0, + "39": 2419.0, + "40": 2393.0, + "41": 2485.0, + "42": 2393.0, + "43": 2025.0, + "44": 2129.0, + "45": 2131.0, + "46": 2281.0, + "47": 2440.0, + "48": 2405.0, + "49": 2336.0, + "50": 2472.0, + "51": 2591.0, + "52": 2526.0, + "53": 2940.0, + "54": 2660.0, + "55": 2391.0, + "56": 2665.0, + "57": 2437.0, + "58": 2987.0, + "59": 2660.0, + "60": 2398.0, + "61": 2806.0, + "62": 2734.0, + "63": 2411.0, + "64": 2967.0, + "65": 2627.0, + "66": 2869.0, + "67": 2810.0, + "68": 2718.0, + "69": 2774.0, + "70": 3156.0, + "71": 3066.0, + "72": 2566.0, + "73": 3156.0, + "74": 1966.0, + "75": 2690.0, + "76": 2965.0, + "77": 3234.0, + "78": 3138.0, + "79": 2977.0, + "80": 3317.0, + "81": 3629.0, + "82": 3248.0, + "83": 2756.0, + "84": 3268.0, + "85": 3380.0, + "86": 2755.0, + "87": 3685.0, + "88": 3079.0, + "89": 3284.0, + "90": 3041.0, + "91": 2674.0, + "92": 3078.0, + "93": 2682.0, + "94": 3367.0, + "95": 3456.0, + "96": 3380.0, + "97": 3163.0, + "98": 3686.0, + "99": 3131.0, + "100": 3129.0 } }, "mem-allocated-bytes": { @@ -432,106 +432,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 6.85919, - "2": 0.0831, - "3": 0.08065, - "4": 0.05861, - "5": 0.04976, - "6": 0.05045, - "7": 0.04972, - "8": 0.04911, - "9": 0.04965, - "10": 0.04942, - "11": 0.04916, - "12": 0.04915, - "13": 0.04939, - "14": 0.04993, - "15": 0.04987, - "16": 0.04906, - "17": 0.05015, - "18": 0.04924, - "19": 0.05168, - "20": 0.04963, - "21": 0.05051, - "22": 0.04948, - "23": 0.05006, - "24": 0.04939, - "25": 0.05019, - "26": 0.04951, - "27": 0.05048, - "28": 0.04917, - "29": 0.05015, - "30": 0.04921, - "31": 0.04969, - "32": 0.04894, - "33": 0.04941, - "34": 0.04938, - "35": 0.04927, - "36": 0.04942, - "37": 0.04944, - "38": 0.04973, - "39": 0.04957, - "40": 0.05016, - "41": 0.04968, - "42": 0.05042, - "43": 0.0523, - "44": 0.04956, - "45": 0.04948, - "46": 0.05093, - "47": 0.0493, - "48": 0.0498, - "49": 0.05177, - "50": 0.05032, - "51": 0.05749, - "52": 0.05013, - "53": 0.0512, - "54": 0.04935, - "55": 0.04891, - "56": 0.04976, - "57": 0.04984, - "58": 0.04964, - "59": 0.05274, - "60": 0.04962, - "61": 0.05096, - "62": 0.04934, - "63": 0.04971, - "64": 0.0503, - "65": 0.05028, - "66": 0.04991, - "67": 0.04926, - "68": 0.04848, - "69": 0.0493, - "70": 0.04943, - "71": 0.04943, - "72": 0.04852, - "73": 0.04928, - "74": 0.04895, - "75": 0.04995, - "76": 0.04877, - "77": 0.0492, - "78": 0.04886, - "79": 0.04938, - "80": 0.04894, - "81": 0.04892, - "82": 0.05016, - "83": 0.04964, - "84": 0.04956, - "85": 0.04881, - "86": 0.04999, - "87": 0.04908, - "88": 0.04838, - "89": 0.04957, - "90": 0.04882, - "91": 0.04993, - "92": 0.05004, - "93": 0.05003, - "94": 0.04961, - "95": 0.05132, - "96": 0.05071, - "97": 0.04952, - "98": 0.04851, - "99": 0.05027, - "100": 0.04988 + "1": "nan", + "2": 4.10968, + "3": 0.0647, + "4": 0.05011, + "5": 0.04999, + "6": 0.05, + "7": 0.04977, + "8": 0.04985, + "9": 0.05058, + "10": 0.04994, + "11": 0.05101, + "12": 0.05004, + "13": 0.05386, + "14": 0.0495, + "15": 0.05196, + "16": 0.04977, + "17": 0.05163, + "18": 0.04987, + "19": 0.052, + "20": 0.05008, + "21": 0.05173, + "22": 0.04974, + "23": 0.05131, + "24": 0.04947, + "25": 0.05107, + "26": 0.05122, + "27": 0.05254, + "28": 0.04977, + "29": 0.05091, + "30": 0.04997, + "31": 0.05132, + "32": 0.04976, + "33": 0.05152, + "34": 0.04986, + "35": 0.05126, + "36": 0.05009, + "37": 0.05096, + "38": 0.04998, + "39": 0.05212, + "40": 0.0502, + "41": 0.05106, + "42": 0.04979, + "43": 0.05269, + "44": 0.05006, + "45": 0.0516, + "46": 0.05082, + "47": 0.05109, + "48": 0.04999, + "49": 0.05119, + "50": 0.05038, + "51": 0.05535, + "52": 0.05046, + "53": 0.05138, + "54": 0.05027, + "55": 0.05075, + "56": 0.04981, + "57": 0.05159, + "58": 0.05058, + "59": 0.05217, + "60": 0.05149, + "61": 0.04997, + "62": 0.05194, + "63": 0.04978, + "64": 0.05149, + "65": 0.05046, + "66": 0.05122, + "67": 0.05044, + "68": 0.05143, + "69": 0.05001, + "70": 0.05119, + "71": 0.04992, + "72": 0.05165, + "73": 0.04982, + "74": 0.05177, + "75": 0.05017, + "76": 0.05181, + "77": 0.04976, + "78": 0.05205, + "79": 0.04988, + "80": 0.05179, + "81": 0.04998, + "82": 0.05171, + "83": 0.04955, + "84": 0.05129, + "85": 0.04997, + "86": 0.05196, + "87": 0.04994, + "88": 0.05222, + "89": 0.04959, + "90": 0.05149, + "91": 0.05016, + "92": 0.04991, + "93": 0.05106, + "94": 0.05058, + "95": 0.05081, + "96": 0.05036, + "97": 0.05139, + "98": 0.05245, + "99": 0.05188, + "100": 0.05016 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json index 05b11c3c8ee..0f5dd5066ab 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute/golden_values_dev_dgx_h100.json @@ -6,103 +6,103 @@ "values": { "1": 10.89631, "2": 10.89416, - "3": 10.88786, - "4": 10.8914, - "5": 10.89154, - "6": 10.90001, - "7": 10.89184, + "3": 10.88785, + "4": 10.89141, + "5": 10.89153, + "6": 10.90002, + "7": 10.89187, "8": 10.89886, - "9": 10.90208, - "10": 10.88361, - "11": 10.87817, - "12": 10.89334, - "13": 10.89814, - "14": 10.89242, - "15": 10.84803, - "16": 10.85398, - "17": 10.83097, - "18": 10.83991, + "9": 10.90211, + "10": 10.88366, + "11": 10.87818, + "12": 10.89332, + "13": 10.89815, + "14": 10.89244, + "15": 10.84802, + "16": 10.85399, + "17": 10.83096, + "18": 10.83988, "19": 10.82801, - "20": 10.74824, - "21": 10.73496, - "22": 10.61719, - "23": 10.72621, - "24": 10.63178, - "25": 10.59309, - "26": 10.63369, - "27": 10.63304, - "28": 10.58264, + "20": 10.74826, + "21": 10.73495, + "22": 10.61721, + "23": 10.7262, + "24": 10.6318, + "25": 10.59315, + "26": 10.63366, + "27": 10.63302, + "28": 10.58261, "29": 10.58594, "30": 10.41204, - "31": 10.15899, - "32": 10.48366, + "31": 10.15907, + "32": 10.48364, "33": 10.46706, - "34": 10.23811, - "35": 10.28189, - "36": 10.24056, - "37": 10.36219, - "38": 10.20309, + "34": 10.23815, + "35": 10.28192, + "36": 10.24053, + "37": 10.36222, + "38": 10.20308, "39": 10.40454, "40": 10.09271, - "41": 10.15835, - "42": 10.21933, - "43": 9.84358, - "44": 9.97303, - "45": 9.84194, - "46": 9.82017, - "47": 10.14969, - "48": 9.86023, - "49": 9.54235, - "50": 9.91343, - "51": 9.8545, - "52": 9.7393, - "53": 10.07426, - "54": 9.96913, - "55": 9.88574, - "56": 9.62438, - "57": 9.48229, - "58": 9.83484, + "41": 10.15834, + "42": 10.21931, + "43": 9.84356, + "44": 9.97301, + "45": 9.84195, + "46": 9.82013, + "47": 10.14966, + "48": 9.86021, + "49": 9.54237, + "50": 9.91349, + "51": 9.85446, + "52": 9.73932, + "53": 10.07424, + "54": 9.96915, + "55": 9.8857, + "56": 9.62439, + "57": 9.48231, + "58": 9.83487, "59": 9.58731, - "60": 9.50243, - "61": 9.6934, - "62": 9.988, - "63": 9.39105, - "64": 9.78022, - "65": 8.94516, + "60": 9.50244, + "61": 9.69338, + "62": 9.98802, + "63": 9.39104, + "64": 9.78023, + "65": 8.94515, "66": 9.70492, "67": 9.37249, - "68": 9.78328, - "69": 9.79057, - "70": 9.74451, - "71": 9.62298, - "72": 9.58457, - "73": 9.50511, - "74": 8.94308, - "75": 9.42524, - "76": 9.07602, - "77": 10.06352, - "78": 9.72307, - "79": 9.37497, - "80": 9.40454, - "81": 9.4779, - "82": 9.69669, + "68": 9.78334, + "69": 9.79056, + "70": 9.74448, + "71": 9.62302, + "72": 9.58456, + "73": 9.50509, + "74": 8.94304, + "75": 9.42523, + "76": 9.07599, + "77": 10.06351, + "78": 9.72308, + "79": 9.37501, + "80": 9.40453, + "81": 9.47789, + "82": 9.69668, "83": 9.30714, - "84": 9.41525, - "85": 9.61295, - "86": 9.07198, - "87": 9.58834, + "84": 9.41527, + "85": 9.61294, + "86": 9.07193, + "87": 9.58839, "88": 9.7476, - "89": 9.59984, + "89": 9.59981, "90": 9.81672, - "91": 9.33791, - "92": 9.35608, + "91": 9.33789, + "92": 9.35604, "93": 9.07423, - "94": 8.83511, - "95": 9.51841, - "96": 9.52391, - "97": 9.30922, - "98": 9.66746, - "99": 8.88421, + "94": 8.83512, + "95": 9.51842, + "96": 9.52392, + "97": 9.30921, + "98": 9.66748, + "99": 8.88417, "100": 9.39923 } }, @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1483.0, - "2": 1650.0, - "3": 1681.0, - "4": 1767.0, - "5": 1903.0, - "6": 1952.0, - "7": 1967.0, - "8": 1651.0, - "9": 1886.0, - "10": 1427.0, - "11": 1939.0, - "12": 1778.0, - "13": 1964.0, - "14": 1762.0, - "15": 1980.0, - "16": 1923.0, - "17": 1817.0, - "18": 1783.0, - "19": 1750.0, - "20": 1588.0, - "21": 1855.0, - "22": 1641.0, - "23": 2098.0, - "24": 1679.0, - "25": 1649.0, - "26": 1806.0, - "27": 1834.0, - "28": 2042.0, - "29": 2033.0, - "30": 1984.0, - "31": 1518.0, - "32": 1954.0, - "33": 2068.0, - "34": 1900.0, - "35": 1921.0, - "36": 1965.0, - "37": 2321.0, - "38": 2340.0, - "39": 2344.0, - "40": 2367.0, - "41": 2457.0, - "42": 2367.0, - "43": 2020.0, - "44": 2135.0, - "45": 2184.0, - "46": 2310.0, - "47": 2463.0, - "48": 2450.0, - "49": 2259.0, - "50": 2444.0, - "51": 2543.0, - "52": 2613.0, - "53": 2945.0, - "54": 2713.0, - "55": 2503.0, - "56": 2692.0, - "57": 2338.0, - "58": 2961.0, - "59": 2620.0, - "60": 2367.0, - "61": 2909.0, - "62": 2728.0, - "63": 2399.0, - "64": 2909.0, - "65": 2605.0, - "66": 2983.0, - "67": 2793.0, - "68": 2663.0, - "69": 2833.0, - "70": 3135.0, - "71": 2997.0, - "72": 2464.0, - "73": 3088.0, - "74": 1970.0, - "75": 2556.0, - "76": 3064.0, - "77": 3231.0, - "78": 3097.0, - "79": 3035.0, - "80": 3301.0, - "81": 3599.0, - "82": 3215.0, - "83": 2757.0, - "84": 3130.0, - "85": 3380.0, - "86": 2742.0, - "87": 3723.0, - "88": 3066.0, - "89": 3264.0, - "90": 3198.0, - "91": 2718.0, - "92": 3070.0, - "93": 2624.0, - "94": 3301.0, - "95": 3431.0, - "96": 3358.0, - "97": 3142.0, - "98": 3704.0, - "99": 3107.0, - "100": 3089.0 + "1": 1524.0, + "2": 1653.0, + "3": 1732.0, + "4": 1794.0, + "5": 1835.0, + "6": 1904.0, + "7": 1919.0, + "8": 1747.0, + "9": 1860.0, + "10": 1363.0, + "11": 1886.0, + "12": 1814.0, + "13": 2010.0, + "14": 1805.0, + "15": 1895.0, + "16": 1925.0, + "17": 1797.0, + "18": 1687.0, + "19": 1794.0, + "20": 1640.0, + "21": 1870.0, + "22": 1691.0, + "23": 2048.0, + "24": 1689.0, + "25": 1674.0, + "26": 1851.0, + "27": 1918.0, + "28": 2004.0, + "29": 2002.0, + "30": 1974.0, + "31": 1552.0, + "32": 1919.0, + "33": 2072.0, + "34": 1877.0, + "35": 2014.0, + "36": 1917.0, + "37": 2380.0, + "38": 2217.0, + "39": 2340.0, + "40": 2334.0, + "41": 2432.0, + "42": 2292.0, + "43": 2056.0, + "44": 2175.0, + "45": 2177.0, + "46": 2317.0, + "47": 2577.0, + "48": 2421.0, + "49": 2154.0, + "50": 2463.0, + "51": 2625.0, + "52": 2529.0, + "53": 2937.0, + "54": 2770.0, + "55": 2449.0, + "56": 2668.0, + "57": 2348.0, + "58": 3012.0, + "59": 2766.0, + "60": 2323.0, + "61": 2868.0, + "62": 2665.0, + "63": 2375.0, + "64": 2910.0, + "65": 2634.0, + "66": 2945.0, + "67": 2770.0, + "68": 2758.0, + "69": 2839.0, + "70": 3090.0, + "71": 2928.0, + "72": 2473.0, + "73": 2991.0, + "74": 2012.0, + "75": 2569.0, + "76": 2953.0, + "77": 3333.0, + "78": 3200.0, + "79": 3040.0, + "80": 3305.0, + "81": 3551.0, + "82": 3269.0, + "83": 2808.0, + "84": 3261.0, + "85": 3369.0, + "86": 2708.0, + "87": 3551.0, + "88": 3135.0, + "89": 3206.0, + "90": 3087.0, + "91": 2653.0, + "92": 3131.0, + "93": 2676.0, + "94": 3310.0, + "95": 3393.0, + "96": 3418.0, + "97": 3229.0, + "98": 3725.0, + "99": 3049.0, + "100": 3170.0 } }, "mem-allocated-bytes": { @@ -218,106 +218,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1261849088.0, - "2": 1261849088.0, - "3": 1261849088.0, - "4": 1261849088.0, - "5": 1261849088.0, - "6": 1261849088.0, - "7": 1261849088.0, - "8": 1261849088.0, - "9": 1261849088.0, - "10": 1261849088.0, - "11": 1261849088.0, - "12": 1261849088.0, - "13": 1261849088.0, - "14": 1261849088.0, - "15": 1261849088.0, - "16": 1261849088.0, - "17": 1261849088.0, - "18": 1261849088.0, - "19": 1261849088.0, - "20": 1261849088.0, - "21": 1261849088.0, - "22": 1261849088.0, - "23": 1261849088.0, - "24": 1261849088.0, - "25": 1261849088.0, - "26": 1261849088.0, - "27": 1261849088.0, - "28": 1261849088.0, - "29": 1261849088.0, - "30": 1261849088.0, - "31": 1261849088.0, - "32": 1261849088.0, - "33": 1261849088.0, - "34": 1261849088.0, - "35": 1261849088.0, - "36": 1261849088.0, - "37": 1261849088.0, - "38": 1261849088.0, - "39": 1261849088.0, - "40": 1261849088.0, - "41": 1261849088.0, - "42": 1261849088.0, - "43": 1261849088.0, - "44": 1261849088.0, - "45": 1261849088.0, - "46": 1261849088.0, - "47": 1261849088.0, - "48": 1261849088.0, - "49": 1261849088.0, - "50": 1261849088.0, - "51": 1261849088.0, - "52": 1261849088.0, - "53": 1261849088.0, - "54": 1261849088.0, - "55": 1261849088.0, - "56": 1261849088.0, - "57": 1261849088.0, - "58": 1261849088.0, - "59": 1261849088.0, - "60": 1261849088.0, - "61": 1261849088.0, - "62": 1261849088.0, - "63": 1261849088.0, - "64": 1261849088.0, - "65": 1261849088.0, - "66": 1261849088.0, - "67": 1261849088.0, - "68": 1261849088.0, - "69": 1261849088.0, - "70": 1261849088.0, - "71": 1261849088.0, - "72": 1261849088.0, - "73": 1261849088.0, - "74": 1261849088.0, - "75": 1261849088.0, - "76": 1261849088.0, - "77": 1261849088.0, - "78": 1261849088.0, - "79": 1261849088.0, - "80": 1261849088.0, - "81": 1261849088.0, - "82": 1261849088.0, - "83": 1261849088.0, - "84": 1261849088.0, - "85": 1261849088.0, - "86": 1261849088.0, - "87": 1261849088.0, - "88": 1261849088.0, - "89": 1261849088.0, - "90": 1261849088.0, - "91": 1261849088.0, - "92": 1261849088.0, - "93": 1261849088.0, - "94": 1261849088.0, - "95": 1261849088.0, - "96": 1261849088.0, - "97": 1261849088.0, - "98": 1261849088.0, - "99": 1261849088.0, - "100": 1261849088.0 + "1": 1259751936.0, + "2": 1259751936.0, + "3": 1259751936.0, + "4": 1259751936.0, + "5": 1259751936.0, + "6": 1259751936.0, + "7": 1259751936.0, + "8": 1259751936.0, + "9": 1259751936.0, + "10": 1259751936.0, + "11": 1259751936.0, + "12": 1259751936.0, + "13": 1259751936.0, + "14": 1259751936.0, + "15": 1259751936.0, + "16": 1259751936.0, + "17": 1259751936.0, + "18": 1259751936.0, + "19": 1259751936.0, + "20": 1259751936.0, + "21": 1259751936.0, + "22": 1259751936.0, + "23": 1259751936.0, + "24": 1259751936.0, + "25": 1259751936.0, + "26": 1259751936.0, + "27": 1259751936.0, + "28": 1259751936.0, + "29": 1259751936.0, + "30": 1259751936.0, + "31": 1259751936.0, + "32": 1259751936.0, + "33": 1259751936.0, + "34": 1259751936.0, + "35": 1259751936.0, + "36": 1259751936.0, + "37": 1259751936.0, + "38": 1259751936.0, + "39": 1259751936.0, + "40": 1259751936.0, + "41": 1259751936.0, + "42": 1259751936.0, + "43": 1259751936.0, + "44": 1259751936.0, + "45": 1259751936.0, + "46": 1259751936.0, + "47": 1259751936.0, + "48": 1259751936.0, + "49": 1259751936.0, + "50": 1259751936.0, + "51": 1259751936.0, + "52": 1259751936.0, + "53": 1259751936.0, + "54": 1259751936.0, + "55": 1259751936.0, + "56": 1259751936.0, + "57": 1259751936.0, + "58": 1259751936.0, + "59": 1259751936.0, + "60": 1259751936.0, + "61": 1259751936.0, + "62": 1259751936.0, + "63": 1259751936.0, + "64": 1259751936.0, + "65": 1259751936.0, + "66": 1259751936.0, + "67": 1259751936.0, + "68": 1259751936.0, + "69": 1259751936.0, + "70": 1259751936.0, + "71": 1259751936.0, + "72": 1259751936.0, + "73": 1259751936.0, + "74": 1259751936.0, + "75": 1259751936.0, + "76": 1259751936.0, + "77": 1259751936.0, + "78": 1259751936.0, + "79": 1259751936.0, + "80": 1259751936.0, + "81": 1259751936.0, + "82": 1259751936.0, + "83": 1259751936.0, + "84": 1259751936.0, + "85": 1259751936.0, + "86": 1259751936.0, + "87": 1259751936.0, + "88": 1259751936.0, + "89": 1259751936.0, + "90": 1259751936.0, + "91": 1259751936.0, + "92": 1259751936.0, + "93": 1259751936.0, + "94": 1259751936.0, + "95": 1259751936.0, + "96": 1259751936.0, + "97": 1259751936.0, + "98": 1259751936.0, + "99": 1259751936.0, + "100": 1259751936.0 } }, "mem-max-allocated-bytes": { @@ -326,105 +326,105 @@ "step_interval": 1, "values": { "1": 2013853696.0, - "2": 2563431424.0, - "3": 2563431424.0, - "4": 2563431424.0, - "5": 2563431424.0, - "6": 2563431424.0, - "7": 2563431424.0, - "8": 2563431424.0, - "9": 2563431424.0, - "10": 2563431424.0, - "11": 2563431424.0, - "12": 2563431424.0, - "13": 2563431424.0, - "14": 2563431424.0, - "15": 2563431424.0, - "16": 2563431424.0, - "17": 2563431424.0, - "18": 2563431424.0, - "19": 2563431424.0, - "20": 2563431424.0, - "21": 2563431424.0, - "22": 2563431424.0, - "23": 2563431424.0, - "24": 2563431424.0, - "25": 2563431424.0, - "26": 2563431424.0, - "27": 2563431424.0, - "28": 2563431424.0, - "29": 2563431424.0, - "30": 2563431424.0, - "31": 2563431424.0, - "32": 2563431424.0, - "33": 2563431424.0, - "34": 2563431424.0, - "35": 2563431424.0, - "36": 2563431424.0, - "37": 2563431424.0, - "38": 2563431424.0, - "39": 2563431424.0, - "40": 2563431424.0, - "41": 2563431424.0, - "42": 2563431424.0, - "43": 2563431424.0, - "44": 2563431424.0, - "45": 2563431424.0, - "46": 2563431424.0, - "47": 2563431424.0, - "48": 2563431424.0, - "49": 2563431424.0, - "50": 2563431424.0, - "51": 2563431424.0, - "52": 2563431424.0, - "53": 2563431424.0, - "54": 2563431424.0, - "55": 2563431424.0, - "56": 2563431424.0, - "57": 2563431424.0, - "58": 2563431424.0, - "59": 2563431424.0, - "60": 2563431424.0, - "61": 2563431424.0, - "62": 2563431424.0, - "63": 2563431424.0, - "64": 2563431424.0, - "65": 2563431424.0, - "66": 2563431424.0, - "67": 2563431424.0, - "68": 2563431424.0, - "69": 2563431424.0, - "70": 2563431424.0, - "71": 2563431424.0, - "72": 2563431424.0, - "73": 2563431424.0, - "74": 2563431424.0, - "75": 2563431424.0, - "76": 2563431424.0, - "77": 2563431424.0, - "78": 2563431424.0, - "79": 2563431424.0, - "80": 2563431424.0, - "81": 2563431424.0, - "82": 2563431424.0, - "83": 2563431424.0, - "84": 2563431424.0, - "85": 2563431424.0, - "86": 2563431424.0, - "87": 2563431424.0, - "88": 2563431424.0, - "89": 2563431424.0, - "90": 2563431424.0, - "91": 2563431424.0, - "92": 2563431424.0, - "93": 2563431424.0, - "94": 2563431424.0, - "95": 2563431424.0, - "96": 2563431424.0, - "97": 2563431424.0, - "98": 2563431424.0, - "99": 2563431424.0, - "100": 2563431424.0 + "2": 2561334272.0, + "3": 2561334272.0, + "4": 2561334272.0, + "5": 2561334272.0, + "6": 2561334272.0, + "7": 2561334272.0, + "8": 2561334272.0, + "9": 2561334272.0, + "10": 2561334272.0, + "11": 2561334272.0, + "12": 2561334272.0, + "13": 2561334272.0, + "14": 2561334272.0, + "15": 2561334272.0, + "16": 2561334272.0, + "17": 2561334272.0, + "18": 2561334272.0, + "19": 2561334272.0, + "20": 2561334272.0, + "21": 2561334272.0, + "22": 2561334272.0, + "23": 2561334272.0, + "24": 2561334272.0, + "25": 2561334272.0, + "26": 2561334272.0, + "27": 2561334272.0, + "28": 2561334272.0, + "29": 2561334272.0, + "30": 2561334272.0, + "31": 2561334272.0, + "32": 2561334272.0, + "33": 2561334272.0, + "34": 2561334272.0, + "35": 2561334272.0, + "36": 2561334272.0, + "37": 2561334272.0, + "38": 2561334272.0, + "39": 2561334272.0, + "40": 2561334272.0, + "41": 2561334272.0, + "42": 2561334272.0, + "43": 2561334272.0, + "44": 2561334272.0, + "45": 2561334272.0, + "46": 2561334272.0, + "47": 2561334272.0, + "48": 2561334272.0, + "49": 2561334272.0, + "50": 2561334272.0, + "51": 2561334272.0, + "52": 2561334272.0, + "53": 2561334272.0, + "54": 2561334272.0, + "55": 2561334272.0, + "56": 2561334272.0, + "57": 2561334272.0, + "58": 2561334272.0, + "59": 2561334272.0, + "60": 2561334272.0, + "61": 2561334272.0, + "62": 2561334272.0, + "63": 2561334272.0, + "64": 2561334272.0, + "65": 2561334272.0, + "66": 2561334272.0, + "67": 2561334272.0, + "68": 2561334272.0, + "69": 2561334272.0, + "70": 2561334272.0, + "71": 2561334272.0, + "72": 2561334272.0, + "73": 2561334272.0, + "74": 2561334272.0, + "75": 2561334272.0, + "76": 2561334272.0, + "77": 2561334272.0, + "78": 2561334272.0, + "79": 2561334272.0, + "80": 2561334272.0, + "81": 2561334272.0, + "82": 2561334272.0, + "83": 2561334272.0, + "84": 2561334272.0, + "85": 2561334272.0, + "86": 2561334272.0, + "87": 2561334272.0, + "88": 2561334272.0, + "89": 2561334272.0, + "90": 2561334272.0, + "91": 2561334272.0, + "92": 2561334272.0, + "93": 2561334272.0, + "94": 2561334272.0, + "95": 2561334272.0, + "96": 2561334272.0, + "97": 2561334272.0, + "98": 2561334272.0, + "99": 2561334272.0, + "100": 2561334272.0 } }, "iteration-time": { @@ -432,106 +432,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 4.95057, - "2": 0.11272, - "3": 0.10006, - "4": 0.0754, - "5": 0.07446, - "6": 0.07499, - "7": 0.07451, - "8": 0.07507, - "9": 0.07406, - "10": 0.07462, - "11": 0.07387, - "12": 0.07421, - "13": 0.07426, - "14": 0.075, - "15": 0.07429, - "16": 0.07394, - "17": 0.07476, - "18": 0.07498, - "19": 0.07455, - "20": 0.07456, - "21": 0.07463, - "22": 0.07473, - "23": 0.07475, - "24": 0.0743, - "25": 0.07447, - "26": 0.07414, - "27": 0.07438, - "28": 0.07665, - "29": 0.07618, - "30": 0.07525, - "31": 0.07718, - "32": 0.07452, - "33": 0.07632, - "34": 0.07594, - "35": 0.0752, - "36": 0.07788, - "37": 0.07472, - "38": 0.07514, - "39": 0.07557, - "40": 0.07528, - "41": 0.07668, - "42": 0.07829, - "43": 0.07561, - "44": 0.07525, - "45": 0.07522, - "46": 0.08858, - "47": 0.09212, - "48": 0.07649, - "49": 0.07761, - "50": 0.07534, - "51": 0.0797, - "52": 0.07601, - "53": 0.07588, - "54": 0.07564, - "55": 0.07643, - "56": 0.07613, - "57": 0.07562, - "58": 0.07558, - "59": 0.07588, - "60": 0.07563, - "61": 0.07585, - "62": 0.07578, - "63": 0.07559, - "64": 0.07502, - "65": 0.07586, - "66": 0.07503, - "67": 0.0755, - "68": 0.07448, - "69": 0.07531, - "70": 0.07481, - "71": 0.07524, - "72": 0.07712, - "73": 0.07539, - "74": 0.07566, - "75": 0.07497, - "76": 0.07458, - "77": 0.07476, - "78": 0.07547, - "79": 0.07542, - "80": 0.07549, - "81": 0.07589, - "82": 0.07548, - "83": 0.07513, - "84": 0.07494, - "85": 0.07468, - "86": 0.07522, - "87": 0.07487, - "88": 0.07533, - "89": 0.07545, - "90": 0.07496, - "91": 0.07533, - "92": 0.07435, - "93": 0.07549, - "94": 0.07465, - "95": 0.07523, - "96": 0.07531, - "97": 0.07697, - "98": 0.0768, - "99": 0.07605, - "100": 0.07588 + "1": "nan", + "2": 2.86901, + "3": 0.0951, + "4": 0.07664, + "5": 0.07727, + "6": 0.07707, + "7": 0.07716, + "8": 0.07667, + "9": 0.07695, + "10": 0.0768, + "11": 0.07613, + "12": 0.07687, + "13": 0.07608, + "14": 0.07715, + "15": 0.07689, + "16": 0.07617, + "17": 0.07691, + "18": 0.07603, + "19": 0.07642, + "20": 0.07617, + "21": 0.0765, + "22": 0.07638, + "23": 0.07673, + "24": 0.07615, + "25": 0.07629, + "26": 0.07589, + "27": 0.07678, + "28": 0.07588, + "29": 0.07579, + "30": 0.07618, + "31": 0.07686, + "32": 0.0755, + "33": 0.07624, + "34": 0.07587, + "35": 0.07656, + "36": 0.07558, + "37": 0.07674, + "38": 0.07619, + "39": 0.07638, + "40": 0.07629, + "41": 0.07916, + "42": 0.07708, + "43": 0.07736, + "44": 0.07658, + "45": 0.07694, + "46": 0.07639, + "47": 0.0787, + "48": 0.0777, + "49": 0.07677, + "50": 0.07678, + "51": 0.08142, + "52": 0.07793, + "53": 0.07688, + "54": 0.0771, + "55": 0.07682, + "56": 0.07717, + "57": 0.07712, + "58": 0.07637, + "59": 0.07821, + "60": 0.07756, + "61": 0.07793, + "62": 0.07875, + "63": 0.07625, + "64": 0.0775, + "65": 0.07689, + "66": 0.07666, + "67": 0.07699, + "68": 0.0763, + "69": 0.0766, + "70": 0.07706, + "71": 0.07676, + "72": 0.07816, + "73": 0.07645, + "74": 0.07713, + "75": 0.07675, + "76": 0.07795, + "77": 0.07688, + "78": 0.07657, + "79": 0.07815, + "80": 0.07702, + "81": 0.07682, + "82": 0.07647, + "83": 0.07736, + "84": 0.07681, + "85": 0.07718, + "86": 0.07666, + "87": 0.07732, + "88": 0.07639, + "89": 0.07691, + "90": 0.07624, + "91": 0.07703, + "92": 0.0762, + "93": 0.0767, + "94": 0.07746, + "95": 0.07723, + "96": 0.07629, + "97": 0.07689, + "98": 0.07615, + "99": 0.0774, + "100": 0.07615 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json index 1f743e8c2e8..bfff8b2dafd 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings/golden_values_dev_dgx_h100.json @@ -6,104 +6,104 @@ "values": { "1": 10.85787, "2": 10.87336, - "3": 10.86821, - "4": 10.87255, - "5": 10.87398, - "6": 10.89631, - "7": 10.86379, - "8": 10.87834, - "9": 10.87399, + "3": 10.86822, + "4": 10.87256, + "5": 10.87397, + "6": 10.89632, + "7": 10.86383, + "8": 10.87835, + "9": 10.87398, "10": 10.83714, - "11": 10.86988, - "12": 10.85947, + "11": 10.86989, + "12": 10.85948, "13": 10.87777, - "14": 10.87924, + "14": 10.87922, "15": 10.81888, - "16": 10.83058, - "17": 10.78684, - "18": 10.80146, - "19": 10.79775, - "20": 10.71155, - "21": 10.6865, - "22": 10.55277, - "23": 10.7014, - "24": 10.58527, + "16": 10.83062, + "17": 10.78686, + "18": 10.80152, + "19": 10.79779, + "20": 10.7116, + "21": 10.68645, + "22": 10.5528, + "23": 10.70142, + "24": 10.58526, "25": 10.52658, - "26": 10.58299, - "27": 10.59487, - "28": 10.54787, - "29": 10.55928, - "30": 10.32818, - "31": 10.08272, + "26": 10.58297, + "27": 10.59488, + "28": 10.54788, + "29": 10.55927, + "30": 10.32815, + "31": 10.08271, "32": 10.44699, - "33": 10.42755, - "34": 10.17932, + "33": 10.42754, + "34": 10.17927, "35": 10.24095, - "36": 10.18094, - "37": 10.32809, - "38": 10.16727, + "36": 10.18089, + "37": 10.32812, + "38": 10.16729, "39": 10.37344, - "40": 10.05079, - "41": 10.10728, - "42": 10.17799, - "43": 9.77846, - "44": 9.91207, - "45": 9.77392, - "46": 9.75431, - "47": 10.09497, - "48": 9.79523, - "49": 9.46391, - "50": 9.8673, - "51": 9.80381, - "52": 9.68202, + "40": 10.05077, + "41": 10.10731, + "42": 10.17806, + "43": 9.77848, + "44": 9.91208, + "45": 9.77394, + "46": 9.7543, + "47": 10.09494, + "48": 9.79522, + "49": 9.4639, + "50": 9.86732, + "51": 9.80376, + "52": 9.68203, "53": 10.02345, - "54": 9.91634, - "55": 9.82456, - "56": 9.56974, - "57": 9.42672, - "58": 9.78081, - "59": 9.53243, - "60": 9.44593, - "61": 9.64254, - "62": 9.94293, - "63": 9.31764, - "64": 9.72548, + "54": 9.91631, + "55": 9.82458, + "56": 9.56976, + "57": 9.42674, + "58": 9.78082, + "59": 9.53247, + "60": 9.44592, + "61": 9.64255, + "62": 9.94291, + "63": 9.31767, + "64": 9.7255, "65": 8.88739, "66": 9.65691, "67": 9.31749, - "68": 9.73495, - "69": 9.74866, - "70": 9.69625, - "71": 9.57689, - "72": 9.52422, - "73": 9.45595, + "68": 9.73494, + "69": 9.74868, + "70": 9.69627, + "71": 9.57684, + "72": 9.52424, + "73": 9.45598, "74": 8.88269, - "75": 9.37584, - "76": 9.01136, - "77": 10.02287, - "78": 9.67963, - "79": 9.33172, - "80": 9.35826, + "75": 9.37587, + "76": 9.01137, + "77": 10.0229, + "78": 9.67961, + "79": 9.33169, + "80": 9.35831, "81": 9.43394, - "82": 9.65054, - "83": 9.25503, - "84": 9.3714, - "85": 9.5623, + "82": 9.65057, + "83": 9.25502, + "84": 9.37136, + "85": 9.56232, "86": 9.03489, - "87": 9.54614, - "88": 9.69785, - "89": 9.54656, - "90": 9.77624, - "91": 9.2884, - "92": 9.30662, - "93": 9.02647, + "87": 9.54613, + "88": 9.69784, + "89": 9.54653, + "90": 9.77622, + "91": 9.28841, + "92": 9.30664, + "93": 9.02649, "94": 8.78837, - "95": 9.48027, - "96": 9.47974, + "95": 9.48026, + "96": 9.47969, "97": 9.25611, - "98": 9.61949, - "99": 8.83824, - "100": 9.35135 + "98": 9.6195, + "99": 8.83827, + "100": 9.35136 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1858.0, - "2": 1854.0, - "3": 1803.0, - "4": 1955.0, - "5": 2000.0, - "6": 2036.0, - "7": 1932.0, - "8": 1791.0, - "9": 1935.0, - "10": 1654.0, - "11": 2080.0, - "12": 1881.0, - "13": 1977.0, - "14": 2080.0, - "15": 1957.0, - "16": 1910.0, - "17": 1974.0, - "18": 1896.0, - "19": 1955.0, - "20": 1816.0, - "21": 1906.0, - "22": 1972.0, - "23": 2062.0, - "24": 1897.0, - "25": 1830.0, - "26": 1788.0, - "27": 1849.0, - "28": 2008.0, - "29": 2128.0, - "30": 1969.0, - "31": 1630.0, - "32": 2057.0, - "33": 2171.0, - "34": 1947.0, - "35": 2097.0, - "36": 1972.0, - "37": 2348.0, - "38": 2186.0, - "39": 2378.0, - "40": 2181.0, - "41": 2326.0, - "42": 2334.0, - "43": 2219.0, - "44": 2234.0, - "45": 2231.0, - "46": 2229.0, - "47": 2449.0, - "48": 2439.0, - "49": 2159.0, - "50": 2290.0, - "51": 2514.0, - "52": 2513.0, - "53": 2894.0, - "54": 2656.0, - "55": 2348.0, - "56": 2506.0, - "57": 2501.0, - "58": 2770.0, - "59": 2681.0, - "60": 2434.0, - "61": 2776.0, - "62": 2596.0, - "63": 2617.0, - "64": 3012.0, - "65": 2657.0, - "66": 2947.0, - "67": 3089.0, - "68": 2818.0, - "69": 2909.0, - "70": 3025.0, - "71": 2924.0, - "72": 2702.0, - "73": 2947.0, - "74": 2306.0, - "75": 2791.0, - "76": 3093.0, - "77": 3107.0, - "78": 3134.0, - "79": 3205.0, - "80": 3123.0, - "81": 3290.0, - "82": 3172.0, - "83": 2719.0, - "84": 3328.0, - "85": 3255.0, - "86": 2546.0, - "87": 3472.0, - "88": 3068.0, - "89": 2953.0, - "90": 3300.0, - "91": 3154.0, - "92": 3061.0, - "93": 2889.0, - "94": 3535.0, - "95": 3078.0, - "96": 3181.0, - "97": 3135.0, - "98": 3569.0, - "99": 3319.0, - "100": 3223.0 + "1": 1848.0, + "2": 1849.0, + "3": 1746.0, + "4": 1950.0, + "5": 2031.0, + "6": 1975.0, + "7": 1943.0, + "8": 1883.0, + "9": 1996.0, + "10": 1630.0, + "11": 2060.0, + "12": 1912.0, + "13": 2031.0, + "14": 1956.0, + "15": 1992.0, + "16": 1950.0, + "17": 1903.0, + "18": 1924.0, + "19": 1905.0, + "20": 1757.0, + "21": 1983.0, + "22": 1985.0, + "23": 2111.0, + "24": 1849.0, + "25": 1871.0, + "26": 1789.0, + "27": 1887.0, + "28": 1973.0, + "29": 2061.0, + "30": 2091.0, + "31": 1643.0, + "32": 2165.0, + "33": 2209.0, + "34": 2005.0, + "35": 2027.0, + "36": 2063.0, + "37": 2374.0, + "38": 2253.0, + "39": 2367.0, + "40": 2182.0, + "41": 2373.0, + "42": 2272.0, + "43": 2154.0, + "44": 2274.0, + "45": 2085.0, + "46": 2228.0, + "47": 2346.0, + "48": 2391.0, + "49": 2150.0, + "50": 2221.0, + "51": 2454.0, + "52": 2520.0, + "53": 2859.0, + "54": 2631.0, + "55": 2425.0, + "56": 2453.0, + "57": 2570.0, + "58": 2639.0, + "59": 2694.0, + "60": 2549.0, + "61": 2746.0, + "62": 2590.0, + "63": 2543.0, + "64": 3028.0, + "65": 2591.0, + "66": 2852.0, + "67": 3044.0, + "68": 2822.0, + "69": 2829.0, + "70": 2980.0, + "71": 2878.0, + "72": 2674.0, + "73": 2920.0, + "74": 2280.0, + "75": 2702.0, + "76": 3061.0, + "77": 3096.0, + "78": 3149.0, + "79": 3172.0, + "80": 3033.0, + "81": 3366.0, + "82": 3265.0, + "83": 2807.0, + "84": 3281.0, + "85": 3266.0, + "86": 2661.0, + "87": 3453.0, + "88": 3202.0, + "89": 3009.0, + "90": 3259.0, + "91": 3051.0, + "92": 3160.0, + "93": 2954.0, + "94": 3471.0, + "95": 3123.0, + "96": 3225.0, + "97": 3116.0, + "98": 3551.0, + "99": 3291.0, + "100": 3140.0 } }, "mem-allocated-bytes": { @@ -325,7 +325,7 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 2236675072.0, + "1": 2237723648.0, "2": 2596141056.0, "3": 2596141056.0, "4": 2596141056.0, @@ -432,106 +432,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 7.66848, - "2": 0.11896, - "3": 0.09977, - "4": 0.07967, - "5": 0.07964, - "6": 0.07997, - "7": 0.08012, - "8": 0.07951, - "9": 0.08093, - "10": 0.07978, - "11": 0.07959, - "12": 0.0801, - "13": 0.08014, - "14": 0.08001, - "15": 0.08005, - "16": 0.0803, - "17": 0.0801, - "18": 0.07861, - "19": 0.07885, - "20": 0.07921, - "21": 0.07891, - "22": 0.07852, - "23": 0.07915, - "24": 0.07938, - "25": 0.08, - "26": 0.0813, - "27": 0.07978, - "28": 0.07899, - "29": 0.0798, - "30": 0.08028, - "31": 0.07891, - "32": 0.07911, - "33": 0.07848, - "34": 0.07925, - "35": 0.07821, - "36": 0.07899, - "37": 0.07887, - "38": 0.07866, - "39": 0.07853, - "40": 0.08169, - "41": 0.07849, - "42": 0.07836, - "43": 0.0786, - "44": 0.07878, - "45": 0.07828, - "46": 0.07805, - "47": 0.07784, - "48": 0.07807, - "49": 0.0787, - "50": 0.0789, - "51": 0.09689, - "52": 0.08417, - "53": 0.08482, - "54": 0.08198, - "55": 0.07942, - "56": 0.07871, - "57": 0.07976, - "58": 0.07956, - "59": 0.08, - "60": 0.0792, - "61": 0.07836, - "62": 0.07989, - "63": 0.0809, - "64": 0.08148, - "65": 0.08043, - "66": 0.07986, - "67": 0.08023, - "68": 0.07899, - "69": 0.07929, - "70": 0.08168, - "71": 0.08127, - "72": 0.0786, - "73": 0.07921, - "74": 0.07909, - "75": 0.0791, - "76": 0.07958, - "77": 0.07852, - "78": 0.07999, - "79": 0.07999, - "80": 0.08194, - "81": 0.07923, - "82": 0.07928, - "83": 0.07876, - "84": 0.07871, - "85": 0.08021, - "86": 0.07922, - "87": 0.07979, - "88": 0.0797, - "89": 0.08029, - "90": 0.15516, - "91": 0.11731, - "92": 0.11011, - "93": 0.14646, - "94": 0.08003, - "95": 0.08107, - "96": 0.07984, - "97": 0.07889, - "98": 0.07881, - "99": 0.07894, - "100": 0.07813 + "1": "nan", + "2": 4.9552, + "3": 0.10033, + "4": 0.08196, + "5": 0.08147, + "6": 0.08085, + "7": 0.08113, + "8": 0.08219, + "9": 0.08231, + "10": 0.08108, + "11": 0.08057, + "12": 0.08093, + "13": 0.08127, + "14": 0.08128, + "15": 0.08213, + "16": 0.08119, + "17": 0.08122, + "18": 0.08074, + "19": 0.08212, + "20": 0.08339, + "21": 0.08221, + "22": 0.08215, + "23": 0.0811, + "24": 0.08084, + "25": 0.08167, + "26": 0.08129, + "27": 0.08149, + "28": 0.08067, + "29": 0.08054, + "30": 0.08096, + "31": 0.08069, + "32": 0.08014, + "33": 0.07984, + "34": 0.07984, + "35": 0.0805, + "36": 0.08039, + "37": 0.07994, + "38": 0.08113, + "39": 0.08006, + "40": 0.07969, + "41": 0.08039, + "42": 0.08012, + "43": 0.08077, + "44": 0.08006, + "45": 0.08062, + "46": 0.08095, + "47": 0.0803, + "48": 0.08011, + "49": 0.08053, + "50": 0.08008, + "51": 0.09167, + "52": 0.08906, + "53": 0.08856, + "54": 0.08817, + "55": 0.08179, + "56": 0.08141, + "57": 0.08275, + "58": 0.08331, + "59": 0.08156, + "60": 0.08245, + "61": 0.08401, + "62": 0.08406, + "63": 0.08119, + "64": 0.08192, + "65": 0.08124, + "66": 0.08077, + "67": 0.08064, + "68": 0.08048, + "69": 0.08077, + "70": 0.0805, + "71": 0.08053, + "72": 0.08112, + "73": 0.08447, + "74": 0.08094, + "75": 0.0807, + "76": 0.08106, + "77": 0.08073, + "78": 0.0819, + "79": 0.0826, + "80": 0.08236, + "81": 0.08103, + "82": 0.08046, + "83": 0.081, + "84": 0.08119, + "85": 0.08089, + "86": 0.08193, + "87": 0.08152, + "88": 0.08194, + "89": 0.0812, + "90": 0.08063, + "91": 0.08092, + "92": 0.08068, + "93": 0.08076, + "94": 0.08091, + "95": 0.08087, + "96": 0.08077, + "97": 0.0807, + "98": 0.08029, + "99": 0.08152, + "100": 0.08047 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json index 42889e09b26..0f203186fb7 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_h100.json @@ -6,104 +6,104 @@ "values": { "1": 10.76985, "2": 10.81791, - "3": 10.784, - "4": 10.788, - "5": 10.81927, - "6": 10.84306, - "7": 10.83464, - "8": 10.8066, - "9": 10.83359, - "10": 10.73562, + "3": 10.78402, + "4": 10.78796, + "5": 10.81924, + "6": 10.84305, + "7": 10.83461, + "8": 10.80657, + "9": 10.83362, + "10": 10.73563, "11": 10.86814, - "12": 10.85075, - "13": 10.84505, - "14": 10.87136, - "15": 10.8218, - "16": 10.80433, + "12": 10.85077, + "13": 10.84503, + "14": 10.87135, + "15": 10.82179, + "16": 10.80434, "17": 10.76124, - "18": 10.80363, - "19": 10.80599, - "20": 10.74747, - "21": 10.7254, - "22": 10.60597, - "23": 10.74387, - "24": 10.65549, + "18": 10.80358, + "19": 10.80593, + "20": 10.74748, + "21": 10.72537, + "22": 10.60596, + "23": 10.74384, + "24": 10.65548, "25": 10.58002, - "26": 10.64496, - "27": 10.67191, - "28": 10.66903, - "29": 10.66652, - "30": 10.46947, + "26": 10.64493, + "27": 10.67189, + "28": 10.66906, + "29": 10.6666, + "30": 10.46943, "31": 10.26264, - "32": 10.56932, - "33": 10.54232, + "32": 10.56935, + "33": 10.54231, "34": 10.36113, - "35": 10.39558, - "36": 10.36866, + "35": 10.39552, + "36": 10.36868, "37": 10.47523, - "38": 10.33715, - "39": 10.49947, - "40": 10.23019, - "41": 10.30905, - "42": 10.33124, - "43": 9.99091, + "38": 10.33713, + "39": 10.49939, + "40": 10.23017, + "41": 10.30906, + "42": 10.33123, + "43": 9.99093, "44": 10.09605, - "45": 10.00787, - "46": 9.96718, - "47": 10.27077, - "48": 10.01043, - "49": 9.73437, + "45": 10.00785, + "46": 9.96712, + "47": 10.27069, + "48": 10.0104, + "49": 9.73436, "50": 10.04737, "51": 10.00084, - "52": 9.89672, - "53": 10.19876, - "54": 10.09066, - "55": 10.00567, - "56": 9.77199, - "57": 9.64533, - "58": 9.98587, - "59": 9.72608, - "60": 9.6777, - "61": 9.8157, - "62": 10.092, - "63": 9.54758, - "64": 9.90438, - "65": 9.09492, - "66": 9.84068, + "52": 9.89675, + "53": 10.1988, + "54": 10.09063, + "55": 10.00569, + "56": 9.77206, + "57": 9.6453, + "58": 9.98586, + "59": 9.72612, + "60": 9.67771, + "61": 9.81568, + "62": 10.09202, + "63": 9.54764, + "64": 9.90442, + "65": 9.09488, + "66": 9.84066, "67": 9.48471, - "68": 9.88996, - "69": 9.87691, - "70": 9.85294, - "71": 9.73278, - "72": 9.72558, - "73": 9.63706, - "74": 9.12334, - "75": 9.55335, - "76": 9.21765, - "77": 10.15202, - "78": 9.81465, - "79": 9.47558, - "80": 9.52073, - "81": 9.5872, - "82": 9.79125, - "83": 9.44848, - "84": 9.49585, - "85": 9.72189, + "68": 9.88998, + "69": 9.87694, + "70": 9.85293, + "71": 9.73276, + "72": 9.72559, + "73": 9.63702, + "74": 9.12336, + "75": 9.55337, + "76": 9.21763, + "77": 10.15204, + "78": 9.81466, + "79": 9.47559, + "80": 9.52071, + "81": 9.58719, + "82": 9.79124, + "83": 9.4485, + "84": 9.49586, + "85": 9.7219, "86": 9.18037, "87": 9.66127, "88": 9.84359, - "89": 9.71651, - "90": 9.88102, + "89": 9.7165, + "90": 9.88101, "91": 9.48434, - "92": 9.4705, - "93": 9.20911, - "94": 8.95382, - "95": 9.60554, + "92": 9.47046, + "93": 9.20912, + "94": 8.95381, + "95": 9.60555, "96": 9.63976, - "97": 9.38762, - "98": 9.7573, - "99": 9.0159, - "100": 9.49925 + "97": 9.38764, + "98": 9.75733, + "99": 9.01588, + "100": 9.49924 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 2680.0, - "2": 2615.0, - "3": 2642.0, - "4": 2479.0, - "5": 2971.0, - "6": 2822.0, - "7": 2833.0, - "8": 2508.0, - "9": 2922.0, - "10": 2508.0, - "11": 2917.0, - "12": 2817.0, - "13": 2935.0, - "14": 2969.0, - "15": 2679.0, - "16": 2976.0, - "17": 2609.0, - "18": 2868.0, - "19": 2790.0, - "20": 2461.0, - "21": 2636.0, - "22": 2356.0, - "23": 2798.0, - "24": 2613.0, - "25": 2640.0, - "26": 2701.0, - "27": 2761.0, - "28": 2801.0, - "29": 2971.0, - "30": 2590.0, - "31": 2307.0, - "32": 2751.0, - "33": 2881.0, - "34": 2352.0, - "35": 2480.0, - "36": 2443.0, - "37": 2748.0, - "38": 2692.0, - "39": 2709.0, - "40": 2570.0, - "41": 2752.0, - "42": 2689.0, - "43": 2381.0, - "44": 2483.0, - "45": 2397.0, - "46": 2281.0, - "47": 2684.0, - "48": 2330.0, - "49": 2293.0, - "50": 2740.0, - "51": 2575.0, - "52": 2621.0, - "53": 2891.0, - "54": 2655.0, - "55": 2559.0, - "56": 2566.0, - "57": 2471.0, - "58": 2767.0, - "59": 2529.0, - "60": 2289.0, - "61": 2642.0, - "62": 2820.0, - "63": 2654.0, - "64": 3020.0, - "65": 2687.0, - "66": 2884.0, - "67": 2666.0, - "68": 2720.0, - "69": 2738.0, - "70": 3004.0, - "71": 2816.0, - "72": 2537.0, - "73": 2826.0, - "74": 2192.0, - "75": 2647.0, - "76": 3048.0, - "77": 3019.0, - "78": 3134.0, - "79": 3092.0, - "80": 3054.0, - "81": 3298.0, - "82": 3350.0, - "83": 2597.0, - "84": 3436.0, - "85": 3350.0, - "86": 2993.0, - "87": 3509.0, - "88": 3403.0, - "89": 3490.0, - "90": 3368.0, - "91": 2461.0, - "92": 2803.0, - "93": 2933.0, - "94": 2888.0, - "95": 3138.0, - "96": 3047.0, - "97": 3016.0, - "98": 3382.0, - "99": 2995.0, - "100": 2490.0 + "1": 2805.0, + "2": 2569.0, + "3": 2669.0, + "4": 2452.0, + "5": 2817.0, + "6": 2939.0, + "7": 2784.0, + "8": 2559.0, + "9": 2824.0, + "10": 2575.0, + "11": 2911.0, + "12": 2689.0, + "13": 2988.0, + "14": 2921.0, + "15": 2693.0, + "16": 3037.0, + "17": 2638.0, + "18": 2916.0, + "19": 2863.0, + "20": 2451.0, + "21": 2687.0, + "22": 2462.0, + "23": 2777.0, + "24": 2644.0, + "25": 2496.0, + "26": 2722.0, + "27": 2758.0, + "28": 2832.0, + "29": 3013.0, + "30": 2558.0, + "31": 2303.0, + "32": 2665.0, + "33": 2780.0, + "34": 2366.0, + "35": 2532.0, + "36": 2537.0, + "37": 2777.0, + "38": 2690.0, + "39": 2843.0, + "40": 2605.0, + "41": 2697.0, + "42": 2686.0, + "43": 2317.0, + "44": 2484.0, + "45": 2297.0, + "46": 2343.0, + "47": 2613.0, + "48": 2414.0, + "49": 2381.0, + "50": 2751.0, + "51": 2584.0, + "52": 2575.0, + "53": 2857.0, + "54": 2756.0, + "55": 2531.0, + "56": 2544.0, + "57": 2484.0, + "58": 2815.0, + "59": 2564.0, + "60": 2263.0, + "61": 2645.0, + "62": 2824.0, + "63": 2589.0, + "64": 3010.0, + "65": 2664.0, + "66": 2928.0, + "67": 2609.0, + "68": 2747.0, + "69": 2832.0, + "70": 3003.0, + "71": 2741.0, + "72": 2483.0, + "73": 2809.0, + "74": 2131.0, + "75": 2673.0, + "76": 3018.0, + "77": 3023.0, + "78": 3182.0, + "79": 3251.0, + "80": 3075.0, + "81": 3372.0, + "82": 3352.0, + "83": 2607.0, + "84": 3299.0, + "85": 3175.0, + "86": 3061.0, + "87": 3470.0, + "88": 3331.0, + "89": 3464.0, + "90": 3438.0, + "91": 2435.0, + "92": 2979.0, + "93": 2983.0, + "94": 2953.0, + "95": 3119.0, + "96": 2879.0, + "97": 3018.0, + "98": 3305.0, + "99": 2999.0, + "100": 2710.0 } }, "mem-allocated-bytes": { @@ -218,106 +218,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 744815104.0, - "2": 744815104.0, - "3": 744815104.0, - "4": 744815104.0, - "5": 744815104.0, - "6": 744815104.0, - "7": 744815104.0, - "8": 744815104.0, - "9": 744815104.0, - "10": 744815104.0, - "11": 744815104.0, - "12": 744815104.0, - "13": 744815104.0, - "14": 744815104.0, - "15": 744815104.0, - "16": 744815104.0, - "17": 744815104.0, - "18": 744815104.0, - "19": 744815104.0, - "20": 744815104.0, - "21": 744815104.0, - "22": 744815104.0, - "23": 744815104.0, - "24": 744815104.0, - "25": 744815104.0, - "26": 744815104.0, - "27": 744815104.0, - "28": 744815104.0, - "29": 744815104.0, - "30": 744815104.0, - "31": 744815104.0, - "32": 744815104.0, - "33": 744815104.0, - "34": 744815104.0, - "35": 744815104.0, - "36": 744815104.0, - "37": 744815104.0, - "38": 744815104.0, - "39": 744815104.0, - "40": 744815104.0, - "41": 744815104.0, - "42": 744815104.0, - "43": 744815104.0, - "44": 744815104.0, - "45": 744815104.0, - "46": 744815104.0, - "47": 744815104.0, - "48": 744815104.0, - "49": 744815104.0, - "50": 744815104.0, - "51": 744815104.0, - "52": 744815104.0, - "53": 744815104.0, - "54": 744815104.0, - "55": 744815104.0, - "56": 744815104.0, - "57": 744815104.0, - "58": 744815104.0, - "59": 744815104.0, - "60": 744815104.0, - "61": 744815104.0, - "62": 744815104.0, - "63": 744815104.0, - "64": 744815104.0, - "65": 744815104.0, - "66": 744815104.0, - "67": 744815104.0, - "68": 744815104.0, - "69": 744815104.0, - "70": 744815104.0, - "71": 744815104.0, - "72": 744815104.0, - "73": 744815104.0, - "74": 744815104.0, - "75": 744815104.0, - "76": 744815104.0, - "77": 744815104.0, - "78": 744815104.0, - "79": 744815104.0, - "80": 744815104.0, - "81": 744815104.0, - "82": 744815104.0, - "83": 744815104.0, - "84": 744815104.0, - "85": 744815104.0, - "86": 744815104.0, - "87": 744815104.0, - "88": 744815104.0, - "89": 744815104.0, - "90": 744815104.0, - "91": 744815104.0, - "92": 744815104.0, - "93": 744815104.0, - "94": 744815104.0, - "95": 744815104.0, - "96": 744815104.0, - "97": 744815104.0, - "98": 744815104.0, - "99": 744815104.0, - "100": 744815104.0 + "1": 745732608.0, + "2": 745732608.0, + "3": 745732608.0, + "4": 745732608.0, + "5": 745732608.0, + "6": 745732608.0, + "7": 745732608.0, + "8": 745732608.0, + "9": 745732608.0, + "10": 745732608.0, + "11": 745732608.0, + "12": 745732608.0, + "13": 745732608.0, + "14": 745732608.0, + "15": 745732608.0, + "16": 745732608.0, + "17": 745732608.0, + "18": 745732608.0, + "19": 745732608.0, + "20": 745732608.0, + "21": 745732608.0, + "22": 745732608.0, + "23": 745732608.0, + "24": 745732608.0, + "25": 745732608.0, + "26": 745732608.0, + "27": 745732608.0, + "28": 745732608.0, + "29": 745732608.0, + "30": 745732608.0, + "31": 745732608.0, + "32": 745732608.0, + "33": 745732608.0, + "34": 745732608.0, + "35": 745732608.0, + "36": 745732608.0, + "37": 745732608.0, + "38": 745732608.0, + "39": 745732608.0, + "40": 745732608.0, + "41": 745732608.0, + "42": 745732608.0, + "43": 745732608.0, + "44": 745732608.0, + "45": 745732608.0, + "46": 745732608.0, + "47": 745732608.0, + "48": 745732608.0, + "49": 745732608.0, + "50": 745732608.0, + "51": 745732608.0, + "52": 745732608.0, + "53": 745732608.0, + "54": 745732608.0, + "55": 745732608.0, + "56": 745732608.0, + "57": 745732608.0, + "58": 745732608.0, + "59": 745732608.0, + "60": 745732608.0, + "61": 745732608.0, + "62": 745732608.0, + "63": 745732608.0, + "64": 745732608.0, + "65": 745732608.0, + "66": 745732608.0, + "67": 745732608.0, + "68": 745732608.0, + "69": 745732608.0, + "70": 745732608.0, + "71": 745732608.0, + "72": 745732608.0, + "73": 745732608.0, + "74": 745732608.0, + "75": 745732608.0, + "76": 745732608.0, + "77": 745732608.0, + "78": 745732608.0, + "79": 745732608.0, + "80": 745732608.0, + "81": 745732608.0, + "82": 745732608.0, + "83": 745732608.0, + "84": 745732608.0, + "85": 745732608.0, + "86": 745732608.0, + "87": 745732608.0, + "88": 745732608.0, + "89": 745732608.0, + "90": 745732608.0, + "91": 745732608.0, + "92": 745732608.0, + "93": 745732608.0, + "94": 745732608.0, + "95": 745732608.0, + "96": 745732608.0, + "97": 745732608.0, + "98": 745732608.0, + "99": 745732608.0, + "100": 745732608.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1928907776.0, - "2": 2210305536.0, - "3": 2210305536.0, - "4": 2210305536.0, - "5": 2210305536.0, - "6": 2210305536.0, - "7": 2210305536.0, - "8": 2210305536.0, - "9": 2210305536.0, - "10": 2210305536.0, - "11": 2210305536.0, - "12": 2210305536.0, - "13": 2210305536.0, - "14": 2210305536.0, - "15": 2210305536.0, - "16": 2210305536.0, - "17": 2210305536.0, - "18": 2210305536.0, - "19": 2210305536.0, - "20": 2210305536.0, - "21": 2210305536.0, - "22": 2210305536.0, - "23": 2210305536.0, - "24": 2210305536.0, - "25": 2210305536.0, - "26": 2210305536.0, - "27": 2210305536.0, - "28": 2210305536.0, - "29": 2210305536.0, - "30": 2210305536.0, - "31": 2210305536.0, - "32": 2210305536.0, - "33": 2210305536.0, - "34": 2210305536.0, - "35": 2210305536.0, - "36": 2210305536.0, - "37": 2210305536.0, - "38": 2210305536.0, - "39": 2210305536.0, - "40": 2210305536.0, - "41": 2210305536.0, - "42": 2210305536.0, - "43": 2210305536.0, - "44": 2210305536.0, - "45": 2210305536.0, - "46": 2210305536.0, - "47": 2210305536.0, - "48": 2210305536.0, - "49": 2210305536.0, - "50": 2210305536.0, - "51": 2210305536.0, - "52": 2210305536.0, - "53": 2210305536.0, - "54": 2210305536.0, - "55": 2210305536.0, - "56": 2210305536.0, - "57": 2210305536.0, - "58": 2210305536.0, - "59": 2210305536.0, - "60": 2210305536.0, - "61": 2210305536.0, - "62": 2210305536.0, - "63": 2210305536.0, - "64": 2210305536.0, - "65": 2210305536.0, - "66": 2210305536.0, - "67": 2210305536.0, - "68": 2210305536.0, - "69": 2210305536.0, - "70": 2210305536.0, - "71": 2210305536.0, - "72": 2210305536.0, - "73": 2210305536.0, - "74": 2210305536.0, - "75": 2210305536.0, - "76": 2210305536.0, - "77": 2210305536.0, - "78": 2210305536.0, - "79": 2210305536.0, - "80": 2210305536.0, - "81": 2210305536.0, - "82": 2210305536.0, - "83": 2210305536.0, - "84": 2210305536.0, - "85": 2210305536.0, - "86": 2210305536.0, - "87": 2210305536.0, - "88": 2210305536.0, - "89": 2210305536.0, - "90": 2210305536.0, - "91": 2210305536.0, - "92": 2210305536.0, - "93": 2210305536.0, - "94": 2210305536.0, - "95": 2210305536.0, - "96": 2210305536.0, - "97": 2210305536.0, - "98": 2210305536.0, - "99": 2210305536.0, - "100": 2210305536.0 + "1": 1927859200.0, + "2": 2211485184.0, + "3": 2211485184.0, + "4": 2211485184.0, + "5": 2211485184.0, + "6": 2211485184.0, + "7": 2211485184.0, + "8": 2211485184.0, + "9": 2211485184.0, + "10": 2211485184.0, + "11": 2211485184.0, + "12": 2211485184.0, + "13": 2211485184.0, + "14": 2211485184.0, + "15": 2211485184.0, + "16": 2211485184.0, + "17": 2211485184.0, + "18": 2211485184.0, + "19": 2211485184.0, + "20": 2211485184.0, + "21": 2211485184.0, + "22": 2211485184.0, + "23": 2211485184.0, + "24": 2211485184.0, + "25": 2211485184.0, + "26": 2211485184.0, + "27": 2211485184.0, + "28": 2211485184.0, + "29": 2211485184.0, + "30": 2211485184.0, + "31": 2211485184.0, + "32": 2211485184.0, + "33": 2211485184.0, + "34": 2211485184.0, + "35": 2211485184.0, + "36": 2211485184.0, + "37": 2211485184.0, + "38": 2211485184.0, + "39": 2211485184.0, + "40": 2211485184.0, + "41": 2211485184.0, + "42": 2211485184.0, + "43": 2211485184.0, + "44": 2211485184.0, + "45": 2211485184.0, + "46": 2211485184.0, + "47": 2211485184.0, + "48": 2211485184.0, + "49": 2211485184.0, + "50": 2211485184.0, + "51": 2211485184.0, + "52": 2211485184.0, + "53": 2211485184.0, + "54": 2211485184.0, + "55": 2211485184.0, + "56": 2211485184.0, + "57": 2211485184.0, + "58": 2211485184.0, + "59": 2211485184.0, + "60": 2211485184.0, + "61": 2211485184.0, + "62": 2211485184.0, + "63": 2211485184.0, + "64": 2211485184.0, + "65": 2211485184.0, + "66": 2211485184.0, + "67": 2211485184.0, + "68": 2211485184.0, + "69": 2211485184.0, + "70": 2211485184.0, + "71": 2211485184.0, + "72": 2211485184.0, + "73": 2211485184.0, + "74": 2211485184.0, + "75": 2211485184.0, + "76": 2211485184.0, + "77": 2211485184.0, + "78": 2211485184.0, + "79": 2211485184.0, + "80": 2211485184.0, + "81": 2211485184.0, + "82": 2211485184.0, + "83": 2211485184.0, + "84": 2211485184.0, + "85": 2211485184.0, + "86": 2211485184.0, + "87": 2211485184.0, + "88": 2211485184.0, + "89": 2211485184.0, + "90": 2211485184.0, + "91": 2211485184.0, + "92": 2211485184.0, + "93": 2211485184.0, + "94": 2211485184.0, + "95": 2211485184.0, + "96": 2211485184.0, + "97": 2211485184.0, + "98": 2211485184.0, + "99": 2211485184.0, + "100": 2211485184.0 } }, "iteration-time": { @@ -432,106 +432,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 38.50475, - "2": 0.14031, - "3": 0.11652, - "4": 0.09549, - "5": 0.09354, - "6": 0.09569, - "7": 0.09409, - "8": 0.09473, - "9": 0.09388, - "10": 0.09459, - "11": 0.09596, - "12": 0.09466, - "13": 0.09509, - "14": 0.09586, - "15": 0.09314, - "16": 0.09368, - "17": 0.09468, - "18": 0.09494, - "19": 0.09289, - "20": 0.09427, - "21": 0.09599, - "22": 0.09701, - "23": 0.09665, - "24": 0.09712, - "25": 0.09542, - "26": 0.09515, - "27": 0.09642, - "28": 0.09519, - "29": 0.09691, - "30": 0.09651, - "31": 0.09742, - "32": 0.09503, - "33": 0.09471, - "34": 0.09424, - "35": 0.09574, - "36": 0.09438, - "37": 0.09509, - "38": 0.09428, - "39": 0.09484, - "40": 0.09459, - "41": 0.0951, - "42": 0.09671, - "43": 0.09633, - "44": 0.09511, - "45": 0.09592, - "46": 0.09579, - "47": 0.09614, - "48": 0.09464, - "49": 0.0958, - "50": 0.09782, - "51": 0.10564, - "52": 0.09373, - "53": 0.09475, - "54": 0.09323, - "55": 0.09237, - "56": 0.09293, - "57": 0.09228, - "58": 0.0948, - "59": 0.09906, - "60": 0.10026, - "61": 0.09961, - "62": 0.09923, - "63": 0.09889, - "64": 0.09888, - "65": 0.09925, - "66": 0.1, - "67": 0.09782, - "68": 0.09891, - "69": 0.09132, - "70": 0.09102, - "71": 0.091, - "72": 0.09368, - "73": 0.09219, - "74": 0.09374, - "75": 0.09232, - "76": 0.09428, - "77": 0.09256, - "78": 0.09623, - "79": 0.09624, - "80": 0.09622, - "81": 0.09668, - "82": 0.09651, - "83": 0.10042, - "84": 0.09998, - "85": 0.10102, - "86": 0.09975, - "87": 0.09955, - "88": 0.10135, - "89": 0.10038, - "90": 0.09933, - "91": 0.10071, - "92": 0.09992, - "93": 0.10054, - "94": 0.09927, - "95": 0.0998, - "96": 0.101, - "97": 0.09268, - "98": 0.09188, - "99": 0.09185, - "100": 0.09107 + "1": "nan", + "2": 6.67128, + "3": 0.11715, + "4": 0.10264, + "5": 0.10169, + "6": 0.10189, + "7": 0.10092, + "8": 0.09811, + "9": 0.09543, + "10": 0.09613, + "11": 0.09543, + "12": 0.09592, + "13": 0.09622, + "14": 0.09609, + "15": 0.09586, + "16": 0.09588, + "17": 0.09644, + "18": 0.09588, + "19": 0.09609, + "20": 0.09593, + "21": 0.09624, + "22": 0.09621, + "23": 0.09548, + "24": 0.09544, + "25": 0.09527, + "26": 0.09554, + "27": 0.09553, + "28": 0.09493, + "29": 0.09537, + "30": 0.09579, + "31": 0.09786, + "32": 0.09613, + "33": 0.09742, + "34": 0.09746, + "35": 0.09803, + "36": 0.09784, + "37": 0.09789, + "38": 0.09536, + "39": 0.09891, + "40": 0.0955, + "41": 0.09548, + "42": 0.09562, + "43": 0.09684, + "44": 0.09596, + "45": 0.09549, + "46": 0.09634, + "47": 0.09586, + "48": 0.09609, + "49": 0.09592, + "50": 0.09588, + "51": 0.10923, + "52": 0.09944, + "53": 0.09884, + "54": 0.09808, + "55": 0.09806, + "56": 0.09755, + "57": 0.09795, + "58": 0.09523, + "59": 0.0943, + "60": 0.0944, + "61": 0.09522, + "62": 0.09461, + "63": 0.09584, + "64": 0.09662, + "65": 0.10008, + "66": 0.09542, + "67": 0.0954, + "68": 0.09384, + "69": 0.09458, + "70": 0.09508, + "71": 0.09433, + "72": 0.0945, + "73": 0.09505, + "74": 0.09509, + "75": 0.09513, + "76": 0.09549, + "77": 0.1005, + "78": 0.1006, + "79": 0.10067, + "80": 0.10033, + "81": 0.10038, + "82": 0.10023, + "83": 0.09673, + "84": 0.0947, + "85": 0.09569, + "86": 0.09555, + "87": 0.09437, + "88": 0.09428, + "89": 0.09456, + "90": 0.09449, + "91": 0.09443, + "92": 0.09552, + "93": 0.09456, + "94": 0.09497, + "95": 0.09414, + "96": 0.09502, + "97": 0.09534, + "98": 0.09476, + "99": 0.09577, + "100": 0.09533 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_gb200.json index 10988c85257..3029be3a708 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_gb200.json @@ -8,52 +8,52 @@ "2": 10.83322, "3": 10.82737, "4": 10.79588, - "5": 10.85708, - "6": 10.86392, - "7": 10.8269, - "8": 10.82588, - "9": 10.83699, - "10": 10.79719, - "11": 10.87851, - "12": 10.85797, - "13": 10.85368, - "14": 10.87548, - "15": 10.79177, - "16": 10.80301, + "5": 10.85705, + "6": 10.8639, + "7": 10.82692, + "8": 10.82592, + "9": 10.83704, + "10": 10.7972, + "11": 10.87853, + "12": 10.85795, + "13": 10.85374, + "14": 10.8755, + "15": 10.79182, + "16": 10.80298, "17": 10.7745, - "18": 10.80399, + "18": 10.80403, "19": 10.79365, - "20": 10.69588, + "20": 10.69587, "21": 10.6855, - "22": 10.53152, - "23": 10.70658, - "24": 10.57319, + "22": 10.5315, + "23": 10.7066, + "24": 10.57324, "25": 10.51545, - "26": 10.59076, + "26": 10.59072, "27": 10.60738, - "28": 10.57025, - "29": 10.58904, - "30": 10.34674, + "28": 10.57026, + "29": 10.58903, + "30": 10.34678, "31": 10.07736, - "32": 10.46317, - "33": 10.45705, + "32": 10.46319, + "33": 10.45702, "34": 10.19923, - "35": 10.25593, - "36": 10.21246, - "37": 10.34689, - "38": 10.18008, - "39": 10.40796, - "40": 10.07602, - "41": 10.12935, - "42": 10.21132, - "43": 9.81692, + "35": 10.25594, + "36": 10.2125, + "37": 10.3469, + "38": 10.1801, + "39": 10.40797, + "40": 10.07599, + "41": 10.12931, + "42": 10.21136, + "43": 9.81697, "44": 9.94027, - "45": 9.817, - "46": 9.80608, + "45": 9.81697, + "46": 9.80603, "47": 10.12473, - "48": 9.84047, - "49": 9.50975, - "50": 9.88932 + "48": 9.84051, + "49": 9.50971, + "50": 9.88935 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1691.0, - "2": 1553.0, - "3": 1673.0, - "4": 1760.0, - "5": 1852.0, - "6": 1861.0, - "7": 1852.0, - "8": 1755.0, - "9": 1952.0, - "10": 1427.0, - "11": 1857.0, - "12": 1820.0, - "13": 1948.0, - "14": 1828.0, - "15": 1913.0, - "16": 1881.0, - "17": 1770.0, - "18": 1683.0, - "19": 1784.0, - "20": 1714.0, - "21": 1969.0, - "22": 1701.0, - "23": 1972.0, - "24": 1545.0, - "25": 1537.0, - "26": 1650.0, - "27": 1770.0, - "28": 1889.0, - "29": 1946.0, - "30": 2031.0, - "31": 1511.0, - "32": 1848.0, - "33": 2009.0, - "34": 1749.0, - "35": 1978.0, - "36": 1926.0, - "37": 2358.0, - "38": 2036.0, - "39": 2202.0, - "40": 2015.0, - "41": 2184.0, - "42": 2304.0, - "43": 2079.0, - "44": 2042.0, - "45": 2082.0, - "46": 2206.0, - "47": 2417.0, - "48": 2284.0, - "49": 2231.0, - "50": 2430.0 + "1": 1692.0, + "2": 1562.0, + "3": 1659.0, + "4": 1661.0, + "5": 1890.0, + "6": 1885.0, + "7": 1867.0, + "8": 1651.0, + "9": 1897.0, + "10": 1425.0, + "11": 1904.0, + "12": 1768.0, + "13": 1970.0, + "14": 1771.0, + "15": 1880.0, + "16": 1857.0, + "17": 1803.0, + "18": 1721.0, + "19": 1761.0, + "20": 1752.0, + "21": 1981.0, + "22": 1699.0, + "23": 2007.0, + "24": 1696.0, + "25": 1607.0, + "26": 1733.0, + "27": 1771.0, + "28": 1882.0, + "29": 1867.0, + "30": 1994.0, + "31": 1541.0, + "32": 1906.0, + "33": 2052.0, + "34": 1850.0, + "35": 1995.0, + "36": 1956.0, + "37": 2351.0, + "38": 2181.0, + "39": 2298.0, + "40": 2103.0, + "41": 2115.0, + "42": 2326.0, + "43": 1958.0, + "44": 2145.0, + "45": 2066.0, + "46": 2223.0, + "47": 2478.0, + "48": 2352.0, + "49": 2254.0, + "50": 2356.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 552193536.0, - "2": 552193536.0, - "3": 553242112.0, - "4": 553242112.0, - "5": 552193536.0, - "6": 553242112.0, - "7": 553242112.0, - "8": 553242112.0, - "9": 553242112.0, - "10": 553242112.0, - "11": 553242112.0, - "12": 552193536.0, - "13": 552193536.0, - "14": 552193536.0, - "15": 552193536.0, - "16": 553242112.0, - "17": 553242112.0, - "18": 552193536.0, - "19": 553242112.0, - "20": 553242112.0, - "21": 553242112.0, - "22": 552193536.0, - "23": 553242112.0, - "24": 553242112.0, - "25": 553242112.0, - "26": 553242112.0, - "27": 553242112.0, - "28": 553242112.0, - "29": 553242112.0, - "30": 553242112.0, - "31": 552193536.0, - "32": 552193536.0, - "33": 553242112.0, - "34": 553242112.0, - "35": 552193536.0, - "36": 553242112.0, - "37": 552193536.0, - "38": 552193536.0, - "39": 552193536.0, - "40": 552193536.0, - "41": 552193536.0, - "42": 552193536.0, - "43": 552193536.0, - "44": 552193536.0, - "45": 552193536.0, - "46": 552193536.0, - "47": 552193536.0, - "48": 552193536.0, - "49": 552193536.0, - "50": 553242112.0 + "1": 551137792.0, + "2": 551137792.0, + "3": 551137792.0, + "4": 551137792.0, + "5": 551137792.0, + "6": 551137792.0, + "7": 551137792.0, + "8": 551137792.0, + "9": 551137792.0, + "10": 551137792.0, + "11": 551137792.0, + "12": 551137792.0, + "13": 551137792.0, + "14": 551137792.0, + "15": 551137792.0, + "16": 551137792.0, + "17": 551137792.0, + "18": 551137792.0, + "19": 551137792.0, + "20": 551137792.0, + "21": 551137792.0, + "22": 551137792.0, + "23": 551137792.0, + "24": 551137792.0, + "25": 551137792.0, + "26": 551137792.0, + "27": 551137792.0, + "28": 551137792.0, + "29": 551137792.0, + "30": 551137792.0, + "31": 551137792.0, + "32": 551137792.0, + "33": 551137792.0, + "34": 551137792.0, + "35": 551137792.0, + "36": 551137792.0, + "37": 551137792.0, + "38": 551137792.0, + "39": 551137792.0, + "40": 551137792.0, + "41": 551137792.0, + "42": 551137792.0, + "43": 551137792.0, + "44": 551137792.0, + "45": 551137792.0, + "46": 551137792.0, + "47": 551137792.0, + "48": 551137792.0, + "49": 551137792.0, + "50": 551137792.0 } }, "mem-max-allocated-bytes": { @@ -176,55 +176,55 @@ "step_interval": 1, "values": { "1": 3798208000.0, - "2": 3942086144.0, - "3": 3942086144.0, - "4": 3942086144.0, - "5": 3942086144.0, - "6": 3942086144.0, - "7": 3942086144.0, - "8": 3942086144.0, - "9": 3942086144.0, - "10": 3942086144.0, - "11": 3942086144.0, - "12": 3942086144.0, - "13": 3942086144.0, - "14": 3942086144.0, - "15": 3942086144.0, - "16": 3942086144.0, - "17": 3942086144.0, - "18": 3942086144.0, - "19": 3942086144.0, - "20": 3942086144.0, - "21": 3942086144.0, - "22": 3942086144.0, - "23": 3942086144.0, - "24": 3942086144.0, - "25": 3942086144.0, - "26": 3942086144.0, - "27": 3942086144.0, - "28": 3942086144.0, - "29": 3942086144.0, - "30": 3942086144.0, - "31": 3942086144.0, - "32": 3942086144.0, - "33": 3942086144.0, - "34": 3942086144.0, - "35": 3942086144.0, - "36": 3942086144.0, - "37": 3942086144.0, - "38": 3942086144.0, - "39": 3942086144.0, - "40": 3942086144.0, - "41": 3942086144.0, - "42": 3942086144.0, - "43": 3942086144.0, - "44": 3942086144.0, - "45": 3942086144.0, - "46": 3942086144.0, - "47": 3942086144.0, - "48": 3942086144.0, - "49": 3942086144.0, - "50": 3942086144.0 + "2": 3940900352.0, + "3": 3940900352.0, + "4": 3940900352.0, + "5": 3940900352.0, + "6": 3940900352.0, + "7": 3940900352.0, + "8": 3940900352.0, + "9": 3940900352.0, + "10": 3940900352.0, + "11": 3940900352.0, + "12": 3940900352.0, + "13": 3940900352.0, + "14": 3940900352.0, + "15": 3940900352.0, + "16": 3940900352.0, + "17": 3940900352.0, + "18": 3940900352.0, + "19": 3940900352.0, + "20": 3940900352.0, + "21": 3940900352.0, + "22": 3940900352.0, + "23": 3940900352.0, + "24": 3940900352.0, + "25": 3940900352.0, + "26": 3940900352.0, + "27": 3940900352.0, + "28": 3940900352.0, + "29": 3940900352.0, + "30": 3940900352.0, + "31": 3940900352.0, + "32": 3940900352.0, + "33": 3940900352.0, + "34": 3940900352.0, + "35": 3940900352.0, + "36": 3940900352.0, + "37": 3940900352.0, + "38": 3940900352.0, + "39": 3940900352.0, + "40": 3940900352.0, + "41": 3940900352.0, + "42": 3940900352.0, + "43": 3940900352.0, + "44": 3940900352.0, + "45": 3940900352.0, + "46": 3940900352.0, + "47": 3940900352.0, + "48": 3940900352.0, + "49": 3940900352.0, + "50": 3940900352.0 } }, "iteration-time": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 3.84171, - "3": 0.13294, - "4": 0.11994, - "5": 0.11682, - "6": 0.11799, - "7": 0.12021, - "8": 0.11949, - "9": 0.1195, - "10": 0.12086, - "11": 0.21563, - "12": 0.12013, - "13": 0.1204, - "14": 0.1188, - "15": 0.1192, - "16": 0.11917, - "17": 0.11999, - "18": 0.12006, - "19": 0.11965, - "20": 0.12016, - "21": 0.21525, - "22": 0.11978, - "23": 0.12009, - "24": 0.12004, - "25": 0.12129, - "26": 0.12041, - "27": 0.12075, - "28": 0.12015, - "29": 0.1204, - "30": 0.12048, - "31": 0.21709, - "32": 0.12108, - "33": 0.11972, - "34": 0.12, - "35": 0.11969, - "36": 0.11944, - "37": 0.11946, - "38": 0.12056, - "39": 0.12045, - "40": 0.12052, - "41": 0.21777, - "42": 0.12063, - "43": 0.12165, - "44": 0.1204, - "45": 0.12036, - "46": 0.12154, - "47": 0.12043, - "48": 0.12145, - "49": 0.12079, - "50": 0.12035 + "2": 10.17412, + "3": 0.14904, + "4": 0.12759, + "5": 0.1274, + "6": 0.12621, + "7": 0.12496, + "8": 0.44001, + "9": 0.12775, + "10": 0.12927, + "11": 0.23731, + "12": 0.12987, + "13": 0.12779, + "14": 0.12929, + "15": 0.12998, + "16": 0.12594, + "17": 0.12925, + "18": 0.12899, + "19": 0.1293, + "20": 0.1284, + "21": 0.22948, + "22": 0.13102, + "23": 0.12998, + "24": 0.13382, + "25": 0.13352, + "26": 0.13255, + "27": 0.13108, + "28": 0.13178, + "29": 0.13341, + "30": 0.13286, + "31": 0.24189, + "32": 0.12944, + "33": 0.1295, + "34": 0.12994, + "35": 0.12844, + "36": 0.12999, + "37": 0.13026, + "38": 0.13104, + "39": 0.13047, + "40": 0.13073, + "41": 0.23481, + "42": 0.1338, + "43": 0.1343, + "44": 0.1326, + "45": 0.13438, + "46": 0.13215, + "47": 0.13101, + "48": 0.1292, + "49": 0.13175, + "50": 0.13038 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json index ea2f72181ea..732b7f7f1de 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer/golden_values_dev_dgx_h100.json @@ -7,53 +7,53 @@ "1": 10.84523, "2": 10.85412, "3": 10.85365, - "4": 10.83867, - "5": 10.87428, - "6": 10.89334, + "4": 10.8387, + "5": 10.87429, + "6": 10.89335, "7": 10.8541, - "8": 10.86235, + "8": 10.86234, "9": 10.86352, - "10": 10.82859, - "11": 10.88772, - "12": 10.87148, - "13": 10.87938, - "14": 10.89123, - "15": 10.81927, - "16": 10.83063, + "10": 10.82861, + "11": 10.88774, + "12": 10.87154, + "13": 10.8794, + "14": 10.89126, + "15": 10.81919, + "16": 10.83062, "17": 10.79878, - "18": 10.81771, - "19": 10.81957, + "18": 10.81768, + "19": 10.81959, "20": 10.72749, "21": 10.70552, - "22": 10.56396, - "23": 10.72823, + "22": 10.56397, + "23": 10.72819, "24": 10.60839, - "25": 10.55198, - "26": 10.60868, - "27": 10.62879, - "28": 10.58271, - "29": 10.59982, - "30": 10.36511, - "31": 10.12096, - "32": 10.47628, + "25": 10.55196, + "26": 10.6087, + "27": 10.62882, + "28": 10.58272, + "29": 10.59986, + "30": 10.36507, + "31": 10.12097, + "32": 10.47625, "33": 10.46906, - "34": 10.22326, - "35": 10.27848, - "36": 10.22883, - "37": 10.35947, - "38": 10.19331, - "39": 10.41586, - "40": 10.09773, - "41": 10.15718, - "42": 10.22441, - "43": 9.83281, - "44": 9.96935, - "45": 9.84205, - "46": 9.83017, - "47": 10.15602, - "48": 9.85503, - "49": 9.54049, - "50": 9.91258 + "34": 10.22325, + "35": 10.27849, + "36": 10.22879, + "37": 10.35946, + "38": 10.19333, + "39": 10.41588, + "40": 10.09771, + "41": 10.15713, + "42": 10.22445, + "43": 9.83283, + "44": 9.96941, + "45": 9.84201, + "46": 9.83021, + "47": 10.15605, + "48": 9.85509, + "49": 9.54052, + "50": 9.91255 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1725.0, - "2": 1664.0, - "3": 1710.0, - "4": 1712.0, - "5": 1834.0, - "6": 1743.0, - "7": 1803.0, - "8": 1737.0, - "9": 1779.0, - "10": 1459.0, - "11": 1898.0, - "12": 1661.0, - "13": 1860.0, - "14": 1764.0, - "15": 1886.0, - "16": 1916.0, - "17": 1773.0, - "18": 1702.0, - "19": 1742.0, - "20": 1649.0, - "21": 1899.0, - "22": 1631.0, - "23": 1960.0, - "24": 1570.0, - "25": 1647.0, - "26": 1649.0, - "27": 1811.0, - "28": 1930.0, - "29": 1910.0, - "30": 1964.0, - "31": 1536.0, - "32": 1873.0, - "33": 2191.0, - "34": 1838.0, - "35": 2017.0, - "36": 1916.0, - "37": 2345.0, - "38": 2247.0, - "39": 2374.0, - "40": 2207.0, - "41": 2246.0, - "42": 2291.0, - "43": 2027.0, - "44": 2147.0, - "45": 2164.0, - "46": 2300.0, - "47": 2418.0, - "48": 2467.0, - "49": 2255.0, - "50": 2224.0 + "1": 1716.0, + "2": 1716.0, + "3": 1713.0, + "4": 1771.0, + "5": 1870.0, + "6": 1787.0, + "7": 1797.0, + "8": 1662.0, + "9": 1857.0, + "10": 1418.0, + "11": 1931.0, + "12": 1675.0, + "13": 1893.0, + "14": 1787.0, + "15": 1973.0, + "16": 1896.0, + "17": 1851.0, + "18": 1766.0, + "19": 1792.0, + "20": 1589.0, + "21": 1815.0, + "22": 1678.0, + "23": 1939.0, + "24": 1611.0, + "25": 1600.0, + "26": 1760.0, + "27": 1842.0, + "28": 1948.0, + "29": 1973.0, + "30": 2003.0, + "31": 1557.0, + "32": 1832.0, + "33": 2126.0, + "34": 1890.0, + "35": 2066.0, + "36": 1915.0, + "37": 2356.0, + "38": 2219.0, + "39": 2394.0, + "40": 2140.0, + "41": 2207.0, + "42": 2136.0, + "43": 1991.0, + "44": 2123.0, + "45": 2102.0, + "46": 2379.0, + "47": 2497.0, + "48": 2457.0, + "49": 2239.0, + "50": 2251.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 553245184.0, - "2": 553245184.0, - "3": 553245184.0, - "4": 553245184.0, - "5": 553245184.0, - "6": 553245184.0, - "7": 553245184.0, - "8": 553245184.0, - "9": 553245184.0, - "10": 553245184.0, - "11": 553245184.0, - "12": 553245184.0, - "13": 553245184.0, - "14": 553245184.0, - "15": 553245184.0, - "16": 553245184.0, - "17": 553245184.0, - "18": 553245184.0, - "19": 553245184.0, - "20": 553245184.0, - "21": 553245184.0, - "22": 553245184.0, - "23": 553245184.0, - "24": 553245184.0, - "25": 553245184.0, - "26": 553245184.0, - "27": 553245184.0, - "28": 553245184.0, - "29": 553245184.0, - "30": 553245184.0, - "31": 553245184.0, - "32": 553245184.0, - "33": 553245184.0, - "34": 553245184.0, - "35": 553245184.0, - "36": 553245184.0, - "37": 553245184.0, - "38": 553245184.0, - "39": 553245184.0, - "40": 553245184.0, - "41": 553245184.0, - "42": 553245184.0, - "43": 553245184.0, - "44": 553245184.0, - "45": 553245184.0, - "46": 553245184.0, - "47": 553245184.0, - "48": 553245184.0, - "49": 553245184.0, - "50": 553245184.0 + "1": 552193536.0, + "2": 552193536.0, + "3": 552193536.0, + "4": 552193536.0, + "5": 552193536.0, + "6": 552193536.0, + "7": 552193536.0, + "8": 552193536.0, + "9": 552193536.0, + "10": 552193536.0, + "11": 552193536.0, + "12": 552193536.0, + "13": 552193536.0, + "14": 552193536.0, + "15": 552193536.0, + "16": 552193536.0, + "17": 552193536.0, + "18": 552193536.0, + "19": 552193536.0, + "20": 552193536.0, + "21": 552193536.0, + "22": 552193536.0, + "23": 552193536.0, + "24": 552193536.0, + "25": 552193536.0, + "26": 552193536.0, + "27": 552193536.0, + "28": 552193536.0, + "29": 552193536.0, + "30": 552193536.0, + "31": 552193536.0, + "32": 552193536.0, + "33": 552193536.0, + "34": 552193536.0, + "35": 552193536.0, + "36": 552193536.0, + "37": 552193536.0, + "38": 552193536.0, + "39": 552193536.0, + "40": 552193536.0, + "41": 552193536.0, + "42": 552193536.0, + "43": 552193536.0, + "44": 552193536.0, + "45": 552193536.0, + "46": 552193536.0, + "47": 552193536.0, + "48": 552193536.0, + "49": 552193536.0, + "50": 552193536.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 3798208000.0, - "2": 3943137792.0, - "3": 3943137792.0, - "4": 3943137792.0, - "5": 3943137792.0, - "6": 3943137792.0, - "7": 3943137792.0, - "8": 3943137792.0, - "9": 3943137792.0, - "10": 3943137792.0, - "11": 3943137792.0, - "12": 3943137792.0, - "13": 3943137792.0, - "14": 3943137792.0, - "15": 3943137792.0, - "16": 3943137792.0, - "17": 3943137792.0, - "18": 3943137792.0, - "19": 3943137792.0, - "20": 3943137792.0, - "21": 3943137792.0, - "22": 3943137792.0, - "23": 3943137792.0, - "24": 3943137792.0, - "25": 3943137792.0, - "26": 3943137792.0, - "27": 3943137792.0, - "28": 3943137792.0, - "29": 3943137792.0, - "30": 3943137792.0, - "31": 3943137792.0, - "32": 3943137792.0, - "33": 3943137792.0, - "34": 3943137792.0, - "35": 3943137792.0, - "36": 3943137792.0, - "37": 3943137792.0, - "38": 3943137792.0, - "39": 3943137792.0, - "40": 3943137792.0, - "41": 3943137792.0, - "42": 3943137792.0, - "43": 3943137792.0, - "44": 3943137792.0, - "45": 3943137792.0, - "46": 3943137792.0, - "47": 3943137792.0, - "48": 3943137792.0, - "49": 3943137792.0, - "50": 3943137792.0 + "1": 3799125504.0, + "2": 3941955072.0, + "3": 3941955072.0, + "4": 3941955072.0, + "5": 3941955072.0, + "6": 3941955072.0, + "7": 3941955072.0, + "8": 3941955072.0, + "9": 3941955072.0, + "10": 3941955072.0, + "11": 3941955072.0, + "12": 3941955072.0, + "13": 3941955072.0, + "14": 3941955072.0, + "15": 3941955072.0, + "16": 3941955072.0, + "17": 3941955072.0, + "18": 3941955072.0, + "19": 3941955072.0, + "20": 3941955072.0, + "21": 3941955072.0, + "22": 3941955072.0, + "23": 3941955072.0, + "24": 3941955072.0, + "25": 3941955072.0, + "26": 3941955072.0, + "27": 3941955072.0, + "28": 3941955072.0, + "29": 3941955072.0, + "30": 3941955072.0, + "31": 3941955072.0, + "32": 3941955072.0, + "33": 3941955072.0, + "34": 3941955072.0, + "35": 3941955072.0, + "36": 3941955072.0, + "37": 3941955072.0, + "38": 3941955072.0, + "39": 3941955072.0, + "40": 3941955072.0, + "41": 3941955072.0, + "42": 3941955072.0, + "43": 3941955072.0, + "44": 3941955072.0, + "45": 3941955072.0, + "46": 3941955072.0, + "47": 3941955072.0, + "48": 3941955072.0, + "49": 3941955072.0, + "50": 3941955072.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 13.97343, - "2": 0.13214, - "3": 0.11635, - "4": 0.09459, - "5": 0.0948, - "6": 0.09321, - "7": 0.09394, - "8": 0.09525, - "9": 0.09364, - "10": 0.09321, - "11": 0.22069, - "12": 0.09263, - "13": 0.09317, - "14": 0.09315, - "15": 0.09254, - "16": 0.09554, - "17": 0.09332, - "18": 0.09352, - "19": 0.09438, - "20": 0.09298, - "21": 0.22042, - "22": 0.09282, - "23": 0.09311, - "24": 0.09404, - "25": 0.09412, - "26": 0.09311, - "27": 0.09293, - "28": 0.09243, - "29": 0.09294, - "30": 0.09541, - "31": 0.22042, - "32": 0.09422, - "33": 0.09281, - "34": 0.09264, - "35": 0.09337, - "36": 0.09247, - "37": 0.09252, - "38": 0.09352, - "39": 0.09297, - "40": 0.09265, - "41": 0.22109, - "42": 0.09577, - "43": 0.09321, - "44": 0.0937, - "45": 0.09442, - "46": 0.09283, - "47": 0.09255, - "48": 0.09325, - "49": 0.09296, - "50": 0.09323 + "1": "nan", + "2": 8.82316, + "3": 0.11598, + "4": 0.09224, + "5": 0.09137, + "6": 0.09137, + "7": 0.09193, + "8": 0.09275, + "9": 0.09168, + "10": 0.09114, + "11": 0.21733, + "12": 0.09151, + "13": 0.09081, + "14": 0.09911, + "15": 0.10239, + "16": 0.09986, + "17": 0.09258, + "18": 0.09121, + "19": 0.09129, + "20": 0.09092, + "21": 0.21644, + "22": 0.09099, + "23": 0.09097, + "24": 0.09123, + "25": 0.0916, + "26": 0.09084, + "27": 0.09187, + "28": 0.09127, + "29": 0.09167, + "30": 0.09155, + "31": 0.21721, + "32": 0.09166, + "33": 0.09144, + "34": 0.0919, + "35": 0.09076, + "36": 0.09136, + "37": 0.09169, + "38": 0.09204, + "39": 0.09238, + "40": 0.09176, + "41": 0.21641, + "42": 0.09152, + "43": 0.09202, + "44": 0.09101, + "45": 0.09119, + "46": 0.09194, + "47": 0.09123, + "48": 0.09155, + "49": 0.09109, + "50": 0.09125 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json index 82352c11781..893627e9928 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_h100.json @@ -7,103 +7,103 @@ "1": 10.84517, "2": 10.85349, "3": 10.8539, - "4": 10.83825, - "5": 10.87427, - "6": 10.89307, - "7": 10.85454, - "8": 10.8626, - "9": 10.86464, - "10": 10.82906, - "11": 10.88792, - "12": 10.87099, - "13": 10.87921, - "14": 10.89078, - "15": 10.81976, - "16": 10.83158, - "17": 10.79868, - "18": 10.81672, - "19": 10.81919, - "20": 10.72701, - "21": 10.70594, - "22": 10.56367, - "23": 10.72804, - "24": 10.60832, - "25": 10.5522, - "26": 10.60853, + "4": 10.83828, + "5": 10.87424, + "6": 10.89306, + "7": 10.85457, + "8": 10.86259, + "9": 10.86462, + "10": 10.82907, + "11": 10.8879, + "12": 10.87098, + "13": 10.87919, + "14": 10.8908, + "15": 10.81975, + "16": 10.83162, + "17": 10.79865, + "18": 10.81669, + "19": 10.81918, + "20": 10.727, + "21": 10.70593, + "22": 10.56362, + "23": 10.72805, + "24": 10.60829, + "25": 10.55217, + "26": 10.60848, "27": 10.62847, - "28": 10.58306, - "29": 10.60011, - "30": 10.36616, - "31": 10.12043, + "28": 10.58307, + "29": 10.60015, + "30": 10.36612, + "31": 10.12045, "32": 10.47685, - "33": 10.46868, - "34": 10.22316, - "35": 10.27781, - "36": 10.22892, - "37": 10.35949, - "38": 10.19369, - "39": 10.41549, - "40": 10.09758, - "41": 10.1573, + "33": 10.46875, + "34": 10.22315, + "35": 10.27784, + "36": 10.22891, + "37": 10.35947, + "38": 10.1937, + "39": 10.41555, + "40": 10.09761, + "41": 10.15736, "42": 10.22398, - "43": 9.83289, - "44": 9.96912, - "45": 9.84191, - "46": 9.83041, - "47": 10.15626, - "48": 9.85486, - "49": 9.54086, - "50": 9.91248, + "43": 9.83285, + "44": 9.96914, + "45": 9.84192, + "46": 9.83042, + "47": 10.15631, + "48": 9.85485, + "49": 9.54087, + "50": 9.91251, "51": 9.85868, "52": 9.74284, - "53": 10.06645, - "54": 9.95167, - "55": 9.88096, - "56": 9.62626, - "57": 9.47768, - "58": 9.83346, - "59": 9.58526, - "60": 9.50125, - "61": 9.69182, + "53": 10.06641, + "54": 9.9517, + "55": 9.88095, + "56": 9.62629, + "57": 9.47765, + "58": 9.83351, + "59": 9.58521, + "60": 9.50123, + "61": 9.69184, "62": 9.98853, - "63": 9.38476, - "64": 9.7803, + "63": 9.38478, + "64": 9.78032, "65": 8.94762, - "66": 9.70856, - "67": 9.36852, + "66": 9.70859, + "67": 9.36851, "68": 9.78439, - "69": 9.79406, - "70": 9.74241, - "71": 9.61808, - "72": 9.58428, - "73": 9.5035, + "69": 9.79409, + "70": 9.7424, + "71": 9.61806, + "72": 9.58426, + "73": 9.50352, "74": 8.94221, - "75": 9.42529, - "76": 9.07408, + "75": 9.42533, + "76": 9.07406, "77": 10.06351, - "78": 9.7208, - "79": 9.37294, - "80": 9.40396, - "81": 9.48168, - "82": 9.69778, + "78": 9.72081, + "79": 9.37293, + "80": 9.40391, + "81": 9.48166, + "82": 9.69775, "83": 9.30714, - "84": 9.41712, - "85": 9.61407, - "86": 9.07615, - "87": 9.59094, - "88": 9.74641, - "89": 9.59993, + "84": 9.41711, + "85": 9.61405, + "86": 9.07616, + "87": 9.59093, + "88": 9.74637, + "89": 9.5999, "90": 9.8142, - "91": 9.33773, + "91": 9.33769, "92": 9.35373, - "93": 9.07395, - "94": 8.83173, - "95": 9.51734, - "96": 9.52415, - "97": 9.30995, - "98": 9.66805, - "99": 8.88588, - "100": 9.39538 + "93": 9.07397, + "94": 8.83167, + "95": 9.51732, + "96": 9.52416, + "97": 9.30996, + "98": 9.66808, + "99": 8.88589, + "100": 9.3954 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1655.0, - "2": 1697.0, - "3": 1724.0, - "4": 1720.0, - "5": 1803.0, - "6": 1772.0, - "7": 1811.0, - "8": 1766.0, - "9": 1750.0, - "10": 1413.0, - "11": 1861.0, - "12": 1650.0, - "13": 1895.0, - "14": 1662.0, - "15": 1951.0, - "16": 1998.0, - "17": 1798.0, - "18": 1687.0, - "19": 1856.0, - "20": 1561.0, - "21": 1882.0, - "22": 1652.0, - "23": 2075.0, - "24": 1606.0, - "25": 1665.0, - "26": 1686.0, - "27": 1839.0, - "28": 2053.0, - "29": 1907.0, - "30": 1893.0, - "31": 1581.0, - "32": 1791.0, - "33": 2149.0, - "34": 1872.0, - "35": 2010.0, - "36": 1799.0, - "37": 2311.0, - "38": 2221.0, - "39": 2261.0, - "40": 2188.0, - "41": 2204.0, - "42": 2300.0, - "43": 2001.0, - "44": 2119.0, - "45": 2126.0, - "46": 2374.0, - "47": 2468.0, - "48": 2405.0, - "49": 2247.0, - "50": 2250.0, - "51": 2607.0, - "52": 2618.0, - "53": 2828.0, - "54": 2730.0, - "55": 2351.0, - "56": 2753.0, - "57": 2323.0, - "58": 2809.0, - "59": 2721.0, - "60": 2440.0, - "61": 2875.0, - "62": 2726.0, - "63": 2444.0, - "64": 3001.0, - "65": 2602.0, - "66": 2981.0, - "67": 2676.0, - "68": 2623.0, - "69": 2802.0, - "70": 3234.0, - "71": 2902.0, - "72": 2337.0, - "73": 2856.0, - "74": 1903.0, - "75": 2388.0, - "76": 3118.0, - "77": 3108.0, - "78": 3122.0, - "79": 2994.0, - "80": 3186.0, - "81": 3470.0, - "82": 3164.0, - "83": 2726.0, - "84": 3214.0, - "85": 3262.0, - "86": 2602.0, - "87": 3658.0, - "88": 2906.0, - "89": 3054.0, - "90": 3018.0, - "91": 2690.0, - "92": 3106.0, - "93": 2701.0, - "94": 3263.0, - "95": 3426.0, - "96": 3405.0, - "97": 3087.0, - "98": 3510.0, - "99": 3148.0, - "100": 3204.0 + "1": 1675.0, + "2": 1642.0, + "3": 1630.0, + "4": 1741.0, + "5": 1842.0, + "6": 1858.0, + "7": 1843.0, + "8": 1692.0, + "9": 1851.0, + "10": 1499.0, + "11": 1935.0, + "12": 1645.0, + "13": 1938.0, + "14": 1752.0, + "15": 1847.0, + "16": 1933.0, + "17": 1748.0, + "18": 1652.0, + "19": 1798.0, + "20": 1552.0, + "21": 1823.0, + "22": 1683.0, + "23": 1961.0, + "24": 1616.0, + "25": 1516.0, + "26": 1826.0, + "27": 1824.0, + "28": 1971.0, + "29": 1904.0, + "30": 1951.0, + "31": 1568.0, + "32": 1841.0, + "33": 2040.0, + "34": 1795.0, + "35": 1941.0, + "36": 1902.0, + "37": 2378.0, + "38": 2211.0, + "39": 2376.0, + "40": 2269.0, + "41": 2086.0, + "42": 2171.0, + "43": 2026.0, + "44": 2148.0, + "45": 2138.0, + "46": 2407.0, + "47": 2548.0, + "48": 2473.0, + "49": 2307.0, + "50": 2200.0, + "51": 2646.0, + "52": 2597.0, + "53": 2875.0, + "54": 2667.0, + "55": 2366.0, + "56": 2633.0, + "57": 2419.0, + "58": 2913.0, + "59": 2719.0, + "60": 2304.0, + "61": 2878.0, + "62": 2604.0, + "63": 2310.0, + "64": 3014.0, + "65": 2662.0, + "66": 2935.0, + "67": 2777.0, + "68": 2567.0, + "69": 2720.0, + "70": 3182.0, + "71": 2927.0, + "72": 2354.0, + "73": 2920.0, + "74": 1876.0, + "75": 2549.0, + "76": 3014.0, + "77": 3257.0, + "78": 3140.0, + "79": 3023.0, + "80": 3187.0, + "81": 3516.0, + "82": 3107.0, + "83": 2717.0, + "84": 3120.0, + "85": 3310.0, + "86": 2725.0, + "87": 3711.0, + "88": 2945.0, + "89": 3220.0, + "90": 3030.0, + "91": 2669.0, + "92": 3050.0, + "93": 2599.0, + "94": 3259.0, + "95": 3202.0, + "96": 3444.0, + "97": 3002.0, + "98": 3582.0, + "99": 3110.0, + "100": 3038.0 } }, "mem-allocated-bytes": { @@ -218,106 +218,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 551278080.0, - "2": 551278080.0, - "3": 551278080.0, - "4": 551278080.0, - "5": 551278080.0, - "6": 551278080.0, - "7": 551278080.0, - "8": 551278080.0, - "9": 551278080.0, - "10": 551278080.0, - "11": 551278080.0, - "12": 551278080.0, - "13": 551278080.0, - "14": 551278080.0, - "15": 551278080.0, - "16": 551278080.0, - "17": 551278080.0, - "18": 551278080.0, - "19": 551278080.0, - "20": 551278080.0, - "21": 551278080.0, - "22": 551278080.0, - "23": 551278080.0, - "24": 551278080.0, - "25": 551278080.0, - "26": 551278080.0, - "27": 551278080.0, - "28": 551278080.0, - "29": 551278080.0, - "30": 551278080.0, - "31": 551278080.0, - "32": 551278080.0, - "33": 551278080.0, - "34": 551278080.0, - "35": 551278080.0, - "36": 551278080.0, - "37": 551278080.0, - "38": 551278080.0, - "39": 551278080.0, - "40": 551278080.0, - "41": 551278080.0, - "42": 551278080.0, - "43": 551278080.0, - "44": 551278080.0, - "45": 551278080.0, - "46": 551278080.0, - "47": 551278080.0, - "48": 551278080.0, - "49": 551278080.0, - "50": 551278080.0, - "51": 551278080.0, - "52": 551278080.0, - "53": 551278080.0, - "54": 551278080.0, - "55": 551278080.0, - "56": 551278080.0, - "57": 551278080.0, - "58": 551278080.0, - "59": 551278080.0, - "60": 551278080.0, - "61": 551278080.0, - "62": 551278080.0, - "63": 551278080.0, - "64": 551278080.0, - "65": 551278080.0, - "66": 551278080.0, - "67": 551278080.0, - "68": 551278080.0, - "69": 551278080.0, - "70": 551278080.0, - "71": 551278080.0, - "72": 551278080.0, - "73": 551278080.0, - "74": 551278080.0, - "75": 551278080.0, - "76": 551278080.0, - "77": 551278080.0, - "78": 551278080.0, - "79": 551278080.0, - "80": 551278080.0, - "81": 551278080.0, - "82": 551278080.0, - "83": 551278080.0, - "84": 551278080.0, - "85": 551278080.0, - "86": 551278080.0, - "87": 551278080.0, - "88": 551278080.0, - "89": 551278080.0, - "90": 551278080.0, - "91": 551278080.0, - "92": 551278080.0, - "93": 551278080.0, - "94": 551278080.0, - "95": 551278080.0, - "96": 551278080.0, - "97": 551278080.0, - "98": 551278080.0, - "99": 551278080.0, - "100": 551278080.0 + "1": 551148032.0, + "2": 551148032.0, + "3": 551148032.0, + "4": 551148032.0, + "5": 551148032.0, + "6": 551148032.0, + "7": 551148032.0, + "8": 551148032.0, + "9": 551148032.0, + "10": 551148032.0, + "11": 551148032.0, + "12": 551148032.0, + "13": 551148032.0, + "14": 551148032.0, + "15": 551148032.0, + "16": 551148032.0, + "17": 551148032.0, + "18": 551148032.0, + "19": 551148032.0, + "20": 551148032.0, + "21": 551148032.0, + "22": 551148032.0, + "23": 551148032.0, + "24": 551148032.0, + "25": 551148032.0, + "26": 551148032.0, + "27": 551148032.0, + "28": 551148032.0, + "29": 551148032.0, + "30": 551148032.0, + "31": 551148032.0, + "32": 551148032.0, + "33": 551148032.0, + "34": 551148032.0, + "35": 551148032.0, + "36": 551148032.0, + "37": 551148032.0, + "38": 551148032.0, + "39": 551148032.0, + "40": 551148032.0, + "41": 551148032.0, + "42": 551148032.0, + "43": 551148032.0, + "44": 551148032.0, + "45": 551148032.0, + "46": 551148032.0, + "47": 551148032.0, + "48": 551148032.0, + "49": 551148032.0, + "50": 551148032.0, + "51": 551148032.0, + "52": 551148032.0, + "53": 551148032.0, + "54": 551148032.0, + "55": 551148032.0, + "56": 551148032.0, + "57": 551148032.0, + "58": 551148032.0, + "59": 551148032.0, + "60": 551148032.0, + "61": 551148032.0, + "62": 551148032.0, + "63": 551148032.0, + "64": 551148032.0, + "65": 551148032.0, + "66": 551148032.0, + "67": 551148032.0, + "68": 551148032.0, + "69": 551148032.0, + "70": 551148032.0, + "71": 551148032.0, + "72": 551148032.0, + "73": 551148032.0, + "74": 551148032.0, + "75": 551148032.0, + "76": 551148032.0, + "77": 551148032.0, + "78": 551148032.0, + "79": 551148032.0, + "80": 551148032.0, + "81": 551148032.0, + "82": 551148032.0, + "83": 551148032.0, + "84": 551148032.0, + "85": 551148032.0, + "86": 551148032.0, + "87": 551148032.0, + "88": 551148032.0, + "89": 551148032.0, + "90": 551148032.0, + "91": 551148032.0, + "92": 551148032.0, + "93": 551148032.0, + "94": 551148032.0, + "95": 551148032.0, + "96": 551148032.0, + "97": 551148032.0, + "98": 551148032.0, + "99": 551148032.0, + "100": 551148032.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 2289441792.0, - "2": 2432405504.0, - "3": 2432405504.0, - "4": 2432405504.0, - "5": 2432405504.0, - "6": 2432405504.0, - "7": 2432405504.0, - "8": 2432405504.0, - "9": 2432405504.0, - "10": 2432405504.0, - "11": 2432405504.0, - "12": 2432405504.0, - "13": 2432405504.0, - "14": 2432405504.0, - "15": 2432405504.0, - "16": 2432405504.0, - "17": 2432405504.0, - "18": 2432405504.0, - "19": 2432405504.0, - "20": 2432405504.0, - "21": 2432405504.0, - "22": 2432405504.0, - "23": 2432405504.0, - "24": 2432405504.0, - "25": 2432405504.0, - "26": 2432405504.0, - "27": 2432405504.0, - "28": 2432405504.0, - "29": 2432405504.0, - "30": 2432405504.0, - "31": 2432405504.0, - "32": 2432405504.0, - "33": 2432405504.0, - "34": 2432405504.0, - "35": 2432405504.0, - "36": 2432405504.0, - "37": 2432405504.0, - "38": 2432405504.0, - "39": 2432405504.0, - "40": 2432405504.0, - "41": 2432405504.0, - "42": 2432405504.0, - "43": 2432405504.0, - "44": 2432405504.0, - "45": 2432405504.0, - "46": 2432405504.0, - "47": 2432405504.0, - "48": 2432405504.0, - "49": 2432405504.0, - "50": 2432405504.0, - "51": 2432405504.0, - "52": 2432405504.0, - "53": 2432405504.0, - "54": 2432405504.0, - "55": 2432405504.0, - "56": 2432405504.0, - "57": 2432405504.0, - "58": 2432405504.0, - "59": 2432405504.0, - "60": 2432405504.0, - "61": 2432405504.0, - "62": 2432405504.0, - "63": 2432405504.0, - "64": 2432405504.0, - "65": 2432405504.0, - "66": 2432405504.0, - "67": 2432405504.0, - "68": 2432405504.0, - "69": 2432405504.0, - "70": 2432405504.0, - "71": 2432405504.0, - "72": 2432405504.0, - "73": 2432405504.0, - "74": 2432405504.0, - "75": 2432405504.0, - "76": 2432405504.0, - "77": 2432405504.0, - "78": 2432405504.0, - "79": 2432405504.0, - "80": 2432405504.0, - "81": 2432405504.0, - "82": 2432405504.0, - "83": 2432405504.0, - "84": 2432405504.0, - "85": 2432405504.0, - "86": 2432405504.0, - "87": 2432405504.0, - "88": 2432405504.0, - "89": 2432405504.0, - "90": 2432405504.0, - "91": 2432405504.0, - "92": 2432405504.0, - "93": 2432405504.0, - "94": 2432405504.0, - "95": 2432405504.0, - "96": 2432405504.0, - "97": 2432405504.0, - "98": 2432405504.0, - "99": 2432405504.0, - "100": 2432405504.0 + "1": 2290489856.0, + "2": 2431226880.0, + "3": 2431226880.0, + "4": 2431226880.0, + "5": 2431226880.0, + "6": 2431226880.0, + "7": 2431226880.0, + "8": 2431226880.0, + "9": 2431226880.0, + "10": 2431226880.0, + "11": 2431226880.0, + "12": 2431226880.0, + "13": 2431226880.0, + "14": 2431226880.0, + "15": 2431226880.0, + "16": 2431226880.0, + "17": 2431226880.0, + "18": 2431226880.0, + "19": 2431226880.0, + "20": 2431226880.0, + "21": 2431226880.0, + "22": 2431226880.0, + "23": 2431226880.0, + "24": 2431226880.0, + "25": 2431226880.0, + "26": 2431226880.0, + "27": 2431226880.0, + "28": 2431226880.0, + "29": 2431226880.0, + "30": 2431226880.0, + "31": 2431226880.0, + "32": 2431226880.0, + "33": 2431226880.0, + "34": 2431226880.0, + "35": 2431226880.0, + "36": 2431226880.0, + "37": 2431226880.0, + "38": 2431226880.0, + "39": 2431226880.0, + "40": 2431226880.0, + "41": 2431226880.0, + "42": 2431226880.0, + "43": 2431226880.0, + "44": 2431226880.0, + "45": 2431226880.0, + "46": 2431226880.0, + "47": 2431226880.0, + "48": 2431226880.0, + "49": 2431226880.0, + "50": 2431226880.0, + "51": 2431226880.0, + "52": 2431226880.0, + "53": 2431226880.0, + "54": 2431226880.0, + "55": 2431226880.0, + "56": 2431226880.0, + "57": 2431226880.0, + "58": 2431226880.0, + "59": 2431226880.0, + "60": 2431226880.0, + "61": 2431226880.0, + "62": 2431226880.0, + "63": 2431226880.0, + "64": 2431226880.0, + "65": 2431226880.0, + "66": 2431226880.0, + "67": 2431226880.0, + "68": 2431226880.0, + "69": 2431226880.0, + "70": 2431226880.0, + "71": 2431226880.0, + "72": 2431226880.0, + "73": 2431226880.0, + "74": 2431226880.0, + "75": 2431226880.0, + "76": 2431226880.0, + "77": 2431226880.0, + "78": 2431226880.0, + "79": 2431226880.0, + "80": 2431226880.0, + "81": 2431226880.0, + "82": 2431226880.0, + "83": 2431226880.0, + "84": 2431226880.0, + "85": 2431226880.0, + "86": 2431226880.0, + "87": 2431226880.0, + "88": 2431226880.0, + "89": 2431226880.0, + "90": 2431226880.0, + "91": 2431226880.0, + "92": 2431226880.0, + "93": 2431226880.0, + "94": 2431226880.0, + "95": 2431226880.0, + "96": 2431226880.0, + "97": 2431226880.0, + "98": 2431226880.0, + "99": 2431226880.0, + "100": 2431226880.0 } }, "iteration-time": { @@ -432,106 +432,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 12.06542, - "2": 0.1206, - "3": 0.10179, - "4": 0.08257, - "5": 0.08196, - "6": 0.08184, - "7": 0.08247, - "8": 0.08147, - "9": 0.08127, - "10": 0.08228, - "11": 0.0839, - "12": 0.08236, - "13": 0.08232, - "14": 0.08218, - "15": 0.08336, - "16": 0.08213, - "17": 0.08296, - "18": 0.0816, - "19": 0.08269, - "20": 0.08138, - "21": 0.08303, - "22": 0.08243, - "23": 0.08357, - "24": 0.08151, - "25": 0.08392, - "26": 0.08247, - "27": 0.08229, - "28": 0.08279, - "29": 0.08232, - "30": 0.0824, - "31": 0.08146, - "32": 0.08912, - "33": 0.08386, - "34": 0.08198, - "35": 0.08188, - "36": 0.08394, - "37": 0.08154, - "38": 0.08111, - "39": 0.08175, - "40": 0.08143, - "41": 0.08312, - "42": 0.08219, - "43": 0.08218, - "44": 0.08316, - "45": 0.08162, - "46": 0.08265, - "47": 0.08169, - "48": 0.08346, - "49": 0.08176, - "50": 0.08213, - "51": 0.09096, - "52": 0.08501, - "53": 0.08473, - "54": 0.08165, - "55": 0.08129, - "56": 0.08244, - "57": 0.08158, - "58": 0.08104, - "59": 0.08185, - "60": 0.0834, - "61": 0.08139, - "62": 0.08134, - "63": 0.086, - "64": 0.08155, - "65": 0.08326, - "66": 0.08135, - "67": 0.08434, - "68": 0.0817, - "69": 0.08297, - "70": 0.08039, - "71": 0.0801, - "72": 0.07962, - "73": 0.07979, - "74": 0.08099, - "75": 0.08004, - "76": 0.07961, - "77": 0.07959, - "78": 0.08021, - "79": 0.08102, - "80": 0.07949, - "81": 0.08018, - "82": 0.08014, - "83": 0.07929, - "84": 0.07992, - "85": 0.07982, - "86": 0.08024, - "87": 0.08054, - "88": 0.08161, - "89": 0.08084, - "90": 0.08079, - "91": 0.08239, - "92": 0.08091, - "93": 0.07966, - "94": 0.08301, - "95": 0.08124, - "96": 0.08066, - "97": 0.08098, - "98": 0.08072, - "99": 0.08164, - "100": 0.08106 + "1": "nan", + "2": 8.77903, + "3": 0.10226, + "4": 0.08218, + "5": 0.0819, + "6": 0.08181, + "7": 0.08261, + "8": 0.0815, + "9": 0.08169, + "10": 0.08243, + "11": 0.08177, + "12": 0.08191, + "13": 0.08197, + "14": 0.08156, + "15": 0.08114, + "16": 0.0823, + "17": 0.08203, + "18": 0.0814, + "19": 0.08116, + "20": 0.08176, + "21": 0.08132, + "22": 0.08258, + "23": 0.08196, + "24": 0.08164, + "25": 0.08195, + "26": 0.08114, + "27": 0.08113, + "28": 0.08172, + "29": 0.08134, + "30": 0.08167, + "31": 0.08218, + "32": 0.08164, + "33": 0.08138, + "34": 0.08172, + "35": 0.08219, + "36": 0.08135, + "37": 0.08112, + "38": 0.08118, + "39": 0.08136, + "40": 0.08149, + "41": 0.08471, + "42": 0.08147, + "43": 0.08214, + "44": 0.08163, + "45": 0.08138, + "46": 0.08201, + "47": 0.08144, + "48": 0.08213, + "49": 0.08173, + "50": 0.08162, + "51": 0.08967, + "52": 0.08349, + "53": 0.08376, + "54": 0.09311, + "55": 0.09282, + "56": 0.09332, + "57": 0.08764, + "58": 0.08491, + "59": 0.08342, + "60": 0.08357, + "61": 0.0833, + "62": 0.08335, + "63": 0.08368, + "64": 0.08326, + "65": 0.08442, + "66": 0.08378, + "67": 0.08389, + "68": 0.0834, + "69": 0.08327, + "70": 0.08313, + "71": 0.08315, + "72": 0.08373, + "73": 0.08388, + "74": 0.08388, + "75": 0.08364, + "76": 0.0835, + "77": 0.08395, + "78": 0.08363, + "79": 0.08399, + "80": 0.08424, + "81": 0.08369, + "82": 0.08323, + "83": 0.08316, + "84": 0.08389, + "85": 0.08392, + "86": 0.08404, + "87": 0.08329, + "88": 0.08316, + "89": 0.08385, + "90": 0.08352, + "91": 0.08359, + "92": 0.08296, + "93": 0.0834, + "94": 0.08468, + "95": 0.08379, + "96": 0.08367, + "97": 0.08366, + "98": 0.08365, + "99": 0.08457, + "100": 0.08384 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json index 399a2c50a8d..3535401ed62 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_h100.json @@ -6,54 +6,54 @@ "values": { "1": 10.83936, "2": 10.8442, - "3": 10.86813, + "3": 10.86812, "4": 10.86022, - "5": 10.87939, - "6": 10.85969, - "7": 10.86386, - "8": 10.8444, - "9": 10.88995, - "10": 10.8926, - "11": 10.89136, - "12": 10.85312, - "13": 10.87319, - "14": 10.83805, - "15": 10.83088, - "16": 10.82011, + "5": 10.87941, + "6": 10.8597, + "7": 10.86387, + "8": 10.84442, + "9": 10.88994, + "10": 10.89264, + "11": 10.89135, + "12": 10.85314, + "13": 10.87321, + "14": 10.83804, + "15": 10.8309, + "16": 10.8201, "17": 10.79138, - "18": 10.81055, + "18": 10.81058, "19": 10.77977, - "20": 10.6635, - "21": 10.69765, - "22": 10.67421, - "23": 10.77344, - "24": 10.63919, + "20": 10.66346, + "21": 10.69764, + "22": 10.67417, + "23": 10.77343, + "24": 10.6392, "25": 10.50497, "26": 10.61911, - "27": 10.56921, - "28": 10.46859, - "29": 10.41119, - "30": 10.42916, + "27": 10.5692, + "28": 10.46856, + "29": 10.41117, + "30": 10.4292, "31": 10.52553, - "32": 10.34942, - "33": 10.2967, - "34": 10.46909, - "35": 9.99632, - "36": 10.13945, - "37": 10.0434, - "38": 10.4139, - "39": 9.80941, - "40": 10.12495, - "41": 10.14883, - "42": 10.04042, - "43": 10.22142, - "44": 10.07348, - "45": 9.71369, - "46": 10.00449, - "47": 9.94758, - "48": 9.68856, - "49": 9.93637, - "50": 9.96042 + "32": 10.34948, + "33": 10.29667, + "34": 10.46911, + "35": 9.99634, + "36": 10.13944, + "37": 10.04339, + "38": 10.41386, + "39": 9.8094, + "40": 10.12496, + "41": 10.14884, + "42": 10.04044, + "43": 10.22139, + "44": 10.07347, + "45": 9.71374, + "46": 10.00445, + "47": 9.94761, + "48": 9.68855, + "49": 9.93636, + "50": 9.96047 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1026.0, - "2": 1184.0, - "3": 1226.0, - "4": 1248.0, - "5": 1259.0, - "6": 1421.0, - "7": 1182.0, - "8": 1036.0, - "9": 1293.0, - "10": 1319.0, - "11": 1212.0, - "12": 1373.0, - "13": 1327.0, - "14": 1121.0, - "15": 1217.0, - "16": 1163.0, - "17": 1246.0, - "18": 1280.0, - "19": 1128.0, - "20": 1019.0, - "21": 1147.0, - "22": 1156.0, - "23": 1341.0, - "24": 1312.0, - "25": 1066.0, - "26": 1138.0, - "27": 1270.0, - "28": 1260.0, - "29": 1292.0, - "30": 1532.0, - "31": 1477.0, - "32": 1460.0, - "33": 1537.0, - "34": 1513.0, - "35": 1235.0, - "36": 1316.0, - "37": 1466.0, - "38": 1564.0, - "39": 1380.0, - "40": 1513.0, - "41": 1633.0, - "42": 1509.0, - "43": 1731.0, - "44": 1636.0, - "45": 1501.0, - "46": 1884.0, - "47": 1567.0, - "48": 1631.0, - "49": 1825.0, - "50": 1639.0 + "1": 1053.0, + "2": 1196.0, + "3": 1254.0, + "4": 1272.0, + "5": 1190.0, + "6": 1319.0, + "7": 1245.0, + "8": 1005.0, + "9": 1304.0, + "10": 1291.0, + "11": 1237.0, + "12": 1331.0, + "13": 1333.0, + "14": 1174.0, + "15": 1221.0, + "16": 1118.0, + "17": 1206.0, + "18": 1292.0, + "19": 1060.0, + "20": 1020.0, + "21": 1174.0, + "22": 1177.0, + "23": 1372.0, + "24": 1256.0, + "25": 1047.0, + "26": 1093.0, + "27": 1228.0, + "28": 1268.0, + "29": 1266.0, + "30": 1528.0, + "31": 1493.0, + "32": 1456.0, + "33": 1486.0, + "34": 1445.0, + "35": 1202.0, + "36": 1350.0, + "37": 1539.0, + "38": 1555.0, + "39": 1417.0, + "40": 1434.0, + "41": 1669.0, + "42": 1625.0, + "43": 1774.0, + "44": 1749.0, + "45": 1507.0, + "46": 1911.0, + "47": 1661.0, + "48": 1590.0, + "49": 1672.0, + "50": 1717.0 } }, "mem-allocated-bytes": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 10.04337, - "2": 0.16822, - "3": 0.13237, - "4": 0.10427, - "5": 0.10319, - "6": 0.10424, - "7": 0.10225, - "8": 0.10398, - "9": 0.10251, - "10": 0.10246, - "11": 0.10345, - "12": 0.103, - "13": 0.10547, - "14": 0.10352, - "15": 0.10359, - "16": 0.1027, - "17": 0.10378, - "18": 0.10313, - "19": 0.10368, - "20": 0.10223, - "21": 0.10211, - "22": 0.1031, - "23": 0.10247, - "24": 0.1027, - "25": 0.10174, - "26": 0.10084, - "27": 0.10138, - "28": 0.10076, - "29": 0.10064, - "30": 0.10061, - "31": 0.10034, - "32": 0.10099, - "33": 0.10117, - "34": 0.10033, - "35": 0.10174, - "36": 0.10259, - "37": 0.1046, - "38": 0.10281, - "39": 0.10332, - "40": 0.10619, - "41": 0.10943, - "42": 0.10864, - "43": 0.10388, - "44": 0.10366, - "45": 0.10485, - "46": 0.10446, - "47": 0.10301, - "48": 0.10412, - "49": 0.10182, - "50": 0.10428 + "1": "nan", + "2": 7.56402, + "3": 0.12899, + "4": 0.1107, + "5": 0.10865, + "6": 0.11009, + "7": 0.10857, + "8": 0.10976, + "9": 0.10842, + "10": 0.10996, + "11": 0.10803, + "12": 0.10974, + "13": 0.1082, + "14": 0.11112, + "15": 0.10788, + "16": 0.10967, + "17": 0.10833, + "18": 0.11052, + "19": 0.10843, + "20": 0.11212, + "21": 0.11044, + "22": 0.11026, + "23": 0.10831, + "24": 0.1101, + "25": 0.11067, + "26": 0.10952, + "27": 0.10788, + "28": 0.11052, + "29": 0.10825, + "30": 0.1095, + "31": 0.10926, + "32": 0.11001, + "33": 0.10964, + "34": 0.10984, + "35": 0.1087, + "36": 0.11024, + "37": 0.10815, + "38": 0.11069, + "39": 0.11778, + "40": 0.10959, + "41": 0.12504, + "42": 0.11012, + "43": 0.10679, + "44": 0.10609, + "45": 0.10883, + "46": 0.10845, + "47": 0.10775, + "48": 0.11123, + "49": 0.1087, + "50": 0.10841 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json index 5b1ee17f8f6..eaa28ef7c26 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2/golden_values_dev_dgx_h100.json @@ -7,52 +7,52 @@ "1": 10.86539, "2": 10.85871, "3": 10.86282, - "4": 10.84007, - "5": 10.87856, - "6": 10.88852, - "7": 10.86536, - "8": 10.86015, - "9": 10.85991, - "10": 10.82982, - "11": 10.88947, - "12": 10.87511, - "13": 10.87422, - "14": 10.89675, - "15": 10.82056, - "16": 10.82497, - "17": 10.78983, - "18": 10.81029, - "19": 10.80528, + "4": 10.84009, + "5": 10.87854, + "6": 10.88854, + "7": 10.86535, + "8": 10.86013, + "9": 10.85988, + "10": 10.82977, + "11": 10.88952, + "12": 10.87506, + "13": 10.87423, + "14": 10.89676, + "15": 10.82052, + "16": 10.82502, + "17": 10.78984, + "18": 10.8103, + "19": 10.80532, "20": 10.70396, - "21": 10.6699, - "22": 10.50641, - "23": 10.69006, + "21": 10.66986, + "22": 10.50644, + "23": 10.69003, "24": 10.56312, - "25": 10.49418, + "25": 10.49419, "26": 10.56627, - "27": 10.58023, - "28": 10.51571, - "29": 10.55296, - "30": 10.30551, - "31": 10.02244, - "32": 10.40618, - "33": 10.39874, - "34": 10.1377, - "35": 10.20184, + "27": 10.58022, + "28": 10.51573, + "29": 10.55297, + "30": 10.30548, + "31": 10.02247, + "32": 10.40615, + "33": 10.39876, + "34": 10.13774, + "35": 10.20185, "36": 10.1605, - "37": 10.28975, - "38": 10.11483, - "39": 10.361, - "40": 10.01905, + "37": 10.28973, + "38": 10.11478, + "39": 10.36102, + "40": 10.01906, "41": 10.07292, - "42": 10.14697, + "42": 10.14696, "43": 9.74684, - "44": 9.87763, - "45": 9.74962, - "46": 9.73382, - "47": 10.07536, - "48": 9.78071, - "49": 9.44783, + "44": 9.87761, + "45": 9.74965, + "46": 9.73383, + "47": 10.07538, + "48": 9.78069, + "49": 9.44785, "50": 9.8399 } }, @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 572.0, - "2": 656.0, - "3": 649.0, - "4": 631.0, - "5": 658.0, - "6": 636.0, - "7": 636.0, - "8": 542.0, - "9": 653.0, - "10": 551.0, - "11": 681.0, - "12": 642.0, - "13": 624.0, - "14": 658.0, - "15": 682.0, - "16": 659.0, - "17": 620.0, - "18": 603.0, - "19": 634.0, - "20": 639.0, - "21": 634.0, - "22": 602.0, - "23": 731.0, - "24": 620.0, - "25": 611.0, - "26": 626.0, - "27": 683.0, - "28": 668.0, - "29": 713.0, - "30": 712.0, - "31": 616.0, - "32": 786.0, - "33": 800.0, - "34": 702.0, - "35": 684.0, - "36": 664.0, - "37": 831.0, - "38": 802.0, - "39": 919.0, - "40": 802.0, - "41": 791.0, - "42": 840.0, - "43": 718.0, - "44": 756.0, - "45": 765.0, - "46": 809.0, - "47": 839.0, - "48": 827.0, - "49": 935.0, - "50": 839.0 + "1": 630.0, + "2": 682.0, + "3": 633.0, + "4": 606.0, + "5": 669.0, + "6": 643.0, + "7": 643.0, + "8": 590.0, + "9": 630.0, + "10": 562.0, + "11": 685.0, + "12": 599.0, + "13": 663.0, + "14": 616.0, + "15": 674.0, + "16": 636.0, + "17": 642.0, + "18": 607.0, + "19": 633.0, + "20": 592.0, + "21": 728.0, + "22": 641.0, + "23": 767.0, + "24": 617.0, + "25": 591.0, + "26": 621.0, + "27": 649.0, + "28": 720.0, + "29": 742.0, + "30": 771.0, + "31": 544.0, + "32": 740.0, + "33": 789.0, + "34": 704.0, + "35": 742.0, + "36": 736.0, + "37": 821.0, + "38": 768.0, + "39": 889.0, + "40": 814.0, + "41": 830.0, + "42": 787.0, + "43": 717.0, + "44": 816.0, + "45": 769.0, + "46": 810.0, + "47": 863.0, + "48": 848.0, + "49": 894.0, + "50": 810.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 510689792.0, - "2": 510689792.0, - "3": 510689792.0, - "4": 510689792.0, - "5": 510689792.0, - "6": 510689792.0, - "7": 510689792.0, - "8": 510689792.0, - "9": 510689792.0, - "10": 510689792.0, - "11": 510689792.0, - "12": 510689792.0, - "13": 510689792.0, - "14": 510689792.0, - "15": 510689792.0, - "16": 510689792.0, - "17": 510689792.0, - "18": 510689792.0, - "19": 510689792.0, - "20": 510689792.0, - "21": 510689792.0, - "22": 510689792.0, - "23": 510689792.0, - "24": 510689792.0, - "25": 510689792.0, - "26": 510689792.0, - "27": 510689792.0, - "28": 510689792.0, - "29": 510689792.0, - "30": 510689792.0, - "31": 510689792.0, - "32": 510689792.0, - "33": 510689792.0, - "34": 510689792.0, - "35": 510689792.0, - "36": 510689792.0, - "37": 510689792.0, - "38": 510689792.0, - "39": 510689792.0, - "40": 510689792.0, - "41": 510689792.0, - "42": 510689792.0, - "43": 510689792.0, - "44": 510689792.0, - "45": 510689792.0, - "46": 510689792.0, - "47": 510689792.0, - "48": 510689792.0, - "49": 510689792.0, - "50": 510689792.0 + "1": 511214080.0, + "2": 511214080.0, + "3": 511214080.0, + "4": 511214080.0, + "5": 511214080.0, + "6": 511214080.0, + "7": 511214080.0, + "8": 511214080.0, + "9": 511214080.0, + "10": 511214080.0, + "11": 511214080.0, + "12": 511214080.0, + "13": 511214080.0, + "14": 511214080.0, + "15": 511214080.0, + "16": 511214080.0, + "17": 511214080.0, + "18": 511214080.0, + "19": 511214080.0, + "20": 511214080.0, + "21": 511214080.0, + "22": 511214080.0, + "23": 511214080.0, + "24": 511214080.0, + "25": 511214080.0, + "26": 511214080.0, + "27": 511214080.0, + "28": 511214080.0, + "29": 511214080.0, + "30": 511214080.0, + "31": 511214080.0, + "32": 511214080.0, + "33": 511214080.0, + "34": 511214080.0, + "35": 511214080.0, + "36": 511214080.0, + "37": 511214080.0, + "38": 511214080.0, + "39": 511214080.0, + "40": 511214080.0, + "41": 511214080.0, + "42": 511214080.0, + "43": 511214080.0, + "44": 511214080.0, + "45": 511214080.0, + "46": 511214080.0, + "47": 511214080.0, + "48": 511214080.0, + "49": 511214080.0, + "50": 511214080.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 757802496.0, - "2": 935777792.0, - "3": 938397696.0, - "4": 938397696.0, - "5": 938397696.0, - "6": 938397696.0, - "7": 938397696.0, - "8": 938397696.0, - "9": 938397696.0, - "10": 938398208.0, - "11": 938398208.0, - "12": 938398208.0, - "13": 938398208.0, - "14": 938398720.0, - "15": 938398720.0, - "16": 938398720.0, - "17": 938398720.0, - "18": 938398720.0, - "19": 938398720.0, - "20": 938398720.0, - "21": 938398720.0, - "22": 938398720.0, - "23": 938398720.0, - "24": 938398720.0, - "25": 938399232.0, - "26": 938399232.0, - "27": 938399232.0, - "28": 938399232.0, - "29": 938399232.0, - "30": 938399232.0, - "31": 938399232.0, - "32": 938399232.0, - "33": 938399232.0, - "34": 938399232.0, - "35": 938399232.0, - "36": 938399232.0, - "37": 938399232.0, - "38": 938399232.0, - "39": 938399232.0, - "40": 938399232.0, - "41": 938399232.0, - "42": 938399232.0, - "43": 938399232.0, - "44": 938399232.0, - "45": 938399232.0, - "46": 938399232.0, - "47": 938399232.0, - "48": 938399232.0, - "49": 938399232.0, - "50": 938399232.0 + "1": 759896576.0, + "2": 934729216.0, + "3": 935777792.0, + "4": 935777792.0, + "5": 935777792.0, + "6": 935777792.0, + "7": 935777792.0, + "8": 935777792.0, + "9": 935777792.0, + "10": 935777792.0, + "11": 935777792.0, + "12": 935777792.0, + "13": 935777792.0, + "14": 935777792.0, + "15": 935777792.0, + "16": 935777792.0, + "17": 935777792.0, + "18": 935777792.0, + "19": 935777792.0, + "20": 935777792.0, + "21": 935777792.0, + "22": 935777792.0, + "23": 935777792.0, + "24": 935777792.0, + "25": 935777792.0, + "26": 935777792.0, + "27": 935777792.0, + "28": 935777792.0, + "29": 935777792.0, + "30": 935777792.0, + "31": 935777792.0, + "32": 935777792.0, + "33": 935777792.0, + "34": 935777792.0, + "35": 935777792.0, + "36": 935777792.0, + "37": 935777792.0, + "38": 935777792.0, + "39": 935777792.0, + "40": 935777792.0, + "41": 935777792.0, + "42": 935777792.0, + "43": 935777792.0, + "44": 935777792.0, + "45": 935777792.0, + "46": 935777792.0, + "47": 935777792.0, + "48": 935777792.0, + "49": 935777792.0, + "50": 935777792.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 35.36663, - "2": 0.35208, - "3": 0.32012, - "4": 0.29736, - "5": 0.30009, - "6": 0.29722, - "7": 0.29604, - "8": 0.29598, - "9": 0.30123, - "10": 0.29278, - "11": 0.29195, - "12": 0.30003, - "13": 0.2957, - "14": 0.2935, - "15": 0.29372, - "16": 0.2984, - "17": 0.29013, - "18": 0.29041, - "19": 0.2934, - "20": 0.29454, - "21": 0.2936, - "22": 0.29663, - "23": 0.29453, - "24": 0.29404, - "25": 0.2912, - "26": 0.29009, - "27": 0.29448, - "28": 0.29043, - "29": 0.29359, - "30": 0.29413, - "31": 0.29317, - "32": 0.29247, - "33": 0.29418, - "34": 0.2938, - "35": 0.29207, - "36": 0.31485, - "37": 0.29543, - "38": 0.29402, - "39": 0.29262, - "40": 0.2957, - "41": 0.29348, - "42": 0.29242, - "43": 0.29117, - "44": 0.2927, - "45": 0.29263, - "46": 0.29024, - "47": 0.29404, - "48": 0.28901, - "49": 0.28844, - "50": 0.29053 + "1": "nan", + "2": 8.67318, + "3": 0.31282, + "4": 0.2961, + "5": 0.29314, + "6": 0.29227, + "7": 0.29313, + "8": 0.29274, + "9": 0.29117, + "10": 0.29411, + "11": 0.29406, + "12": 0.29474, + "13": 0.2909, + "14": 0.29404, + "15": 0.29297, + "16": 0.29224, + "17": 0.29251, + "18": 0.2928, + "19": 0.29136, + "20": 0.29278, + "21": 0.29115, + "22": 0.29239, + "23": 0.29204, + "24": 0.29189, + "25": 0.29107, + "26": 0.29209, + "27": 0.29212, + "28": 0.2922, + "29": 0.29266, + "30": 0.29249, + "31": 0.29112, + "32": 0.29204, + "33": 0.2903, + "34": 0.29046, + "35": 0.29216, + "36": 0.28992, + "37": 0.29292, + "38": 0.29192, + "39": 0.29231, + "40": 0.29044, + "41": 0.29017, + "42": 0.29127, + "43": 0.29043, + "44": 0.29193, + "45": 0.29241, + "46": 0.29181, + "47": 0.29196, + "48": 0.29308, + "49": 0.29264, + "50": 0.29024 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json index f5628621ad5..e8497a14105 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss/golden_values_dev_dgx_h100.json @@ -7,52 +7,52 @@ "1": 10.86539, "2": 10.85871, "3": 10.86282, - "4": 10.84007, - "5": 10.87856, - "6": 10.88852, - "7": 10.86536, - "8": 10.86015, - "9": 10.85991, - "10": 10.82982, - "11": 10.88947, - "12": 10.87511, - "13": 10.87422, - "14": 10.89675, - "15": 10.82056, - "16": 10.82497, - "17": 10.78983, - "18": 10.81029, - "19": 10.80528, + "4": 10.84009, + "5": 10.87854, + "6": 10.88854, + "7": 10.86535, + "8": 10.86013, + "9": 10.85988, + "10": 10.82977, + "11": 10.88952, + "12": 10.87506, + "13": 10.87423, + "14": 10.89676, + "15": 10.82052, + "16": 10.82502, + "17": 10.78984, + "18": 10.8103, + "19": 10.80532, "20": 10.70396, - "21": 10.6699, - "22": 10.50641, - "23": 10.69006, + "21": 10.66986, + "22": 10.50644, + "23": 10.69003, "24": 10.56312, - "25": 10.49418, + "25": 10.49419, "26": 10.56627, - "27": 10.58023, - "28": 10.51571, - "29": 10.55296, - "30": 10.30551, - "31": 10.02244, - "32": 10.40618, - "33": 10.39874, - "34": 10.1377, - "35": 10.20184, + "27": 10.58022, + "28": 10.51573, + "29": 10.55297, + "30": 10.30548, + "31": 10.02247, + "32": 10.40615, + "33": 10.39876, + "34": 10.13774, + "35": 10.20185, "36": 10.1605, - "37": 10.28975, - "38": 10.11483, - "39": 10.361, - "40": 10.01905, + "37": 10.28973, + "38": 10.11478, + "39": 10.36102, + "40": 10.01906, "41": 10.07292, - "42": 10.14697, + "42": 10.14696, "43": 9.74684, - "44": 9.87763, - "45": 9.74962, - "46": 9.73382, - "47": 10.07536, - "48": 9.78071, - "49": 9.44783, + "44": 9.87761, + "45": 9.74965, + "46": 9.73383, + "47": 10.07538, + "48": 9.78069, + "49": 9.44785, "50": 9.8399 } }, @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 572.0, - "2": 656.0, - "3": 649.0, - "4": 631.0, - "5": 658.0, - "6": 636.0, - "7": 636.0, - "8": 542.0, - "9": 653.0, - "10": 551.0, - "11": 681.0, - "12": 642.0, - "13": 624.0, - "14": 658.0, - "15": 682.0, - "16": 659.0, - "17": 620.0, - "18": 603.0, - "19": 634.0, - "20": 639.0, - "21": 634.0, - "22": 602.0, - "23": 731.0, - "24": 620.0, - "25": 611.0, - "26": 626.0, - "27": 683.0, - "28": 668.0, - "29": 713.0, - "30": 712.0, - "31": 616.0, - "32": 786.0, - "33": 800.0, - "34": 702.0, - "35": 684.0, - "36": 664.0, - "37": 831.0, - "38": 802.0, - "39": 919.0, - "40": 802.0, - "41": 791.0, - "42": 840.0, - "43": 718.0, - "44": 756.0, - "45": 765.0, - "46": 809.0, - "47": 839.0, - "48": 827.0, - "49": 935.0, - "50": 839.0 + "1": 630.0, + "2": 682.0, + "3": 633.0, + "4": 606.0, + "5": 669.0, + "6": 643.0, + "7": 643.0, + "8": 590.0, + "9": 630.0, + "10": 562.0, + "11": 685.0, + "12": 599.0, + "13": 663.0, + "14": 616.0, + "15": 674.0, + "16": 636.0, + "17": 642.0, + "18": 607.0, + "19": 633.0, + "20": 592.0, + "21": 728.0, + "22": 641.0, + "23": 767.0, + "24": 617.0, + "25": 591.0, + "26": 621.0, + "27": 649.0, + "28": 720.0, + "29": 742.0, + "30": 771.0, + "31": 544.0, + "32": 740.0, + "33": 789.0, + "34": 704.0, + "35": 742.0, + "36": 736.0, + "37": 821.0, + "38": 768.0, + "39": 889.0, + "40": 814.0, + "41": 830.0, + "42": 787.0, + "43": 717.0, + "44": 816.0, + "45": 769.0, + "46": 810.0, + "47": 863.0, + "48": 848.0, + "49": 894.0, + "50": 810.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 510689792.0, - "2": 510689792.0, - "3": 510689792.0, - "4": 510689792.0, - "5": 510689792.0, - "6": 510689792.0, - "7": 510689792.0, - "8": 510689792.0, - "9": 510689792.0, - "10": 510689792.0, - "11": 510689792.0, - "12": 510689792.0, - "13": 510689792.0, - "14": 510689792.0, - "15": 510689792.0, - "16": 510689792.0, - "17": 510689792.0, - "18": 510689792.0, - "19": 510689792.0, - "20": 510689792.0, - "21": 510689792.0, - "22": 510689792.0, - "23": 510689792.0, - "24": 510689792.0, - "25": 510689792.0, - "26": 510689792.0, - "27": 510689792.0, - "28": 510689792.0, - "29": 510689792.0, - "30": 510689792.0, - "31": 510689792.0, - "32": 510689792.0, - "33": 510689792.0, - "34": 510689792.0, - "35": 510689792.0, - "36": 510689792.0, - "37": 510689792.0, - "38": 510689792.0, - "39": 510689792.0, - "40": 510689792.0, - "41": 510689792.0, - "42": 510689792.0, - "43": 510689792.0, - "44": 510689792.0, - "45": 510689792.0, - "46": 510689792.0, - "47": 510689792.0, - "48": 510689792.0, - "49": 510689792.0, - "50": 510689792.0 + "1": 511214080.0, + "2": 511214080.0, + "3": 511214080.0, + "4": 511214080.0, + "5": 511214080.0, + "6": 511214080.0, + "7": 511214080.0, + "8": 511214080.0, + "9": 511214080.0, + "10": 511214080.0, + "11": 511214080.0, + "12": 511214080.0, + "13": 511214080.0, + "14": 511214080.0, + "15": 511214080.0, + "16": 511214080.0, + "17": 511214080.0, + "18": 511214080.0, + "19": 511214080.0, + "20": 511214080.0, + "21": 511214080.0, + "22": 511214080.0, + "23": 511214080.0, + "24": 511214080.0, + "25": 511214080.0, + "26": 511214080.0, + "27": 511214080.0, + "28": 511214080.0, + "29": 511214080.0, + "30": 511214080.0, + "31": 511214080.0, + "32": 511214080.0, + "33": 511214080.0, + "34": 511214080.0, + "35": 511214080.0, + "36": 511214080.0, + "37": 511214080.0, + "38": 511214080.0, + "39": 511214080.0, + "40": 511214080.0, + "41": 511214080.0, + "42": 511214080.0, + "43": 511214080.0, + "44": 511214080.0, + "45": 511214080.0, + "46": 511214080.0, + "47": 511214080.0, + "48": 511214080.0, + "49": 511214080.0, + "50": 511214080.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 755704320.0, - "2": 938398720.0, - "3": 938398720.0, - "4": 938398720.0, - "5": 938398720.0, - "6": 938399232.0, - "7": 938399232.0, - "8": 938399232.0, - "9": 938399232.0, - "10": 938399232.0, - "11": 938399232.0, - "12": 938399232.0, - "13": 938399232.0, - "14": 938399232.0, - "15": 938399232.0, - "16": 938399232.0, - "17": 938399232.0, - "18": 938399232.0, - "19": 938399232.0, - "20": 938399232.0, - "21": 938399232.0, - "22": 938399232.0, - "23": 938399232.0, - "24": 938399232.0, - "25": 938399232.0, - "26": 938399232.0, - "27": 938399232.0, - "28": 938399232.0, - "29": 938399232.0, - "30": 938399232.0, - "31": 938399232.0, - "32": 938399232.0, - "33": 938399232.0, - "34": 938399232.0, - "35": 938399232.0, - "36": 938399232.0, - "37": 938399232.0, - "38": 938399232.0, - "39": 938399232.0, - "40": 938399232.0, - "41": 938399232.0, - "42": 938399232.0, - "43": 938399232.0, - "44": 938399232.0, - "45": 938399232.0, - "46": 938399232.0, - "47": 938399232.0, - "48": 938399232.0, - "49": 938399232.0, - "50": 938399232.0 + "1": 757799936.0, + "2": 935777280.0, + "3": 935777280.0, + "4": 935777280.0, + "5": 935777280.0, + "6": 935777280.0, + "7": 935777280.0, + "8": 935777280.0, + "9": 935777280.0, + "10": 935777280.0, + "11": 935777280.0, + "12": 935777280.0, + "13": 935777280.0, + "14": 935777280.0, + "15": 935777280.0, + "16": 935777280.0, + "17": 935777280.0, + "18": 935777280.0, + "19": 935777280.0, + "20": 935777280.0, + "21": 935777280.0, + "22": 935777280.0, + "23": 935777280.0, + "24": 935777280.0, + "25": 935777280.0, + "26": 935777792.0, + "27": 935777792.0, + "28": 935777792.0, + "29": 935777792.0, + "30": 935777792.0, + "31": 935777792.0, + "32": 935777792.0, + "33": 935777792.0, + "34": 935777792.0, + "35": 935777792.0, + "36": 935777792.0, + "37": 935777792.0, + "38": 935777792.0, + "39": 935777792.0, + "40": 935777792.0, + "41": 935777792.0, + "42": 935777792.0, + "43": 935777792.0, + "44": 935777792.0, + "45": 935777792.0, + "46": 935777792.0, + "47": 935777792.0, + "48": 935777792.0, + "49": 935777792.0, + "50": 935777792.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 35.29813, - "2": 0.37906, - "3": 0.30948, - "4": 0.2886, - "5": 0.28858, - "6": 0.29461, - "7": 0.28328, - "8": 0.28783, - "9": 0.28448, - "10": 0.28698, - "11": 0.28404, - "12": 0.28717, - "13": 0.2828, - "14": 0.2846, - "15": 0.28648, - "16": 0.28793, - "17": 0.28473, - "18": 0.28326, - "19": 0.28524, - "20": 0.29094, - "21": 0.29401, - "22": 0.28944, - "23": 0.28693, - "24": 0.29508, - "25": 0.28683, - "26": 0.28507, - "27": 0.2849, - "28": 0.28658, - "29": 0.28518, - "30": 0.28539, - "31": 0.2829, - "32": 0.28482, - "33": 0.28454, - "34": 0.28634, - "35": 0.28739, - "36": 0.28563, - "37": 0.28401, - "38": 0.28251, - "39": 0.28156, - "40": 0.28197, - "41": 0.28236, - "42": 0.27995, - "43": 0.28293, - "44": 0.28018, - "45": 0.28419, - "46": 0.28512, - "47": 0.2818, - "48": 0.28099, - "49": 0.2831, - "50": 0.28153 + "1": "nan", + "2": 10.37053, + "3": 0.30798, + "4": 0.29057, + "5": 0.29242, + "6": 0.29043, + "7": 0.28986, + "8": 0.29102, + "9": 0.28927, + "10": 0.29009, + "11": 0.2889, + "12": 0.2887, + "13": 0.28995, + "14": 0.29044, + "15": 0.28952, + "16": 0.29094, + "17": 0.29042, + "18": 0.28929, + "19": 0.29186, + "20": 0.29067, + "21": 0.28897, + "22": 0.28967, + "23": 0.29105, + "24": 0.29026, + "25": 0.29028, + "26": 0.28971, + "27": 0.29198, + "28": 0.29059, + "29": 0.28754, + "30": 0.29224, + "31": 0.29028, + "32": 0.28933, + "33": 0.28982, + "34": 0.28839, + "35": 0.29022, + "36": 0.28949, + "37": 0.29283, + "38": 0.29171, + "39": 0.29089, + "40": 0.29251, + "41": 0.2922, + "42": 0.28894, + "43": 0.29147, + "44": 0.28939, + "45": 0.2897, + "46": 0.29036, + "47": 0.29096, + "48": 0.29013, + "49": 0.2907, + "50": 0.29146 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json index a470bf65873..95a3aa7d78e 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last/golden_values_dev_dgx_h100.json @@ -7,52 +7,52 @@ "1": 10.86539, "2": 10.85871, "3": 10.86282, - "4": 10.84007, - "5": 10.87856, - "6": 10.88852, - "7": 10.86536, - "8": 10.86015, - "9": 10.85991, - "10": 10.82982, - "11": 10.88947, - "12": 10.87511, - "13": 10.87422, - "14": 10.89675, - "15": 10.82056, - "16": 10.82497, - "17": 10.78983, - "18": 10.81029, - "19": 10.80528, + "4": 10.84009, + "5": 10.87854, + "6": 10.88854, + "7": 10.86535, + "8": 10.86013, + "9": 10.85988, + "10": 10.82977, + "11": 10.88952, + "12": 10.87506, + "13": 10.87423, + "14": 10.89676, + "15": 10.82052, + "16": 10.82502, + "17": 10.78984, + "18": 10.8103, + "19": 10.80532, "20": 10.70396, - "21": 10.6699, - "22": 10.50641, - "23": 10.69006, + "21": 10.66986, + "22": 10.50644, + "23": 10.69003, "24": 10.56312, - "25": 10.49418, + "25": 10.49419, "26": 10.56627, - "27": 10.58023, - "28": 10.51571, - "29": 10.55296, - "30": 10.30551, - "31": 10.02244, - "32": 10.40618, - "33": 10.39874, - "34": 10.1377, - "35": 10.20184, + "27": 10.58022, + "28": 10.51573, + "29": 10.55297, + "30": 10.30548, + "31": 10.02247, + "32": 10.40615, + "33": 10.39876, + "34": 10.13774, + "35": 10.20185, "36": 10.1605, - "37": 10.28975, - "38": 10.11483, - "39": 10.361, - "40": 10.01905, + "37": 10.28973, + "38": 10.11478, + "39": 10.36102, + "40": 10.01906, "41": 10.07292, - "42": 10.14697, + "42": 10.14696, "43": 9.74684, - "44": 9.87763, - "45": 9.74962, - "46": 9.73382, - "47": 10.07536, - "48": 9.78071, - "49": 9.44783, + "44": 9.87761, + "45": 9.74965, + "46": 9.73383, + "47": 10.07538, + "48": 9.78069, + "49": 9.44785, "50": 9.8399 } }, @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 572.0, - "2": 656.0, - "3": 649.0, - "4": 631.0, - "5": 658.0, - "6": 636.0, - "7": 636.0, - "8": 542.0, - "9": 653.0, - "10": 551.0, - "11": 681.0, - "12": 642.0, - "13": 624.0, - "14": 658.0, - "15": 682.0, - "16": 659.0, - "17": 620.0, - "18": 603.0, - "19": 634.0, - "20": 639.0, - "21": 634.0, - "22": 602.0, - "23": 731.0, - "24": 620.0, - "25": 611.0, - "26": 626.0, - "27": 683.0, - "28": 668.0, - "29": 713.0, - "30": 712.0, - "31": 616.0, - "32": 786.0, - "33": 800.0, - "34": 702.0, - "35": 684.0, - "36": 664.0, - "37": 831.0, - "38": 802.0, - "39": 919.0, - "40": 802.0, - "41": 791.0, - "42": 840.0, - "43": 718.0, - "44": 756.0, - "45": 765.0, - "46": 809.0, - "47": 839.0, - "48": 827.0, - "49": 935.0, - "50": 839.0 + "1": 630.0, + "2": 682.0, + "3": 633.0, + "4": 606.0, + "5": 669.0, + "6": 643.0, + "7": 643.0, + "8": 590.0, + "9": 630.0, + "10": 562.0, + "11": 685.0, + "12": 599.0, + "13": 663.0, + "14": 616.0, + "15": 674.0, + "16": 636.0, + "17": 642.0, + "18": 607.0, + "19": 633.0, + "20": 592.0, + "21": 728.0, + "22": 641.0, + "23": 767.0, + "24": 617.0, + "25": 591.0, + "26": 621.0, + "27": 649.0, + "28": 720.0, + "29": 742.0, + "30": 771.0, + "31": 544.0, + "32": 740.0, + "33": 789.0, + "34": 704.0, + "35": 742.0, + "36": 736.0, + "37": 821.0, + "38": 768.0, + "39": 889.0, + "40": 814.0, + "41": 830.0, + "42": 787.0, + "43": 717.0, + "44": 816.0, + "45": 769.0, + "46": 810.0, + "47": 863.0, + "48": 848.0, + "49": 894.0, + "50": 810.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 510689792.0, - "2": 510689792.0, - "3": 510689792.0, - "4": 510689792.0, - "5": 510689792.0, - "6": 510689792.0, - "7": 510689792.0, - "8": 510689792.0, - "9": 510689792.0, - "10": 510689792.0, - "11": 510689792.0, - "12": 510689792.0, - "13": 510689792.0, - "14": 510689792.0, - "15": 510689792.0, - "16": 510689792.0, - "17": 510689792.0, - "18": 510689792.0, - "19": 510689792.0, - "20": 510689792.0, - "21": 510689792.0, - "22": 510689792.0, - "23": 510689792.0, - "24": 510689792.0, - "25": 510689792.0, - "26": 510689792.0, - "27": 510689792.0, - "28": 510689792.0, - "29": 510689792.0, - "30": 510689792.0, - "31": 510689792.0, - "32": 510689792.0, - "33": 510689792.0, - "34": 510689792.0, - "35": 510689792.0, - "36": 510689792.0, - "37": 510689792.0, - "38": 510689792.0, - "39": 510689792.0, - "40": 510689792.0, - "41": 510689792.0, - "42": 510689792.0, - "43": 510689792.0, - "44": 510689792.0, - "45": 510689792.0, - "46": 510689792.0, - "47": 510689792.0, - "48": 510689792.0, - "49": 510689792.0, - "50": 510689792.0 + "1": 511214080.0, + "2": 511214080.0, + "3": 511214080.0, + "4": 511214080.0, + "5": 511214080.0, + "6": 511214080.0, + "7": 511214080.0, + "8": 511214080.0, + "9": 511214080.0, + "10": 511214080.0, + "11": 511214080.0, + "12": 511214080.0, + "13": 511214080.0, + "14": 511214080.0, + "15": 511214080.0, + "16": 511214080.0, + "17": 511214080.0, + "18": 511214080.0, + "19": 511214080.0, + "20": 511214080.0, + "21": 511214080.0, + "22": 511214080.0, + "23": 511214080.0, + "24": 511214080.0, + "25": 511214080.0, + "26": 511214080.0, + "27": 511214080.0, + "28": 511214080.0, + "29": 511214080.0, + "30": 511214080.0, + "31": 511214080.0, + "32": 511214080.0, + "33": 511214080.0, + "34": 511214080.0, + "35": 511214080.0, + "36": 511214080.0, + "37": 511214080.0, + "38": 511214080.0, + "39": 511214080.0, + "40": 511214080.0, + "41": 511214080.0, + "42": 511214080.0, + "43": 511214080.0, + "44": 511214080.0, + "45": 511214080.0, + "46": 511214080.0, + "47": 511214080.0, + "48": 511214080.0, + "49": 511214080.0, + "50": 511214080.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 756752896.0, - "2": 938398720.0, - "3": 938398720.0, - "4": 938398720.0, - "5": 938398720.0, - "6": 938398720.0, - "7": 938398720.0, - "8": 938398720.0, - "9": 938398720.0, - "10": 938398720.0, - "11": 938398720.0, - "12": 938398720.0, - "13": 938398720.0, - "14": 938398720.0, - "15": 938398720.0, - "16": 938399232.0, - "17": 938399232.0, - "18": 938399232.0, - "19": 938399232.0, - "20": 938399232.0, - "21": 938399232.0, - "22": 938399232.0, - "23": 938399232.0, - "24": 938399232.0, - "25": 938399232.0, - "26": 938399232.0, - "27": 938399232.0, - "28": 938399232.0, - "29": 938399232.0, - "30": 938399232.0, - "31": 938399232.0, - "32": 938399232.0, - "33": 938399232.0, - "34": 938399232.0, - "35": 938399232.0, - "36": 938399232.0, - "37": 938399232.0, - "38": 938399232.0, - "39": 938399232.0, - "40": 938399232.0, - "41": 938399232.0, - "42": 938399232.0, - "43": 938399232.0, - "44": 938399232.0, - "45": 938399232.0, - "46": 938399232.0, - "47": 938399232.0, - "48": 938399232.0, - "49": 938399232.0, - "50": 938399232.0 + "1": 756753920.0, + "2": 934729216.0, + "3": 935777280.0, + "4": 935777280.0, + "5": 935777280.0, + "6": 935777280.0, + "7": 935777280.0, + "8": 935777280.0, + "9": 935777280.0, + "10": 935777280.0, + "11": 935777280.0, + "12": 935777280.0, + "13": 935777792.0, + "14": 935777792.0, + "15": 935777792.0, + "16": 935777792.0, + "17": 935777792.0, + "18": 935777792.0, + "19": 935777792.0, + "20": 935777792.0, + "21": 935777792.0, + "22": 935777792.0, + "23": 935777792.0, + "24": 935777792.0, + "25": 935777792.0, + "26": 935777792.0, + "27": 935777792.0, + "28": 935777792.0, + "29": 935777792.0, + "30": 935777792.0, + "31": 935777792.0, + "32": 935777792.0, + "33": 935777792.0, + "34": 935777792.0, + "35": 935777792.0, + "36": 935777792.0, + "37": 935777792.0, + "38": 935777792.0, + "39": 935777792.0, + "40": 935777792.0, + "41": 935777792.0, + "42": 935777792.0, + "43": 935777792.0, + "44": 935777792.0, + "45": 935777792.0, + "46": 935777792.0, + "47": 935777792.0, + "48": 935777792.0, + "49": 935777792.0, + "50": 935777792.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 37.38041, - "2": 0.33426, - "3": 0.30575, - "4": 0.2855, - "5": 0.28459, - "6": 0.28581, - "7": 0.28653, - "8": 0.28649, - "9": 0.28246, - "10": 0.28538, - "11": 0.28516, - "12": 0.28331, - "13": 0.28799, - "14": 0.28438, - "15": 0.28361, - "16": 0.28315, - "17": 0.2837, - "18": 0.28279, - "19": 0.28916, - "20": 0.28613, - "21": 0.2849, - "22": 0.2837, - "23": 0.2861, - "24": 0.28551, - "25": 0.28665, - "26": 0.28308, - "27": 0.28626, - "28": 0.28139, - "29": 0.28479, - "30": 0.28557, - "31": 0.28342, - "32": 0.28058, - "33": 0.2824, - "34": 0.28129, - "35": 0.28377, - "36": 0.28273, - "37": 0.28699, - "38": 0.28388, - "39": 0.28427, - "40": 0.28442, - "41": 0.28373, - "42": 0.28177, - "43": 0.28258, - "44": 0.28237, - "45": 0.2815, - "46": 0.28503, - "47": 0.2817, - "48": 0.28433, - "49": 0.28819, - "50": 0.28371 + "1": "nan", + "2": 10.50661, + "3": 0.31687, + "4": 0.28819, + "5": 0.28906, + "6": 0.28653, + "7": 0.28677, + "8": 0.28692, + "9": 0.28706, + "10": 0.28507, + "11": 0.28571, + "12": 0.28481, + "13": 0.28851, + "14": 0.28777, + "15": 0.28632, + "16": 0.28504, + "17": 0.28694, + "18": 0.28711, + "19": 0.29178, + "20": 0.28664, + "21": 0.2888, + "22": 0.2856, + "23": 0.28568, + "24": 0.28763, + "25": 0.28782, + "26": 0.28662, + "27": 0.28959, + "28": 0.2881, + "29": 0.28977, + "30": 0.28727, + "31": 0.28683, + "32": 0.29036, + "33": 0.28965, + "34": 0.28788, + "35": 0.28738, + "36": 0.28582, + "37": 0.28582, + "38": 0.28479, + "39": 0.28607, + "40": 0.2872, + "41": 0.28534, + "42": 0.28706, + "43": 0.28596, + "44": 0.28512, + "45": 0.28693, + "46": 0.28709, + "47": 0.28762, + "48": 0.28663, + "49": 0.28815, + "50": 0.28624 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json index f78c3deb59d..77286bef5b8 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last/golden_values_dev_dgx_h100.json @@ -7,52 +7,52 @@ "1": 10.86539, "2": 10.85871, "3": 10.86282, - "4": 10.84007, - "5": 10.87856, - "6": 10.88852, - "7": 10.86536, - "8": 10.86015, - "9": 10.85991, - "10": 10.82982, - "11": 10.88947, - "12": 10.87511, - "13": 10.87422, - "14": 10.89675, - "15": 10.82056, - "16": 10.82497, - "17": 10.78983, - "18": 10.81029, - "19": 10.80528, + "4": 10.84009, + "5": 10.87854, + "6": 10.88854, + "7": 10.86535, + "8": 10.86013, + "9": 10.85988, + "10": 10.82977, + "11": 10.88952, + "12": 10.87506, + "13": 10.87423, + "14": 10.89676, + "15": 10.82052, + "16": 10.82502, + "17": 10.78984, + "18": 10.8103, + "19": 10.80532, "20": 10.70396, - "21": 10.6699, - "22": 10.50641, - "23": 10.69006, + "21": 10.66986, + "22": 10.50644, + "23": 10.69003, "24": 10.56312, - "25": 10.49418, + "25": 10.49419, "26": 10.56627, - "27": 10.58023, - "28": 10.51571, - "29": 10.55296, - "30": 10.30551, - "31": 10.02244, - "32": 10.40618, - "33": 10.39874, - "34": 10.1377, - "35": 10.20184, + "27": 10.58022, + "28": 10.51573, + "29": 10.55297, + "30": 10.30548, + "31": 10.02247, + "32": 10.40615, + "33": 10.39876, + "34": 10.13774, + "35": 10.20185, "36": 10.1605, - "37": 10.28975, - "38": 10.11483, - "39": 10.361, - "40": 10.01905, + "37": 10.28973, + "38": 10.11478, + "39": 10.36102, + "40": 10.01906, "41": 10.07292, - "42": 10.14697, + "42": 10.14696, "43": 9.74684, - "44": 9.87763, - "45": 9.74962, - "46": 9.73382, - "47": 10.07536, - "48": 9.78071, - "49": 9.44783, + "44": 9.87761, + "45": 9.74965, + "46": 9.73383, + "47": 10.07538, + "48": 9.78069, + "49": 9.44785, "50": 9.8399 } }, @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 572.0, - "2": 656.0, - "3": 649.0, - "4": 631.0, - "5": 658.0, - "6": 636.0, - "7": 636.0, - "8": 542.0, - "9": 653.0, - "10": 551.0, - "11": 681.0, - "12": 642.0, - "13": 624.0, - "14": 658.0, - "15": 682.0, - "16": 659.0, - "17": 620.0, - "18": 603.0, - "19": 634.0, - "20": 639.0, - "21": 634.0, - "22": 602.0, - "23": 731.0, - "24": 620.0, - "25": 611.0, - "26": 626.0, - "27": 683.0, - "28": 668.0, - "29": 713.0, - "30": 712.0, - "31": 616.0, - "32": 786.0, - "33": 800.0, - "34": 702.0, - "35": 684.0, - "36": 664.0, - "37": 831.0, - "38": 802.0, - "39": 919.0, - "40": 802.0, - "41": 791.0, - "42": 840.0, - "43": 718.0, - "44": 756.0, - "45": 765.0, - "46": 809.0, - "47": 839.0, - "48": 827.0, - "49": 935.0, - "50": 839.0 + "1": 630.0, + "2": 682.0, + "3": 633.0, + "4": 606.0, + "5": 669.0, + "6": 643.0, + "7": 643.0, + "8": 590.0, + "9": 630.0, + "10": 562.0, + "11": 685.0, + "12": 599.0, + "13": 663.0, + "14": 616.0, + "15": 674.0, + "16": 636.0, + "17": 642.0, + "18": 607.0, + "19": 633.0, + "20": 592.0, + "21": 728.0, + "22": 641.0, + "23": 767.0, + "24": 617.0, + "25": 591.0, + "26": 621.0, + "27": 649.0, + "28": 720.0, + "29": 742.0, + "30": 771.0, + "31": 544.0, + "32": 740.0, + "33": 789.0, + "34": 704.0, + "35": 742.0, + "36": 736.0, + "37": 821.0, + "38": 768.0, + "39": 889.0, + "40": 814.0, + "41": 830.0, + "42": 787.0, + "43": 717.0, + "44": 816.0, + "45": 769.0, + "46": 810.0, + "47": 863.0, + "48": 848.0, + "49": 894.0, + "50": 810.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 512786944.0, - "2": 512786944.0, - "3": 512786944.0, - "4": 512786944.0, - "5": 512786944.0, - "6": 512786944.0, - "7": 512786944.0, - "8": 512786944.0, - "9": 512786944.0, - "10": 512786944.0, - "11": 512786944.0, - "12": 512786944.0, - "13": 512786944.0, - "14": 512786944.0, - "15": 512786944.0, - "16": 512786944.0, - "17": 512786944.0, - "18": 512786944.0, - "19": 512786944.0, - "20": 512786944.0, - "21": 512786944.0, - "22": 512786944.0, - "23": 512786944.0, - "24": 512786944.0, - "25": 512786944.0, - "26": 512786944.0, - "27": 512786944.0, - "28": 512786944.0, - "29": 512786944.0, - "30": 512786944.0, - "31": 512786944.0, - "32": 512786944.0, - "33": 512786944.0, - "34": 512786944.0, - "35": 512786944.0, - "36": 512786944.0, - "37": 512786944.0, - "38": 512786944.0, - "39": 512786944.0, - "40": 512786944.0, - "41": 512786944.0, - "42": 512786944.0, - "43": 512786944.0, - "44": 512786944.0, - "45": 512786944.0, - "46": 512786944.0, - "47": 512786944.0, - "48": 512786944.0, - "49": 512786944.0, - "50": 512786944.0 + "1": 511214080.0, + "2": 511214080.0, + "3": 511214080.0, + "4": 511214080.0, + "5": 511214080.0, + "6": 511214080.0, + "7": 511214080.0, + "8": 511214080.0, + "9": 511214080.0, + "10": 511214080.0, + "11": 511214080.0, + "12": 511214080.0, + "13": 511214080.0, + "14": 511214080.0, + "15": 511214080.0, + "16": 511214080.0, + "17": 511214080.0, + "18": 511214080.0, + "19": 511214080.0, + "20": 511214080.0, + "21": 511214080.0, + "22": 511214080.0, + "23": 511214080.0, + "24": 511214080.0, + "25": 511214080.0, + "26": 511214080.0, + "27": 511214080.0, + "28": 511214080.0, + "29": 511214080.0, + "30": 511214080.0, + "31": 511214080.0, + "32": 511214080.0, + "33": 511214080.0, + "34": 511214080.0, + "35": 511214080.0, + "36": 511214080.0, + "37": 511214080.0, + "38": 511214080.0, + "39": 511214080.0, + "40": 511214080.0, + "41": 511214080.0, + "42": 511214080.0, + "43": 511214080.0, + "44": 511214080.0, + "45": 511214080.0, + "46": 511214080.0, + "47": 511214080.0, + "48": 511214080.0, + "49": 511214080.0, + "50": 511214080.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 758851072.0, - "2": 937350656.0, - "3": 937350656.0, - "4": 937350656.0, - "5": 937350656.0, - "6": 937350656.0, - "7": 937350656.0, - "8": 937350656.0, - "9": 937350656.0, - "10": 937350656.0, - "11": 937350656.0, - "12": 937350656.0, - "13": 937350656.0, - "14": 937350656.0, - "15": 937350656.0, - "16": 937350656.0, - "17": 937350656.0, - "18": 937350656.0, - "19": 937350656.0, - "20": 937350656.0, - "21": 937350656.0, - "22": 937350656.0, - "23": 937350656.0, - "24": 937350656.0, - "25": 937350656.0, - "26": 937350656.0, - "27": 937350656.0, - "28": 937350656.0, - "29": 937350656.0, - "30": 937350656.0, - "31": 937350656.0, - "32": 937350656.0, - "33": 937350656.0, - "34": 937350656.0, - "35": 937350656.0, - "36": 937350656.0, - "37": 937350656.0, - "38": 937350656.0, - "39": 937350656.0, - "40": 937350656.0, - "41": 937350656.0, - "42": 937350656.0, - "43": 937350656.0, - "44": 937350656.0, - "45": 937350656.0, - "46": 937350656.0, - "47": 937350656.0, - "48": 937350656.0, - "49": 937350656.0, - "50": 937350656.0 + "1": 756753920.0, + "2": 935776768.0, + "3": 935777280.0, + "4": 935777280.0, + "5": 935777280.0, + "6": 935777280.0, + "7": 935777280.0, + "8": 935777792.0, + "9": 935777792.0, + "10": 935777792.0, + "11": 935777792.0, + "12": 935777792.0, + "13": 935777792.0, + "14": 935777792.0, + "15": 935777792.0, + "16": 935777792.0, + "17": 935777792.0, + "18": 935777792.0, + "19": 935777792.0, + "20": 935777792.0, + "21": 935777792.0, + "22": 935777792.0, + "23": 935777792.0, + "24": 935777792.0, + "25": 935777792.0, + "26": 935777792.0, + "27": 935777792.0, + "28": 935777792.0, + "29": 935777792.0, + "30": 935777792.0, + "31": 935777792.0, + "32": 935777792.0, + "33": 935777792.0, + "34": 935777792.0, + "35": 935777792.0, + "36": 935777792.0, + "37": 935777792.0, + "38": 935777792.0, + "39": 935777792.0, + "40": 935777792.0, + "41": 935777792.0, + "42": 935777792.0, + "43": 935777792.0, + "44": 935777792.0, + "45": 935777792.0, + "46": 935777792.0, + "47": 935777792.0, + "48": 935777792.0, + "49": 935777792.0, + "50": 935777792.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 35.82214, - "2": 0.4147, - "3": 0.32319, - "4": 0.30032, - "5": 0.30017, - "6": 0.29443, - "7": 0.29684, - "8": 0.29654, - "9": 0.29369, - "10": 0.29144, - "11": 0.29461, - "12": 0.29494, - "13": 0.2989, - "14": 0.30075, - "15": 0.30668, - "16": 0.29656, - "17": 0.29426, - "18": 0.29342, - "19": 0.29461, - "20": 0.29689, - "21": 0.29944, - "22": 0.29592, - "23": 0.29544, - "24": 0.29391, - "25": 0.29356, - "26": 0.29086, - "27": 0.29138, - "28": 0.29613, - "29": 0.29464, - "30": 0.29623, - "31": 0.29357, - "32": 0.2941, - "33": 0.29995, - "34": 0.29721, - "35": 0.29459, - "36": 0.29391, - "37": 0.29408, - "38": 0.29673, - "39": 0.2977, - "40": 0.29439, - "41": 0.29458, - "42": 0.29561, - "43": 0.29392, - "44": 0.3078, - "45": 0.29321, - "46": 0.28828, - "47": 0.28745, - "48": 0.30287, - "49": 0.28551, - "50": 0.28747 + "1": "nan", + "2": 9.60636, + "3": 0.30907, + "4": 0.29314, + "5": 0.29242, + "6": 0.29143, + "7": 0.29357, + "8": 0.30142, + "9": 0.29028, + "10": 0.29092, + "11": 0.29102, + "12": 0.2905, + "13": 0.29086, + "14": 0.29207, + "15": 0.28922, + "16": 0.29176, + "17": 0.28919, + "18": 0.29146, + "19": 0.29052, + "20": 0.29227, + "21": 0.28872, + "22": 0.29, + "23": 0.29255, + "24": 0.29075, + "25": 0.29024, + "26": 0.29028, + "27": 0.29519, + "28": 0.29186, + "29": 0.29203, + "30": 0.29012, + "31": 0.29246, + "32": 0.29064, + "33": 0.29112, + "34": 0.28949, + "35": 0.29014, + "36": 0.2908, + "37": 0.29072, + "38": 0.29108, + "39": 0.29008, + "40": 0.29165, + "41": 0.29027, + "42": 0.2907, + "43": 0.29083, + "44": 0.29162, + "45": 0.29265, + "46": 0.29117, + "47": 0.29097, + "48": 0.291, + "49": 0.29094, + "50": 0.29054 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json index feb49a01aad..6c88449234e 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_h100.json @@ -6,103 +6,103 @@ "values": { "1": 10.84445, "2": 10.84755, - "3": 10.84905, - "4": 10.844, - "5": 10.88133, - "6": 10.88069, - "7": 10.86435, + "3": 10.84906, + "4": 10.84402, + "5": 10.88134, + "6": 10.88068, + "7": 10.86436, "8": 10.85483, "9": 10.85577, - "10": 10.81851, - "11": 10.88835, + "10": 10.81854, + "11": 10.88836, "12": 10.86318, - "13": 10.86739, - "14": 10.88397, + "13": 10.8674, + "14": 10.88403, "15": 10.82443, - "16": 10.82905, - "17": 10.7953, - "18": 10.81529, - "19": 10.80121, - "20": 10.71826, - "21": 10.69956, - "22": 10.56756, - "23": 10.7171, - "24": 10.60451, - "25": 10.55018, - "26": 10.60859, - "27": 10.62013, - "28": 10.57541, - "29": 10.59599, - "30": 10.38364, - "31": 10.15409, + "16": 10.82903, + "17": 10.79527, + "18": 10.81531, + "19": 10.8012, + "20": 10.71823, + "21": 10.6996, + "22": 10.5676, + "23": 10.71711, + "24": 10.60453, + "25": 10.55012, + "26": 10.6086, + "27": 10.62015, + "28": 10.5754, + "29": 10.596, + "30": 10.38366, + "31": 10.15413, "32": 10.48036, - "33": 10.47379, - "34": 10.23693, + "33": 10.47378, + "34": 10.23689, "35": 10.28857, "36": 10.24862, - "37": 10.35357, - "38": 10.20827, - "39": 10.41871, - "40": 10.11266, - "41": 10.16079, - "42": 10.23304, - "43": 9.86146, + "37": 10.35356, + "38": 10.20824, + "39": 10.4187, + "40": 10.11269, + "41": 10.1608, + "42": 10.23306, + "43": 9.86149, "44": 9.97719, - "45": 9.8651, - "46": 9.8486, - "47": 10.16607, - "48": 9.87126, + "45": 9.86508, + "46": 9.84858, + "47": 10.16606, + "48": 9.87125, "49": 9.56738, - "50": 9.92137, - "51": 9.86682, - "52": 9.7694, + "50": 9.92136, + "51": 9.8668, + "52": 9.76943, "53": 10.07839, - "54": 9.96992, - "55": 9.89678, - "56": 9.64417, - "57": 9.49737, - "58": 9.84853, - "59": 9.59973, + "54": 9.96994, + "55": 9.89681, + "56": 9.64416, + "57": 9.49733, + "58": 9.84854, + "59": 9.59976, "60": 9.5062, - "61": 9.71028, - "62": 9.99079, - "63": 9.38989, - "64": 9.78616, + "61": 9.71031, + "62": 9.9908, + "63": 9.38991, + "64": 9.78614, "65": 8.95963, - "66": 9.70879, - "67": 9.3791, - "68": 9.79602, - "69": 9.80692, - "70": 9.74781, + "66": 9.70878, + "67": 9.37911, + "68": 9.796, + "69": 9.80693, + "70": 9.74782, "71": 9.61777, - "72": 9.59105, - "73": 9.50417, - "74": 8.94629, - "75": 9.42953, - "76": 9.08443, - "77": 10.06697, - "78": 9.73245, - "79": 9.38132, - "80": 9.41079, - "81": 9.48315, - "82": 9.70491, - "83": 9.30719, - "84": 9.42254, - "85": 9.61799, - "86": 9.07625, - "87": 9.59384, + "72": 9.59106, + "73": 9.50416, + "74": 8.9463, + "75": 9.42952, + "76": 9.0844, + "77": 10.06699, + "78": 9.73242, + "79": 9.38135, + "80": 9.41082, + "81": 9.48318, + "82": 9.70493, + "83": 9.30716, + "84": 9.4225, + "85": 9.61796, + "86": 9.07629, + "87": 9.59387, "88": 9.75414, - "89": 9.60107, - "90": 9.8203, - "91": 9.34086, - "92": 9.35733, - "93": 9.07939, - "94": 8.83611, - "95": 9.52231, - "96": 9.53388, - "97": 9.31636, + "89": 9.60105, + "90": 9.82028, + "91": 9.34084, + "92": 9.35735, + "93": 9.07938, + "94": 8.83609, + "95": 9.52229, + "96": 9.53389, + "97": 9.31635, "98": 9.68001, - "99": 8.89242, + "99": 8.89243, "100": 9.3998 } }, @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1814.0, - "2": 1725.0, - "3": 1782.0, - "4": 1955.0, - "5": 1930.0, - "6": 1875.0, - "7": 1951.0, - "8": 1800.0, - "9": 1914.0, - "10": 1495.0, - "11": 1987.0, - "12": 1811.0, - "13": 2030.0, - "14": 1930.0, - "15": 1948.0, - "16": 1933.0, - "17": 1892.0, - "18": 1781.0, - "19": 1985.0, - "20": 1812.0, - "21": 2115.0, - "22": 1885.0, - "23": 2120.0, - "24": 1814.0, - "25": 1705.0, - "26": 1815.0, - "27": 1870.0, - "28": 2162.0, - "29": 2104.0, - "30": 2061.0, - "31": 1666.0, - "32": 2010.0, - "33": 2157.0, - "34": 1918.0, - "35": 2000.0, - "36": 1966.0, - "37": 2421.0, - "38": 2318.0, - "39": 2488.0, - "40": 2213.0, - "41": 2361.0, - "42": 2330.0, - "43": 2092.0, - "44": 2184.0, - "45": 2237.0, - "46": 2311.0, - "47": 2645.0, - "48": 2374.0, - "49": 2345.0, - "50": 2357.0, - "51": 2627.0, - "52": 2530.0, - "53": 2856.0, - "54": 2776.0, - "55": 2346.0, - "56": 2679.0, - "57": 2410.0, - "58": 2990.0, - "59": 2835.0, - "60": 2502.0, - "61": 2984.0, - "62": 2692.0, - "63": 2463.0, - "64": 3009.0, - "65": 2587.0, - "66": 3126.0, - "67": 2793.0, - "68": 2665.0, - "69": 2776.0, - "70": 3135.0, - "71": 3151.0, - "72": 2424.0, - "73": 2926.0, - "74": 1921.0, - "75": 2347.0, - "76": 3026.0, - "77": 3283.0, - "78": 3224.0, - "79": 3165.0, - "80": 3311.0, - "81": 3792.0, - "82": 3279.0, - "83": 2867.0, - "84": 3381.0, - "85": 3415.0, - "86": 2962.0, - "87": 3822.0, - "88": 3311.0, - "89": 3392.0, - "90": 3184.0, - "91": 2795.0, - "92": 3121.0, - "93": 2731.0, - "94": 3503.0, - "95": 3473.0, - "96": 3465.0, - "97": 3299.0, - "98": 3663.0, - "99": 3394.0, - "100": 3235.0 + "1": 1746.0, + "2": 1752.0, + "3": 1730.0, + "4": 1936.0, + "5": 1952.0, + "6": 1959.0, + "7": 1964.0, + "8": 1759.0, + "9": 1846.0, + "10": 1493.0, + "11": 1949.0, + "12": 1823.0, + "13": 2059.0, + "14": 1965.0, + "15": 1967.0, + "16": 1873.0, + "17": 1939.0, + "18": 1808.0, + "19": 1890.0, + "20": 1704.0, + "21": 1963.0, + "22": 1868.0, + "23": 2239.0, + "24": 1864.0, + "25": 1756.0, + "26": 1791.0, + "27": 1874.0, + "28": 2152.0, + "29": 2187.0, + "30": 1983.0, + "31": 1673.0, + "32": 2043.0, + "33": 2236.0, + "34": 2025.0, + "35": 2016.0, + "36": 2018.0, + "37": 2406.0, + "38": 2369.0, + "39": 2407.0, + "40": 2313.0, + "41": 2229.0, + "42": 2471.0, + "43": 2076.0, + "44": 2192.0, + "45": 2109.0, + "46": 2357.0, + "47": 2472.0, + "48": 2410.0, + "49": 2324.0, + "50": 2338.0, + "51": 2664.0, + "52": 2695.0, + "53": 2828.0, + "54": 2729.0, + "55": 2390.0, + "56": 2703.0, + "57": 2454.0, + "58": 2858.0, + "59": 2763.0, + "60": 2418.0, + "61": 3151.0, + "62": 2659.0, + "63": 2486.0, + "64": 3018.0, + "65": 2711.0, + "66": 3127.0, + "67": 2856.0, + "68": 2709.0, + "69": 2917.0, + "70": 3161.0, + "71": 3258.0, + "72": 2435.0, + "73": 2855.0, + "74": 1864.0, + "75": 2430.0, + "76": 2998.0, + "77": 3268.0, + "78": 3112.0, + "79": 3050.0, + "80": 3356.0, + "81": 3795.0, + "82": 3265.0, + "83": 2783.0, + "84": 3382.0, + "85": 3450.0, + "86": 2842.0, + "87": 4024.0, + "88": 3201.0, + "89": 3473.0, + "90": 3097.0, + "91": 2699.0, + "92": 3110.0, + "93": 2684.0, + "94": 3451.0, + "95": 3543.0, + "96": 3477.0, + "97": 3247.0, + "98": 3783.0, + "99": 3360.0, + "100": 3239.0 } }, "mem-allocated-bytes": { @@ -218,106 +218,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 700103168.0, - "2": 700103168.0, - "3": 700103168.0, - "4": 700103168.0, - "5": 700103168.0, - "6": 700103168.0, - "7": 700103168.0, - "8": 700103168.0, - "9": 700103168.0, - "10": 700103168.0, - "11": 700103168.0, - "12": 700103168.0, - "13": 700103168.0, - "14": 700103168.0, - "15": 700103168.0, - "16": 700103168.0, - "17": 700103168.0, - "18": 700103168.0, - "19": 700103168.0, - "20": 700103168.0, - "21": 700103168.0, - "22": 700103168.0, - "23": 700103168.0, - "24": 700103168.0, - "25": 700103168.0, - "26": 700103168.0, - "27": 700103168.0, - "28": 700103168.0, - "29": 700103168.0, - "30": 700103168.0, - "31": 700103168.0, - "32": 700103168.0, - "33": 700103168.0, - "34": 700103168.0, - "35": 700103168.0, - "36": 700103168.0, - "37": 700103168.0, - "38": 700103168.0, - "39": 700103168.0, - "40": 700103168.0, - "41": 700103168.0, - "42": 700103168.0, - "43": 700103168.0, - "44": 700103168.0, - "45": 700103168.0, - "46": 700103168.0, - "47": 700103168.0, - "48": 700103168.0, - "49": 700103168.0, - "50": 700103168.0, - "51": 700103168.0, - "52": 700103168.0, - "53": 700103168.0, - "54": 700103168.0, - "55": 700103168.0, - "56": 700103168.0, - "57": 700103168.0, - "58": 700103168.0, - "59": 700103168.0, - "60": 700103168.0, - "61": 700103168.0, - "62": 700103168.0, - "63": 700103168.0, - "64": 700103168.0, - "65": 700103168.0, - "66": 700103168.0, - "67": 700103168.0, - "68": 700103168.0, - "69": 700103168.0, - "70": 700103168.0, - "71": 700103168.0, - "72": 700103168.0, - "73": 700103168.0, - "74": 700103168.0, - "75": 700103168.0, - "76": 700103168.0, - "77": 700103168.0, - "78": 700103168.0, - "79": 700103168.0, - "80": 700103168.0, - "81": 700103168.0, - "82": 700103168.0, - "83": 700103168.0, - "84": 700103168.0, - "85": 700103168.0, - "86": 700103168.0, - "87": 700103168.0, - "88": 700103168.0, - "89": 700103168.0, - "90": 700103168.0, - "91": 700103168.0, - "92": 700103168.0, - "93": 700103168.0, - "94": 700103168.0, - "95": 700103168.0, - "96": 700103168.0, - "97": 700103168.0, - "98": 700103168.0, - "99": 700103168.0, - "100": 700103168.0 + "1": 246999552.0, + "2": 246999552.0, + "3": 246999552.0, + "4": 246999552.0, + "5": 246999552.0, + "6": 246999552.0, + "7": 246999552.0, + "8": 246999552.0, + "9": 246999552.0, + "10": 246999552.0, + "11": 246999552.0, + "12": 246999552.0, + "13": 246999552.0, + "14": 246999552.0, + "15": 246999552.0, + "16": 246999552.0, + "17": 246999552.0, + "18": 246999552.0, + "19": 246999552.0, + "20": 246999552.0, + "21": 246999552.0, + "22": 246999552.0, + "23": 246999552.0, + "24": 246999552.0, + "25": 246999552.0, + "26": 246999552.0, + "27": 246999552.0, + "28": 246999552.0, + "29": 246999552.0, + "30": 246999552.0, + "31": 246999552.0, + "32": 246999552.0, + "33": 246999552.0, + "34": 246999552.0, + "35": 246999552.0, + "36": 246999552.0, + "37": 246999552.0, + "38": 246999552.0, + "39": 246999552.0, + "40": 246999552.0, + "41": 246999552.0, + "42": 246999552.0, + "43": 246999552.0, + "44": 246999552.0, + "45": 246999552.0, + "46": 246999552.0, + "47": 246999552.0, + "48": 246999552.0, + "49": 246999552.0, + "50": 246999552.0, + "51": 246999552.0, + "52": 246999552.0, + "53": 246999552.0, + "54": 246999552.0, + "55": 246999552.0, + "56": 246999552.0, + "57": 246999552.0, + "58": 246999552.0, + "59": 246999552.0, + "60": 246999552.0, + "61": 246999552.0, + "62": 246999552.0, + "63": 246999552.0, + "64": 246999552.0, + "65": 246999552.0, + "66": 246999552.0, + "67": 246999552.0, + "68": 246999552.0, + "69": 246999552.0, + "70": 246999552.0, + "71": 246999552.0, + "72": 246999552.0, + "73": 246999552.0, + "74": 246999552.0, + "75": 246999552.0, + "76": 246999552.0, + "77": 246999552.0, + "78": 246999552.0, + "79": 246999552.0, + "80": 246999552.0, + "81": 246999552.0, + "82": 246999552.0, + "83": 246999552.0, + "84": 246999552.0, + "85": 246999552.0, + "86": 246999552.0, + "87": 246999552.0, + "88": 246999552.0, + "89": 246999552.0, + "90": 246999552.0, + "91": 246999552.0, + "92": 246999552.0, + "93": 246999552.0, + "94": 246999552.0, + "95": 246999552.0, + "96": 246999552.0, + "97": 246999552.0, + "98": 246999552.0, + "99": 246999552.0, + "100": 246999552.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1956312576.0, - "2": 1956313600.0, - "3": 1956313600.0, - "4": 1956313600.0, - "5": 1956313600.0, - "6": 1956313600.0, - "7": 1956313600.0, - "8": 1956313600.0, - "9": 1956313600.0, - "10": 1956313600.0, - "11": 1956313600.0, - "12": 1956313600.0, - "13": 1956313600.0, - "14": 1956313600.0, - "15": 1956313600.0, - "16": 1956313600.0, - "17": 1956313600.0, - "18": 1956313600.0, - "19": 1956313600.0, - "20": 1956313600.0, - "21": 1956313600.0, - "22": 1956313600.0, - "23": 1956313600.0, - "24": 1956313600.0, - "25": 1956313600.0, - "26": 1956313600.0, - "27": 1956313600.0, - "28": 1956313600.0, - "29": 1956313600.0, - "30": 1956313600.0, - "31": 1956313600.0, - "32": 1956313600.0, - "33": 1956313600.0, - "34": 1956313600.0, - "35": 1956313600.0, - "36": 1956313600.0, - "37": 1956313600.0, - "38": 1956313600.0, - "39": 1956313600.0, - "40": 1956313600.0, - "41": 1956313600.0, - "42": 1956313600.0, - "43": 1956313600.0, - "44": 1956313600.0, - "45": 1956313600.0, - "46": 1956313600.0, - "47": 1956313600.0, - "48": 1956313600.0, - "49": 1956313600.0, - "50": 1956313600.0, - "51": 1956313600.0, - "52": 1956313600.0, - "53": 1956313600.0, - "54": 1956313600.0, - "55": 1956313600.0, - "56": 1956313600.0, - "57": 1956313600.0, - "58": 1956313600.0, - "59": 1956313600.0, - "60": 1956313600.0, - "61": 1956313600.0, - "62": 1956313600.0, - "63": 1956313600.0, - "64": 1956313600.0, - "65": 1956313600.0, - "66": 1956313600.0, - "67": 1956313600.0, - "68": 1956313600.0, - "69": 1956313600.0, - "70": 1956313600.0, - "71": 1956313600.0, - "72": 1956313600.0, - "73": 1956313600.0, - "74": 1956313600.0, - "75": 1956313600.0, - "76": 1956313600.0, - "77": 1956313600.0, - "78": 1956313600.0, - "79": 1956313600.0, - "80": 1956313600.0, - "81": 1956313600.0, - "82": 1956313600.0, - "83": 1956313600.0, - "84": 1956313600.0, - "85": 1956313600.0, - "86": 1956313600.0, - "87": 1956313600.0, - "88": 1956313600.0, - "89": 1956313600.0, - "90": 1956313600.0, - "91": 1956313600.0, - "92": 1956313600.0, - "93": 1956313600.0, - "94": 1956313600.0, - "95": 1956313600.0, - "96": 1956313600.0, - "97": 1956313600.0, - "98": 1956313600.0, - "99": 1956313600.0, - "100": 1956313600.0 + "1": 1503208960.0, + "2": 1503209984.0, + "3": 1503209984.0, + "4": 1503209984.0, + "5": 1503209984.0, + "6": 1503209984.0, + "7": 1503209984.0, + "8": 1503209984.0, + "9": 1503209984.0, + "10": 1503209984.0, + "11": 1503209984.0, + "12": 1503209984.0, + "13": 1503209984.0, + "14": 1503209984.0, + "15": 1503209984.0, + "16": 1503209984.0, + "17": 1503209984.0, + "18": 1503209984.0, + "19": 1503209984.0, + "20": 1503209984.0, + "21": 1503209984.0, + "22": 1503209984.0, + "23": 1503209984.0, + "24": 1503209984.0, + "25": 1503209984.0, + "26": 1503209984.0, + "27": 1503209984.0, + "28": 1503209984.0, + "29": 1503209984.0, + "30": 1503209984.0, + "31": 1503209984.0, + "32": 1503209984.0, + "33": 1503209984.0, + "34": 1503209984.0, + "35": 1503209984.0, + "36": 1503209984.0, + "37": 1503209984.0, + "38": 1503209984.0, + "39": 1503209984.0, + "40": 1503209984.0, + "41": 1503209984.0, + "42": 1503209984.0, + "43": 1503209984.0, + "44": 1503209984.0, + "45": 1503209984.0, + "46": 1503209984.0, + "47": 1503209984.0, + "48": 1503209984.0, + "49": 1503209984.0, + "50": 1503209984.0, + "51": 1503209984.0, + "52": 1503209984.0, + "53": 1503209984.0, + "54": 1503209984.0, + "55": 1503209984.0, + "56": 1503209984.0, + "57": 1503209984.0, + "58": 1503209984.0, + "59": 1503209984.0, + "60": 1503209984.0, + "61": 1503209984.0, + "62": 1503209984.0, + "63": 1503209984.0, + "64": 1503209984.0, + "65": 1503209984.0, + "66": 1503209984.0, + "67": 1503209984.0, + "68": 1503209984.0, + "69": 1503209984.0, + "70": 1503209984.0, + "71": 1503209984.0, + "72": 1503209984.0, + "73": 1503209984.0, + "74": 1503209984.0, + "75": 1503209984.0, + "76": 1503209984.0, + "77": 1503209984.0, + "78": 1503209984.0, + "79": 1503209984.0, + "80": 1503209984.0, + "81": 1503209984.0, + "82": 1503209984.0, + "83": 1503209984.0, + "84": 1503209984.0, + "85": 1503209984.0, + "86": 1503209984.0, + "87": 1503209984.0, + "88": 1503209984.0, + "89": 1503209984.0, + "90": 1503209984.0, + "91": 1503209984.0, + "92": 1503209984.0, + "93": 1503209984.0, + "94": 1503209984.0, + "95": 1503209984.0, + "96": 1503209984.0, + "97": 1503209984.0, + "98": 1503209984.0, + "99": 1503209984.0, + "100": 1503209984.0 } }, "iteration-time": { @@ -432,106 +432,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 4.9999, - "2": 0.17604, - "3": 0.16654, - "4": 0.15324, - "5": 0.14982, - "6": 0.15181, - "7": 0.15028, - "8": 0.15021, - "9": 0.14947, - "10": 0.15037, - "11": 0.15211, - "12": 0.15245, - "13": 0.1517, - "14": 0.15044, - "15": 0.15166, - "16": 0.14955, - "17": 0.15212, - "18": 0.15368, - "19": 0.15062, - "20": 0.15093, - "21": 0.1573, - "22": 0.15817, - "23": 0.14955, - "24": 0.14912, - "25": 0.15491, - "26": 0.14937, - "27": 0.15155, - "28": 0.15055, - "29": 0.14603, - "30": 0.14602, - "31": 0.14824, - "32": 0.14477, - "33": 0.14671, - "34": 0.14693, - "35": 0.14738, - "36": 0.14504, - "37": 0.14513, - "38": 0.14512, - "39": 0.14473, - "40": 0.14614, - "41": 0.14578, - "42": 0.14684, - "43": 0.14487, - "44": 0.14547, - "45": 0.145, - "46": 0.14486, - "47": 0.14751, - "48": 0.14552, - "49": 0.14493, - "50": 0.14395, - "51": 0.1521, - "52": 0.14666, - "53": 0.14801, - "54": 0.14826, - "55": 0.14557, - "56": 0.15142, - "57": 0.14933, - "58": 0.14555, - "59": 0.14614, - "60": 0.15938, - "61": 0.16219, - "62": 0.14894, - "63": 0.14392, - "64": 0.14433, - "65": 0.1452, - "66": 0.14488, - "67": 0.14508, - "68": 0.14493, - "69": 0.14702, - "70": 0.14432, - "71": 0.14412, - "72": 0.14561, - "73": 0.15534, - "74": 0.14715, - "75": 0.14564, - "76": 0.146, - "77": 0.14498, - "78": 0.14433, - "79": 0.14454, - "80": 0.1457, - "81": 0.14534, - "82": 0.14499, - "83": 0.14463, - "84": 0.1456, - "85": 0.14456, - "86": 0.1456, - "87": 0.14661, - "88": 0.1469, - "89": 0.14537, - "90": 0.14515, - "91": 0.14627, - "92": 0.14607, - "93": 0.14633, - "94": 0.14863, - "95": 0.14553, - "96": 0.14487, - "97": 0.14462, - "98": 0.14685, - "99": 0.14551, - "100": 0.14614 + "1": "nan", + "2": 4.54155, + "3": 0.16659, + "4": 0.15214, + "5": 0.1523, + "6": 0.15232, + "7": 0.15204, + "8": 0.15238, + "9": 0.1527, + "10": 0.15438, + "11": 0.15152, + "12": 0.15264, + "13": 0.15088, + "14": 0.15236, + "15": 0.15215, + "16": 0.15166, + "17": 0.15105, + "18": 0.1514, + "19": 0.15124, + "20": 0.1526, + "21": 0.15347, + "22": 0.15181, + "23": 0.15217, + "24": 0.15173, + "25": 0.15128, + "26": 0.15215, + "27": 0.15211, + "28": 0.15158, + "29": 0.15282, + "30": 0.15102, + "31": 0.1515, + "32": 0.15116, + "33": 0.15163, + "34": 0.15354, + "35": 0.15179, + "36": 0.15361, + "37": 0.15138, + "38": 0.15228, + "39": 0.15137, + "40": 0.15129, + "41": 0.15315, + "42": 0.15255, + "43": 0.15412, + "44": 0.15214, + "45": 0.15199, + "46": 0.15185, + "47": 0.15241, + "48": 0.15173, + "49": 0.15098, + "50": 0.15125, + "51": 0.15707, + "52": 0.15103, + "53": 0.15143, + "54": 0.15084, + "55": 0.15417, + "56": 0.15144, + "57": 0.15093, + "58": 0.15183, + "59": 0.15198, + "60": 0.14997, + "61": 0.15028, + "62": 0.15096, + "63": 0.15001, + "64": 0.15046, + "65": 0.15073, + "66": 0.15169, + "67": 0.15096, + "68": 0.1504, + "69": 0.15095, + "70": 0.15098, + "71": 0.15063, + "72": 0.14992, + "73": 0.14999, + "74": 0.15057, + "75": 0.15164, + "76": 0.14989, + "77": 0.15018, + "78": 0.15156, + "79": 0.15068, + "80": 0.1524, + "81": 0.15036, + "82": 0.15245, + "83": 0.15134, + "84": 0.1497, + "85": 0.15165, + "86": 0.15055, + "87": 0.15095, + "88": 0.15056, + "89": 0.15068, + "90": 0.15006, + "91": 0.14974, + "92": 0.15176, + "93": 0.14999, + "94": 0.15424, + "95": 0.15052, + "96": 0.15022, + "97": 0.14987, + "98": 0.15036, + "99": 0.28489, + "100": 0.26561 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json index 64a0d3b0293..0c807dccff2 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_h100.json @@ -6,104 +6,104 @@ "values": { "1": 10.78091, "2": 10.80272, - "3": 10.8036, - "4": 10.77566, - "5": 10.83259, - "6": 10.83704, - "7": 10.79793, - "8": 10.79364, - "9": 10.808, - "10": 10.76116, - "11": 10.85297, - "12": 10.84152, - "13": 10.8247, - "14": 10.85822, - "15": 10.78238, - "16": 10.77927, - "17": 10.74878, - "18": 10.7897, - "19": 10.7749, - "20": 10.71704, - "21": 10.70811, - "22": 10.54787, - "23": 10.72978, - "24": 10.60324, - "25": 10.55979, - "26": 10.61611, - "27": 10.6446, - "28": 10.62463, - "29": 10.63492, - "30": 10.42362, - "31": 10.16499, - "32": 10.51313, - "33": 10.5094, - "34": 10.2668, - "35": 10.32318, - "36": 10.28865, - "37": 10.41114, - "38": 10.26426, - "39": 10.45, - "40": 10.17473, - "41": 10.20958, - "42": 10.27824, - "43": 9.91831, - "44": 10.03131, - "45": 9.91995, - "46": 9.8862, - "47": 10.19255, - "48": 9.92803, - "49": 9.61616, - "50": 9.98532, - "51": 9.90528, - "52": 9.80364, - "53": 10.12728, - "54": 10.00036, - "55": 9.9362, - "56": 9.68506, - "57": 9.55805, - "58": 9.90514, - "59": 9.63857, - "60": 9.57451, - "61": 9.76864, - "62": 10.03802, - "63": 9.44503, - "64": 9.82796, - "65": 9.00712, - "66": 9.77422, - "67": 9.41277, - "68": 9.84111, - "69": 9.82784, - "70": 9.79011, - "71": 9.66957, - "72": 9.62799, - "73": 9.5473, - "74": 9.03663, - "75": 9.49153, - "76": 9.16783, - "77": 10.10857, - "78": 9.77081, - "79": 9.4383, - "80": 9.45436, - "81": 9.52266, - "82": 9.7424, - "83": 9.37076, - "84": 9.45377, - "85": 9.65832, - "86": 9.12522, - "87": 9.62697, - "88": 9.79619, - "89": 9.66054, - "90": 9.85081, - "91": 9.39408, - "92": 9.40744, - "93": 9.13595, - "94": 8.89048, - "95": 9.563, - "96": 9.5714, - "97": 9.34318, - "98": 9.73026, - "99": 8.95002, - "100": 9.4424 + "3": 10.8033, + "4": 10.77546, + "5": 10.83234, + "6": 10.83663, + "7": 10.79792, + "8": 10.79503, + "9": 10.80759, + "10": 10.7611, + "11": 10.85386, + "12": 10.8411, + "13": 10.82469, + "14": 10.8577, + "15": 10.7827, + "16": 10.77874, + "17": 10.74936, + "18": 10.78893, + "19": 10.77602, + "20": 10.71702, + "21": 10.70735, + "22": 10.54904, + "23": 10.7305, + "24": 10.60326, + "25": 10.55864, + "26": 10.616, + "27": 10.64522, + "28": 10.62564, + "29": 10.63518, + "30": 10.42293, + "31": 10.16453, + "32": 10.5133, + "33": 10.50808, + "34": 10.26715, + "35": 10.32347, + "36": 10.2895, + "37": 10.41048, + "38": 10.26379, + "39": 10.45045, + "40": 10.17505, + "41": 10.20993, + "42": 10.27805, + "43": 9.91864, + "44": 10.03154, + "45": 9.92007, + "46": 9.88602, + "47": 10.19269, + "48": 9.92801, + "49": 9.61693, + "50": 9.98555, + "51": 9.90533, + "52": 9.80376, + "53": 10.1272, + "54": 10.00078, + "55": 9.93605, + "56": 9.685, + "57": 9.55797, + "58": 9.90491, + "59": 9.63852, + "60": 9.57468, + "61": 9.76828, + "62": 10.03804, + "63": 9.44497, + "64": 9.82812, + "65": 9.00766, + "66": 9.77473, + "67": 9.41308, + "68": 9.841, + "69": 9.8278, + "70": 9.79024, + "71": 9.66985, + "72": 9.62812, + "73": 9.54762, + "74": 9.03687, + "75": 9.49155, + "76": 9.16765, + "77": 10.10868, + "78": 9.77084, + "79": 9.43859, + "80": 9.45452, + "81": 9.5231, + "82": 9.74237, + "83": 9.36998, + "84": 9.45372, + "85": 9.65829, + "86": 9.12499, + "87": 9.6268, + "88": 9.79668, + "89": 9.66102, + "90": 9.85128, + "91": 9.39437, + "92": 9.40751, + "93": 9.13625, + "94": 8.89077, + "95": 9.56271, + "96": 9.57141, + "97": 9.34357, + "98": 9.73041, + "99": 8.95022, + "100": 9.44231 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 30994.0, - "2": 32962.0, - "3": 33026.0, - "4": 30732.0, - "5": 36042.0, - "6": 37038.0, - "7": 34481.0, - "8": 31368.0, - "9": 33980.0, - "10": 29532.0, - "11": 37852.0, - "12": 34972.0, - "13": 36994.0, - "14": 37789.0, - "15": 34058.0, - "16": 36656.0, - "17": 34700.0, - "18": 34946.0, - "19": 35228.0, - "20": 32392.0, - "21": 33247.0, - "22": 30040.0, - "23": 37891.0, - "24": 32099.0, - "25": 30921.0, - "26": 34212.0, - "27": 34975.0, - "28": 36746.0, - "29": 37759.0, - "30": 32786.0, - "31": 30423.0, - "32": 35992.0, - "33": 36915.0, - "34": 32293.0, - "35": 33654.0, - "36": 34755.0, - "37": 37859.0, - "38": 36022.0, - "39": 38343.0, - "40": 35963.0, - "41": 35882.0, - "42": 36774.0, - "43": 34186.0, - "44": 33572.0, - "45": 35574.0, - "46": 37208.0, - "47": 40154.0, - "48": 36385.0, - "49": 36259.0, - "50": 38861.0, - "51": 38061.0, - "52": 37025.0, - "53": 41802.0, - "54": 41253.0, - "55": 37654.0, - "56": 41164.0, - "57": 37682.0, - "58": 41782.0, - "59": 39444.0, - "60": 40691.0, - "61": 40876.0, - "62": 43113.0, - "63": 38389.0, - "64": 43217.0, - "65": 41689.0, - "66": 45525.0, - "67": 41717.0, - "68": 40369.0, - "69": 41287.0, - "70": 45545.0, - "71": 41651.0, - "72": 41881.0, - "73": 45139.0, - "74": 35747.0, - "75": 39155.0, - "76": 44874.0, - "77": 45442.0, - "78": 46782.0, - "79": 48776.0, - "80": 47161.0, - "81": 51277.0, - "82": 49953.0, - "83": 45334.0, - "84": 46096.0, - "85": 49238.0, - "86": 46118.0, - "87": 49880.0, - "88": 47115.0, - "89": 48583.0, - "90": 49057.0, - "91": 45950.0, - "92": 47820.0, - "93": 46437.0, - "94": 47530.0, - "95": 48000.0, - "96": 50285.0, - "97": 46225.0, - "98": 49809.0, - "99": 47890.0, - "100": 44636.0 + "1": 31107.0, + "2": 32721.0, + "3": 33140.0, + "4": 30656.0, + "5": 35720.0, + "6": 37150.0, + "7": 34162.0, + "8": 31326.0, + "9": 34055.0, + "10": 29915.0, + "11": 37687.0, + "12": 35052.0, + "13": 36883.0, + "14": 37603.0, + "15": 34636.0, + "16": 36534.0, + "17": 34837.0, + "18": 34634.0, + "19": 34980.0, + "20": 32714.0, + "21": 33033.0, + "22": 30188.0, + "23": 37697.0, + "24": 32531.0, + "25": 31086.0, + "26": 33999.0, + "27": 35087.0, + "28": 36733.0, + "29": 37775.0, + "30": 32526.0, + "31": 30006.0, + "32": 36230.0, + "33": 37581.0, + "34": 32474.0, + "35": 33761.0, + "36": 35054.0, + "37": 37709.0, + "38": 35467.0, + "39": 38485.0, + "40": 36317.0, + "41": 35573.0, + "42": 36618.0, + "43": 33964.0, + "44": 33811.0, + "45": 35679.0, + "46": 37473.0, + "47": 40062.0, + "48": 36484.0, + "49": 36090.0, + "50": 38612.0, + "51": 37922.0, + "52": 36746.0, + "53": 41649.0, + "54": 40883.0, + "55": 38207.0, + "56": 40562.0, + "57": 37865.0, + "58": 41705.0, + "59": 39636.0, + "60": 40762.0, + "61": 40787.0, + "62": 43208.0, + "63": 38391.0, + "64": 43027.0, + "65": 41885.0, + "66": 45019.0, + "67": 41505.0, + "68": 40240.0, + "69": 41214.0, + "70": 45479.0, + "71": 41127.0, + "72": 42550.0, + "73": 45534.0, + "74": 35474.0, + "75": 39167.0, + "76": 44804.0, + "77": 45501.0, + "78": 46904.0, + "79": 49375.0, + "80": 47189.0, + "81": 51001.0, + "82": 49450.0, + "83": 45576.0, + "84": 46033.0, + "85": 49703.0, + "86": 46281.0, + "87": 49659.0, + "88": 47363.0, + "89": 48948.0, + "90": 49126.0, + "91": 45749.0, + "92": 47680.0, + "93": 46596.0, + "94": 47371.0, + "95": 47482.0, + "96": 49876.0, + "97": 46788.0, + "98": 49628.0, + "99": 47400.0, + "100": 44756.0 } }, "mem-allocated-bytes": { @@ -222,102 +222,102 @@ "2": 892869632.0, "3": 892869632.0, "4": 892865536.0, - "5": 892866048.0, + "5": 892868096.0, "6": 892867584.0, - "7": 892867072.0, - "8": 892868608.0, - "9": 892866560.0, - "10": 892868608.0, - "11": 892867072.0, - "12": 892866560.0, + "7": 892868608.0, + "8": 892867584.0, + "9": 892864512.0, + "10": 892868096.0, + "11": 892866560.0, + "12": 892867072.0, "13": 892866560.0, - "14": 892869120.0, - "15": 892868608.0, - "16": 892868096.0, + "14": 892870144.0, + "15": 892869632.0, + "16": 892866048.0, "17": 892868608.0, - "18": 892870656.0, + "18": 892869632.0, "19": 892869120.0, - "20": 892867584.0, - "21": 892867584.0, + "20": 892868096.0, + "21": 892868096.0, "22": 892864512.0, - "23": 892865536.0, + "23": 892864000.0, "24": 892868096.0, - "25": 892864512.0, - "26": 892867584.0, - "27": 892868096.0, - "28": 892866560.0, - "29": 892867072.0, - "30": 892864512.0, - "31": 892863488.0, - "32": 892862976.0, - "33": 892867072.0, - "34": 892866560.0, - "35": 892866048.0, - "36": 892869632.0, - "37": 892868096.0, + "25": 892864000.0, + "26": 892866560.0, + "27": 892867584.0, + "28": 892867072.0, + "29": 892866560.0, + "30": 892865536.0, + "31": 892862976.0, + "32": 892864000.0, + "33": 892865536.0, + "34": 892866048.0, + "35": 892866560.0, + "36": 892871168.0, + "37": 892866048.0, "38": 892867584.0, "39": 892867072.0, - "40": 892868096.0, - "41": 892866560.0, - "42": 892868608.0, - "43": 892862464.0, - "44": 892864000.0, - "45": 892866048.0, - "46": 892865536.0, - "47": 892866048.0, - "48": 892862464.0, - "49": 892864512.0, - "50": 892868096.0, + "40": 892866560.0, + "41": 892865536.0, + "42": 892867072.0, + "43": 892864000.0, + "44": 892864512.0, + "45": 892867072.0, + "46": 892865024.0, + "47": 892868608.0, + "48": 892862976.0, + "49": 892865536.0, + "50": 892866048.0, "51": 892861440.0, - "52": 892859904.0, - "53": 892862464.0, + "52": 892858880.0, + "53": 892863488.0, "54": 892862464.0, "55": 892863488.0, - "56": 892866048.0, + "56": 892865536.0, "57": 892858368.0, - "58": 892860416.0, + "58": 892861440.0, "59": 892859904.0, - "60": 892860928.0, - "61": 892869632.0, - "62": 892866560.0, + "60": 892861952.0, + "61": 892870144.0, + "62": 892867072.0, "63": 892862976.0, - "64": 892864512.0, + "64": 892864000.0, "65": 892852736.0, "66": 892867072.0, - "67": 892862464.0, + "67": 892862976.0, "68": 892869632.0, - "69": 892865536.0, + "69": 892864512.0, "70": 892867584.0, - "71": 892869632.0, + "71": 892868608.0, "72": 892861440.0, - "73": 892869120.0, + "73": 892868608.0, "74": 892859392.0, - "75": 892868096.0, - "76": 892867584.0, + "75": 892868608.0, + "76": 892868608.0, "77": 892868096.0, - "78": 892864512.0, - "79": 892865536.0, - "80": 892865536.0, + "78": 892866048.0, + "79": 892865024.0, + "80": 892862976.0, "81": 892867072.0, "82": 892865024.0, - "83": 892861952.0, - "84": 892862464.0, - "85": 892862976.0, - "86": 892862464.0, - "87": 892871168.0, - "88": 892863488.0, - "89": 892865536.0, + "83": 892862464.0, + "84": 892861952.0, + "85": 892863488.0, + "86": 892862976.0, + "87": 892869632.0, + "88": 892862976.0, + "89": 892864000.0, "90": 892867072.0, - "91": 892868096.0, - "92": 892866560.0, + "91": 892869120.0, + "92": 892866048.0, "93": 892869632.0, "94": 892865536.0, - "95": 892866048.0, - "96": 892866048.0, - "97": 892864000.0, - "98": 892868608.0, + "95": 892867584.0, + "96": 892867072.0, + "97": 892863488.0, + "98": 892868096.0, "99": 892860928.0, - "100": 892862976.0 + "100": 892863488.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1918568448.0, - "2": 2157712384.0, - "3": 2157712384.0, - "4": 2157712384.0, - "5": 2159109632.0, - "6": 2159109632.0, - "7": 2159109632.0, - "8": 2159109632.0, - "9": 2159109632.0, - "10": 2159142912.0, - "11": 2159142912.0, - "12": 2159142912.0, - "13": 2159142912.0, - "14": 2159633920.0, - "15": 2159633920.0, - "16": 2159633920.0, - "17": 2159633920.0, - "18": 2159633920.0, - "19": 2159633920.0, - "20": 2159633920.0, - "21": 2159633920.0, - "22": 2159633920.0, - "23": 2159633920.0, - "24": 2159633920.0, - "25": 2159633920.0, - "26": 2159802368.0, - "27": 2159802368.0, - "28": 2159802368.0, - "29": 2159802368.0, - "30": 2159802368.0, - "31": 2159802368.0, - "32": 2159802368.0, - "33": 2159802368.0, - "34": 2159802368.0, - "35": 2159802368.0, - "36": 2159802368.0, - "37": 2159802368.0, - "38": 2159802368.0, - "39": 2159802368.0, - "40": 2159802368.0, - "41": 2159802368.0, - "42": 2159802368.0, - "43": 2159802368.0, - "44": 2159802368.0, - "45": 2159802368.0, - "46": 2159802368.0, - "47": 2159802368.0, - "48": 2159802368.0, - "49": 2159802368.0, - "50": 2159802368.0, - "51": 2159802368.0, - "52": 2159802368.0, - "53": 2159802368.0, - "54": 2159802368.0, - "55": 2159802368.0, - "56": 2159802368.0, - "57": 2159802368.0, - "58": 2159802368.0, - "59": 2159802368.0, - "60": 2159802368.0, - "61": 2159802368.0, - "62": 2159802368.0, - "63": 2159802368.0, - "64": 2159802368.0, - "65": 2159802368.0, - "66": 2159802368.0, - "67": 2159802368.0, - "68": 2159802368.0, - "69": 2159802368.0, - "70": 2159802368.0, - "71": 2159802368.0, - "72": 2159802368.0, - "73": 2160337408.0, - "74": 2160337408.0, - "75": 2160337408.0, - "76": 2160337408.0, - "77": 2160337408.0, - "78": 2160337408.0, - "79": 2160337408.0, - "80": 2160337408.0, - "81": 2160337408.0, - "82": 2160337408.0, - "83": 2160337408.0, - "84": 2161362944.0, - "85": 2161362944.0, - "86": 2161362944.0, - "87": 2161362944.0, - "88": 2161362944.0, - "89": 2161362944.0, - "90": 2161362944.0, - "91": 2161362944.0, - "92": 2161362944.0, - "93": 2161362944.0, - "94": 2161362944.0, - "95": 2162391552.0, - "96": 2162391552.0, - "97": 2162391552.0, - "98": 2162391552.0, - "99": 2162391552.0, - "100": 2162391552.0 + "1": 1918914560.0, + "2": 2158009344.0, + "3": 2158009344.0, + "4": 2158009344.0, + "5": 2158620672.0, + "6": 2158620672.0, + "7": 2158620672.0, + "8": 2158620672.0, + "9": 2158620672.0, + "10": 2159374848.0, + "11": 2159374848.0, + "12": 2159374848.0, + "13": 2159374848.0, + "14": 2159374848.0, + "15": 2159374848.0, + "16": 2159374848.0, + "17": 2160485376.0, + "18": 2160485376.0, + "19": 2160485376.0, + "20": 2160485376.0, + "21": 2160485376.0, + "22": 2160485376.0, + "23": 2160485376.0, + "24": 2160485376.0, + "25": 2160485376.0, + "26": 2160485376.0, + "27": 2160485376.0, + "28": 2160485376.0, + "29": 2160485376.0, + "30": 2160485376.0, + "31": 2160485376.0, + "32": 2160485376.0, + "33": 2160485376.0, + "34": 2160485376.0, + "35": 2160485376.0, + "36": 2160485376.0, + "37": 2160485376.0, + "38": 2160485376.0, + "39": 2160485376.0, + "40": 2160485376.0, + "41": 2160485376.0, + "42": 2160485376.0, + "43": 2160485376.0, + "44": 2160485376.0, + "45": 2160485376.0, + "46": 2160485376.0, + "47": 2160485376.0, + "48": 2160485376.0, + "49": 2160485376.0, + "50": 2160485376.0, + "51": 2160485376.0, + "52": 2160485376.0, + "53": 2160485376.0, + "54": 2160485376.0, + "55": 2160485376.0, + "56": 2160485376.0, + "57": 2160485376.0, + "58": 2160485376.0, + "59": 2160485376.0, + "60": 2160485376.0, + "61": 2160485376.0, + "62": 2160485376.0, + "63": 2160485376.0, + "64": 2160485376.0, + "65": 2160485376.0, + "66": 2160485376.0, + "67": 2160485376.0, + "68": 2160485376.0, + "69": 2160485376.0, + "70": 2160485376.0, + "71": 2160485376.0, + "72": 2160485376.0, + "73": 2160485376.0, + "74": 2160485376.0, + "75": 2160485376.0, + "76": 2160485376.0, + "77": 2160485376.0, + "78": 2160485376.0, + "79": 2160485376.0, + "80": 2160485376.0, + "81": 2160485376.0, + "82": 2160485376.0, + "83": 2160485376.0, + "84": 2161049088.0, + "85": 2161049088.0, + "86": 2161049088.0, + "87": 2161049088.0, + "88": 2161049088.0, + "89": 2161278464.0, + "90": 2161278464.0, + "91": 2161278464.0, + "92": 2161278464.0, + "93": 2161278464.0, + "94": 2161278464.0, + "95": 2161278464.0, + "96": 2161278464.0, + "97": 2161278464.0, + "98": 2161278464.0, + "99": 2161278464.0, + "100": 2161278464.0 } }, "iteration-time": { @@ -432,106 +432,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 14.93722, - "2": 0.29196, - "3": 0.25566, - "4": 0.22819, - "5": 0.21657, - "6": 0.22742, - "7": 0.23255, - "8": 0.21868, - "9": 0.23203, - "10": 0.22911, - "11": 0.22371, - "12": 0.22358, - "13": 0.21762, - "14": 0.2166, - "15": 0.2341, - "16": 0.21834, - "17": 0.21429, - "18": 0.21499, - "19": 0.2158, - "20": 0.21523, - "21": 0.21654, - "22": 0.21788, - "23": 0.21597, - "24": 0.20917, - "25": 0.2076, - "26": 0.20309, - "27": 0.20463, - "28": 0.57074, - "29": 0.20266, - "30": 0.21832, - "31": 0.23121, - "32": 0.2052, - "33": 0.20847, - "34": 0.22756, - "35": 0.21093, - "36": 0.20495, - "37": 0.20762, - "38": 0.20131, - "39": 0.1991, - "40": 0.20426, - "41": 0.20518, - "42": 0.20555, - "43": 0.21112, - "44": 0.20079, - "45": 0.21854, - "46": 0.22885, - "47": 0.20366, - "48": 0.21784, - "49": 0.23722, - "50": 0.20288, - "51": 0.23225, - "52": 0.23281, - "53": 0.20606, - "54": 0.21135, - "55": 0.21897, - "56": 0.25991, - "57": 0.22845, - "58": 0.21751, - "59": 0.21469, - "60": 0.21187, - "61": 0.20946, - "62": 0.21358, - "63": 0.21765, - "64": 0.20357, - "65": 0.27698, - "66": 0.2118, - "67": 0.25518, - "68": 0.22631, - "69": 0.21209, - "70": 0.2039, - "71": 0.22504, - "72": 0.22276, - "73": 0.25179, - "74": 0.22993, - "75": 0.21538, - "76": 0.23629, - "77": 0.20835, - "78": 0.21168, - "79": 0.21631, - "80": 0.21797, - "81": 0.20362, - "82": 0.20269, - "83": 0.21014, - "84": 0.21456, - "85": 0.20971, - "86": 0.22253, - "87": 0.20037, - "88": 0.20403, - "89": 0.21541, - "90": 0.21443, - "91": 0.23258, - "92": 0.21749, - "93": 0.22377, - "94": 0.23559, - "95": 0.21351, - "96": 0.20316, - "97": 0.21349, - "98": 0.20244, - "99": 0.21023, - "100": 0.20508 + "1": "nan", + "2": 8.24297, + "3": 0.24917, + "4": 0.22708, + "5": 0.21091, + "6": 0.20871, + "7": 0.22571, + "8": 0.20589, + "9": 0.22299, + "10": 0.20889, + "11": 0.20974, + "12": 0.2032, + "13": 0.20661, + "14": 0.20582, + "15": 0.21382, + "16": 0.20554, + "17": 0.22023, + "18": 0.20261, + "19": 0.20397, + "20": 0.20382, + "21": 0.20348, + "22": 0.20182, + "23": 0.20472, + "24": 0.26023, + "25": 0.2262, + "26": 0.20234, + "27": 0.19988, + "28": 0.20256, + "29": 0.20159, + "30": 0.20319, + "31": 0.23887, + "32": 0.20104, + "33": 0.20656, + "34": 0.22518, + "35": 0.20637, + "36": 0.21099, + "37": 0.20482, + "38": 0.21085, + "39": 0.2017, + "40": 0.20811, + "41": 0.20764, + "42": 0.21063, + "43": 0.2127, + "44": 0.20315, + "45": 0.22094, + "46": 0.22801, + "47": 0.20862, + "48": 0.22449, + "49": 0.25173, + "50": 0.20364, + "51": 0.23222, + "52": 0.22675, + "53": 0.20912, + "54": 0.21494, + "55": 0.22275, + "56": 0.26966, + "57": 0.24178, + "58": 0.21645, + "59": 0.21724, + "60": 0.21548, + "61": 0.20622, + "62": 0.20684, + "63": 0.22652, + "64": 0.21905, + "65": 0.27593, + "66": 0.21101, + "67": 0.25744, + "68": 0.22501, + "69": 0.21481, + "70": 0.20907, + "71": 0.23526, + "72": 0.21997, + "73": 0.24429, + "74": 0.22929, + "75": 0.22107, + "76": 0.23601, + "77": 0.21064, + "78": 0.21509, + "79": 0.22395, + "80": 0.22205, + "81": 0.21058, + "82": 0.20887, + "83": 0.21535, + "84": 0.21652, + "85": 0.2148, + "86": 0.23867, + "87": 0.20938, + "88": 0.21057, + "89": 0.22376, + "90": 0.21051, + "91": 0.23361, + "92": 0.21808, + "93": 0.22811, + "94": 0.23968, + "95": 0.22302, + "96": 0.21171, + "97": 0.21849, + "98": 0.21172, + "99": 0.21854, + "100": 0.21108 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json index bf57cfecddc..d4c864e5403 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_h100.json @@ -6,104 +6,104 @@ "values": { "1": 10.78091, "2": 10.80272, - "3": 10.8036, - "4": 10.77566, - "5": 10.83259, - "6": 10.83704, - "7": 10.79728, - "8": 10.79467, - "9": 10.80828, - "10": 10.76154, - "11": 10.85384, - "12": 10.84189, - "13": 10.82465, - "14": 10.85824, - "15": 10.78235, - "16": 10.77923, - "17": 10.7484, - "18": 10.78919, - "19": 10.77567, - "20": 10.71707, - "21": 10.70767, - "22": 10.54782, - "23": 10.72977, - "24": 10.60346, - "25": 10.55815, - "26": 10.61659, - "27": 10.6449, - "28": 10.62536, - "29": 10.6349, - "30": 10.42303, - "31": 10.16459, - "32": 10.51284, - "33": 10.50836, - "34": 10.2667, - "35": 10.32353, - "36": 10.2895, - "37": 10.41051, - "38": 10.26406, - "39": 10.44988, - "40": 10.17537, - "41": 10.20908, - "42": 10.27843, - "43": 9.91808, - "44": 10.03128, - "45": 9.92032, - "46": 9.88579, - "47": 10.19208, - "48": 9.92758, - "49": 9.61634, - "50": 9.98512, - "51": 9.90532, - "52": 9.8039, - "53": 10.12749, - "54": 10.00016, - "55": 9.93664, - "56": 9.68581, - "57": 9.55837, - "58": 9.90508, - "59": 9.63839, - "60": 9.57464, - "61": 9.76841, - "62": 10.03826, - "63": 9.44553, - "64": 9.82755, - "65": 9.00746, - "66": 9.77476, - "67": 9.41315, - "68": 9.84101, - "69": 9.8283, - "70": 9.79049, - "71": 9.66947, - "72": 9.62799, - "73": 9.54696, - "74": 9.03684, - "75": 9.49167, - "76": 9.16779, - "77": 10.1088, - "78": 9.77072, - "79": 9.43806, - "80": 9.45438, - "81": 9.5225, - "82": 9.74228, - "83": 9.36999, - "84": 9.45397, - "85": 9.65808, - "86": 9.12501, - "87": 9.62705, - "88": 9.79641, - "89": 9.66075, - "90": 9.8512, - "91": 9.39414, - "92": 9.40741, - "93": 9.13573, - "94": 8.89066, - "95": 9.56273, - "96": 9.5712, + "3": 10.8033, + "4": 10.77546, + "5": 10.83234, + "6": 10.83686, + "7": 10.79766, + "8": 10.79538, + "9": 10.80819, + "10": 10.76119, + "11": 10.85366, + "12": 10.84129, + "13": 10.82509, + "14": 10.85805, + "15": 10.78316, + "16": 10.77881, + "17": 10.74855, + "18": 10.78922, + "19": 10.77581, + "20": 10.71696, + "21": 10.70726, + "22": 10.54794, + "23": 10.72989, + "24": 10.60253, + "25": 10.55878, + "26": 10.61677, + "27": 10.64505, + "28": 10.62519, + "29": 10.63517, + "30": 10.42276, + "31": 10.16465, + "32": 10.51335, + "33": 10.50958, + "34": 10.26657, + "35": 10.32347, + "36": 10.28979, + "37": 10.4109, + "38": 10.26384, + "39": 10.45032, + "40": 10.17507, + "41": 10.20977, + "42": 10.27804, + "43": 9.91819, + "44": 10.03149, + "45": 9.92017, + "46": 9.88583, + "47": 10.19254, + "48": 9.9288, + "49": 9.61639, + "50": 9.98524, + "51": 9.90536, + "52": 9.80381, + "53": 10.12759, + "54": 10.00056, + "55": 9.93634, + "56": 9.68523, + "57": 9.55804, + "58": 9.90541, + "59": 9.63836, + "60": 9.57461, + "61": 9.76818, + "62": 10.03824, + "63": 9.44518, + "64": 9.8279, + "65": 9.00725, + "66": 9.77416, + "67": 9.41301, + "68": 9.84131, + "69": 9.82817, + "70": 9.7901, + "71": 9.66981, + "72": 9.6282, + "73": 9.5471, + "74": 9.03695, + "75": 9.49158, + "76": 9.16784, + "77": 10.1084, + "78": 9.77052, + "79": 9.43865, + "80": 9.45457, + "81": 9.52285, + "82": 9.74241, + "83": 9.37019, + "84": 9.45398, + "85": 9.65802, + "86": 9.12483, + "87": 9.62679, + "88": 9.79649, + "89": 9.66102, + "90": 9.85116, + "91": 9.39426, + "92": 9.40771, + "93": 9.1361, + "94": 8.89095, + "95": 9.56306, + "96": 9.57164, "97": 9.34355, - "98": 9.73013, - "99": 8.95039, - "100": 9.44212 + "98": 9.73064, + "99": 8.95015, + "100": 9.4422 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 30994.0, - "2": 32962.0, - "3": 33026.0, - "4": 30732.0, - "5": 36042.0, - "6": 36987.0, - "7": 34490.0, - "8": 31442.0, - "9": 33931.0, - "10": 29993.0, - "11": 37681.0, - "12": 34978.0, - "13": 36675.0, - "14": 37601.0, - "15": 34369.0, - "16": 36581.0, - "17": 34615.0, - "18": 34408.0, - "19": 35362.0, - "20": 32532.0, - "21": 33181.0, - "22": 30426.0, - "23": 37807.0, - "24": 32299.0, - "25": 30879.0, - "26": 33994.0, - "27": 34721.0, - "28": 36576.0, - "29": 37196.0, - "30": 32443.0, - "31": 30177.0, - "32": 35948.0, - "33": 37549.0, - "34": 32243.0, - "35": 33961.0, - "36": 34340.0, - "37": 37853.0, - "38": 35694.0, - "39": 38797.0, - "40": 36317.0, - "41": 35380.0, - "42": 36704.0, - "43": 34045.0, - "44": 33691.0, - "45": 35877.0, - "46": 36737.0, - "47": 40148.0, - "48": 36696.0, - "49": 36203.0, - "50": 38688.0, - "51": 37791.0, - "52": 37021.0, - "53": 41944.0, - "54": 40947.0, - "55": 37727.0, - "56": 40761.0, - "57": 37481.0, - "58": 41787.0, - "59": 39365.0, - "60": 40922.0, - "61": 41100.0, - "62": 43388.0, - "63": 38269.0, - "64": 43526.0, - "65": 41821.0, - "66": 44876.0, - "67": 42497.0, - "68": 39967.0, - "69": 41255.0, - "70": 45781.0, - "71": 42348.0, - "72": 42151.0, - "73": 45043.0, - "74": 35705.0, - "75": 39397.0, - "76": 45340.0, - "77": 45670.0, - "78": 46614.0, - "79": 49159.0, - "80": 47317.0, - "81": 51048.0, - "82": 49312.0, - "83": 45257.0, - "84": 45494.0, - "85": 49366.0, - "86": 45783.0, - "87": 50223.0, - "88": 47536.0, - "89": 48826.0, - "90": 49499.0, - "91": 45726.0, - "92": 47926.0, - "93": 46433.0, - "94": 47675.0, - "95": 47504.0, - "96": 50174.0, - "97": 46465.0, - "98": 49255.0, - "99": 48053.0, - "100": 44507.0 + "1": 31107.0, + "2": 32721.0, + "3": 33140.0, + "4": 30656.0, + "5": 35799.0, + "6": 37075.0, + "7": 34822.0, + "8": 31224.0, + "9": 34055.0, + "10": 29672.0, + "11": 38013.0, + "12": 34877.0, + "13": 36872.0, + "14": 37309.0, + "15": 33893.0, + "16": 36221.0, + "17": 34432.0, + "18": 34490.0, + "19": 35021.0, + "20": 32199.0, + "21": 33337.0, + "22": 30425.0, + "23": 37809.0, + "24": 31989.0, + "25": 31266.0, + "26": 34336.0, + "27": 35216.0, + "28": 36798.0, + "29": 37760.0, + "30": 32671.0, + "31": 30064.0, + "32": 35953.0, + "33": 37477.0, + "34": 32620.0, + "35": 33974.0, + "36": 34976.0, + "37": 38143.0, + "38": 35436.0, + "39": 38055.0, + "40": 35938.0, + "41": 35563.0, + "42": 36971.0, + "43": 34043.0, + "44": 33807.0, + "45": 36044.0, + "46": 36928.0, + "47": 39956.0, + "48": 35913.0, + "49": 36479.0, + "50": 38660.0, + "51": 38048.0, + "52": 36865.0, + "53": 41892.0, + "54": 40976.0, + "55": 37655.0, + "56": 40772.0, + "57": 37718.0, + "58": 41662.0, + "59": 39464.0, + "60": 40840.0, + "61": 41327.0, + "62": 42869.0, + "63": 38168.0, + "64": 43178.0, + "65": 41726.0, + "66": 45268.0, + "67": 41722.0, + "68": 40064.0, + "69": 41172.0, + "70": 45454.0, + "71": 41783.0, + "72": 41937.0, + "73": 45471.0, + "74": 35899.0, + "75": 39357.0, + "76": 44869.0, + "77": 45870.0, + "78": 47025.0, + "79": 48721.0, + "80": 47404.0, + "81": 51085.0, + "82": 49565.0, + "83": 45472.0, + "84": 45819.0, + "85": 49022.0, + "86": 45683.0, + "87": 49828.0, + "88": 47202.0, + "89": 49021.0, + "90": 49593.0, + "91": 45590.0, + "92": 47773.0, + "93": 46323.0, + "94": 47575.0, + "95": 47766.0, + "96": 49962.0, + "97": 46643.0, + "98": 49851.0, + "99": 47485.0, + "100": 44436.0 } }, "mem-allocated-bytes": { @@ -222,101 +222,101 @@ "2": 1254506496.0, "3": 1254506496.0, "4": 1254502400.0, - "5": 1254502912.0, - "6": 1254504448.0, - "7": 1254504960.0, - "8": 1254504960.0, + "5": 1254504960.0, + "6": 1254503936.0, + "7": 1254504448.0, + "8": 1254505984.0, "9": 1254502912.0, - "10": 1254504448.0, - "11": 1254504960.0, - "12": 1254503936.0, - "13": 1254501888.0, - "14": 1254506496.0, + "10": 1254504960.0, + "11": 1254505472.0, + "12": 1254504448.0, + "13": 1254504448.0, + "14": 1254507008.0, "15": 1254505472.0, "16": 1254504448.0, "17": 1254505472.0, - "18": 1254503424.0, - "19": 1254504960.0, - "20": 1254504448.0, + "18": 1254505472.0, + "19": 1254507008.0, + "20": 1254505984.0, "21": 1254504448.0, - "22": 1254502400.0, - "23": 1254501888.0, + "22": 1254501888.0, + "23": 1254502400.0, "24": 1254504448.0, - "25": 1254501376.0, + "25": 1254500864.0, "26": 1254503424.0, - "27": 1254502912.0, + "27": 1254504448.0, "28": 1254503936.0, - "29": 1254506496.0, + "29": 1254504448.0, "30": 1254501376.0, "31": 1254500352.0, - "32": 1254501376.0, + "32": 1254499840.0, "33": 1254503936.0, - "34": 1254503936.0, + "34": 1254503424.0, "35": 1254502912.0, - "36": 1254506496.0, - "37": 1254504448.0, - "38": 1254504960.0, - "39": 1254503936.0, - "40": 1254503936.0, - "41": 1254504448.0, - "42": 1254503936.0, - "43": 1254500864.0, + "36": 1254505984.0, + "37": 1254505472.0, + "38": 1254507520.0, + "39": 1254503424.0, + "40": 1254502912.0, + "41": 1254503936.0, + "42": 1254505472.0, + "43": 1254500352.0, "44": 1254502400.0, - "45": 1254503424.0, + "45": 1254502400.0, "46": 1254501888.0, - "47": 1254504960.0, - "48": 1254500864.0, + "47": 1254505472.0, + "48": 1254499840.0, "49": 1254501376.0, - "50": 1254503936.0, - "51": 1254497792.0, - "52": 1254497280.0, - "53": 1254498816.0, - "54": 1254499328.0, - "55": 1254501376.0, - "56": 1254502912.0, - "57": 1254494208.0, - "58": 1254499328.0, - "59": 1254496256.0, - "60": 1254497792.0, - "61": 1254505984.0, + "50": 1254502912.0, + "51": 1254498816.0, + "52": 1254496768.0, + "53": 1254499840.0, + "54": 1254500352.0, + "55": 1254499840.0, + "56": 1254504448.0, + "57": 1254494720.0, + "58": 1254497792.0, + "59": 1254495744.0, + "60": 1254498816.0, + "61": 1254503936.0, "62": 1254504960.0, - "63": 1254500352.0, - "64": 1254499840.0, + "63": 1254499840.0, + "64": 1254500864.0, "65": 1254489600.0, - "66": 1254503936.0, - "67": 1254499328.0, + "66": 1254503424.0, + "67": 1254499840.0, "68": 1254507008.0, - "69": 1254502400.0, - "70": 1254503936.0, - "71": 1254505984.0, - "72": 1254497280.0, + "69": 1254502912.0, + "70": 1254504448.0, + "71": 1254504960.0, + "72": 1254497792.0, "73": 1254505472.0, - "74": 1254496256.0, - "75": 1254505472.0, - "76": 1254504448.0, + "74": 1254496768.0, + "75": 1254503936.0, + "76": 1254505472.0, "77": 1254504960.0, - "78": 1254501376.0, + "78": 1254502400.0, "79": 1254501888.0, "80": 1254500864.0, - "81": 1254504448.0, - "82": 1254501376.0, + "81": 1254504960.0, + "82": 1254500864.0, "83": 1254498816.0, - "84": 1254498304.0, - "85": 1254500352.0, + "84": 1254499840.0, + "85": 1254499840.0, "86": 1254499840.0, - "87": 1254506496.0, - "88": 1254500352.0, - "89": 1254501888.0, - "90": 1254503936.0, - "91": 1254506496.0, - "92": 1254503936.0, - "93": 1254506496.0, - "94": 1254501376.0, - "95": 1254502912.0, + "87": 1254505984.0, + "88": 1254498816.0, + "89": 1254501376.0, + "90": 1254502912.0, + "91": 1254505984.0, + "92": 1254503424.0, + "93": 1254505472.0, + "94": 1254501888.0, + "95": 1254504448.0, "96": 1254502912.0, - "97": 1254500352.0, - "98": 1254508544.0, - "99": 1254498304.0, + "97": 1254500864.0, + "98": 1254504960.0, + "99": 1254497792.0, "100": 1254500864.0 } }, @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 2038519808.0, - "2": 2520255488.0, - "3": 2520255488.0, - "4": 2520255488.0, - "5": 2520552960.0, - "6": 2520552960.0, - "7": 2520552960.0, - "8": 2520552960.0, - "9": 2520552960.0, - "10": 2520552960.0, - "11": 2520552960.0, - "12": 2520552960.0, - "13": 2520552960.0, - "14": 2520552960.0, - "15": 2520552960.0, - "16": 2520552960.0, - "17": 2520552960.0, - "18": 2520552960.0, - "19": 2520552960.0, - "20": 2520552960.0, - "21": 2520552960.0, - "22": 2520552960.0, - "23": 2520552960.0, - "24": 2520552960.0, - "25": 2520552960.0, - "26": 2520552960.0, - "27": 2520552960.0, - "28": 2520552960.0, - "29": 2520552960.0, - "30": 2520552960.0, - "31": 2520552960.0, - "32": 2520552960.0, - "33": 2521159680.0, - "34": 2521159680.0, - "35": 2521159680.0, - "36": 2521159680.0, - "37": 2521159680.0, - "38": 2521159680.0, - "39": 2521159680.0, - "40": 2521159680.0, - "41": 2521159680.0, - "42": 2521159680.0, - "43": 2521159680.0, - "44": 2521159680.0, - "45": 2521159680.0, - "46": 2521615360.0, - "47": 2521615360.0, - "48": 2521615360.0, - "49": 2521615360.0, - "50": 2521615360.0, - "51": 2521615360.0, - "52": 2521615360.0, - "53": 2521615360.0, - "54": 2521615360.0, - "55": 2521615360.0, - "56": 2521615360.0, - "57": 2521615360.0, - "58": 2521615360.0, - "59": 2521615360.0, - "60": 2521615360.0, - "61": 2521615360.0, - "62": 2521615360.0, - "63": 2521615360.0, - "64": 2521615360.0, - "65": 2521615360.0, - "66": 2521615360.0, - "67": 2521615360.0, - "68": 2521615360.0, - "69": 2521615360.0, - "70": 2521615360.0, - "71": 2521615360.0, - "72": 2521615360.0, - "73": 2521615360.0, - "74": 2521615360.0, - "75": 2521615360.0, - "76": 2521615360.0, - "77": 2521615360.0, - "78": 2521615360.0, - "79": 2521615360.0, - "80": 2521615360.0, - "81": 2521615360.0, - "82": 2521615360.0, - "83": 2521615360.0, - "84": 2521615360.0, - "85": 2521615360.0, - "86": 2521615360.0, - "87": 2521615360.0, - "88": 2521615360.0, - "89": 2521615360.0, - "90": 2521615360.0, - "91": 2521615360.0, - "92": 2521615360.0, - "93": 2521615360.0, - "94": 2521615360.0, - "95": 2523076096.0, - "96": 2523076096.0, - "97": 2523076096.0, - "98": 2523076096.0, - "99": 2523076096.0, - "100": 2523076096.0 + "1": 2039278592.0, + "2": 2519583744.0, + "3": 2519583744.0, + "4": 2519734272.0, + "5": 2521542656.0, + "6": 2521542656.0, + "7": 2521542656.0, + "8": 2521542656.0, + "9": 2521542656.0, + "10": 2521542656.0, + "11": 2521542656.0, + "12": 2521542656.0, + "13": 2521542656.0, + "14": 2521542656.0, + "15": 2521542656.0, + "16": 2521542656.0, + "17": 2521542656.0, + "18": 2521542656.0, + "19": 2521542656.0, + "20": 2521542656.0, + "21": 2521542656.0, + "22": 2521542656.0, + "23": 2521542656.0, + "24": 2521542656.0, + "25": 2521542656.0, + "26": 2521542656.0, + "27": 2521542656.0, + "28": 2521542656.0, + "29": 2521542656.0, + "30": 2521542656.0, + "31": 2521542656.0, + "32": 2521542656.0, + "33": 2521542656.0, + "34": 2521542656.0, + "35": 2521542656.0, + "36": 2521542656.0, + "37": 2521542656.0, + "38": 2521542656.0, + "39": 2521542656.0, + "40": 2521542656.0, + "41": 2521542656.0, + "42": 2521542656.0, + "43": 2521542656.0, + "44": 2521542656.0, + "45": 2521542656.0, + "46": 2522991616.0, + "47": 2522991616.0, + "48": 2522991616.0, + "49": 2522991616.0, + "50": 2522991616.0, + "51": 2522991616.0, + "52": 2522991616.0, + "53": 2522991616.0, + "54": 2522991616.0, + "55": 2522991616.0, + "56": 2522991616.0, + "57": 2522991616.0, + "58": 2522991616.0, + "59": 2522991616.0, + "60": 2522991616.0, + "61": 2522991616.0, + "62": 2522991616.0, + "63": 2522991616.0, + "64": 2522991616.0, + "65": 2522991616.0, + "66": 2522991616.0, + "67": 2522991616.0, + "68": 2522991616.0, + "69": 2522991616.0, + "70": 2522991616.0, + "71": 2522991616.0, + "72": 2522991616.0, + "73": 2522991616.0, + "74": 2522991616.0, + "75": 2522991616.0, + "76": 2522991616.0, + "77": 2522991616.0, + "78": 2522991616.0, + "79": 2522991616.0, + "80": 2522991616.0, + "81": 2522991616.0, + "82": 2522991616.0, + "83": 2522991616.0, + "84": 2522991616.0, + "85": 2522991616.0, + "86": 2522991616.0, + "87": 2522991616.0, + "88": 2522991616.0, + "89": 2522991616.0, + "90": 2522991616.0, + "91": 2522991616.0, + "92": 2522991616.0, + "93": 2522991616.0, + "94": 2522991616.0, + "95": 2523597312.0, + "96": 2523597312.0, + "97": 2523597312.0, + "98": 2523597312.0, + "99": 2523597312.0, + "100": 2523597312.0 } }, "iteration-time": { @@ -432,106 +432,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 17.78784, - "2": 0.2935, - "3": 0.25416, - "4": 0.28848, - "5": 0.27342, - "6": 0.21986, - "7": 0.22775, - "8": 0.21125, - "9": 0.22242, - "10": 0.20696, - "11": 0.21121, - "12": 0.20562, - "13": 0.20918, - "14": 0.20486, - "15": 0.22312, - "16": 0.20648, - "17": 0.21741, - "18": 0.20596, - "19": 0.20449, - "20": 0.20633, - "21": 0.20648, - "22": 0.20939, - "23": 0.20613, - "24": 0.2098, - "25": 0.21077, - "26": 0.20978, - "27": 0.20622, - "28": 0.20953, - "29": 0.2052, - "30": 0.20858, - "31": 0.23751, - "32": 0.20916, - "33": 0.21528, - "34": 0.22994, - "35": 0.20666, - "36": 0.56591, - "37": 0.2088, - "38": 0.20535, - "39": 0.20334, - "40": 0.21053, - "41": 0.20731, - "42": 0.21647, - "43": 0.21279, - "44": 0.20733, - "45": 0.22499, - "46": 0.22926, - "47": 0.21023, - "48": 0.21769, - "49": 0.24399, - "50": 0.21286, - "51": 0.238, - "52": 0.23293, - "53": 0.20987, - "54": 0.21516, - "55": 0.22388, - "56": 0.25985, - "57": 0.22604, - "58": 0.61513, - "59": 0.22219, - "60": 0.21734, - "61": 0.90688, - "62": 0.21705, - "63": 0.23992, - "64": 0.21828, - "65": 0.27683, - "66": 0.21653, - "67": 0.27213, - "68": 0.8349, - "69": 0.21293, - "70": 0.21051, - "71": 0.22862, - "72": 0.22498, - "73": 0.24298, - "74": 0.23094, - "75": 0.22956, - "76": 0.24583, - "77": 0.21646, - "78": 0.22364, - "79": 0.22898, - "80": 0.21878, - "81": 0.21415, - "82": 0.21267, - "83": 0.22485, - "84": 0.22454, - "85": 0.21746, - "86": 0.23031, - "87": 0.21423, - "88": 0.21226, - "89": 0.2196, - "90": 0.21327, - "91": 0.23392, - "92": 0.22086, - "93": 0.23306, - "94": 0.24169, - "95": 0.22202, - "96": 0.2155, - "97": 0.22184, - "98": 0.2139, - "99": 0.21705, - "100": 0.21654 + "1": "nan", + "2": 8.51904, + "3": 0.25584, + "4": 0.23199, + "5": 0.21547, + "6": 0.21649, + "7": 0.23094, + "8": 0.21057, + "9": 0.2241, + "10": 0.20897, + "11": 0.21478, + "12": 0.21044, + "13": 0.20902, + "14": 0.2089, + "15": 0.2149, + "16": 0.21454, + "17": 0.22147, + "18": 0.20804, + "19": 0.20834, + "20": 0.21151, + "21": 0.21055, + "22": 0.20874, + "23": 0.21032, + "24": 0.21033, + "25": 0.20839, + "26": 0.20718, + "27": 0.20747, + "28": 0.2057, + "29": 0.20635, + "30": 0.20714, + "31": 0.24758, + "32": 0.20807, + "33": 0.20967, + "34": 0.22818, + "35": 0.2141, + "36": 0.21109, + "37": 0.21116, + "38": 0.21698, + "39": 0.20632, + "40": 0.21527, + "41": 0.21578, + "42": 0.22039, + "43": 0.22418, + "44": 0.21237, + "45": 0.22508, + "46": 0.23293, + "47": 0.20944, + "48": 0.23637, + "49": 0.24841, + "50": 0.20949, + "51": 0.2335, + "52": 0.239, + "53": 0.21187, + "54": 0.21721, + "55": 0.22955, + "56": 0.26737, + "57": 0.23659, + "58": 0.62797, + "59": 0.22564, + "60": 0.21977, + "61": 0.21143, + "62": 0.20871, + "63": 0.23546, + "64": 0.21856, + "65": 0.27947, + "66": 0.21357, + "67": 0.26023, + "68": 0.23399, + "69": 0.21277, + "70": 0.21272, + "71": 0.23119, + "72": 0.22395, + "73": 0.24468, + "74": 0.2328, + "75": 0.2216, + "76": 0.2435, + "77": 0.2455, + "78": 0.22637, + "79": 0.22675, + "80": 0.2295, + "81": 0.21493, + "82": 0.21166, + "83": 0.21848, + "84": 0.22787, + "85": 0.22018, + "86": 0.23278, + "87": 0.21253, + "88": 0.21393, + "89": 0.22643, + "90": 0.20914, + "91": 0.23149, + "92": 0.21971, + "93": 0.2297, + "94": 0.24065, + "95": 0.22047, + "96": 0.21275, + "97": 0.22961, + "98": 0.20996, + "99": 0.22047, + "100": 0.21528 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json index 38498d3139b..46380ce788f 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_h100.json @@ -6,54 +6,54 @@ "values": { "1": 10.80815, "2": 10.82612, - "3": 10.83032, - "4": 10.80963, - "5": 10.84127, - "6": 10.8581, - "7": 10.81967, - "8": 10.82506, - "9": 10.83749, - "10": 10.7783, - "11": 10.85781, - "12": 10.85539, - "13": 10.85233, - "14": 10.86699, - "15": 10.81253, - "16": 10.80292, - "17": 10.78098, - "18": 10.80788, - "19": 10.79276, - "20": 10.74548, - "21": 10.72785, - "22": 10.59608, - "23": 10.73999, - "24": 10.63509, - "25": 10.59832, - "26": 10.63517, - "27": 10.65744, - "28": 10.64536, - "29": 10.65122, - "30": 10.44144, - "31": 10.21465, - "32": 10.53342, - "33": 10.52518, - "34": 10.30171, - "35": 10.34871, - "36": 10.30843, - "37": 10.42353, - "38": 10.28859, - "39": 10.45514, - "40": 10.19363, - "41": 10.22791, - "42": 10.29725, - "43": 9.95871, - "44": 10.06717, - "45": 9.95955, - "46": 9.92614, - "47": 10.20607, - "48": 9.96021, - "49": 9.65854, - "50": 10.01296 + "3": 10.83021, + "4": 10.80972, + "5": 10.84221, + "6": 10.85854, + "7": 10.81917, + "8": 10.8246, + "9": 10.83781, + "10": 10.7791, + "11": 10.85744, + "12": 10.85583, + "13": 10.85189, + "14": 10.86713, + "15": 10.81236, + "16": 10.80239, + "17": 10.78179, + "18": 10.80713, + "19": 10.79207, + "20": 10.74661, + "21": 10.72807, + "22": 10.59589, + "23": 10.74, + "24": 10.63444, + "25": 10.59903, + "26": 10.63534, + "27": 10.6572, + "28": 10.64584, + "29": 10.65074, + "30": 10.44118, + "31": 10.21432, + "32": 10.53336, + "33": 10.52563, + "34": 10.30234, + "35": 10.34861, + "36": 10.30798, + "37": 10.42406, + "38": 10.28848, + "39": 10.45537, + "40": 10.19376, + "41": 10.22806, + "42": 10.29758, + "43": 9.95857, + "44": 10.06699, + "45": 9.96021, + "46": 9.92588, + "47": 10.2065, + "48": 9.96009, + "49": 9.65871, + "50": 10.01319 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 31590.0, - "2": 32940.0, - "3": 33668.0, - "4": 31186.0, - "5": 36214.0, - "6": 37169.0, - "7": 34770.0, - "8": 31862.0, - "9": 34102.0, - "10": 30394.0, - "11": 38432.0, - "12": 35039.0, - "13": 37236.0, - "14": 37668.0, - "15": 34199.0, - "16": 36659.0, - "17": 34831.0, - "18": 35011.0, - "19": 35486.0, - "20": 33221.0, - "21": 33971.0, - "22": 30501.0, - "23": 38411.0, - "24": 32764.0, - "25": 31363.0, - "26": 34624.0, - "27": 36096.0, - "28": 37021.0, - "29": 37900.0, - "30": 33066.0, - "31": 29871.0, - "32": 36113.0, - "33": 38168.0, - "34": 33074.0, - "35": 34300.0, - "36": 35363.0, - "37": 38150.0, - "38": 35798.0, - "39": 38945.0, - "40": 35780.0, - "41": 35999.0, - "42": 36611.0, - "43": 33781.0, - "44": 34207.0, - "45": 35198.0, - "46": 36779.0, - "47": 40585.0, - "48": 36434.0, - "49": 35787.0, - "50": 38996.0 + "1": 31697.0, + "2": 33160.0, + "3": 33746.0, + "4": 30996.0, + "5": 35841.0, + "6": 37590.0, + "7": 34745.0, + "8": 31631.0, + "9": 34446.0, + "10": 30001.0, + "11": 38588.0, + "12": 34898.0, + "13": 37016.0, + "14": 37438.0, + "15": 34389.0, + "16": 36499.0, + "17": 34886.0, + "18": 34911.0, + "19": 36019.0, + "20": 32918.0, + "21": 33251.0, + "22": 30923.0, + "23": 38242.0, + "24": 32736.0, + "25": 31190.0, + "26": 35013.0, + "27": 35883.0, + "28": 37233.0, + "29": 37939.0, + "30": 33116.0, + "31": 29886.0, + "32": 36334.0, + "33": 37853.0, + "34": 32967.0, + "35": 34338.0, + "36": 35362.0, + "37": 37785.0, + "38": 35581.0, + "39": 39179.0, + "40": 35557.0, + "41": 35849.0, + "42": 36813.0, + "43": 34096.0, + "44": 34154.0, + "45": 35321.0, + "46": 37096.0, + "47": 40587.0, + "48": 36162.0, + "49": 36053.0, + "50": 39230.0 } }, "mem-allocated-bytes": { @@ -121,53 +121,53 @@ "1": 1027090944.0, "2": 1027090944.0, "3": 1027091968.0, - "4": 1027091456.0, + "4": 1027090432.0, "5": 1027091968.0, "6": 1027090432.0, - "7": 1027086848.0, - "8": 1027087872.0, - "9": 1027094528.0, + "7": 1027087872.0, + "8": 1027086336.0, + "9": 1027092480.0, "10": 1027088896.0, - "11": 1027089408.0, - "12": 1027089408.0, - "13": 1027091968.0, - "14": 1027088896.0, - "15": 1027090432.0, - "16": 1027091456.0, + "11": 1027089920.0, + "12": 1027088384.0, + "13": 1027089920.0, + "14": 1027089920.0, + "15": 1027089408.0, + "16": 1027092992.0, "17": 1027089408.0, - "18": 1027093504.0, - "19": 1027091968.0, - "20": 1027095040.0, - "21": 1027088384.0, + "18": 1027094528.0, + "19": 1027091456.0, + "20": 1027093504.0, + "21": 1027088896.0, "22": 1027091456.0, - "23": 1027091968.0, - "24": 1027090944.0, - "25": 1027089408.0, + "23": 1027092480.0, + "24": 1027091456.0, + "25": 1027090432.0, "26": 1027090432.0, - "27": 1027090432.0, - "28": 1027087872.0, - "29": 1027088896.0, + "27": 1027089920.0, + "28": 1027088384.0, + "29": 1027088384.0, "30": 1027087360.0, - "31": 1027079168.0, + "31": 1027080192.0, "32": 1027082240.0, "33": 1027091456.0, "34": 1027088384.0, - "35": 1027090432.0, - "36": 1027088896.0, - "37": 1027089408.0, + "35": 1027089920.0, + "36": 1027089408.0, + "37": 1027089920.0, "38": 1027090432.0, - "39": 1027085824.0, - "40": 1027086848.0, - "41": 1027088384.0, + "39": 1027087872.0, + "40": 1027086336.0, + "41": 1027087872.0, "42": 1027091968.0, - "43": 1027084800.0, + "43": 1027084288.0, "44": 1027087872.0, "45": 1027087872.0, - "46": 1027078656.0, + "46": 1027079168.0, "47": 1027087872.0, - "48": 1027086336.0, - "49": 1027082240.0, - "50": 1027089920.0 + "48": 1027087360.0, + "49": 1027082752.0, + "50": 1027089408.0 } }, "mem-max-allocated-bytes": { @@ -176,55 +176,55 @@ "step_interval": 1, "values": { "1": 3057868288.0, - "2": 3298335232.0, - "3": 3298335232.0, - "4": 3300084224.0, - "5": 3300084224.0, - "6": 3300084224.0, - "7": 3300084224.0, - "8": 3300084224.0, - "9": 3300084224.0, - "10": 3300122624.0, - "11": 3300122624.0, - "12": 3300122624.0, - "13": 3300122624.0, - "14": 3300122624.0, - "15": 3300122624.0, - "16": 3300122624.0, - "17": 3300122624.0, - "18": 3300122624.0, - "19": 3300376576.0, - "20": 3300416000.0, - "21": 3300416000.0, - "22": 3301032960.0, - "23": 3301998080.0, - "24": 3301998080.0, - "25": 3301998080.0, - "26": 3301998080.0, - "27": 3301998080.0, - "28": 3301998080.0, - "29": 3301998080.0, - "30": 3301998080.0, - "31": 3301998080.0, - "32": 3301998080.0, - "33": 3301998080.0, - "34": 3301998080.0, - "35": 3301998080.0, - "36": 3301998080.0, - "37": 3301998080.0, - "38": 3301998080.0, - "39": 3301998080.0, - "40": 3301998080.0, - "41": 3301998080.0, - "42": 3301998080.0, - "43": 3301998080.0, - "44": 3301998080.0, - "45": 3301998080.0, - "46": 3301998080.0, - "47": 3301998080.0, - "48": 3301998080.0, - "49": 3301998080.0, - "50": 3301998080.0 + "2": 3298402816.0, + "3": 3298442752.0, + "4": 3299160576.0, + "5": 3299160576.0, + "6": 3299434496.0, + "7": 3299434496.0, + "8": 3299434496.0, + "9": 3299434496.0, + "10": 3300508160.0, + "11": 3300508160.0, + "12": 3300508160.0, + "13": 3300508160.0, + "14": 3300508160.0, + "15": 3300508160.0, + "16": 3300508160.0, + "17": 3300508160.0, + "18": 3300508160.0, + "19": 3300508160.0, + "20": 3300508160.0, + "21": 3300508160.0, + "22": 3300905472.0, + "23": 3300905472.0, + "24": 3300905472.0, + "25": 3300905472.0, + "26": 3300905472.0, + "27": 3300905472.0, + "28": 3300905472.0, + "29": 3300905472.0, + "30": 3300905472.0, + "31": 3300905472.0, + "32": 3300905472.0, + "33": 3300905472.0, + "34": 3300905472.0, + "35": 3300905472.0, + "36": 3300905472.0, + "37": 3300905472.0, + "38": 3300905472.0, + "39": 3300905472.0, + "40": 3300905472.0, + "41": 3300905472.0, + "42": 3300905472.0, + "43": 3300905472.0, + "44": 3300905472.0, + "45": 3300905472.0, + "46": 3300905472.0, + "47": 3300905472.0, + "48": 3300905472.0, + "49": 3300905472.0, + "50": 3300905472.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 16.45405, - "2": 0.30024, - "3": 0.24416, - "4": 0.22949, - "5": 0.21642, - "6": 0.20677, - "7": 0.21591, - "8": 0.21087, - "9": 0.20973, - "10": 0.20724, - "11": 0.20594, - "12": 0.20225, - "13": 0.21091, - "14": 0.2028, - "15": 0.22641, - "16": 0.20409, - "17": 0.21141, - "18": 0.20363, - "19": 0.20701, - "20": 0.2078, - "21": 0.20171, - "22": 0.20432, - "23": 0.19941, - "24": 0.20413, - "25": 0.20204, - "26": 0.20188, - "27": 0.60524, - "28": 0.21001, - "29": 0.20338, - "30": 0.20253, - "31": 0.2399, - "32": 0.19914, - "33": 0.20122, - "34": 0.22929, - "35": 0.20106, - "36": 0.22225, - "37": 0.20411, - "38": 0.20267, - "39": 0.19726, - "40": 0.21398, - "41": 0.21317, - "42": 0.20362, - "43": 0.20696, - "44": 0.20834, - "45": 0.21563, - "46": 0.22195, - "47": 0.20394, - "48": 0.22663, - "49": 0.24701, - "50": 0.20255 + "1": "nan", + "2": 7.83079, + "3": 0.25851, + "4": 0.2417, + "5": 0.23599, + "6": 0.22253, + "7": 0.2276, + "8": 0.22193, + "9": 0.22152, + "10": 0.21893, + "11": 0.21839, + "12": 0.21679, + "13": 0.21725, + "14": 0.21669, + "15": 0.23001, + "16": 0.21982, + "17": 0.23384, + "18": 0.21656, + "19": 0.21788, + "20": 0.21669, + "21": 0.22221, + "22": 0.21704, + "23": 0.2215, + "24": 0.21629, + "25": 0.21746, + "26": 0.22186, + "27": 0.22249, + "28": 0.21578, + "29": 0.21576, + "30": 0.2148, + "31": 0.26036, + "32": 0.21704, + "33": 0.20526, + "34": 0.22713, + "35": 0.20506, + "36": 0.21238, + "37": 0.20316, + "38": 0.2071, + "39": 0.20139, + "40": 0.2271, + "41": 0.20756, + "42": 0.20565, + "43": 0.21816, + "44": 0.20535, + "45": 0.21859, + "46": 0.25466, + "47": 0.23419, + "48": 0.22918, + "49": 0.25554, + "50": 0.20246 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json index b626738d63e..00fc95f4832 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_h100.json @@ -6,54 +6,54 @@ "values": { "1": 10.80815, "2": 10.82612, - "3": 10.83032, - "4": 10.80963, - "5": 10.84127, - "6": 10.8581, - "7": 10.81967, - "8": 10.82506, - "9": 10.83749, - "10": 10.7783, - "11": 10.85781, - "12": 10.85539, - "13": 10.85233, - "14": 10.86699, - "15": 10.81253, - "16": 10.80292, - "17": 10.78098, - "18": 10.80788, - "19": 10.79276, - "20": 10.74548, - "21": 10.72785, - "22": 10.59608, - "23": 10.73999, - "24": 10.63509, - "25": 10.59832, - "26": 10.63517, - "27": 10.65744, - "28": 10.64536, - "29": 10.65122, - "30": 10.44144, - "31": 10.21465, - "32": 10.53342, - "33": 10.52518, - "34": 10.30171, - "35": 10.34871, - "36": 10.30843, - "37": 10.42353, - "38": 10.28859, - "39": 10.45514, - "40": 10.19363, - "41": 10.22791, - "42": 10.29725, - "43": 9.95871, - "44": 10.06717, - "45": 9.95955, - "46": 9.92614, - "47": 10.20607, - "48": 9.96021, - "49": 9.65854, - "50": 10.01296 + "3": 10.83021, + "4": 10.80972, + "5": 10.84221, + "6": 10.85854, + "7": 10.81917, + "8": 10.8246, + "9": 10.83781, + "10": 10.7791, + "11": 10.85744, + "12": 10.85583, + "13": 10.85189, + "14": 10.86713, + "15": 10.81236, + "16": 10.80239, + "17": 10.78179, + "18": 10.80713, + "19": 10.79207, + "20": 10.74661, + "21": 10.72807, + "22": 10.59589, + "23": 10.74, + "24": 10.63444, + "25": 10.59903, + "26": 10.63534, + "27": 10.6572, + "28": 10.64584, + "29": 10.65074, + "30": 10.44118, + "31": 10.21432, + "32": 10.53336, + "33": 10.52563, + "34": 10.30234, + "35": 10.34861, + "36": 10.30798, + "37": 10.42406, + "38": 10.28848, + "39": 10.45537, + "40": 10.19376, + "41": 10.22806, + "42": 10.29758, + "43": 9.95857, + "44": 10.06699, + "45": 9.96021, + "46": 9.92588, + "47": 10.2065, + "48": 9.96009, + "49": 9.65871, + "50": 10.01319 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 31590.0, - "2": 32940.0, - "3": 33668.0, - "4": 31186.0, - "5": 36214.0, - "6": 37169.0, - "7": 34770.0, - "8": 31862.0, - "9": 34102.0, - "10": 30394.0, - "11": 38432.0, - "12": 35039.0, - "13": 37236.0, - "14": 37668.0, - "15": 34199.0, - "16": 36659.0, - "17": 34831.0, - "18": 35011.0, - "19": 35486.0, - "20": 33221.0, - "21": 33971.0, - "22": 30501.0, - "23": 38411.0, - "24": 32764.0, - "25": 31363.0, - "26": 34624.0, - "27": 36096.0, - "28": 37021.0, - "29": 37900.0, - "30": 33066.0, - "31": 29871.0, - "32": 36113.0, - "33": 38168.0, - "34": 33074.0, - "35": 34300.0, - "36": 35363.0, - "37": 38150.0, - "38": 35798.0, - "39": 38945.0, - "40": 35780.0, - "41": 35999.0, - "42": 36611.0, - "43": 33781.0, - "44": 34207.0, - "45": 35198.0, - "46": 36779.0, - "47": 40585.0, - "48": 36434.0, - "49": 35787.0, - "50": 38996.0 + "1": 31697.0, + "2": 33160.0, + "3": 33746.0, + "4": 30996.0, + "5": 35841.0, + "6": 37590.0, + "7": 34745.0, + "8": 31631.0, + "9": 34446.0, + "10": 30001.0, + "11": 38588.0, + "12": 34898.0, + "13": 37016.0, + "14": 37438.0, + "15": 34389.0, + "16": 36499.0, + "17": 34886.0, + "18": 34911.0, + "19": 36019.0, + "20": 32918.0, + "21": 33251.0, + "22": 30923.0, + "23": 38242.0, + "24": 32736.0, + "25": 31190.0, + "26": 35013.0, + "27": 35883.0, + "28": 37233.0, + "29": 37939.0, + "30": 33116.0, + "31": 29886.0, + "32": 36334.0, + "33": 37853.0, + "34": 32967.0, + "35": 34338.0, + "36": 35362.0, + "37": 37785.0, + "38": 35581.0, + "39": 39179.0, + "40": 35557.0, + "41": 35849.0, + "42": 36813.0, + "43": 34096.0, + "44": 34154.0, + "45": 35321.0, + "46": 37096.0, + "47": 40587.0, + "48": 36162.0, + "49": 36053.0, + "50": 39230.0 } }, "mem-allocated-bytes": { @@ -121,53 +121,53 @@ "1": 1027090944.0, "2": 1027090944.0, "3": 1027091968.0, - "4": 1027091456.0, + "4": 1027090432.0, "5": 1027091968.0, "6": 1027090432.0, - "7": 1027086848.0, - "8": 1027087872.0, - "9": 1027094528.0, + "7": 1027087872.0, + "8": 1027086336.0, + "9": 1027092480.0, "10": 1027088896.0, - "11": 1027089408.0, - "12": 1027089408.0, - "13": 1027091968.0, - "14": 1027088896.0, - "15": 1027090432.0, - "16": 1027091456.0, + "11": 1027089920.0, + "12": 1027088384.0, + "13": 1027089920.0, + "14": 1027089920.0, + "15": 1027089408.0, + "16": 1027092992.0, "17": 1027089408.0, - "18": 1027093504.0, - "19": 1027091968.0, - "20": 1027095040.0, - "21": 1027088384.0, + "18": 1027094528.0, + "19": 1027091456.0, + "20": 1027093504.0, + "21": 1027088896.0, "22": 1027091456.0, - "23": 1027091968.0, - "24": 1027090944.0, - "25": 1027089408.0, + "23": 1027092480.0, + "24": 1027091456.0, + "25": 1027090432.0, "26": 1027090432.0, - "27": 1027090432.0, - "28": 1027087872.0, - "29": 1027088896.0, + "27": 1027089920.0, + "28": 1027088384.0, + "29": 1027088384.0, "30": 1027087360.0, - "31": 1027079168.0, + "31": 1027080192.0, "32": 1027082240.0, "33": 1027091456.0, "34": 1027088384.0, - "35": 1027090432.0, - "36": 1027088896.0, - "37": 1027089408.0, + "35": 1027089920.0, + "36": 1027089408.0, + "37": 1027089920.0, "38": 1027090432.0, - "39": 1027085824.0, - "40": 1027086848.0, - "41": 1027088384.0, + "39": 1027087872.0, + "40": 1027086336.0, + "41": 1027087872.0, "42": 1027091968.0, - "43": 1027084800.0, + "43": 1027084288.0, "44": 1027087872.0, "45": 1027087872.0, - "46": 1027078656.0, + "46": 1027079168.0, "47": 1027087872.0, - "48": 1027086336.0, - "49": 1027082240.0, - "50": 1027089920.0 + "48": 1027087360.0, + "49": 1027082752.0, + "50": 1027089408.0 } }, "mem-max-allocated-bytes": { @@ -176,55 +176,55 @@ "step_interval": 1, "values": { "1": 3057868288.0, - "2": 3298335232.0, - "3": 3298335232.0, - "4": 3300084224.0, - "5": 3300084224.0, - "6": 3300084224.0, - "7": 3300084224.0, - "8": 3300084224.0, - "9": 3300084224.0, - "10": 3300122624.0, - "11": 3300122624.0, - "12": 3300122624.0, - "13": 3300122624.0, - "14": 3300122624.0, - "15": 3300122624.0, - "16": 3300122624.0, - "17": 3300122624.0, - "18": 3300122624.0, - "19": 3300376576.0, - "20": 3300416000.0, - "21": 3300416000.0, - "22": 3301032960.0, - "23": 3301998080.0, - "24": 3301998080.0, - "25": 3301998080.0, - "26": 3301998080.0, - "27": 3301998080.0, - "28": 3301998080.0, - "29": 3301998080.0, - "30": 3301998080.0, - "31": 3301998080.0, - "32": 3301998080.0, - "33": 3301998080.0, - "34": 3301998080.0, - "35": 3301998080.0, - "36": 3301998080.0, - "37": 3301998080.0, - "38": 3301998080.0, - "39": 3301998080.0, - "40": 3301998080.0, - "41": 3301998080.0, - "42": 3301998080.0, - "43": 3301998080.0, - "44": 3301998080.0, - "45": 3301998080.0, - "46": 3301998080.0, - "47": 3301998080.0, - "48": 3301998080.0, - "49": 3301998080.0, - "50": 3301998080.0 + "2": 3298402816.0, + "3": 3298442752.0, + "4": 3299160576.0, + "5": 3299160576.0, + "6": 3299434496.0, + "7": 3299434496.0, + "8": 3299434496.0, + "9": 3299434496.0, + "10": 3300508160.0, + "11": 3300508160.0, + "12": 3300508160.0, + "13": 3300508160.0, + "14": 3300508160.0, + "15": 3300508160.0, + "16": 3300508160.0, + "17": 3300508160.0, + "18": 3300508160.0, + "19": 3300508160.0, + "20": 3300508160.0, + "21": 3300508160.0, + "22": 3300905472.0, + "23": 3300905472.0, + "24": 3300905472.0, + "25": 3300905472.0, + "26": 3300905472.0, + "27": 3300905472.0, + "28": 3300905472.0, + "29": 3300905472.0, + "30": 3300905472.0, + "31": 3300905472.0, + "32": 3300905472.0, + "33": 3300905472.0, + "34": 3300905472.0, + "35": 3300905472.0, + "36": 3300905472.0, + "37": 3300905472.0, + "38": 3300905472.0, + "39": 3300905472.0, + "40": 3300905472.0, + "41": 3300905472.0, + "42": 3300905472.0, + "43": 3300905472.0, + "44": 3300905472.0, + "45": 3300905472.0, + "46": 3300905472.0, + "47": 3300905472.0, + "48": 3300905472.0, + "49": 3300905472.0, + "50": 3300905472.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 15.57121, - "2": 0.28312, - "3": 0.24431, - "4": 0.2266, - "5": 0.21347, - "6": 0.20803, - "7": 0.2145, - "8": 0.20409, - "9": 0.2038, - "10": 0.20378, - "11": 0.20122, - "12": 0.20047, - "13": 0.2053, - "14": 0.20008, - "15": 0.22405, - "16": 0.19642, - "17": 0.20937, - "18": 0.19918, - "19": 0.2032, - "20": 0.19792, - "21": 0.19626, - "22": 0.20047, - "23": 0.19555, - "24": 0.2, - "25": 0.23371, - "26": 0.2005, - "27": 0.59196, - "28": 0.19966, - "29": 0.20231, - "30": 0.19778, - "31": 0.23768, - "32": 0.20526, - "33": 0.20518, - "34": 0.22786, - "35": 0.20088, - "36": 0.21894, - "37": 0.20033, - "38": 0.20352, - "39": 0.19985, - "40": 0.20975, - "41": 0.2189, - "42": 0.20277, - "43": 0.20495, - "44": 0.20563, - "45": 0.21473, - "46": 0.21859, - "47": 0.2018, - "48": 0.22732, - "49": 0.2668, - "50": 0.19761 + "1": "nan", + "2": 7.28158, + "3": 0.24294, + "4": 0.23001, + "5": 0.21629, + "6": 0.2091, + "7": 0.21859, + "8": 0.2079, + "9": 0.20821, + "10": 0.2058, + "11": 0.20543, + "12": 0.20144, + "13": 0.21279, + "14": 0.20198, + "15": 0.22746, + "16": 0.20139, + "17": 0.21784, + "18": 0.20325, + "19": 0.20142, + "20": 0.20486, + "21": 0.20328, + "22": 0.20214, + "23": 0.20718, + "24": 0.20327, + "25": 0.20113, + "26": 0.20792, + "27": 0.2039, + "28": 0.2039, + "29": 0.20177, + "30": 0.20567, + "31": 0.24295, + "32": 0.2017, + "33": 0.20596, + "34": 0.22755, + "35": 0.20292, + "36": 0.21197, + "37": 0.20478, + "38": 0.20382, + "39": 0.20165, + "40": 0.22804, + "41": 0.20973, + "42": 0.20764, + "43": 0.22016, + "44": 0.20517, + "45": 0.21837, + "46": 0.22771, + "47": 0.20864, + "48": 0.22625, + "49": 0.262, + "50": 0.20365 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json index 19b393f6369..aa287eb5340 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_h100.json @@ -6,54 +6,54 @@ "values": { "1": 10.77518, "2": 10.78038, - "3": 10.79302, - "4": 10.74107, - "5": 10.82013, - "6": 10.82951, - "7": 10.7953, - "8": 10.78263, - "9": 10.79278, - "10": 10.7446, - "11": 10.85147, - "12": 10.82613, - "13": 10.82825, - "14": 10.85504, - "15": 10.75536, - "16": 10.75777, - "17": 10.72319, - "18": 10.76274, - "19": 10.75075, - "20": 10.66587, - "21": 10.6419, - "22": 10.47523, - "23": 10.66959, - "24": 10.54157, - "25": 10.4825, - "26": 10.55255, - "27": 10.57459, - "28": 10.55159, - "29": 10.5668, - "30": 10.31134, - "31": 10.01921, - "32": 10.42655, - "33": 10.42294, - "34": 10.14739, - "35": 10.21574, - "36": 10.15811, - "37": 10.30279, - "38": 10.14031, - "39": 10.36301, - "40": 10.02669, - "41": 10.07635, + "3": 10.79303, + "4": 10.74095, + "5": 10.82011, + "6": 10.82888, + "7": 10.79604, + "8": 10.78229, + "9": 10.79255, + "10": 10.74397, + "11": 10.85078, + "12": 10.82662, + "13": 10.82794, + "14": 10.85485, + "15": 10.75465, + "16": 10.75743, + "17": 10.72351, + "18": 10.76308, + "19": 10.75036, + "20": 10.66668, + "21": 10.64243, + "22": 10.47603, + "23": 10.66938, + "24": 10.54209, + "25": 10.483, + "26": 10.55225, + "27": 10.57485, + "28": 10.55032, + "29": 10.56648, + "30": 10.31185, + "31": 10.0192, + "32": 10.42647, + "33": 10.42317, + "34": 10.14751, + "35": 10.21565, + "36": 10.15857, + "37": 10.3022, + "38": 10.13992, + "39": 10.36262, + "40": 10.02665, + "41": 10.07636, "42": 10.16156, - "43": 9.74374, - "44": 9.88962, - "45": 9.75874, - "46": 9.73618, - "47": 10.0844, - "48": 9.78532, - "49": 9.45072, - "50": 9.85634 + "43": 9.74408, + "44": 9.88906, + "45": 9.75905, + "46": 9.73651, + "47": 10.0848, + "48": 9.78533, + "49": 9.45084, + "50": 9.85649 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 27105.0, - "2": 28791.0, - "3": 29282.0, - "4": 27583.0, - "5": 31595.0, - "6": 32831.0, - "7": 31023.0, - "8": 27107.0, - "9": 30780.0, - "10": 25505.0, - "11": 33684.0, - "12": 30235.0, - "13": 32960.0, - "14": 32880.0, - "15": 30405.0, - "16": 32455.0, - "17": 30933.0, - "18": 30623.0, - "19": 30803.0, - "20": 28593.0, - "21": 29002.0, - "22": 27030.0, - "23": 34463.0, - "24": 29154.0, - "25": 27827.0, - "26": 31119.0, - "27": 32108.0, - "28": 33412.0, - "29": 34737.0, - "30": 30465.0, - "31": 28775.0, - "32": 33115.0, - "33": 34745.0, - "34": 30785.0, - "35": 32116.0, - "36": 33968.0, - "37": 36757.0, - "38": 34150.0, - "39": 37240.0, - "40": 35353.0, - "41": 34638.0, - "42": 36703.0, - "43": 34601.0, - "44": 33783.0, - "45": 35388.0, - "46": 35484.0, - "47": 40591.0, - "48": 36671.0, - "49": 36174.0, - "50": 38231.0 + "1": 26957.0, + "2": 28911.0, + "3": 29270.0, + "4": 27454.0, + "5": 31649.0, + "6": 32808.0, + "7": 31347.0, + "8": 27271.0, + "9": 30402.0, + "10": 25594.0, + "11": 33793.0, + "12": 30427.0, + "13": 32816.0, + "14": 33234.0, + "15": 31054.0, + "16": 32434.0, + "17": 31130.0, + "18": 30598.0, + "19": 30753.0, + "20": 28748.0, + "21": 29005.0, + "22": 27314.0, + "23": 34805.0, + "24": 29016.0, + "25": 28010.0, + "26": 31012.0, + "27": 32031.0, + "28": 33369.0, + "29": 34839.0, + "30": 30432.0, + "31": 28381.0, + "32": 32932.0, + "33": 34825.0, + "34": 30371.0, + "35": 32400.0, + "36": 33694.0, + "37": 36758.0, + "38": 34363.0, + "39": 37548.0, + "40": 35197.0, + "41": 34705.0, + "42": 36625.0, + "43": 34474.0, + "44": 33817.0, + "45": 35400.0, + "46": 35703.0, + "47": 40392.0, + "48": 36646.0, + "49": 36073.0, + "50": 37629.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1561031168.0, - "2": 1562193408.0, - "3": 1561517056.0, - "4": 1560948224.0, - "5": 1562155008.0, - "6": 1563247104.0, - "7": 1562656768.0, - "8": 1562246656.0, - "9": 1561597952.0, - "10": 1564070400.0, - "11": 1562084352.0, - "12": 1559892480.0, - "13": 1562137600.0, - "14": 1561026048.0, - "15": 1561419776.0, - "16": 1562166784.0, - "17": 1560322048.0, - "18": 1561402880.0, - "19": 1564046336.0, - "20": 1562059264.0, - "21": 1560781824.0, - "22": 1561673728.0, - "23": 1562520064.0, - "24": 1561093632.0, - "25": 1561384960.0, - "26": 1562000896.0, - "27": 1561264128.0, - "28": 1561458176.0, - "29": 1561382912.0, - "30": 1562413568.0, - "31": 1560165376.0, - "32": 1561413120.0, - "33": 1562501120.0, - "34": 1562718720.0, - "35": 1563195392.0, - "36": 1561894400.0, - "37": 1560998912.0, - "38": 1563760128.0, - "39": 1561207808.0, - "40": 1562625536.0, - "41": 1561658368.0, - "42": 1561409024.0, - "43": 1559668736.0, - "44": 1561136640.0, - "45": 1560246272.0, - "46": 1562813952.0, - "47": 1561296896.0, - "48": 1561900544.0, - "49": 1562101760.0, - "50": 1563655680.0 + "1": 1560340992.0, + "2": 1559452672.0, + "3": 1559933440.0, + "4": 1559384064.0, + "5": 1559977472.0, + "6": 1559256064.0, + "7": 1560589824.0, + "8": 1559076864.0, + "9": 1559026176.0, + "10": 1560162304.0, + "11": 1559762432.0, + "12": 1560014848.0, + "13": 1559311872.0, + "14": 1559925760.0, + "15": 1559127552.0, + "16": 1559450624.0, + "17": 1559836160.0, + "18": 1559387136.0, + "19": 1559434752.0, + "20": 1560067072.0, + "21": 1560083968.0, + "22": 1559487488.0, + "23": 1560066560.0, + "24": 1560071168.0, + "25": 1559536128.0, + "26": 1559178240.0, + "27": 1560172032.0, + "28": 1559939072.0, + "29": 1559888384.0, + "30": 1559586816.0, + "31": 1559228928.0, + "32": 1559279616.0, + "33": 1559536128.0, + "34": 1561029632.0, + "35": 1560458752.0, + "36": 1559279616.0, + "37": 1559875072.0, + "38": 1560733696.0, + "39": 1559857664.0, + "40": 1559380992.0, + "41": 1559330304.0, + "42": 1559380992.0, + "43": 1559367168.0, + "44": 1559380992.0, + "45": 1559330304.0, + "46": 1559843840.0, + "47": 1560254976.0, + "48": 1559380992.0, + "49": 1560439296.0, + "50": 1559823360.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 3465706496.0, - "2": 4045009920.0, - "3": 4045009920.0, - "4": 4045009920.0, - "5": 4045009920.0, - "6": 4067111936.0, - "7": 4067111936.0, - "8": 4067111936.0, - "9": 4067111936.0, - "10": 4067111936.0, - "11": 4067111936.0, - "12": 4067111936.0, - "13": 4067111936.0, - "14": 4067111936.0, - "15": 4067111936.0, - "16": 4067111936.0, - "17": 4067111936.0, - "18": 4067111936.0, - "19": 4067111936.0, - "20": 4067111936.0, - "21": 4067111936.0, - "22": 4067111936.0, - "23": 4067111936.0, - "24": 4067111936.0, - "25": 4067111936.0, - "26": 4067111936.0, - "27": 4067111936.0, - "28": 4067111936.0, - "29": 4067111936.0, - "30": 4067111936.0, - "31": 4067111936.0, - "32": 4067111936.0, - "33": 4067111936.0, - "34": 4067111936.0, - "35": 4067111936.0, - "36": 4067111936.0, - "37": 4067111936.0, - "38": 4067111936.0, - "39": 4067111936.0, - "40": 4067111936.0, - "41": 4067111936.0, - "42": 4067111936.0, - "43": 4067111936.0, - "44": 4067111936.0, - "45": 4067111936.0, - "46": 4067111936.0, - "47": 4067111936.0, - "48": 4067111936.0, - "49": 4067111936.0, - "50": 4067111936.0 + "1": 3463584768.0, + "2": 4043448832.0, + "3": 4043448832.0, + "4": 4043448832.0, + "5": 4043448832.0, + "6": 4066991616.0, + "7": 4066991616.0, + "8": 4066991616.0, + "9": 4066991616.0, + "10": 4066991616.0, + "11": 4066991616.0, + "12": 4066991616.0, + "13": 4066991616.0, + "14": 4066991616.0, + "15": 4066991616.0, + "16": 4066991616.0, + "17": 4066991616.0, + "18": 4066991616.0, + "19": 4066991616.0, + "20": 4066991616.0, + "21": 4066991616.0, + "22": 4066991616.0, + "23": 4066991616.0, + "24": 4066991616.0, + "25": 4066991616.0, + "26": 4066991616.0, + "27": 4066991616.0, + "28": 4066991616.0, + "29": 4066991616.0, + "30": 4066991616.0, + "31": 4066991616.0, + "32": 4066991616.0, + "33": 4066991616.0, + "34": 4066991616.0, + "35": 4066991616.0, + "36": 4066991616.0, + "37": 4066991616.0, + "38": 4066991616.0, + "39": 4066991616.0, + "40": 4066991616.0, + "41": 4066991616.0, + "42": 4066991616.0, + "43": 4066991616.0, + "44": 4066991616.0, + "45": 4066991616.0, + "46": 4066991616.0, + "47": 4066991616.0, + "48": 4066991616.0, + "49": 4066991616.0, + "50": 4066991616.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 25.658, - "2": 0.47954, - "3": 0.41847, - "4": 0.33258, - "5": 0.34351, - "6": 0.31011, - "7": 0.31575, - "8": 0.29238, - "9": 0.30311, - "10": 0.34916, - "11": 0.30925, - "12": 0.34341, - "13": 0.28433, - "14": 0.28892, - "15": 0.29252, - "16": 0.2927, - "17": 0.30297, - "18": 0.29339, - "19": 0.2886, - "20": 0.29686, - "21": 0.29022, - "22": 0.65703, - "23": 0.29161, - "24": 0.29821, - "25": 0.29341, - "26": 0.30856, - "27": 0.2991, - "28": 0.29279, - "29": 0.29852, - "30": 0.30839, - "31": 0.29491, - "32": 0.2896, - "33": 0.29084, - "34": 0.32605, - "35": 0.29205, - "36": 0.28559, - "37": 0.29399, - "38": 0.28264, - "39": 0.28463, - "40": 0.28019, - "41": 0.28893, - "42": 0.27586, - "43": 0.28759, - "44": 0.28318, - "45": 0.27759, - "46": 0.27363, - "47": 0.27776, - "48": 0.27855, - "49": 1.02062, - "50": 0.28168 + "1": "nan", + "2": 11.13255, + "3": 0.38871, + "4": 0.35015, + "5": 0.331, + "6": 0.31196, + "7": 0.2967, + "8": 0.2819, + "9": 0.28495, + "10": 0.27913, + "11": 0.28748, + "12": 0.27223, + "13": 0.27199, + "14": 0.27763, + "15": 0.28, + "16": 0.27326, + "17": 0.2695, + "18": 0.2655, + "19": 0.2721, + "20": 0.27449, + "21": 0.2702, + "22": 0.27811, + "23": 0.2669, + "24": 0.26869, + "25": 0.2782, + "26": 0.27222, + "27": 0.276, + "28": 0.28334, + "29": 0.27828, + "30": 0.28953, + "31": 0.27662, + "32": 0.2747, + "33": 0.27508, + "34": 0.28272, + "35": 0.27957, + "36": 0.28319, + "37": 0.2797, + "38": 0.27997, + "39": 0.27077, + "40": 0.27209, + "41": 0.27227, + "42": 0.28186, + "43": 0.28259, + "44": 0.38217, + "45": 0.27829, + "46": 0.27227, + "47": 0.26481, + "48": 0.27479, + "49": 0.26874, + "50": 0.26337 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json index bfbb1e850e1..3459a646ecd 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_h100.json @@ -6,104 +6,104 @@ "values": { "1": 10.82922, "2": 10.84163, - "3": 10.84245, - "4": 10.82, - "5": 10.85652, - "6": 10.86906, - "7": 10.83778, - "8": 10.84312, - "9": 10.84423, - "10": 10.79298, - "11": 10.86697, - "12": 10.86875, - "13": 10.86207, - "14": 10.86919, - "15": 10.8067, - "16": 10.8057, - "17": 10.77686, - "18": 10.79541, - "19": 10.78384, - "20": 10.72654, - "21": 10.69491, - "22": 10.54462, - "23": 10.6993, - "24": 10.58151, - "25": 10.53282, - "26": 10.58817, - "27": 10.601, - "28": 10.57563, - "29": 10.58022, - "30": 10.35802, - "31": 10.08769, - "32": 10.44466, - "33": 10.4477, - "34": 10.18704, - "35": 10.24483, - "36": 10.19713, - "37": 10.32294, - "38": 10.17101, - "39": 10.37026, - "40": 10.05533, - "41": 10.09491, - "42": 10.17971, - "43": 9.78263, - "44": 9.91346, - "45": 9.77951, - "46": 9.75648, - "47": 10.09647, - "48": 9.80391, - "49": 9.46649, - "50": 9.86874, - "51": 9.79428, - "52": 9.68303, - "53": 10.03314, - "54": 9.9113, - "55": 9.82995, - "56": 9.57839, - "57": 9.42377, - "58": 9.80549, - "59": 9.53292, - "60": 9.449, - "61": 9.65293, - "62": 9.95672, - "63": 9.33775, - "64": 9.74194, - "65": 8.89366, - "66": 9.67317, - "67": 9.33002, - "68": 9.76517, - "69": 9.76336, - "70": 9.71127, - "71": 9.59511, - "72": 9.54797, - "73": 9.47124, - "74": 8.89297, - "75": 9.39451, - "76": 9.04721, - "77": 10.04318, - "78": 9.70313, - "79": 9.35169, - "80": 9.38198, - "81": 9.45146, - "82": 9.67546, - "83": 9.27658, - "84": 9.39241, - "85": 9.58333, - "86": 9.04518, - "87": 9.56487, - "88": 9.72459, - "89": 9.57019, - "90": 9.79944, - "91": 9.30737, - "92": 9.3313, - "93": 9.04109, - "94": 8.80259, - "95": 9.50213, - "96": 9.5021, - "97": 9.28183, - "98": 9.64883, - "99": 8.8594, - "100": 9.37131 + "3": 10.84243, + "4": 10.81957, + "5": 10.85664, + "6": 10.86827, + "7": 10.83827, + "8": 10.8422, + "9": 10.84386, + "10": 10.79334, + "11": 10.86738, + "12": 10.86907, + "13": 10.86171, + "14": 10.86924, + "15": 10.8068, + "16": 10.80521, + "17": 10.77645, + "18": 10.79607, + "19": 10.78431, + "20": 10.72724, + "21": 10.6946, + "22": 10.54439, + "23": 10.69964, + "24": 10.58157, + "25": 10.53325, + "26": 10.58756, + "27": 10.60097, + "28": 10.5763, + "29": 10.58036, + "30": 10.3575, + "31": 10.08738, + "32": 10.44476, + "33": 10.44759, + "34": 10.18783, + "35": 10.24474, + "36": 10.19774, + "37": 10.32309, + "38": 10.17129, + "39": 10.37065, + "40": 10.05528, + "41": 10.09503, + "42": 10.17995, + "43": 9.78259, + "44": 9.91366, + "45": 9.77924, + "46": 9.75667, + "47": 10.09678, + "48": 9.80346, + "49": 9.4667, + "50": 9.86896, + "51": 9.79433, + "52": 9.68302, + "53": 10.03299, + "54": 9.91153, + "55": 9.83055, + "56": 9.57836, + "57": 9.42375, + "58": 9.80572, + "59": 9.53304, + "60": 9.44886, + "61": 9.65297, + "62": 9.95701, + "63": 9.33764, + "64": 9.74169, + "65": 8.89408, + "66": 9.67312, + "67": 9.33005, + "68": 9.76528, + "69": 9.76313, + "70": 9.7113, + "71": 9.59503, + "72": 9.54795, + "73": 9.47125, + "74": 8.89278, + "75": 9.39455, + "76": 9.04738, + "77": 10.04269, + "78": 9.70299, + "79": 9.35183, + "80": 9.38199, + "81": 9.45105, + "82": 9.67565, + "83": 9.27677, + "84": 9.39244, + "85": 9.58302, + "86": 9.04523, + "87": 9.56489, + "88": 9.72458, + "89": 9.56995, + "90": 9.79945, + "91": 9.30745, + "92": 9.3312, + "93": 9.04065, + "94": 8.80269, + "95": 9.50212, + "96": 9.50157, + "97": 9.28199, + "98": 9.64852, + "99": 8.85942, + "100": 9.37135 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 27245.0, - "2": 28958.0, - "3": 29464.0, - "4": 28046.0, - "5": 31369.0, - "6": 33287.0, - "7": 31200.0, - "8": 26921.0, - "9": 30008.0, - "10": 25870.0, - "11": 33681.0, - "12": 30344.0, - "13": 32737.0, - "14": 33315.0, - "15": 29830.0, - "16": 32475.0, - "17": 30747.0, - "18": 30381.0, - "19": 31032.0, - "20": 28243.0, - "21": 29224.0, - "22": 27340.0, - "23": 34119.0, - "24": 29049.0, - "25": 27636.0, - "26": 30662.0, - "27": 32009.0, - "28": 33355.0, - "29": 34714.0, - "30": 30387.0, - "31": 28212.0, - "32": 33411.0, - "33": 34696.0, - "34": 30053.0, - "35": 31488.0, - "36": 32943.0, - "37": 35829.0, - "38": 33740.0, - "39": 37632.0, - "40": 34779.0, - "41": 33958.0, - "42": 36396.0, - "43": 34088.0, - "44": 34090.0, - "45": 35158.0, - "46": 36174.0, - "47": 39772.0, - "48": 36516.0, - "49": 36733.0, - "50": 38234.0, - "51": 38608.0, - "52": 37030.0, - "53": 42442.0, - "54": 40944.0, - "55": 37133.0, - "56": 41001.0, - "57": 37524.0, - "58": 42317.0, - "59": 40804.0, - "60": 40450.0, - "61": 41478.0, - "62": 39766.0, - "63": 37941.0, - "64": 42197.0, - "65": 40947.0, - "66": 44094.0, - "67": 41958.0, - "68": 40060.0, - "69": 42189.0, - "70": 43436.0, - "71": 42748.0, - "72": 44280.0, - "73": 47478.0, - "74": 41456.0, - "75": 39925.0, - "76": 43490.0, - "77": 45636.0, - "78": 2141470.0, - "79": 46055.0, - "80": 51863.0, - "81": 151341.0, - "82": 49835.0, - "83": 143360.0, - "84": 2141546.0, - "85": 2145177.0, - "86": 132114.0, - "87": 2147022.0, - "88": 59899.0, - "89": 162883.0, - "90": 51330.0, - "91": 2141901.0, - "92": 44946.0, - "93": 138194.0, - "94": 2145772.0, - "95": 45247.0, - "96": 135045.0, - "97": 53170.0, - "98": 168576.0, - "99": 2141797.0, - "100": 163741.0 + "1": 27323.0, + "2": 29073.0, + "3": 29255.0, + "4": 27962.0, + "5": 31688.0, + "6": 32993.0, + "7": 31065.0, + "8": 27438.0, + "9": 30073.0, + "10": 26065.0, + "11": 33565.0, + "12": 30127.0, + "13": 32888.0, + "14": 33095.0, + "15": 30180.0, + "16": 32402.0, + "17": 30716.0, + "18": 30705.0, + "19": 30785.0, + "20": 28165.0, + "21": 28990.0, + "22": 27297.0, + "23": 33611.0, + "24": 29400.0, + "25": 27474.0, + "26": 30881.0, + "27": 31763.0, + "28": 33226.0, + "29": 34855.0, + "30": 30031.0, + "31": 27996.0, + "32": 33444.0, + "33": 34744.0, + "34": 30041.0, + "35": 31685.0, + "36": 33100.0, + "37": 35781.0, + "38": 34038.0, + "39": 37867.0, + "40": 34304.0, + "41": 34129.0, + "42": 36144.0, + "43": 34457.0, + "44": 33966.0, + "45": 35067.0, + "46": 36064.0, + "47": 39720.0, + "48": 36731.0, + "49": 37095.0, + "50": 38131.0, + "51": 38418.0, + "52": 36970.0, + "53": 42469.0, + "54": 41376.0, + "55": 37144.0, + "56": 41057.0, + "57": 37882.0, + "58": 42179.0, + "59": 40741.0, + "60": 40662.0, + "61": 41737.0, + "62": 39988.0, + "63": 37986.0, + "64": 42182.0, + "65": 41172.0, + "66": 43902.0, + "67": 42888.0, + "68": 39939.0, + "69": 42474.0, + "70": 43410.0, + "71": 42547.0, + "72": 44667.0, + "73": 47806.0, + "74": 49985.0, + "75": 39314.0, + "76": 43658.0, + "77": 56586.0, + "78": 2142059.0, + "79": 46090.0, + "80": 169105.0, + "81": 151389.0, + "82": 55551.0, + "83": 142987.0, + "84": 2141133.0, + "85": 2145286.0, + "86": 132276.0, + "87": 2147472.0, + "88": 57700.0, + "89": 162718.0, + "90": 51175.0, + "91": 2142368.0, + "92": 44885.0, + "93": 137807.0, + "94": 178019.0, + "95": 45818.0, + "96": 135267.0, + "97": 53434.0, + "98": 168439.0, + "99": 2141756.0, + "100": 162968.0 } }, "mem-allocated-bytes": { @@ -218,106 +218,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1668119552.0, - "2": 1668144128.0, - "3": 1668127232.0, - "4": 1668115456.0, - "5": 1668150272.0, - "6": 1668141056.0, - "7": 1668115968.0, - "8": 1668127744.0, - "9": 1668131328.0, - "10": 1668108288.0, - "11": 1668125184.0, - "12": 1668123136.0, - "13": 1668132864.0, - "14": 1668132352.0, - "15": 1668107264.0, - "16": 1668116480.0, - "17": 1668106240.0, - "18": 1668092416.0, - "19": 1668118016.0, - "20": 1668108288.0, - "21": 1668082688.0, - "22": 1668089344.0, - "23": 1668081664.0, - "24": 1668089344.0, - "25": 1668054528.0, - "26": 1668085248.0, - "27": 1668073984.0, - "28": 1668053504.0, - "29": 1668061184.0, - "30": 1668038144.0, - "31": 1668009984.0, - "32": 1668027392.0, - "33": 1668038656.0, - "34": 1668029440.0, - "35": 1668021760.0, - "36": 1668039680.0, - "37": 1668031488.0, - "38": 1668039680.0, - "39": 1668020736.0, - "40": 1668018176.0, - "41": 1668008960.0, - "42": 1668018176.0, - "43": 1667971072.0, - "44": 1667995648.0, - "45": 1668002816.0, - "46": 1667958272.0, - "47": 1668014592.0, - "48": 1667957248.0, - "49": 1667977216.0, - "50": 1667992576.0, - "51": 1667978752.0, - "52": 1667986944.0, - "53": 1667982848.0, - "54": 1667987968.0, - "55": 1667992064.0, - "56": 1667992064.0, - "57": 1667954176.0, - "58": 1667985920.0, - "59": 1667977216.0, - "60": 1667998208.0, - "61": 1668008448.0, - "62": 1668008960.0, - "63": 1667976192.0, - "64": 1667992064.0, - "65": 1667954688.0, - "66": 1667990016.0, - "67": 1667995136.0, - "68": 1668002304.0, - "69": 1667986432.0, - "70": 1667996160.0, - "71": 1668009984.0, - "72": 1668003840.0, - "73": 1668004352.0, - "74": 1668006400.0, - "75": 1668045824.0, - "76": 1668047872.0, - "77": 1668048896.0, - "78": 1667998208.0, - "79": 1668033536.0, - "80": 1668013568.0, - "81": 1668016128.0, - "82": 1668030976.0, - "83": 1668032000.0, - "84": 1668015616.0, - "85": 1668015616.0, - "86": 1667997696.0, - "87": 1668056064.0, - "88": 1668017664.0, - "89": 1668007936.0, - "90": 1668049920.0, - "91": 1668049408.0, - "92": 1668049920.0, - "93": 1668033536.0, - "94": 1668043264.0, - "95": 1668053504.0, - "96": 1668058112.0, - "97": 1668030464.0, - "98": 1668079104.0, - "99": 1668022272.0, - "100": 1668041728.0 + "1": 787516416.0, + "2": 787540992.0, + "3": 787523584.0, + "4": 787510784.0, + "5": 787548160.0, + "6": 787537920.0, + "7": 787510784.0, + "8": 787525632.0, + "9": 787527168.0, + "10": 787504128.0, + "11": 787523584.0, + "12": 787520512.0, + "13": 787528704.0, + "14": 787529728.0, + "15": 787502592.0, + "16": 787514368.0, + "17": 787503104.0, + "18": 787488768.0, + "19": 787514880.0, + "20": 787503616.0, + "21": 787479552.0, + "22": 787486720.0, + "23": 787477504.0, + "24": 787485184.0, + "25": 787451392.0, + "26": 787481088.0, + "27": 787469824.0, + "28": 787449856.0, + "29": 787459584.0, + "30": 787435520.0, + "31": 787404288.0, + "32": 787423744.0, + "33": 787432960.0, + "34": 787426304.0, + "35": 787418624.0, + "36": 787435008.0, + "37": 787427328.0, + "38": 787436544.0, + "39": 787417088.0, + "40": 787415040.0, + "41": 787405312.0, + "42": 787414528.0, + "43": 787367424.0, + "44": 787391488.0, + "45": 787400192.0, + "46": 787356672.0, + "47": 787412480.0, + "48": 787354112.0, + "49": 787374592.0, + "50": 787390976.0, + "51": 787376128.0, + "52": 787383296.0, + "53": 787379200.0, + "54": 787383808.0, + "55": 787389952.0, + "56": 787391488.0, + "57": 787350528.0, + "58": 787381760.0, + "59": 787374080.0, + "60": 787395584.0, + "61": 787407872.0, + "62": 787406848.0, + "63": 787373056.0, + "64": 787390464.0, + "65": 787351040.0, + "66": 787387392.0, + "67": 787394560.0, + "68": 787401216.0, + "69": 787383296.0, + "70": 787393536.0, + "71": 787406848.0, + "72": 787400192.0, + "73": 787400704.0, + "74": 787405312.0, + "75": 787442176.0, + "76": 787443712.0, + "77": 787446784.0, + "78": 787397120.0, + "79": 787430400.0, + "80": 787411968.0, + "81": 787413504.0, + "82": 787427840.0, + "83": 787430912.0, + "84": 787412992.0, + "85": 787412992.0, + "86": 787395584.0, + "87": 787453440.0, + "88": 787417088.0, + "89": 787404288.0, + "90": 787448320.0, + "91": 787447296.0, + "92": 787447296.0, + "93": 787429376.0, + "94": 787440128.0, + "95": 787450880.0, + "96": 787456000.0, + "97": 787428352.0, + "98": 787475456.0, + "99": 787421696.0, + "100": 787440128.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 3460789248.0, - "2": 3470375936.0, - "3": 3470375936.0, - "4": 3470375936.0, - "5": 3480799232.0, - "6": 3480799232.0, - "7": 3480799232.0, - "8": 3480799232.0, - "9": 3480799232.0, - "10": 3480799232.0, - "11": 3480799232.0, - "12": 3480799232.0, - "13": 3480799232.0, - "14": 3480799232.0, - "15": 3480799232.0, - "16": 3480799232.0, - "17": 3480799232.0, - "18": 3480799232.0, - "19": 3480799232.0, - "20": 3480799232.0, - "21": 3480799232.0, - "22": 3480799232.0, - "23": 3480799232.0, - "24": 3480799232.0, - "25": 3480799232.0, - "26": 3480799232.0, - "27": 3480799232.0, - "28": 3480799232.0, - "29": 3480799232.0, - "30": 3480799232.0, - "31": 3480799232.0, - "32": 3480799232.0, - "33": 3480799232.0, - "34": 3480799232.0, - "35": 3480799232.0, - "36": 3480799232.0, - "37": 3480799232.0, - "38": 3480799232.0, - "39": 3480799232.0, - "40": 3480799232.0, - "41": 3480799232.0, - "42": 3480799232.0, - "43": 3480799232.0, - "44": 3480799232.0, - "45": 3480799232.0, - "46": 3480799232.0, - "47": 3480799232.0, - "48": 3480799232.0, - "49": 3480799232.0, - "50": 3480799232.0, - "51": 3480799232.0, - "52": 3480799232.0, - "53": 3480799232.0, - "54": 3480799232.0, - "55": 3480799232.0, - "56": 3480799232.0, - "57": 3480799232.0, - "58": 3480799232.0, - "59": 3480799232.0, - "60": 3480799232.0, - "61": 3480799232.0, - "62": 3480799232.0, - "63": 3480799232.0, - "64": 3480799232.0, - "65": 3480799232.0, - "66": 3480799232.0, - "67": 3480799232.0, - "68": 3480799232.0, - "69": 3480799232.0, - "70": 3480799232.0, - "71": 3480799232.0, - "72": 3480799232.0, - "73": 3480799232.0, - "74": 3480799232.0, - "75": 3480799232.0, - "76": 3480799232.0, - "77": 3480799232.0, - "78": 3480799232.0, - "79": 3480799232.0, - "80": 3480799232.0, - "81": 3480799232.0, - "82": 3480799232.0, - "83": 3480799232.0, - "84": 3480799232.0, - "85": 3480799232.0, - "86": 3480799232.0, - "87": 3480799232.0, - "88": 3480799232.0, - "89": 3480799232.0, - "90": 3480799232.0, - "91": 3480799232.0, - "92": 3480799232.0, - "93": 3480799232.0, - "94": 3480799232.0, - "95": 3480799232.0, - "96": 3480799232.0, - "97": 3480799232.0, - "98": 3480799232.0, - "99": 3480799232.0, - "100": 3480799232.0 + "1": 2578864128.0, + "2": 2590714880.0, + "3": 2590714880.0, + "4": 2590714880.0, + "5": 2596939776.0, + "6": 2596939776.0, + "7": 2596939776.0, + "8": 2596939776.0, + "9": 2596939776.0, + "10": 2596939776.0, + "11": 2596939776.0, + "12": 2596939776.0, + "13": 2596939776.0, + "14": 2596939776.0, + "15": 2596939776.0, + "16": 2596939776.0, + "17": 2596939776.0, + "18": 2596939776.0, + "19": 2596939776.0, + "20": 2596939776.0, + "21": 2596939776.0, + "22": 2596939776.0, + "23": 2596939776.0, + "24": 2596939776.0, + "25": 2596939776.0, + "26": 2596939776.0, + "27": 2596939776.0, + "28": 2596939776.0, + "29": 2596939776.0, + "30": 2596939776.0, + "31": 2596939776.0, + "32": 2596939776.0, + "33": 2596939776.0, + "34": 2596939776.0, + "35": 2596939776.0, + "36": 2596939776.0, + "37": 2596939776.0, + "38": 2596939776.0, + "39": 2596939776.0, + "40": 2596939776.0, + "41": 2596939776.0, + "42": 2596939776.0, + "43": 2596939776.0, + "44": 2596939776.0, + "45": 2596939776.0, + "46": 2596939776.0, + "47": 2596939776.0, + "48": 2596939776.0, + "49": 2596939776.0, + "50": 2596939776.0, + "51": 2596939776.0, + "52": 2596939776.0, + "53": 2596939776.0, + "54": 2596939776.0, + "55": 2596939776.0, + "56": 2596939776.0, + "57": 2596939776.0, + "58": 2596939776.0, + "59": 2596939776.0, + "60": 2596939776.0, + "61": 2596939776.0, + "62": 2596939776.0, + "63": 2596939776.0, + "64": 2596939776.0, + "65": 2596939776.0, + "66": 2596939776.0, + "67": 2596939776.0, + "68": 2596939776.0, + "69": 2596939776.0, + "70": 2596939776.0, + "71": 2596939776.0, + "72": 2596939776.0, + "73": 2596939776.0, + "74": 2596939776.0, + "75": 2596939776.0, + "76": 2596939776.0, + "77": 2596939776.0, + "78": 2596939776.0, + "79": 2596939776.0, + "80": 2596939776.0, + "81": 2596939776.0, + "82": 2596939776.0, + "83": 2596939776.0, + "84": 2596939776.0, + "85": 2596939776.0, + "86": 2596939776.0, + "87": 2596939776.0, + "88": 2596939776.0, + "89": 2596939776.0, + "90": 2596939776.0, + "91": 2596939776.0, + "92": 2596939776.0, + "93": 2596939776.0, + "94": 2596939776.0, + "95": 2596939776.0, + "96": 2596939776.0, + "97": 2596939776.0, + "98": 2596939776.0, + "99": 2596939776.0, + "100": 2596939776.0 } }, "iteration-time": { @@ -432,106 +432,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 11.49667, - "2": 0.45982, - "3": 0.39283, - "4": 0.37269, - "5": 0.33438, - "6": 0.33048, - "7": 0.33351, - "8": 0.32704, - "9": 0.31789, - "10": 0.30958, - "11": 0.30791, - "12": 0.30859, - "13": 0.32053, - "14": 0.30171, - "15": 0.30843, - "16": 0.30302, - "17": 0.30464, - "18": 0.30431, - "19": 0.30467, - "20": 0.29614, - "21": 0.3034, - "22": 0.30183, - "23": 0.29505, - "24": 0.29208, - "25": 0.29678, - "26": 0.29737, - "27": 0.30864, - "28": 0.31313, - "29": 0.30795, - "30": 0.31701, - "31": 0.31516, - "32": 0.32758, - "33": 0.31728, - "34": 0.32164, - "35": 0.32366, - "36": 0.3008, - "37": 0.30816, - "38": 0.30782, - "39": 0.3097, - "40": 0.31658, - "41": 0.30749, - "42": 0.30662, - "43": 0.30452, - "44": 0.32171, - "45": 0.30874, - "46": 0.31718, - "47": 0.30947, - "48": 0.30568, - "49": 0.30559, - "50": 0.30518, - "51": 0.32349, - "52": 0.30552, - "53": 0.2972, - "54": 0.29675, - "55": 0.6806, - "56": 0.30449, - "57": 0.30268, - "58": 0.29449, - "59": 0.29915, - "60": 0.30558, - "61": 0.29817, - "62": 0.29837, - "63": 0.29648, - "64": 0.30355, - "65": 0.30526, - "66": 0.29685, - "67": 0.29607, - "68": 0.30383, - "69": 0.29497, - "70": 0.29908, - "71": 0.298, - "72": 0.29482, - "73": 0.29392, - "74": 0.29933, - "75": 0.29938, - "76": 0.29472, - "77": 0.29225, - "78": 0.29345, - "79": 0.29571, - "80": 0.29379, - "81": 0.29694, - "82": 0.29442, - "83": 0.29839, - "84": 0.30064, - "85": 0.29571, - "86": 0.30107, - "87": 0.29723, - "88": 0.29324, - "89": 0.29688, - "90": 0.29142, - "91": 0.29759, - "92": 0.29347, - "93": 0.29617, - "94": 0.29996, - "95": 0.29791, - "96": 0.29236, - "97": 0.29637, - "98": 0.29446, - "99": 0.293, - "100": 0.2937 + "1": "nan", + "2": 6.89551, + "3": 0.3993, + "4": 0.35815, + "5": 0.33602, + "6": 0.33197, + "7": 0.32865, + "8": 0.3207, + "9": 0.31349, + "10": 0.30884, + "11": 0.30331, + "12": 0.31767, + "13": 0.30954, + "14": 0.31341, + "15": 0.30056, + "16": 0.29909, + "17": 0.29535, + "18": 0.2967, + "19": 0.29826, + "20": 0.3668, + "21": 0.29117, + "22": 0.29552, + "23": 0.29522, + "24": 0.29834, + "25": 0.3027, + "26": 0.30854, + "27": 0.30293, + "28": 0.32097, + "29": 0.31935, + "30": 0.31509, + "31": 0.318, + "32": 0.3239, + "33": 0.31269, + "34": 0.31706, + "35": 0.30185, + "36": 0.3093, + "37": 0.30754, + "38": 0.3055, + "39": 0.30888, + "40": 0.30503, + "41": 0.31122, + "42": 0.30456, + "43": 0.30646, + "44": 0.30592, + "45": 0.30891, + "46": 0.30514, + "47": 0.3133, + "48": 0.30772, + "49": 0.30986, + "50": 0.30753, + "51": 0.31874, + "52": 0.31049, + "53": 0.3116, + "54": 0.30679, + "55": 0.31195, + "56": 0.30856, + "57": 0.3111, + "58": 0.31146, + "59": 0.30836, + "60": 0.30868, + "61": 0.30926, + "62": 0.31281, + "63": 0.30621, + "64": 0.30843, + "65": 0.30639, + "66": 0.30615, + "67": 0.31187, + "68": 0.30344, + "69": 0.30997, + "70": 0.30812, + "71": 0.30308, + "72": 0.30953, + "73": 0.30762, + "74": 0.30868, + "75": 0.30488, + "76": 0.30533, + "77": 0.30634, + "78": 0.30293, + "79": 0.30759, + "80": 0.3039, + "81": 0.30436, + "82": 0.30687, + "83": 0.30699, + "84": 0.30074, + "85": 0.69936, + "86": 0.30517, + "87": 0.29797, + "88": 0.29916, + "89": 0.30492, + "90": 0.30345, + "91": 0.30516, + "92": 0.3062, + "93": 0.30268, + "94": 0.30389, + "95": 0.30319, + "96": 0.30544, + "97": 0.30696, + "98": 0.30283, + "99": 0.30414, + "100": 0.30658 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_gb200.json index fe8d3f78926..da72a476bdb 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_ep8_resume_torch_dist_muon/golden_values_dev_dgx_gb200.json @@ -6,104 +6,104 @@ "values": { "1": 10.80397, "2": 10.81064, - "3": 10.79812, - "4": 10.78677, - "5": 10.82981, - "6": 10.85273, - "7": 10.80976, - "8": 10.80152, - "9": 10.82476, - "10": 10.78235, - "11": 10.83837, - "12": 10.84645, - "13": 10.86121, - "14": 10.86494, - "15": 10.83809, - "16": 10.8346, - "17": 10.8121, - "18": 10.84437, - "19": 10.83592, - "20": 10.81732, - "21": 10.83519, - "22": 10.76256, - "23": 10.85511, - "24": 10.80666, - "25": 10.80025, - "26": 10.81426, - "27": 10.82995, - "28": 10.855, - "29": 10.86565, - "30": 10.79384, - "31": 10.74771, - "32": 10.84943, - "33": 10.83771, - "34": 10.80572, - "35": 10.80265, - "36": 10.79622, - "37": 10.82514, - "38": 10.79237, - "39": 10.84811, - "40": 10.77883, - "41": 10.79922, - "42": 10.81563, - "43": 10.74376, - "44": 10.76683, - "45": 10.76467, - "46": 10.77697, - "47": 10.79973, - "48": 10.77586, - "49": 10.72215, - "50": 10.78584, - "51": 10.78731, - "52": 10.7657, - "53": 10.81241, - "54": 10.79761, - "55": 10.80688, - "56": 10.75611, - "57": 10.71341, - "58": 10.78104, - "59": 10.7507, - "60": 10.72941, - "61": 10.76448, + "3": 10.79816, + "4": 10.78667, + "5": 10.83006, + "6": 10.85279, + "7": 10.81001, + "8": 10.80139, + "9": 10.82488, + "10": 10.78278, + "11": 10.83861, + "12": 10.84706, + "13": 10.86115, + "14": 10.86392, + "15": 10.83829, + "16": 10.83465, + "17": 10.81228, + "18": 10.84433, + "19": 10.83579, + "20": 10.81715, + "21": 10.83558, + "22": 10.76244, + "23": 10.85562, + "24": 10.80585, + "25": 10.80032, + "26": 10.81408, + "27": 10.82942, + "28": 10.85518, + "29": 10.86654, + "30": 10.79342, + "31": 10.74744, + "32": 10.84934, + "33": 10.83722, + "34": 10.80532, + "35": 10.80254, + "36": 10.79634, + "37": 10.82446, + "38": 10.79244, + "39": 10.8476, + "40": 10.77824, + "41": 10.79911, + "42": 10.81569, + "43": 10.74332, + "44": 10.76703, + "45": 10.76456, + "46": 10.77681, + "47": 10.79976, + "48": 10.77524, + "49": 10.72236, + "50": 10.78643, + "51": 10.78682, + "52": 10.76527, + "53": 10.81239, + "54": 10.79746, + "55": 10.80725, + "56": 10.75649, + "57": 10.71408, + "58": 10.78111, + "59": 10.75091, + "60": 10.7293, + "61": 10.76447, "62": 10.8119, - "63": 10.69242, - "64": 10.76661, - "65": 10.62474, - "66": 10.75342, - "67": 10.69134, - "68": 10.77079, - "69": 10.76029, - "70": 10.76451, - "71": 10.73531, - "72": 10.72951, - "73": 10.7174, - "74": 10.57782, - "75": 10.68245, - "76": 10.61342, - "77": 10.80749, - "78": 10.7321, - "79": 10.66078, - "80": 10.68008, - "81": 10.69796, - "82": 10.72301, - "83": 10.6413, - "84": 10.6619, - "85": 10.70249, - "86": 10.58035, - "87": 10.69015, - "88": 10.73441, - "89": 10.67777, + "63": 10.69268, + "64": 10.76676, + "65": 10.62456, + "66": 10.75377, + "67": 10.69166, + "68": 10.77137, + "69": 10.75985, + "70": 10.76522, + "71": 10.73505, + "72": 10.72934, + "73": 10.71724, + "74": 10.57755, + "75": 10.68228, + "76": 10.61297, + "77": 10.80774, + "78": 10.73233, + "79": 10.66075, + "80": 10.68018, + "81": 10.6978, + "82": 10.72298, + "83": 10.6411, + "84": 10.66201, + "85": 10.70254, + "86": 10.58044, + "87": 10.69046, + "88": 10.7346, + "89": 10.67776, "90": 10.74269, - "91": 10.62186, - "92": 10.63964, - "93": 10.56627, - "94": 10.49913, - "95": 10.65738, - "96": 10.65873, - "97": 10.57872, - "98": 10.6722, - "99": 10.4802, - "100": 10.59334 + "91": 10.62158, + "92": 10.63948, + "93": 10.56648, + "94": 10.49996, + "95": 10.65683, + "96": 10.65942, + "97": 10.57917, + "98": 10.67222, + "99": 10.4805, + "100": 10.59341 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1158.0, - "2": 1181.0, - "3": 1248.0, - "4": 1219.0, - "5": 1372.0, - "6": 1529.0, - "7": 1362.0, - "8": 1179.0, - "9": 1276.0, - "10": 1234.0, - "11": 1281.0, - "12": 1249.0, - "13": 1386.0, - "14": 1213.0, - "15": 1215.0, - "16": 1299.0, - "17": 1242.0, - "18": 1233.0, - "19": 1167.0, - "20": 1392.0, - "21": 1264.0, - "22": 1289.0, - "23": 1336.0, - "24": 1168.0, - "25": 1170.0, - "26": 1207.0, - "27": 1192.0, - "28": 1327.0, - "29": 1354.0, - "30": 1250.0, - "31": 1110.0, - "32": 1331.0, - "33": 1340.0, - "34": 1250.0, - "35": 1105.0, - "36": 1138.0, - "37": 1265.0, - "38": 1375.0, - "39": 1243.0, - "40": 1306.0, - "41": 1154.0, - "42": 1251.0, - "43": 1122.0, - "44": 1139.0, - "45": 1122.0, - "46": 1203.0, - "47": 1405.0, - "48": 1282.0, - "49": 1167.0, - "50": 1166.0, - "51": 1249.0, - "52": 1320.0, - "53": 1340.0, - "54": 1232.0, - "55": 1103.0, - "56": 1275.0, - "57": 1194.0, - "58": 1259.0, - "59": 1283.0, - "60": 1265.0, - "61": 1124.0, - "62": 1349.0, - "63": 1132.0, - "64": 1272.0, - "65": 1017.0, - "66": 1174.0, - "67": 1242.0, - "68": 1291.0, - "69": 1295.0, - "70": 1143.0, - "71": 1148.0, - "72": 1266.0, - "73": 1199.0, - "74": 1133.0, - "75": 1346.0, - "76": 1224.0, - "77": 1329.0, - "78": 1256.0, - "79": 997.0, - "80": 1093.0, - "81": 1204.0, - "82": 1213.0, - "83": 1128.0, - "84": 1228.0, - "85": 1316.0, - "86": 1101.0, - "87": 1278.0, - "88": 1286.0, - "89": 1163.0, - "90": 1415.0, - "91": 1248.0, - "92": 1137.0, - "93": 912.0, - "94": 985.0, - "95": 1097.0, - "96": 1087.0, - "97": 1098.0, - "98": 1170.0, - "99": 1047.0, - "100": 1205.0 + "1": 1199.0, + "2": 1204.0, + "3": 1185.0, + "4": 1174.0, + "5": 1364.0, + "6": 1517.0, + "7": 1302.0, + "8": 1274.0, + "9": 1298.0, + "10": 1221.0, + "11": 1232.0, + "12": 1277.0, + "13": 1578.0, + "14": 1183.0, + "15": 1288.0, + "16": 1231.0, + "17": 1329.0, + "18": 1219.0, + "19": 1099.0, + "20": 1333.0, + "21": 1305.0, + "22": 1253.0, + "23": 1297.0, + "24": 1214.0, + "25": 1187.0, + "26": 1179.0, + "27": 1150.0, + "28": 1285.0, + "29": 1419.0, + "30": 1243.0, + "31": 1040.0, + "32": 1334.0, + "33": 1386.0, + "34": 1244.0, + "35": 1063.0, + "36": 1172.0, + "37": 1294.0, + "38": 1403.0, + "39": 1349.0, + "40": 1247.0, + "41": 1124.0, + "42": 1302.0, + "43": 1128.0, + "44": 1247.0, + "45": 1190.0, + "46": 1247.0, + "47": 1377.0, + "48": 1381.0, + "49": 1079.0, + "50": 1128.0, + "51": 1295.0, + "52": 1270.0, + "53": 1415.0, + "54": 1154.0, + "55": 1124.0, + "56": 1267.0, + "57": 1261.0, + "58": 1328.0, + "59": 1396.0, + "60": 1190.0, + "61": 1141.0, + "62": 1365.0, + "63": 1186.0, + "64": 1295.0, + "65": 1079.0, + "66": 1147.0, + "67": 1205.0, + "68": 1300.0, + "69": 1256.0, + "70": 1150.0, + "71": 1179.0, + "72": 1253.0, + "73": 1187.0, + "74": 1175.0, + "75": 1406.0, + "76": 1124.0, + "77": 1378.0, + "78": 1268.0, + "79": 971.0, + "80": 1089.0, + "81": 1274.0, + "82": 1187.0, + "83": 1123.0, + "84": 1284.0, + "85": 1311.0, + "86": 1121.0, + "87": 1285.0, + "88": 1305.0, + "89": 1239.0, + "90": 1375.0, + "91": 1198.0, + "92": 1135.0, + "93": 931.0, + "94": 1013.0, + "95": 1186.0, + "96": 1152.0, + "97": 1065.0, + "98": 1173.0, + "99": 1085.0, + "100": 1193.0 } }, "mem-allocated-bytes": { @@ -221,103 +221,103 @@ "1": 1095588352.0, "2": 1095600640.0, "3": 1095576576.0, - "4": 1095606272.0, - "5": 1095514624.0, + "4": 1095604736.0, + "5": 1095514112.0, "6": 1095542272.0, - "7": 1095563776.0, - "8": 1095580160.0, - "9": 1095585792.0, - "10": 1095554048.0, - "11": 1095538176.0, - "12": 1095523328.0, - "13": 1095589888.0, - "14": 1095519744.0, - "15": 1095557120.0, - "16": 1095548928.0, - "17": 1095531008.0, - "18": 1095528448.0, - "19": 1095549440.0, - "20": 1095504384.0, - "21": 1095561728.0, - "22": 1095583232.0, - "23": 1095534592.0, - "24": 1095604736.0, + "7": 1095565824.0, + "8": 1095580672.0, + "9": 1095586816.0, + "10": 1095555072.0, + "11": 1095536640.0, + "12": 1095522304.0, + "13": 1095591424.0, + "14": 1095520256.0, + "15": 1095557632.0, + "16": 1095552000.0, + "17": 1095530496.0, + "18": 1095527936.0, + "19": 1095548416.0, + "20": 1095504896.0, + "21": 1095557632.0, + "22": 1095582720.0, + "23": 1095535104.0, + "24": 1095603712.0, "25": 1095577088.0, - "26": 1095565824.0, - "27": 1095591424.0, - "28": 1095485952.0, - "29": 1095502848.0, + "26": 1095563776.0, + "27": 1095590400.0, + "28": 1095489024.0, + "29": 1095503360.0, "30": 1095552512.0, - "31": 1095588352.0, + "31": 1095588864.0, "32": 1095491072.0, - "33": 1095547392.0, + "33": 1095546368.0, "34": 1095568384.0, - "35": 1095527424.0, - "36": 1095533568.0, - "37": 1095482880.0, - "38": 1095552000.0, - "39": 1095532544.0, + "35": 1095527936.0, + "36": 1095532544.0, + "37": 1095483904.0, + "38": 1095550464.0, + "39": 1095531520.0, "40": 1095567360.0, - "41": 1095537152.0, - "42": 1095543296.0, - "43": 1095581184.0, + "41": 1095536640.0, + "42": 1095543808.0, + "43": 1095577088.0, "44": 1095620096.0, - "45": 1095569408.0, - "46": 1095584768.0, - "47": 1095573504.0, - "48": 1095577088.0, - "49": 1095530496.0, - "50": 1095540736.0, - "51": 1095570944.0, - "52": 1095538176.0, - "53": 1095597568.0, + "45": 1095565824.0, + "46": 1095583744.0, + "47": 1095576576.0, + "48": 1095577600.0, + "49": 1095531008.0, + "50": 1095540224.0, + "51": 1095572480.0, + "52": 1095535616.0, + "53": 1095599616.0, "54": 1095536640.0, - "55": 1095517184.0, - "56": 1095566848.0, - "57": 1095645696.0, + "55": 1095517696.0, + "56": 1095569408.0, + "57": 1095646720.0, "58": 1095634944.0, - "59": 1095548928.0, - "60": 1095562752.0, - "61": 1095553536.0, - "62": 1095572480.0, - "63": 1095573504.0, - "64": 1095550464.0, - "65": 1095578112.0, - "66": 1095531008.0, - "67": 1095568896.0, - "68": 1095566848.0, - "69": 1095527424.0, - "70": 1095532032.0, - "71": 1095520768.0, + "59": 1095548416.0, + "60": 1095560704.0, + "61": 1095556096.0, + "62": 1095572992.0, + "63": 1095572480.0, + "64": 1095547904.0, + "65": 1095579136.0, + "66": 1095531520.0, + "67": 1095569408.0, + "68": 1095567872.0, + "69": 1095527936.0, + "70": 1095533568.0, + "71": 1095521280.0, "72": 1095548928.0, - "73": 1095569920.0, - "74": 1095596032.0, - "75": 1095538688.0, - "76": 1095584768.0, - "77": 1095507968.0, - "78": 1095514624.0, - "79": 1095515648.0, - "80": 1095551488.0, - "81": 1095513600.0, + "73": 1095570432.0, + "74": 1095595520.0, + "75": 1095540224.0, + "76": 1095583744.0, + "77": 1095508992.0, + "78": 1095514112.0, + "79": 1095516672.0, + "80": 1095552512.0, + "81": 1095513088.0, "82": 1095498240.0, - "83": 1095558656.0, + "83": 1095557632.0, "84": 1095569408.0, - "85": 1095576064.0, - "86": 1095590400.0, - "87": 1095523840.0, - "88": 1095517696.0, - "89": 1095539712.0, - "90": 1095528960.0, - "91": 1095550976.0, - "92": 1095561216.0, - "93": 1095579136.0, - "94": 1095564288.0, - "95": 1095510528.0, - "96": 1095502336.0, - "97": 1095537152.0, + "85": 1095574528.0, + "86": 1095589888.0, + "87": 1095524352.0, + "88": 1095519744.0, + "89": 1095540736.0, + "90": 1095529472.0, + "91": 1095549952.0, + "92": 1095560192.0, + "93": 1095576576.0, + "94": 1095566336.0, + "95": 1095512576.0, + "96": 1095500800.0, + "97": 1095538688.0, "98": 1095496192.0, - "99": 1095577600.0, - "100": 1095598592.0 + "99": 1095577088.0, + "100": 1095597056.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 3125957632.0, - "2": 3477050368.0, - "3": 3477050368.0, - "4": 3481636352.0, - "5": 3481636352.0, - "6": 3481636352.0, - "7": 3481636352.0, - "8": 3481636352.0, - "9": 3481636352.0, - "10": 3481636352.0, - "11": 3481636352.0, - "12": 3481636352.0, - "13": 3481636352.0, - "14": 3481636352.0, - "15": 3481636352.0, - "16": 3481636352.0, - "17": 3481636352.0, - "18": 3481636352.0, - "19": 3481636352.0, - "20": 3481636352.0, - "21": 3481636352.0, - "22": 3481636352.0, - "23": 3481636352.0, - "24": 3482527744.0, - "25": 3482527744.0, - "26": 3482527744.0, - "27": 3482527744.0, - "28": 3482527744.0, - "29": 3482527744.0, - "30": 3482527744.0, - "31": 3482527744.0, - "32": 3482527744.0, - "33": 3482527744.0, - "34": 3482527744.0, - "35": 3482527744.0, - "36": 3482527744.0, - "37": 3482527744.0, - "38": 3482527744.0, - "39": 3482527744.0, - "40": 3482527744.0, - "41": 3482527744.0, - "42": 3482527744.0, - "43": 3482527744.0, - "44": 3495770112.0, - "45": 3495770112.0, - "46": 3495770112.0, - "47": 3495770112.0, - "48": 3495770112.0, - "49": 3495770112.0, - "50": 3495770112.0, - "51": 3495770112.0, - "52": 3495770112.0, - "53": 3495770112.0, - "54": 3495770112.0, - "55": 3495770112.0, - "56": 3495770112.0, - "57": 3505988608.0, - "58": 3505988608.0, - "59": 3505988608.0, - "60": 3505988608.0, - "61": 3505988608.0, - "62": 3505988608.0, - "63": 3505988608.0, - "64": 3505988608.0, - "65": 3505988608.0, - "66": 3505988608.0, - "67": 3505988608.0, - "68": 3505988608.0, - "69": 3505988608.0, - "70": 3505988608.0, - "71": 3505988608.0, - "72": 3505988608.0, - "73": 3505988608.0, - "74": 3505988608.0, - "75": 3505988608.0, - "76": 3505988608.0, - "77": 3505988608.0, - "78": 3505988608.0, - "79": 3505988608.0, - "80": 3505988608.0, - "81": 3505988608.0, - "82": 3505988608.0, - "83": 3505988608.0, - "84": 3505988608.0, - "85": 3505988608.0, - "86": 3505988608.0, - "87": 3505988608.0, - "88": 3505988608.0, - "89": 3505988608.0, - "90": 3505988608.0, - "91": 3505988608.0, - "92": 3505988608.0, - "93": 3505988608.0, - "94": 3505988608.0, - "95": 3505988608.0, - "96": 3505988608.0, - "97": 3505988608.0, - "98": 3505988608.0, - "99": 3505988608.0, - "100": 3505988608.0 + "1": 3125509120.0, + "2": 3475397632.0, + "3": 3475397632.0, + "4": 3483599872.0, + "5": 3483599872.0, + "6": 3483599872.0, + "7": 3483599872.0, + "8": 3483599872.0, + "9": 3483599872.0, + "10": 3483599872.0, + "11": 3483599872.0, + "12": 3483599872.0, + "13": 3483599872.0, + "14": 3483599872.0, + "15": 3483599872.0, + "16": 3483599872.0, + "17": 3483599872.0, + "18": 3483599872.0, + "19": 3483599872.0, + "20": 3483599872.0, + "21": 3483599872.0, + "22": 3483599872.0, + "23": 3483599872.0, + "24": 3483599872.0, + "25": 3483599872.0, + "26": 3483599872.0, + "27": 3483599872.0, + "28": 3483599872.0, + "29": 3483599872.0, + "30": 3483599872.0, + "31": 3483599872.0, + "32": 3483599872.0, + "33": 3483599872.0, + "34": 3483599872.0, + "35": 3483599872.0, + "36": 3483599872.0, + "37": 3483599872.0, + "38": 3483599872.0, + "39": 3483599872.0, + "40": 3483599872.0, + "41": 3483599872.0, + "42": 3483599872.0, + "43": 3483599872.0, + "44": 3491555328.0, + "45": 3491555328.0, + "46": 3491555328.0, + "47": 3491555328.0, + "48": 3491555328.0, + "49": 3491555328.0, + "50": 3491555328.0, + "51": 3491555328.0, + "52": 3491555328.0, + "53": 3491555328.0, + "54": 3491555328.0, + "55": 3491555328.0, + "56": 3491555328.0, + "57": 3513412608.0, + "58": 3513412608.0, + "59": 3513412608.0, + "60": 3513412608.0, + "61": 3513412608.0, + "62": 3513412608.0, + "63": 3513412608.0, + "64": 3513412608.0, + "65": 3513412608.0, + "66": 3513412608.0, + "67": 3513412608.0, + "68": 3513412608.0, + "69": 3513412608.0, + "70": 3513412608.0, + "71": 3513412608.0, + "72": 3513412608.0, + "73": 3513412608.0, + "74": 3513412608.0, + "75": 3513412608.0, + "76": 3513412608.0, + "77": 3513412608.0, + "78": 3513412608.0, + "79": 3513412608.0, + "80": 3513412608.0, + "81": 3513412608.0, + "82": 3513412608.0, + "83": 3513412608.0, + "84": 3513412608.0, + "85": 3513412608.0, + "86": 3513412608.0, + "87": 3513412608.0, + "88": 3513412608.0, + "89": 3513412608.0, + "90": 3513412608.0, + "91": 3513412608.0, + "92": 3513412608.0, + "93": 3513412608.0, + "94": 3513412608.0, + "95": 3513412608.0, + "96": 3513412608.0, + "97": 3513412608.0, + "98": 3513412608.0, + "99": 3513412608.0, + "100": 3513412608.0 } }, "iteration-time": { @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 4.71174, - "3": 0.47502, - "4": 0.44931, - "5": 0.44277, - "6": 0.44844, - "7": 0.45785, - "8": 0.44209, - "9": 0.43757, - "10": 0.42772, - "11": 0.44315, - "12": 0.42725, - "13": 0.42666, - "14": 0.41928, - "15": 0.42831, - "16": 0.42799, - "17": 0.42051, - "18": 0.41469, - "19": 0.41876, - "20": 0.41842, - "21": 0.43095, - "22": 0.41003, - "23": 0.41066, - "24": 0.41091, - "25": 0.40849, - "26": 0.4098, - "27": 0.41447, - "28": 0.4098, - "29": 0.40395, - "30": 0.41016, - "31": 0.41347, - "32": 0.40916, - "33": 0.41299, - "34": 0.40596, - "35": 0.40696, - "36": 0.40868, - "37": 0.40718, - "38": 0.40736, - "39": 0.40604, - "40": 0.40127, - "41": 0.4, - "42": 0.40197, - "43": 0.40902, - "44": 0.40712, - "45": 0.4098, - "46": 0.40168, - "47": 0.40487, - "48": 0.40622, - "49": 0.4089, - "50": 0.40406, - "51": 0.41118, - "52": 0.40412, - "53": 0.40027, - "54": 0.40192, - "55": 0.39782, - "56": 0.39731, - "57": 0.39836, - "58": 0.40128, - "59": 0.39958, - "60": 0.39863, - "61": 0.78712, - "62": 0.39887, - "63": 0.39967, - "64": 0.40024, - "65": 0.39891, - "66": 0.40058, - "67": 0.80982, - "68": 0.39889, - "69": 0.39895, - "70": 0.40201, - "71": 0.39871, - "72": 0.39819, - "73": 0.40638, - "74": 0.40241, - "75": 0.39867, - "76": 0.40192, - "77": 0.4032, - "78": 0.39871, - "79": 0.96252, - "80": 0.39811, - "81": 0.40176, - "82": 0.39856, - "83": 0.40217, - "84": 0.3966, - "85": 0.40212, - "86": 0.40144, - "87": 0.39779, - "88": 0.3989, - "89": 0.39982, - "90": 0.40291, - "91": 0.40052, - "92": 0.39772, - "93": 0.40147, - "94": 0.40072, - "95": 0.40007, - "96": 0.40232, - "97": 0.40777, - "98": 0.4002, - "99": 0.39995, - "100": 0.39879 + "2": 7.28646, + "3": 0.47271, + "4": 0.47745, + "5": 0.46501, + "6": 0.47669, + "7": 0.46462, + "8": 0.46907, + "9": 0.44246, + "10": 0.4455, + "11": 0.46496, + "12": 0.44073, + "13": 0.43266, + "14": 0.65831, + "15": 0.43046, + "16": 0.44578, + "17": 0.43964, + "18": 0.42736, + "19": 0.42673, + "20": 0.4253, + "21": 0.43642, + "22": 0.42855, + "23": 0.4346, + "24": 0.43483, + "25": 0.66422, + "26": 0.43455, + "27": 0.92371, + "28": 0.435, + "29": 0.42648, + "30": 0.43015, + "31": 0.43893, + "32": 0.43102, + "33": 0.43175, + "34": 0.43105, + "35": 0.42836, + "36": 0.43039, + "37": 0.43009, + "38": 0.42479, + "39": 0.42802, + "40": 0.42478, + "41": 0.429, + "42": 0.42865, + "43": 0.42842, + "44": 0.4272, + "45": 0.42734, + "46": 0.42362, + "47": 0.42573, + "48": 0.42741, + "49": 0.42514, + "50": 0.42652, + "51": 0.45536, + "52": 0.44443, + "53": 0.43114, + "54": 0.43344, + "55": 0.43001, + "56": 0.434, + "57": 0.42981, + "58": 0.4385, + "59": 0.42664, + "60": 0.42897, + "61": 0.43394, + "62": 0.43264, + "63": 0.43242, + "64": 0.42941, + "65": 0.42985, + "66": 0.43279, + "67": 0.43045, + "68": 0.43095, + "69": 0.43014, + "70": 0.42803, + "71": 0.42618, + "72": 0.42723, + "73": 0.4291, + "74": 0.43729, + "75": 0.43541, + "76": 0.43098, + "77": 0.42678, + "78": 0.42621, + "79": 0.43012, + "80": 0.42998, + "81": 0.43012, + "82": 0.42825, + "83": 0.42992, + "84": 0.43213, + "85": 0.42977, + "86": 0.42829, + "87": 0.42697, + "88": 0.42891, + "89": 0.43068, + "90": 0.43026, + "91": 0.42539, + "92": 0.42817, + "93": 0.4276, + "94": 0.42846, + "95": 0.43302, + "96": 0.43007, + "97": 0.43566, + "98": 0.42972, + "99": 0.43211, + "100": 0.42947 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json index d5ced620365..1f980642e72 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json @@ -4,56 +4,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 11.06693, - "2": 11.0602, - "3": 10.16141, - "4": 10.11145, - "5": 10.47957, - "6": 10.21751, - "7": 10.56153, - "8": 12.79501, - "9": 12.96949, - "10": 13.32223, - "11": 11.63359, - "12": 11.4938, - "13": 12.46292, - "14": 12.13415, - "15": 11.90295, - "16": 12.01307, - "17": 12.17443, - "18": 12.64978, - "19": 11.81295, - "20": 12.18673, - "21": 11.24306, - "22": 11.54156, - "23": 10.98412, - "24": 11.01925, - "25": 10.73001, - "26": 10.72806, - "27": 10.79039, - "28": 10.714, - "29": 10.73974, - "30": 10.75246, - "31": 10.68874, - "32": 10.65791, - "33": 10.81137, - "34": 10.79058, - "35": 10.75368, - "36": 10.64393, - "37": 10.87492, - "38": 10.90591, - "39": 10.78825, - "40": 10.75548, - "41": 10.8955, - "42": 10.70411, - "43": 10.66907, - "44": 10.72512, - "45": 10.54927, - "46": 10.46973, - "47": 10.66311, - "48": 10.62453, - "49": 10.61656, - "50": 10.21176 + "1": 11.06701, + "2": 11.06021, + "3": 10.20005, + "4": 9.75939, + "5": 10.37343, + "6": 10.19088, + "7": 11.31502, + "8": 12.43285, + "9": 12.01458, + "10": 11.45726, + "11": 11.58577, + "12": 11.42409, + "13": 12.09212, + "14": 12.17488, + "15": 11.97062, + "16": 11.88313, + "17": 11.93875, + "18": 12.32174, + "19": 11.67153, + "20": 12.19814, + "21": 11.54287, + "22": 9.80848, + "23": 9.97818, + "24": 9.71264, + "25": 10.50518, + "26": 9.97363, + "27": 10.00439, + "28": 10.19053, + "29": 10.46412, + "30": 10.66579, + "31": 10.85612, + "32": 10.74726, + "33": 10.76846, + "34": 10.76858, + "35": 10.76094, + "36": 10.53668, + "37": 10.77826, + "38": 10.71275, + "39": 10.82689, + "40": 10.72817, + "41": 10.68451, + "42": 10.18244, + "43": 10.79574, + "44": 10.8553, + "45": 10.43881, + "46": 10.73617, + "47": 10.80059, + "48": 10.8248, + "49": 10.79974, + "50": 10.54592 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 47165216.0, - "2": 46897552.0, - "3": 52682736.0, - "4": 70585808.0, - "5": 1850183680.0, - "6": 171098656.0, - "7": 436105120.0, - "8": 1850183680.0, - "9": 1850183680.0, - "10": 1850183680.0, - "11": 1850183680.0, - "12": 1850183680.0, - "13": 1850183680.0, - "14": 1850183680.0, - "15": 555857088.0, - "16": 1850183680.0, - "17": 1850183680.0, - "18": 1850183680.0, - "19": 886404992.0, - "20": 654826944.0, - "21": 603993664.0, - "22": 726709632.0, - "23": 566656896.0, - "24": 1850183680.0, - "25": 799245696.0, - "26": 978252032.0, - "27": 1850183680.0, - "28": 906183104.0, - "29": 1850183680.0, - "30": 1850183680.0, - "31": 810874112.0, - "32": 1850183680.0, - "33": 1850183680.0, - "34": 553779584.0, - "35": 565382400.0, - "36": 585787712.0, - "37": 627284160.0, - "38": 331368192.0, - "39": 638619264.0, - "40": 1850183680.0, - "41": 1850183680.0, - "42": 1850183680.0, - "43": 1850183680.0, - "44": 1850183680.0, - "45": 1850183680.0, - "46": 1850183680.0, - "47": 434842944.0, - "48": 1850183680.0, - "49": 575219328.0, - "50": 1850183680.0 + "1": 47165260.0, + "2": 46897544.0, + "3": 1815580672.0, + "4": 1815580672.0, + "5": 135092176.0, + "6": 64130344.0, + "7": 1815580672.0, + "8": 1815580672.0, + "9": 1815580672.0, + "10": 1815580672.0, + "11": 1815580672.0, + "12": 1815580672.0, + "13": 1815580672.0, + "14": 1815580672.0, + "15": 1815580672.0, + "16": 1815580672.0, + "17": 1815580672.0, + "18": 1815580672.0, + "19": 1815580672.0, + "20": 1815580672.0, + "21": 386908928.0, + "22": 1815580672.0, + "23": 1815580672.0, + "24": 1815580672.0, + "25": 534944416.0, + "26": 616509824.0, + "27": 914967936.0, + "28": 937646336.0, + "29": 924778048.0, + "30": 1815580672.0, + "31": 867456256.0, + "32": 838028800.0, + "33": 969112000.0, + "34": 503505536.0, + "35": 562247616.0, + "36": 658155200.0, + "37": 542371712.0, + "38": 1815580672.0, + "39": 1815580672.0, + "40": 1815580672.0, + "41": 1815580672.0, + "42": 1815580672.0, + "43": 1815580672.0, + "44": 1815580672.0, + "45": 1815580672.0, + "46": 1815580672.0, + "47": 1815580672.0, + "48": 1815580672.0, + "49": 490314144.0, + "50": 667091136.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 5283616256.0, - "2": 5288015360.0, - "3": 5288218112.0, - "4": 5288420864.0, - "5": 5288623616.0, - "6": 5287812608.0, - "7": 5288015360.0, - "8": 5288218112.0, - "9": 5287711232.0, - "10": 5287913984.0, - "11": 5288116736.0, - "12": 5288319488.0, - "13": 5288522240.0, - "14": 5288724992.0, - "15": 5288927744.0, - "16": 5289130496.0, - "17": 5289333248.0, - "18": 5289536000.0, - "19": 5289738752.0, - "20": 5289941504.0, - "21": 5290144256.0, - "22": 5290347008.0, - "23": 5290549760.0, - "24": 5290752512.0, - "25": 5290955264.0, - "26": 5291158016.0, - "27": 5291360768.0, - "28": 5291563520.0, - "29": 5291766272.0, - "30": 5291969024.0, - "31": 5292171776.0, - "32": 5292374528.0, - "33": 5292577280.0, - "34": 5292780032.0, - "35": 5292982784.0, - "36": 5293185536.0, - "37": 5293388288.0, - "38": 5293591040.0, - "39": 5293793792.0, - "40": 5293996544.0, - "41": 5294199296.0, - "42": 5294402048.0, - "43": 5294604800.0, - "44": 5294807552.0, - "45": 5295010304.0, - "46": 5295213056.0, - "47": 5295415808.0, - "48": 5295618560.0, - "49": 5295821312.0, - "50": 5296024064.0 + "1": 5283603968.0, + "2": 5283808768.0, + "3": 5284011520.0, + "4": 5284214272.0, + "5": 5284417024.0, + "6": 5283504640.0, + "7": 5283707392.0, + "8": 5283910144.0, + "9": 5284112896.0, + "10": 5284315648.0, + "11": 5284518400.0, + "12": 5283504640.0, + "13": 5283707392.0, + "14": 5283910144.0, + "15": 5284112896.0, + "16": 5284315648.0, + "17": 5284518400.0, + "18": 5284721152.0, + "19": 5284923904.0, + "20": 5285126656.0, + "21": 5285329408.0, + "22": 5285532160.0, + "23": 5285734912.0, + "24": 5285937664.0, + "25": 5286140416.0, + "26": 5286343168.0, + "27": 5286545920.0, + "28": 5286748672.0, + "29": 5286951424.0, + "30": 5287154176.0, + "31": 5287356928.0, + "32": 5287559680.0, + "33": 5287762432.0, + "34": 5287965184.0, + "35": 5288167936.0, + "36": 5288370688.0, + "37": 5288573440.0, + "38": 5288776192.0, + "39": 5288978944.0, + "40": 5289181696.0, + "41": 5289384448.0, + "42": 5289587200.0, + "43": 5289789952.0, + "44": 5289992704.0, + "45": 5290195456.0, + "46": 5290398208.0, + "47": 5290600960.0, + "48": 5290803712.0, + "49": 5291006464.0, + "50": 5291209216.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 5283618816.0, - "2": 8185453056.0, - "3": 8185453056.0, - "4": 8185453056.0, - "5": 8195318272.0, - "6": 8195318272.0, - "7": 8195318272.0, - "8": 8195318272.0, - "9": 8195318272.0, - "10": 8195318272.0, - "11": 8195318272.0, - "12": 8195318272.0, - "13": 8195318272.0, - "14": 8195318272.0, - "15": 8195318272.0, - "16": 8199233024.0, - "17": 8199233024.0, - "18": 8199233024.0, - "19": 8199233024.0, - "20": 8199233024.0, - "21": 8238446080.0, - "22": 8238446080.0, - "23": 8238446080.0, - "24": 8238446080.0, - "25": 8247293440.0, - "26": 8247293440.0, - "27": 8247293440.0, - "28": 8250185216.0, - "29": 8255527424.0, - "30": 8255527424.0, - "31": 8255527424.0, - "32": 8255527424.0, - "33": 8255527424.0, - "34": 8255527424.0, - "35": 8255527424.0, - "36": 8255527424.0, - "37": 8255527424.0, - "38": 8255527424.0, - "39": 8255527424.0, - "40": 8255527424.0, - "41": 8255527424.0, - "42": 8255527424.0, - "43": 8255527424.0, - "44": 8255527424.0, - "45": 8255527424.0, - "46": 8255527424.0, - "47": 8255527424.0, - "48": 8255527424.0, - "49": 8255527424.0, - "50": 8255527424.0 + "1": 5283606528.0, + "2": 8252578304.0, + "3": 8252578304.0, + "4": 8252578304.0, + "5": 8253250048.0, + "6": 8253250048.0, + "7": 8253250048.0, + "8": 8253250048.0, + "9": 8253250048.0, + "10": 8253250048.0, + "11": 8253250048.0, + "12": 8253250048.0, + "13": 8253250048.0, + "14": 8253250048.0, + "15": 8253250048.0, + "16": 8253250048.0, + "17": 8253250048.0, + "18": 8253250048.0, + "19": 8253250048.0, + "20": 8253250048.0, + "21": 8253250048.0, + "22": 8259284480.0, + "23": 8319884800.0, + "24": 8358525440.0, + "25": 8362294784.0, + "26": 8362294784.0, + "27": 8362294784.0, + "28": 8362294784.0, + "29": 8362294784.0, + "30": 8364191232.0, + "31": 8364191232.0, + "32": 8364191232.0, + "33": 8364191232.0, + "34": 8364191232.0, + "35": 8364191232.0, + "36": 8364191232.0, + "37": 8364191232.0, + "38": 8377395712.0, + "39": 8377395712.0, + "40": 8377395712.0, + "41": 8377395712.0, + "42": 8377395712.0, + "43": 8377395712.0, + "44": 8377395712.0, + "45": 8377395712.0, + "46": 8377395712.0, + "47": 8377395712.0, + "48": 8377395712.0, + "49": 8377395712.0, + "50": 8377395712.0 } }, "mtp_1 loss": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 11.07401, - "2": 11.0927, - "3": 10.83159, - "4": 10.61397, - "5": 10.85768, - "6": 9.79263, - "7": 10.90607, - "8": 10.19798, - "9": 9.82717, - "10": 9.23805, - "11": 11.0712, - "12": 11.11709, - "13": 10.03407, - "14": 10.27606, - "15": 10.73067, - "16": 10.91485, - "17": 10.76886, - "18": 10.49659, - "19": 10.96955, - "20": 10.45905, - "21": 10.91629, - "22": 10.05081, - "23": 10.44411, - "24": 9.74826, - "25": 10.81497, - "26": 10.38519, - "27": 10.31999, - "28": 10.27887, - "29": 10.40945, - "30": 10.20684, - "31": 10.54594, - "32": 8.85942, - "33": 9.75619, - "34": 10.56214, - "35": 10.59167, - "36": 9.30537, - "37": 10.59407, - "38": 10.2994, - "39": 10.69954, - "40": 10.37003, - "41": 10.248, - "42": 8.56376, - "43": 10.49224, - "44": 10.57211, - "45": 9.36238, - "46": 10.2179, - "47": 10.63449, - "48": 10.56697, - "49": 10.44093, - "50": 9.49252 + "1": 11.07397, + "2": 11.09265, + "3": 10.8306, + "4": 10.58342, + "5": 10.83769, + "6": 9.90459, + "7": 11.15094, + "8": 10.15193, + "9": 9.65551, + "10": 8.91334, + "11": 11.06242, + "12": 11.10352, + "13": 9.87343, + "14": 10.20717, + "15": 10.70026, + "16": 10.7834, + "17": 10.5832, + "18": 10.1899, + "19": 10.78252, + "20": 10.26742, + "21": 10.90867, + "22": 9.68739, + "23": 10.17903, + "24": 9.56283, + "25": 10.62358, + "26": 10.16437, + "27": 10.10325, + "28": 10.15042, + "29": 10.31854, + "30": 10.20663, + "31": 10.59866, + "32": 8.87951, + "33": 9.7619, + "34": 10.59317, + "35": 10.64661, + "36": 9.3348, + "37": 10.63017, + "38": 10.3672, + "39": 10.74352, + "40": 10.44713, + "41": 10.25471, + "42": 8.50737, + "43": 10.63395, + "44": 10.69426, + "45": 9.43185, + "46": 10.42516, + "47": 10.75514, + "48": 10.75581, + "49": 10.5988, + "50": 9.66264 } }, "iteration-time": { @@ -289,56 +289,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 71.30157, - "2": 2.34464, - "3": 2.38747, - "4": 2.10322, - "5": 2.12945, - "6": 2.0424, - "7": 2.12036, - "8": 2.0147, - "9": 2.04925, - "10": 2.02797, - "11": 1.95087, - "12": 2.04985, - "13": 1.94106, - "14": 1.90425, - "15": 1.89051, - "16": 1.89398, - "17": 1.94082, - "18": 1.93176, - "19": 1.94027, - "20": 1.90271, - "21": 1.91097, - "22": 1.90382, - "23": 1.93889, - "24": 1.90551, - "25": 1.90947, - "26": 1.92126, - "27": 1.89917, - "28": 1.89866, - "29": 1.93981, - "30": 1.90782, - "31": 1.91244, - "32": 1.93864, - "33": 1.93947, - "34": 1.96882, - "35": 1.89751, - "36": 1.94038, - "37": 1.90603, - "38": 1.94988, - "39": 1.89874, - "40": 1.90233, - "41": 1.92861, - "42": 1.93931, - "43": 1.91212, - "44": 1.92615, - "45": 1.89555, - "46": 1.94522, - "47": 1.9103, - "48": 1.94689, - "49": 1.9355, - "50": 1.89832 + "1": "nan", + "2": 34.13201, + "3": 2.31389, + "4": 2.16978, + "5": 2.33783, + "6": 2.3455, + "7": 3.06265, + "8": 1.86983, + "9": 3.03958, + "10": 1.88627, + "11": 1.86417, + "12": 2.8165, + "13": 1.87594, + "14": 1.88355, + "15": 1.86624, + "16": 1.87284, + "17": 1.85967, + "18": 1.88925, + "19": 1.87467, + "20": 1.86371, + "21": 1.88039, + "22": 1.91965, + "23": 1.8694, + "24": 2.45414, + "25": 2.28676, + "26": 1.88528, + "27": 1.86545, + "28": 1.87283, + "29": 1.87365, + "30": 1.88033, + "31": 1.87038, + "32": 1.86674, + "33": 1.86959, + "34": 1.86841, + "35": 1.86552, + "36": 1.84979, + "37": 1.87455, + "38": 1.86713, + "39": 1.86261, + "40": 1.86376, + "41": 1.86336, + "42": 1.85522, + "43": 1.86429, + "44": 1.87047, + "45": 1.86475, + "46": 1.86422, + "47": 1.86044, + "48": 1.86089, + "49": 1.86676, + "50": 1.8645 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json index 57848f8130e..e826ff174f4 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json @@ -4,56 +4,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 11.01693, - "2": 11.06263, - "3": 10.08845, - "4": 9.73223, - "5": 10.41008, - "6": 10.46377, - "7": 11.62265, - "8": 12.30479, - "9": 12.258, - "10": 12.11321, - "11": 11.67717, - "12": 11.60724, - "13": 11.46408, - "14": 11.41026, - "15": 11.44828, - "16": 11.31999, - "17": 11.28503, - "18": 11.35547, - "19": 11.35205, - "20": 11.50757, - "21": 11.41181, - "22": 11.56383, - "23": 11.41906, - "24": 11.39788, - "25": 11.26438, - "26": 11.36733, - "27": 11.37099, - "28": 11.40035, - "29": 11.42808, - "30": 11.53613, - "31": 11.3981, - "32": 12.00058, - "33": 11.68213, - "34": 11.38046, - "35": 11.36734, - "36": 11.77291, - "37": 11.34584, - "38": 11.4654, - "39": 11.33231, - "40": 11.43538, - "41": 11.47405, - "42": 12.09241, - "43": 11.39968, - "44": 11.38762, - "45": 11.79356, - "46": 11.4469, - "47": 11.3507, - "48": 11.30787, - "49": 11.39251, - "50": 11.7264 + "1": 11.01695, + "2": 11.0627, + "3": 10.08254, + "4": 9.77341, + "5": 10.42209, + "6": 9.50215, + "7": 10.51122, + "8": 9.93361, + "9": 9.96346, + "10": 9.78405, + "11": 10.83484, + "12": 10.9711, + "13": 9.37879, + "14": 9.60221, + "15": 9.74747, + "16": 9.85611, + "17": 9.91366, + "18": 9.67222, + "19": 10.12837, + "20": 10.07093, + "21": 10.22833, + "22": 10.00916, + "23": 10.3176, + "24": 10.249, + "25": 10.57076, + "26": 10.3543, + "27": 10.38498, + "28": 10.44036, + "29": 10.42095, + "30": 10.39991, + "31": 10.47396, + "32": 9.9548, + "33": 10.16366, + "34": 10.45178, + "35": 10.38486, + "36": 9.7646, + "37": 10.43313, + "38": 10.24672, + "39": 10.43639, + "40": 10.27337, + "41": 10.2693, + "42": 9.41843, + "43": 10.39381, + "44": 10.49775, + "45": 9.91264, + "46": 10.30142, + "47": 10.50805, + "48": 10.49407, + "49": 10.46356, + "50": 10.05295 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 47167880.0, - "2": 46899772.0, - "3": 1722086400.0, - "4": 1722086400.0, - "5": 188597600.0, - "6": 120779000.0, - "7": 527310080.0, - "8": 1722086400.0, - "9": 1722086400.0, - "10": 321966144.0, - "11": 493484608.0, - "12": 1722086400.0, - "13": 529395136.0, - "14": 1722086400.0, - "15": 1722086400.0, - "16": 723018944.0, - "17": 233377744.0, - "18": 642084544.0, - "19": 1722086400.0, - "20": 1722086400.0, - "21": 578776704.0, - "22": 396416192.0, - "23": 506872960.0, - "24": 670044160.0, - "25": 884090624.0, - "26": 912192512.0, - "27": 764026112.0, - "28": 972234112.0, - "29": 915345600.0, - "30": 937728768.0, - "31": 1722086400.0, - "32": 976440512.0, - "33": 984833664.0, - "34": 802321088.0, - "35": 1722086400.0, - "36": 931810816.0, - "37": 897772032.0, - "38": 982505792.0, - "39": 704699008.0, - "40": 688513344.0, - "41": 946725760.0, - "42": 1722086400.0, - "43": 1722086400.0, - "44": 875336384.0, - "45": 1722086400.0, - "46": 909066432.0, - "47": 900409280.0, - "48": 890279744.0, - "49": 597272192.0, - "50": 921883712.0 + "1": 47167740.0, + "2": 46899720.0, + "3": 58977560.0, + "4": 108338976.0, + "5": 179138592.0, + "6": 1689646080.0, + "7": 1689646080.0, + "8": 1689646080.0, + "9": 1689646080.0, + "10": 233872368.0, + "11": 1689646080.0, + "12": 411751104.0, + "13": 1689646080.0, + "14": 612875008.0, + "15": 596709440.0, + "16": 546873536.0, + "17": 522787808.0, + "18": 742742272.0, + "19": 1689646080.0, + "20": 1689646080.0, + "21": 1689646080.0, + "22": 1689646080.0, + "23": 1689646080.0, + "24": 1689646080.0, + "25": 437427808.0, + "26": 949940032.0, + "27": 1689646080.0, + "28": 553880064.0, + "29": 673135104.0, + "30": 921992576.0, + "31": 578061184.0, + "32": 731112064.0, + "33": 937648896.0, + "34": 648171264.0, + "35": 845326400.0, + "36": 1689646080.0, + "37": 1689646080.0, + "38": 557866752.0, + "39": 953183360.0, + "40": 657044800.0, + "41": 780078464.0, + "42": 639694272.0, + "43": 1689646080.0, + "44": 1689646080.0, + "45": 597456384.0, + "46": 635425088.0, + "47": 538688640.0, + "48": 525364640.0, + "49": 795455232.0, + "50": 890439424.0 } }, "mem-allocated-bytes": { @@ -176,55 +176,55 @@ "step_interval": 1, "values": { "1": 4313449472.0, - "2": 7108272640.0, - "3": 7108272640.0, - "4": 7108272640.0, - "5": 7119571456.0, - "6": 7119571456.0, - "7": 7129409024.0, - "8": 7158368768.0, - "9": 7158368768.0, - "10": 7158838784.0, - "11": 7202046464.0, - "12": 7202046464.0, - "13": 7202046464.0, - "14": 7202046464.0, - "15": 7202046464.0, - "16": 7202046464.0, - "17": 7202046464.0, - "18": 7202046464.0, - "19": 7202046464.0, - "20": 7202046464.0, - "21": 7202046464.0, - "22": 7202046464.0, - "23": 7202046464.0, - "24": 7202046464.0, - "25": 7202046464.0, - "26": 7202046464.0, - "27": 7202046464.0, - "28": 7202046464.0, - "29": 7202046464.0, - "30": 7202046464.0, - "31": 7202046464.0, - "32": 7202046464.0, - "33": 7202046464.0, - "34": 7202046464.0, - "35": 7202046464.0, - "36": 7202046464.0, - "37": 7202046464.0, - "38": 7202046464.0, - "39": 7202046464.0, - "40": 7202046464.0, - "41": 7202046464.0, - "42": 7202046464.0, - "43": 7202046464.0, - "44": 7202046464.0, - "45": 7202046464.0, - "46": 7202046464.0, - "47": 7202046464.0, - "48": 7202046464.0, - "49": 7202046464.0, - "50": 7202046464.0 + "2": 7108321792.0, + "3": 7108321792.0, + "4": 7108321792.0, + "5": 7108321792.0, + "6": 7108321792.0, + "7": 7108321792.0, + "8": 7108321792.0, + "9": 7108321792.0, + "10": 7125704192.0, + "11": 7125704192.0, + "12": 7125704192.0, + "13": 7125704192.0, + "14": 7125704192.0, + "15": 7125704192.0, + "16": 7125704192.0, + "17": 7125704192.0, + "18": 7125704192.0, + "19": 7125704192.0, + "20": 7125704192.0, + "21": 7125704192.0, + "22": 7125704192.0, + "23": 7125704192.0, + "24": 7125704192.0, + "25": 7125704192.0, + "26": 7125704192.0, + "27": 7125704192.0, + "28": 7125704192.0, + "29": 7125704192.0, + "30": 7125704192.0, + "31": 7125704192.0, + "32": 7125704192.0, + "33": 7125704192.0, + "34": 7125704192.0, + "35": 7125704192.0, + "36": 7125704192.0, + "37": 7125704192.0, + "38": 7125704192.0, + "39": 7125704192.0, + "40": 7125704192.0, + "41": 7125704192.0, + "42": 7125704192.0, + "43": 7125704192.0, + "44": 7125704192.0, + "45": 7125704192.0, + "46": 7125704192.0, + "47": 7125704192.0, + "48": 7125704192.0, + "49": 7125704192.0, + "50": 7125704192.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 90.31742, - "2": 2.522, - "3": 2.42029, - "4": 2.06158, - "5": 2.28893, - "6": 3.01447, - "7": 3.96389, - "8": 3.20878, - "9": 2.43815, - "10": 1.94158, - "11": 1.95031, - "12": 1.98877, - "13": 1.92978, - "14": 1.93494, - "15": 1.92559, - "16": 1.95925, - "17": 2.59672, - "18": 1.94175, - "19": 1.92388, - "20": 1.92283, - "21": 1.92623, - "22": 1.92561, - "23": 1.92611, - "24": 1.94339, - "25": 2.02939, - "26": 1.93181, - "27": 1.92433, - "28": 1.96842, - "29": 1.92479, - "30": 1.93949, - "31": 1.96151, - "32": 1.93071, - "33": 1.92266, - "34": 1.92587, - "35": 1.92251, - "36": 1.92324, - "37": 1.93141, - "38": 1.92431, - "39": 1.93685, - "40": 1.92592, - "41": 1.92962, - "42": 1.92986, - "43": 1.92956, - "44": 1.93019, - "45": 1.93251, - "46": 1.92915, - "47": 1.93714, - "48": 1.93564, - "49": 1.94035, - "50": 1.93018 + "1": "nan", + "2": 34.30859, + "3": 2.40715, + "4": 2.25303, + "5": 2.10844, + "6": 3.04985, + "7": 2.57498, + "8": 1.96238, + "9": 2.51648, + "10": 1.96358, + "11": 3.22098, + "12": 2.58112, + "13": 2.58339, + "14": 1.93058, + "15": 2.00134, + "16": 1.93882, + "17": 2.4041, + "18": 1.92447, + "19": 1.95655, + "20": 1.92675, + "21": 1.93097, + "22": 1.94756, + "23": 1.93203, + "24": 1.92845, + "25": 1.92893, + "26": 1.93383, + "27": 1.92816, + "28": 1.97235, + "29": 1.92972, + "30": 1.92497, + "31": 1.92667, + "32": 1.92464, + "33": 1.92813, + "34": 1.93393, + "35": 1.92662, + "36": 1.92665, + "37": 1.93412, + "38": 1.92479, + "39": 1.92275, + "40": 1.93356, + "41": 1.93707, + "42": 1.92834, + "43": 1.92982, + "44": 1.93937, + "45": 1.93285, + "46": 1.93985, + "47": 1.93106, + "48": 1.93834, + "49": 1.92661, + "50": 1.9285 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200.json index a35a7574e59..543e2cc2a10 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_gb200.json @@ -6,104 +6,104 @@ "values": { "1": 10.81233, "2": 10.82416, - "3": 10.81841, - "4": 10.81357, - "5": 10.85116, - "6": 10.85502, - "7": 10.84363, - "8": 10.83621, - "9": 10.84178, - "10": 10.77391, - "11": 10.86217, - "12": 10.84672, - "13": 10.85692, - "14": 10.8614, - "15": 10.80709, - "16": 10.78544, - "17": 10.7701, - "18": 10.79072, - "19": 10.78529, - "20": 10.71496, - "21": 10.67362, - "22": 10.5386, - "23": 10.69608, - "24": 10.58118, - "25": 10.52212, - "26": 10.58665, - "27": 10.60344, - "28": 10.5676, - "29": 10.5868, - "30": 10.36177, - "31": 10.09661, - "32": 10.45911, - "33": 10.45926, - "34": 10.21524, - "35": 10.2617, - "36": 10.22327, - "37": 10.35631, - "38": 10.20637, - "39": 10.40825, + "3": 10.81831, + "4": 10.81379, + "5": 10.85121, + "6": 10.8547, + "7": 10.84381, + "8": 10.8365, + "9": 10.84218, + "10": 10.77383, + "11": 10.8619, + "12": 10.84697, + "13": 10.85639, + "14": 10.86148, + "15": 10.80747, + "16": 10.78545, + "17": 10.76973, + "18": 10.79049, + "19": 10.78507, + "20": 10.71455, + "21": 10.67322, + "22": 10.53843, + "23": 10.69589, + "24": 10.58179, + "25": 10.52248, + "26": 10.58696, + "27": 10.60345, + "28": 10.56762, + "29": 10.58671, + "30": 10.36179, + "31": 10.09658, + "32": 10.45901, + "33": 10.45915, + "34": 10.21515, + "35": 10.26158, + "36": 10.22329, + "37": 10.35641, + "38": 10.20668, + "39": 10.40815, "40": 10.08881, - "41": 10.13871, - "42": 10.22236, - "43": 9.82978, - "44": 9.96931, - "45": 9.83925, - "46": 9.81008, - "47": 10.16408, - "48": 9.84608, - "49": 9.53674, - "50": 9.91754, - "51": 9.86341, - "52": 9.74862, - "53": 10.08034, - "54": 9.96286, - "55": 9.89221, - "56": 9.64295, - "57": 9.48196, - "58": 9.85327, - "59": 9.58985, - "60": 9.5157, - "61": 9.70142, - "62": 10.01153, + "41": 10.13887, + "42": 10.22235, + "43": 9.82988, + "44": 9.96906, + "45": 9.83893, + "46": 9.80989, + "47": 10.16382, + "48": 9.84595, + "49": 9.53661, + "50": 9.91749, + "51": 9.86296, + "52": 9.74879, + "53": 10.08035, + "54": 9.96298, + "55": 9.89248, + "56": 9.64293, + "57": 9.48223, + "58": 9.85312, + "59": 9.59002, + "60": 9.51552, + "61": 9.70147, + "62": 10.01171, "63": 9.40557, - "64": 9.78559, - "65": 8.96047, - "66": 9.72678, - "67": 9.38244, - "68": 9.79903, - "69": 9.81114, - "70": 9.74788, - "71": 9.6452, - "72": 9.6027, - "73": 9.51692, - "74": 8.95583, - "75": 9.43449, - "76": 9.10005, - "77": 10.07816, - "78": 9.72912, - "79": 9.39357, - "80": 9.41584, - "81": 9.49174, - "82": 9.71087, - "83": 9.32591, - "84": 9.42272, - "85": 9.62054, - "86": 9.08096, - "87": 9.59797, - "88": 9.7551, - "89": 9.6096, - "90": 9.83264, - "91": 9.34163, - "92": 9.3578, - "93": 9.09025, - "94": 8.83205, - "95": 9.52868, - "96": 9.5278, - "97": 9.30277, - "98": 9.66393, - "99": 8.89773, - "100": 9.404 + "64": 9.78551, + "65": 8.96036, + "66": 9.7268, + "67": 9.38247, + "68": 9.79892, + "69": 9.81119, + "70": 9.74796, + "71": 9.64522, + "72": 9.60256, + "73": 9.51699, + "74": 8.95574, + "75": 9.4345, + "76": 9.10006, + "77": 10.07821, + "78": 9.72914, + "79": 9.39348, + "80": 9.41577, + "81": 9.49167, + "82": 9.71091, + "83": 9.32596, + "84": 9.42284, + "85": 9.62068, + "86": 9.08105, + "87": 9.5979, + "88": 9.75496, + "89": 9.60965, + "90": 9.83256, + "91": 9.3416, + "92": 9.35793, + "93": 9.09033, + "94": 8.83208, + "95": 9.52869, + "96": 9.528, + "97": 9.30259, + "98": 9.66375, + "99": 8.89765, + "100": 9.40386 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 5532.0, - "2": 5934.0, - "3": 5812.0, - "4": 5817.0, - "5": 6435.0, - "6": 6641.0, - "7": 5880.0, - "8": 5900.0, - "9": 6317.0, - "10": 5314.0, - "11": 6659.0, - "12": 6393.0, - "13": 6585.0, - "14": 6649.0, - "15": 6237.0, - "16": 6606.0, - "17": 6232.0, - "18": 6059.0, - "19": 6380.0, - "20": 5723.0, - "21": 6197.0, - "22": 5714.0, - "23": 6527.0, - "24": 5948.0, - "25": 5822.0, - "26": 6271.0, - "27": 6493.0, - "28": 6789.0, - "29": 6971.0, - "30": 6252.0, - "31": 5836.0, - "32": 6830.0, - "33": 7155.0, - "34": 6428.0, - "35": 6909.0, - "36": 6559.0, - "37": 7582.0, - "38": 7325.0, - "39": 8189.0, - "40": 7156.0, - "41": 7113.0, - "42": 7783.0, - "43": 7236.0, - "44": 6958.0, - "45": 7093.0, - "46": 7385.0, - "47": 7634.0, - "48": 7916.0, - "49": 7565.0, - "50": 7795.0, - "51": 7967.0, - "52": 7869.0, - "53": 9001.0, - "54": 8408.0, - "55": 7734.0, - "56": 8108.0, - "57": 7339.0, - "58": 8677.0, - "59": 8299.0, - "60": 7790.0, - "61": 8347.0, - "62": 8345.0, - "63": 7835.0, - "64": 8861.0, - "65": 8293.0, - "66": 9180.0, - "67": 8276.0, - "68": 8251.0, - "69": 8666.0, - "70": 9836.0, - "71": 9020.0, - "72": 8503.0, - "73": 8996.0, - "74": 6967.0, - "75": 7749.0, - "76": 8534.0, - "77": 10688.0, - "78": 48163.0, - "79": 9603.0, - "80": 9991.0, - "81": 55995.0, - "82": 9533.0, - "83": 65535.0, - "84": 9876.0, - "85": 15848.0, - "86": 8732.0, - "87": 10574.0, - "88": 12165.0, - "89": 9808.0, - "90": 9644.0, - "91": 8584.0, - "92": 9300.0, - "93": 8081.0, - "94": 9101.0, - "95": 9919.0, - "96": 9755.0, - "97": 11113.0, - "98": 10522.0, - "99": 8739.0, - "100": 9616.0 + "1": 5448.0, + "2": 5819.0, + "3": 6069.0, + "4": 5978.0, + "5": 6540.0, + "6": 6651.0, + "7": 5815.0, + "8": 5985.0, + "9": 6368.0, + "10": 5193.0, + "11": 6645.0, + "12": 6164.0, + "13": 6700.0, + "14": 6605.0, + "15": 6319.0, + "16": 6433.0, + "17": 6273.0, + "18": 5911.0, + "19": 6190.0, + "20": 5738.0, + "21": 6336.0, + "22": 5644.0, + "23": 6709.0, + "24": 6018.0, + "25": 5779.0, + "26": 6302.0, + "27": 6353.0, + "28": 6892.0, + "29": 7096.0, + "30": 6363.0, + "31": 5753.0, + "32": 6799.0, + "33": 7283.0, + "34": 6419.0, + "35": 6803.0, + "36": 6746.0, + "37": 7579.0, + "38": 7254.0, + "39": 8116.0, + "40": 7091.0, + "41": 7055.0, + "42": 7823.0, + "43": 7063.0, + "44": 7019.0, + "45": 6939.0, + "46": 7526.0, + "47": 7705.0, + "48": 7820.0, + "49": 7562.0, + "50": 7630.0, + "51": 7871.0, + "52": 7817.0, + "53": 8811.0, + "54": 8685.0, + "55": 7627.0, + "56": 7955.0, + "57": 7531.0, + "58": 8601.0, + "59": 8141.0, + "60": 7853.0, + "61": 8454.0, + "62": 8523.0, + "63": 7455.0, + "64": 8842.0, + "65": 8319.0, + "66": 9091.0, + "67": 8265.0, + "68": 8239.0, + "69": 8628.0, + "70": 9305.0, + "71": 9045.0, + "72": 8274.0, + "73": 9002.0, + "74": 6733.0, + "75": 7819.0, + "76": 8476.0, + "77": 14557.0, + "78": 48217.0, + "79": 9749.0, + "80": 16412.0, + "81": 56085.0, + "82": 9891.0, + "83": 65785.0, + "84": 9986.0, + "85": 15853.0, + "86": 9132.0, + "87": 10472.0, + "88": 12159.0, + "89": 9972.0, + "90": 9657.0, + "91": 8619.0, + "92": 9395.0, + "93": 8287.0, + "94": 9107.0, + "95": 10227.0, + "96": 9643.0, + "97": 14728.0, + "98": 10199.0, + "99": 8877.0, + "100": 9678.0 } }, "mem-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 982504960.0, - "2": 1156255744.0, - "3": 1156255744.0, - "4": 1156255744.0, - "5": 1156255744.0, - "6": 1156255744.0, - "7": 1156255744.0, - "8": 1156255744.0, - "9": 1156255744.0, - "10": 1156255744.0, - "11": 1156255744.0, - "12": 1156255744.0, - "13": 1156255744.0, - "14": 1156255744.0, - "15": 1156255744.0, - "16": 1156255744.0, - "17": 1156255744.0, - "18": 1156255744.0, - "19": 1156255744.0, - "20": 1156255744.0, - "21": 1156255744.0, - "22": 1156255744.0, - "23": 1156255744.0, - "24": 1156255744.0, - "25": 1156255744.0, - "26": 1156255744.0, - "27": 1157233664.0, - "28": 1157233664.0, - "29": 1157233664.0, - "30": 1157233664.0, - "31": 1157233664.0, - "32": 1157233664.0, - "33": 1157233664.0, - "34": 1157233664.0, - "35": 1157233664.0, - "36": 1157233664.0, - "37": 1157233664.0, - "38": 1157233664.0, - "39": 1157233664.0, - "40": 1157233664.0, - "41": 1158865408.0, - "42": 1158865408.0, - "43": 1158865408.0, - "44": 1158865408.0, - "45": 1158865408.0, - "46": 1158865408.0, - "47": 1158865408.0, - "48": 1158865408.0, - "49": 1158865408.0, - "50": 1158865408.0, - "51": 1158865408.0, - "52": 1158865408.0, - "53": 1158865408.0, - "54": 1158865408.0, - "55": 1159034368.0, - "56": 1159063040.0, - "57": 1159542784.0, - "58": 1159542784.0, - "59": 1159542784.0, - "60": 1159542784.0, - "61": 1165075456.0, - "62": 1165075456.0, - "63": 1165075456.0, - "64": 1165075456.0, - "65": 1165075456.0, - "66": 1165075456.0, - "67": 1165075456.0, - "68": 1165075456.0, - "69": 1165075456.0, - "70": 1165075456.0, - "71": 1165075456.0, - "72": 1165075456.0, - "73": 1165075456.0, - "74": 1165075456.0, - "75": 1165075456.0, - "76": 1166216192.0, - "77": 1166216192.0, - "78": 1166216192.0, - "79": 1166216192.0, - "80": 1166216192.0, - "81": 1166216192.0, - "82": 1166216192.0, - "83": 1166639104.0, - "84": 1166639104.0, - "85": 1166639104.0, - "86": 1166639104.0, - "87": 1166639104.0, - "88": 1166639104.0, - "89": 1166639104.0, - "90": 1166639104.0, - "91": 1166639104.0, - "92": 1166639104.0, - "93": 1166639104.0, - "94": 1166639104.0, - "95": 1166639104.0, - "96": 1166639104.0, - "97": 1166639104.0, - "98": 1166639104.0, - "99": 1166639104.0, - "100": 1166639104.0 + "1": 983094784.0, + "2": 1154649088.0, + "3": 1154649088.0, + "4": 1156098560.0, + "5": 1156098560.0, + "6": 1156098560.0, + "7": 1156098560.0, + "8": 1156098560.0, + "9": 1156246528.0, + "10": 1156246528.0, + "11": 1156246528.0, + "12": 1156246528.0, + "13": 1156246528.0, + "14": 1156246528.0, + "15": 1156246528.0, + "16": 1156246528.0, + "17": 1156246528.0, + "18": 1156246528.0, + "19": 1156246528.0, + "20": 1156246528.0, + "21": 1156246528.0, + "22": 1156246528.0, + "23": 1156246528.0, + "24": 1156246528.0, + "25": 1156246528.0, + "26": 1156246528.0, + "27": 1158345728.0, + "28": 1158345728.0, + "29": 1158345728.0, + "30": 1158345728.0, + "31": 1158345728.0, + "32": 1158345728.0, + "33": 1158345728.0, + "34": 1158345728.0, + "35": 1158345728.0, + "36": 1158345728.0, + "37": 1158345728.0, + "38": 1158345728.0, + "39": 1158345728.0, + "40": 1158345728.0, + "41": 1158345728.0, + "42": 1158345728.0, + "43": 1158345728.0, + "44": 1158345728.0, + "45": 1158345728.0, + "46": 1158345728.0, + "47": 1158345728.0, + "48": 1158345728.0, + "49": 1158345728.0, + "50": 1158345728.0, + "51": 1158345728.0, + "52": 1158345728.0, + "53": 1158345728.0, + "54": 1158345728.0, + "55": 1158345728.0, + "56": 1158651392.0, + "57": 1158651392.0, + "58": 1158651392.0, + "59": 1158651392.0, + "60": 1159026176.0, + "61": 1164139008.0, + "62": 1164139008.0, + "63": 1164139008.0, + "64": 1164139008.0, + "65": 1164139008.0, + "66": 1164139008.0, + "67": 1164139008.0, + "68": 1164139008.0, + "69": 1164139008.0, + "70": 1164139008.0, + "71": 1164139008.0, + "72": 1164139008.0, + "73": 1164139008.0, + "74": 1164139008.0, + "75": 1164139008.0, + "76": 1167156224.0, + "77": 1167156224.0, + "78": 1167156224.0, + "79": 1167156224.0, + "80": 1167156224.0, + "81": 1167156224.0, + "82": 1167156224.0, + "83": 1167156224.0, + "84": 1167156224.0, + "85": 1167156224.0, + "86": 1167156224.0, + "87": 1167156224.0, + "88": 1167156224.0, + "89": 1167156224.0, + "90": 1167156224.0, + "91": 1167156224.0, + "92": 1167156224.0, + "93": 1167156224.0, + "94": 1167156224.0, + "95": 1167156224.0, + "96": 1167156224.0, + "97": 1167156224.0, + "98": 1167156224.0, + "99": 1167156224.0, + "100": 1167156224.0 } }, "iteration-time": { @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 7.77547, - "3": 1.01252, - "4": 1.00639, - "5": 0.9897, - "6": 0.99553, - "7": 0.99796, - "8": 1.00873, - "9": 0.99009, - "10": 0.99264, - "11": 0.98765, - "12": 0.99024, - "13": 0.98319, - "14": 0.98552, - "15": 0.99368, - "16": 0.98342, - "17": 0.97729, - "18": 0.97272, - "19": 0.97308, - "20": 0.96906, - "21": 0.9751, - "22": 0.97375, - "23": 0.97447, - "24": 0.98494, - "25": 0.9779, - "26": 1.30939, - "27": 0.9766, - "28": 0.9856, - "29": 0.99223, - "30": 1.27178, - "31": 0.98025, - "32": 1.22425, - "33": 1.27653, - "34": 0.99358, - "35": 1.00171, - "36": 1.25408, - "37": 1.60005, - "38": 1.00572, - "39": 0.98676, - "40": 0.97218, - "41": 1.30266, - "42": 1.29066, - "43": 0.99057, - "44": 0.98517, - "45": 0.97968, - "46": 0.97289, - "47": 0.98145, - "48": 0.9804, - "49": 0.98022, - "50": 0.97431, - "51": 0.97593, - "52": 0.97255, - "53": 0.97424, - "54": 0.97043, - "55": 0.96887, - "56": 0.97492, - "57": 0.97623, - "58": 0.97423, - "59": 0.98879, - "60": 0.97992, - "61": 0.97895, - "62": 0.98829, - "63": 0.98719, - "64": 0.98651, - "65": 0.97852, - "66": 0.98045, - "67": 0.97825, - "68": 0.9795, - "69": 0.97812, - "70": 0.96297, - "71": 0.96718, - "72": 0.98343, - "73": 0.978, - "74": 0.99341, - "75": 0.97768, - "76": 0.97508, - "77": 0.97891, - "78": 0.9739, - "79": 0.96825, - "80": 0.96595, - "81": 0.95551, - "82": 0.97223, - "83": 0.9633, - "84": 0.96539, - "85": 0.97065, - "86": 0.97198, - "87": 0.97978, - "88": 0.98268, - "89": 0.99894, - "90": 1.00246, - "91": 0.98763, - "92": 0.98552, - "93": 0.99698, - "94": 0.99827, - "95": 0.99936, - "96": 0.99295, - "97": 0.99144, - "98": 0.99227, - "99": 0.98859, - "100": 0.99158 + "2": 12.89078, + "3": 0.98029, + "4": 0.95785, + "5": 0.95711, + "6": 0.94649, + "7": 0.94952, + "8": 0.9488, + "9": 0.93732, + "10": 0.94311, + "11": 0.93981, + "12": 0.9352, + "13": 0.97845, + "14": 1.03965, + "15": 0.93797, + "16": 0.93755, + "17": 0.96077, + "18": 0.92839, + "19": 0.93505, + "20": 0.93144, + "21": 0.93252, + "22": 0.93218, + "23": 0.93281, + "24": 0.93339, + "25": 0.93354, + "26": 0.9364, + "27": 0.93329, + "28": 0.93888, + "29": 0.95358, + "30": 0.94008, + "31": 0.9377, + "32": 0.93439, + "33": 0.93986, + "34": 0.931, + "35": 0.93755, + "36": 0.93189, + "37": 0.9332, + "38": 0.93386, + "39": 2.21396, + "40": 1.02952, + "41": 1.61943, + "42": 1.53973, + "43": 0.95188, + "44": 1.02544, + "45": 1.83976, + "46": 3.40015, + "47": 0.94133, + "48": 0.9376, + "49": 0.94372, + "50": 0.94132, + "51": 0.98326, + "52": 0.95038, + "53": 0.94956, + "54": 0.95341, + "55": 0.95514, + "56": 0.95325, + "57": 0.9515, + "58": 0.95749, + "59": 0.95387, + "60": 0.95558, + "61": 0.95588, + "62": 0.9448, + "63": 0.95476, + "64": 1.00066, + "65": 0.95721, + "66": 0.95057, + "67": 1.02324, + "68": 0.95344, + "69": 0.97302, + "70": 0.95308, + "71": 0.94573, + "72": 0.95296, + "73": 0.94822, + "74": 0.94914, + "75": 0.94558, + "76": 0.95045, + "77": 0.94721, + "78": 0.94215, + "79": 0.95306, + "80": 0.95305, + "81": 0.95102, + "82": 0.95071, + "83": 0.95055, + "84": 0.9467, + "85": 0.95308, + "86": 0.9457, + "87": 0.94835, + "88": 0.94623, + "89": 0.95127, + "90": 0.94161, + "91": 0.9565, + "92": 0.9496, + "93": 0.94508, + "94": 0.94308, + "95": 0.94835, + "96": 0.95356, + "97": 0.951, + "98": 0.94191, + "99": 0.93828, + "100": 0.94527 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json index dc836c3d699..31acd27983c 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer/golden_values_dev_dgx_h100.json @@ -6,103 +6,103 @@ "values": { "1": 10.81455, "2": 10.81846, - "3": 10.81528, - "4": 10.80297, - "5": 10.8513, - "6": 10.85011, - "7": 10.83843, - "8": 10.83961, - "9": 10.82224, - "10": 10.77788, - "11": 10.86443, - "12": 10.83746, - "13": 10.85841, - "14": 10.86315, - "15": 10.79766, - "16": 10.79525, - "17": 10.77133, - "18": 10.78938, - "19": 10.78311, - "20": 10.71655, - "21": 10.68376, - "22": 10.53038, - "23": 10.69869, - "24": 10.5858, - "25": 10.52379, - "26": 10.58281, - "27": 10.6097, - "28": 10.57173, - "29": 10.59005, - "30": 10.35671, - "31": 10.09391, - "32": 10.45878, - "33": 10.45658, - "34": 10.20481, - "35": 10.26727, + "3": 10.81516, + "4": 10.80317, + "5": 10.85115, + "6": 10.8502, + "7": 10.83883, + "8": 10.83934, + "9": 10.82201, + "10": 10.77778, + "11": 10.86392, + "12": 10.8371, + "13": 10.85834, + "14": 10.86343, + "15": 10.79796, + "16": 10.79498, + "17": 10.77137, + "18": 10.78909, + "19": 10.78316, + "20": 10.71669, + "21": 10.68357, + "22": 10.53007, + "23": 10.69871, + "24": 10.58548, + "25": 10.52389, + "26": 10.58298, + "27": 10.60958, + "28": 10.57159, + "29": 10.58943, + "30": 10.35618, + "31": 10.09392, + "32": 10.459, + "33": 10.45662, + "34": 10.20486, + "35": 10.26731, "36": 10.22341, - "37": 10.35319, - "38": 10.19446, - "39": 10.41712, - "40": 10.08932, - "41": 10.12772, - "42": 10.21193, - "43": 9.83111, - "44": 9.96933, - "45": 9.83615, - "46": 9.81673, - "47": 10.15426, - "48": 9.85308, - "49": 9.53436, - "50": 9.91912, - "51": 9.85363, - "52": 9.74288, + "37": 10.35281, + "38": 10.19478, + "39": 10.41737, + "40": 10.08939, + "41": 10.12783, + "42": 10.21195, + "43": 9.83125, + "44": 9.96948, + "45": 9.83599, + "46": 9.81675, + "47": 10.15396, + "48": 9.85302, + "49": 9.53451, + "50": 9.91905, + "51": 9.85374, + "52": 9.74267, "53": 10.07163, - "54": 9.96275, - "55": 9.88233, - "56": 9.63455, - "57": 9.48649, - "58": 9.84879, - "59": 9.589, - "60": 9.5109, - "61": 9.703, - "62": 9.99634, - "63": 9.40054, - "64": 9.78477, - "65": 8.95365, - "66": 9.71813, - "67": 9.36915, - "68": 9.79814, + "54": 9.96271, + "55": 9.88222, + "56": 9.63469, + "57": 9.48658, + "58": 9.84875, + "59": 9.58895, + "60": 9.5107, + "61": 9.70312, + "62": 9.99631, + "63": 9.40047, + "64": 9.78468, + "65": 8.95368, + "66": 9.71803, + "67": 9.36918, + "68": 9.79825, "69": 9.79674, - "70": 9.74886, - "71": 9.63185, - "72": 9.59951, - "73": 9.50305, - "74": 8.95217, - "75": 9.43098, - "76": 9.09068, - "77": 10.08086, - "78": 9.7353, - "79": 9.38859, - "80": 9.41418, - "81": 9.48423, - "82": 9.70903, - "83": 9.3151, - "84": 9.41846, - "85": 9.62239, - "86": 9.07953, - "87": 9.59204, - "88": 9.74948, + "70": 9.74901, + "71": 9.63189, + "72": 9.59967, + "73": 9.50302, + "74": 8.95201, + "75": 9.43099, + "76": 9.09061, + "77": 10.08076, + "78": 9.73539, + "79": 9.38866, + "80": 9.41411, + "81": 9.48409, + "82": 9.70901, + "83": 9.31516, + "84": 9.41834, + "85": 9.6223, + "86": 9.07933, + "87": 9.592, + "88": 9.74931, "89": 9.60436, - "90": 9.82573, - "91": 9.34231, - "92": 9.35857, - "93": 9.07976, - "94": 8.82788, - "95": 9.50877, - "96": 9.52129, - "97": 9.30597, - "98": 9.66586, - "99": 8.87711, + "90": 9.82577, + "91": 9.34226, + "92": 9.3584, + "93": 9.07981, + "94": 8.82786, + "95": 9.50871, + "96": 9.5212, + "97": 9.30611, + "98": 9.66576, + "99": 8.87717, "100": 9.38978 } }, @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 5566.0, - "2": 5749.0, - "3": 5881.0, - "4": 5840.0, - "5": 6476.0, - "6": 6425.0, - "7": 5900.0, - "8": 5783.0, - "9": 6426.0, - "10": 5252.0, - "11": 6722.0, - "12": 6169.0, - "13": 6556.0, - "14": 6524.0, - "15": 6116.0, - "16": 6245.0, - "17": 6139.0, - "18": 5888.0, - "19": 6375.0, - "20": 5773.0, - "21": 6188.0, - "22": 5742.0, - "23": 6768.0, - "24": 6000.0, - "25": 5852.0, - "26": 6285.0, - "27": 6357.0, - "28": 6586.0, - "29": 6742.0, - "30": 6214.0, - "31": 5775.0, - "32": 6746.0, - "33": 7205.0, - "34": 6344.0, - "35": 6686.0, - "36": 6743.0, - "37": 7281.0, - "38": 7228.0, - "39": 7810.0, - "40": 7116.0, - "41": 6902.0, - "42": 7809.0, - "43": 7110.0, - "44": 7040.0, - "45": 7058.0, - "46": 7292.0, - "47": 7813.0, - "48": 7672.0, - "49": 7601.0, - "50": 7605.0, - "51": 8105.0, - "52": 7792.0, - "53": 8870.0, - "54": 8700.0, - "55": 7685.0, - "56": 7975.0, - "57": 7544.0, - "58": 8539.0, - "59": 8275.0, - "60": 7822.0, - "61": 8316.0, - "62": 8493.0, - "63": 7748.0, - "64": 8801.0, - "65": 8269.0, - "66": 9209.0, - "67": 8382.0, - "68": 8362.0, - "69": 8644.0, - "70": 9785.0, - "71": 9060.0, - "72": 8909.0, - "73": 9217.0, - "74": 6949.0, - "75": 7960.0, - "76": 8489.0, - "77": 12484.0, - "78": 9598.0, - "79": 12984.0, - "80": 11398.0, - "81": 10221.0, - "82": 9615.0, - "83": 62741.0, - "84": 9936.0, - "85": 46541.0, - "86": 8528.0, - "87": 14916.0, - "88": 9710.0, - "89": 10273.0, - "90": 11178.0, - "91": 8856.0, - "92": 9337.0, - "93": 8404.0, - "94": 9649.0, - "95": 9657.0, - "96": 13226.0, - "97": 9093.0, - "98": 10575.0, - "99": 15320.0, - "100": 9363.0 + "1": 5523.0, + "2": 5667.0, + "3": 5701.0, + "4": 5756.0, + "5": 6371.0, + "6": 6769.0, + "7": 5895.0, + "8": 5804.0, + "9": 6388.0, + "10": 5108.0, + "11": 6621.0, + "12": 6222.0, + "13": 6441.0, + "14": 6571.0, + "15": 6070.0, + "16": 6585.0, + "17": 6269.0, + "18": 6078.0, + "19": 6150.0, + "20": 5586.0, + "21": 6218.0, + "22": 5649.0, + "23": 6873.0, + "24": 6095.0, + "25": 5713.0, + "26": 6306.0, + "27": 6546.0, + "28": 6930.0, + "29": 6915.0, + "30": 6305.0, + "31": 5768.0, + "32": 6676.0, + "33": 7230.0, + "34": 6375.0, + "35": 6738.0, + "36": 6731.0, + "37": 7520.0, + "38": 7182.0, + "39": 7804.0, + "40": 7077.0, + "41": 7052.0, + "42": 7885.0, + "43": 7096.0, + "44": 7056.0, + "45": 7021.0, + "46": 7445.0, + "47": 7741.0, + "48": 7703.0, + "49": 7502.0, + "50": 7554.0, + "51": 8030.0, + "52": 7992.0, + "53": 8904.0, + "54": 8607.0, + "55": 7373.0, + "56": 8015.0, + "57": 7549.0, + "58": 8401.0, + "59": 8151.0, + "60": 8060.0, + "61": 8592.0, + "62": 8229.0, + "63": 7692.0, + "64": 8841.0, + "65": 8367.0, + "66": 9351.0, + "67": 8290.0, + "68": 8239.0, + "69": 8713.0, + "70": 9738.0, + "71": 9146.0, + "72": 8792.0, + "73": 9254.0, + "74": 7055.0, + "75": 7845.0, + "76": 8463.0, + "77": 19085.0, + "78": 9710.0, + "79": 13200.0, + "80": 11524.0, + "81": 10051.0, + "82": 9649.0, + "83": 62751.0, + "84": 9640.0, + "85": 46534.0, + "86": 8760.0, + "87": 15074.0, + "88": 9396.0, + "89": 10096.0, + "90": 11139.0, + "91": 9081.0, + "92": 9452.0, + "93": 8138.0, + "94": 9395.0, + "95": 9646.0, + "96": 13083.0, + "97": 9003.0, + "98": 10560.0, + "99": 15300.0, + "100": 9434.0 } }, "mem-allocated-bytes": { @@ -218,106 +218,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 628645888.0, - "2": 628646912.0, - "3": 628646912.0, - "4": 628646912.0, - "5": 628646912.0, - "6": 628646912.0, - "7": 628646912.0, - "8": 628646912.0, - "9": 628646912.0, - "10": 628646912.0, - "11": 628646912.0, - "12": 628646912.0, - "13": 628646912.0, - "14": 628646912.0, - "15": 628646912.0, - "16": 628646912.0, - "17": 628646912.0, - "18": 628646912.0, - "19": 628646912.0, - "20": 628646912.0, - "21": 628646912.0, - "22": 628646912.0, - "23": 628646912.0, - "24": 628646912.0, - "25": 628646912.0, - "26": 628646912.0, - "27": 628646912.0, - "28": 628646912.0, - "29": 628646912.0, - "30": 628646912.0, - "31": 628646912.0, - "32": 628646912.0, - "33": 628646912.0, - "34": 628646912.0, - "35": 628646912.0, - "36": 628646912.0, - "37": 628646912.0, - "38": 628646912.0, - "39": 628646912.0, - "40": 628646912.0, - "41": 628646912.0, - "42": 628646912.0, - "43": 628646912.0, - "44": 628646912.0, - "45": 628646912.0, - "46": 628646912.0, - "47": 628646912.0, - "48": 628646912.0, - "49": 628646912.0, - "50": 628646912.0, - "51": 628646912.0, - "52": 628646912.0, - "53": 628646912.0, - "54": 628646912.0, - "55": 628646912.0, - "56": 628646912.0, - "57": 628646912.0, - "58": 628646912.0, - "59": 628646912.0, - "60": 628646912.0, - "61": 628646912.0, - "62": 628646912.0, - "63": 628646912.0, - "64": 628646912.0, - "65": 628646912.0, - "66": 628646912.0, - "67": 628646912.0, - "68": 628646912.0, - "69": 628646912.0, - "70": 628646912.0, - "71": 628646912.0, - "72": 628646912.0, - "73": 628646912.0, - "74": 628646912.0, - "75": 628646912.0, - "76": 628646912.0, - "77": 628646912.0, - "78": 628646912.0, - "79": 628646912.0, - "80": 628646912.0, - "81": 628646912.0, - "82": 628646912.0, - "83": 628646912.0, - "84": 628646912.0, - "85": 628646912.0, - "86": 628646912.0, - "87": 628646912.0, - "88": 628646912.0, - "89": 628646912.0, - "90": 628646912.0, - "91": 628646912.0, - "92": 628646912.0, - "93": 628646912.0, - "94": 628646912.0, - "95": 628646912.0, - "96": 628646912.0, - "97": 628646912.0, - "98": 628646912.0, - "99": 628646912.0, - "100": 628646912.0 + "1": 628965376.0, + "2": 628966400.0, + "3": 628966400.0, + "4": 628966400.0, + "5": 628966400.0, + "6": 628966400.0, + "7": 628966400.0, + "8": 628966400.0, + "9": 628966400.0, + "10": 628966400.0, + "11": 628966400.0, + "12": 628966400.0, + "13": 628966400.0, + "14": 628966400.0, + "15": 628966400.0, + "16": 628966400.0, + "17": 628966400.0, + "18": 628966400.0, + "19": 628966400.0, + "20": 628966400.0, + "21": 628966400.0, + "22": 628966400.0, + "23": 628966400.0, + "24": 628966400.0, + "25": 628966400.0, + "26": 628966400.0, + "27": 628966400.0, + "28": 628966400.0, + "29": 628966400.0, + "30": 628966400.0, + "31": 628966400.0, + "32": 628966400.0, + "33": 628966400.0, + "34": 628966400.0, + "35": 628966400.0, + "36": 628966400.0, + "37": 628966400.0, + "38": 628966400.0, + "39": 628966400.0, + "40": 628966400.0, + "41": 628966400.0, + "42": 628966400.0, + "43": 628966400.0, + "44": 628966400.0, + "45": 628966400.0, + "46": 628966400.0, + "47": 628966400.0, + "48": 628966400.0, + "49": 628966400.0, + "50": 628966400.0, + "51": 628966400.0, + "52": 628966400.0, + "53": 628966400.0, + "54": 628966400.0, + "55": 628966400.0, + "56": 628966400.0, + "57": 628966400.0, + "58": 628966400.0, + "59": 628966400.0, + "60": 628966400.0, + "61": 628966400.0, + "62": 628966400.0, + "63": 628966400.0, + "64": 628966400.0, + "65": 628966400.0, + "66": 628966400.0, + "67": 628966400.0, + "68": 628966400.0, + "69": 628966400.0, + "70": 628966400.0, + "71": 628966400.0, + "72": 628966400.0, + "73": 628966400.0, + "74": 628966400.0, + "75": 628966400.0, + "76": 628966400.0, + "77": 628966400.0, + "78": 628966400.0, + "79": 628966400.0, + "80": 628966400.0, + "81": 628966400.0, + "82": 628966400.0, + "83": 628966400.0, + "84": 628966400.0, + "85": 628966400.0, + "86": 628966400.0, + "87": 628966400.0, + "88": 628966400.0, + "89": 628966400.0, + "90": 628966400.0, + "91": 628966400.0, + "92": 628966400.0, + "93": 628966400.0, + "94": 628966400.0, + "95": 628966400.0, + "96": 628966400.0, + "97": 628966400.0, + "98": 628966400.0, + "99": 628966400.0, + "100": 628966400.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 982203392.0, - "2": 1149396992.0, - "3": 1149396992.0, - "4": 1155475456.0, - "5": 1155475456.0, - "6": 1155475456.0, - "7": 1155475456.0, - "8": 1155475456.0, - "9": 1155475456.0, - "10": 1155475456.0, - "11": 1155475456.0, - "12": 1155475456.0, - "13": 1155475456.0, - "14": 1155475456.0, - "15": 1155475456.0, - "16": 1155475456.0, - "17": 1155475456.0, - "18": 1155475456.0, - "19": 1155475456.0, - "20": 1155475456.0, - "21": 1155475456.0, - "22": 1155475456.0, - "23": 1155475456.0, - "24": 1155475456.0, - "25": 1155475456.0, - "26": 1155475456.0, - "27": 1155475456.0, - "28": 1155475456.0, - "29": 1155475456.0, - "30": 1155475456.0, - "31": 1155475456.0, - "32": 1155475456.0, - "33": 1155475456.0, - "34": 1155475456.0, - "35": 1155475456.0, - "36": 1155475456.0, - "37": 1155475456.0, - "38": 1155475456.0, - "39": 1155475456.0, - "40": 1155475456.0, - "41": 1155475456.0, - "42": 1155475456.0, - "43": 1155475456.0, - "44": 1155475456.0, - "45": 1155475456.0, - "46": 1155475456.0, - "47": 1155475456.0, - "48": 1155475456.0, - "49": 1155475456.0, - "50": 1155475456.0, - "51": 1155475456.0, - "52": 1155475456.0, - "53": 1155475456.0, - "54": 1155475456.0, - "55": 1155475456.0, - "56": 1155475456.0, - "57": 1155475456.0, - "58": 1155475456.0, - "59": 1155475456.0, - "60": 1155975680.0, - "61": 1159303168.0, - "62": 1159303168.0, - "63": 1159303168.0, - "64": 1159303168.0, - "65": 1159303168.0, - "66": 1159303168.0, - "67": 1159303168.0, - "68": 1159303168.0, - "69": 1159303168.0, - "70": 1159303168.0, - "71": 1159303168.0, - "72": 1159303168.0, - "73": 1159303168.0, - "74": 1159303168.0, - "75": 1159303168.0, - "76": 1164697088.0, - "77": 1164697088.0, - "78": 1164697088.0, - "79": 1164697088.0, - "80": 1164697088.0, - "81": 1164697088.0, - "82": 1164697088.0, - "83": 1164697088.0, - "84": 1164697088.0, - "85": 1164697088.0, - "86": 1164697088.0, - "87": 1164697088.0, - "88": 1164697088.0, - "89": 1164697088.0, - "90": 1164697088.0, - "91": 1164697088.0, - "92": 1164697088.0, - "93": 1164697088.0, - "94": 1164697088.0, - "95": 1164697088.0, - "96": 1164697088.0, - "97": 1164697088.0, - "98": 1164697088.0, - "99": 1164697088.0, - "100": 1164697088.0 + "1": 982171648.0, + "2": 1151529984.0, + "3": 1151529984.0, + "4": 1157186560.0, + "5": 1157186560.0, + "6": 1157186560.0, + "7": 1157186560.0, + "8": 1157186560.0, + "9": 1157186560.0, + "10": 1157186560.0, + "11": 1157186560.0, + "12": 1157186560.0, + "13": 1157186560.0, + "14": 1157186560.0, + "15": 1157186560.0, + "16": 1157186560.0, + "17": 1157186560.0, + "18": 1157186560.0, + "19": 1157186560.0, + "20": 1157186560.0, + "21": 1157186560.0, + "22": 1157186560.0, + "23": 1157186560.0, + "24": 1157186560.0, + "25": 1157186560.0, + "26": 1157186560.0, + "27": 1157186560.0, + "28": 1157186560.0, + "29": 1157186560.0, + "30": 1157186560.0, + "31": 1157186560.0, + "32": 1157186560.0, + "33": 1157186560.0, + "34": 1157186560.0, + "35": 1157186560.0, + "36": 1157186560.0, + "37": 1157186560.0, + "38": 1157186560.0, + "39": 1157186560.0, + "40": 1157186560.0, + "41": 1157186560.0, + "42": 1157186560.0, + "43": 1157186560.0, + "44": 1157186560.0, + "45": 1157186560.0, + "46": 1157186560.0, + "47": 1157186560.0, + "48": 1157186560.0, + "49": 1157186560.0, + "50": 1157186560.0, + "51": 1157186560.0, + "52": 1157186560.0, + "53": 1157186560.0, + "54": 1157186560.0, + "55": 1157186560.0, + "56": 1157186560.0, + "57": 1157186560.0, + "58": 1157186560.0, + "59": 1157186560.0, + "60": 1158689280.0, + "61": 1160906752.0, + "62": 1160906752.0, + "63": 1160906752.0, + "64": 1160906752.0, + "65": 1160906752.0, + "66": 1160906752.0, + "67": 1160906752.0, + "68": 1160906752.0, + "69": 1160906752.0, + "70": 1160906752.0, + "71": 1160906752.0, + "72": 1160906752.0, + "73": 1160906752.0, + "74": 1160906752.0, + "75": 1160906752.0, + "76": 1164090880.0, + "77": 1164090880.0, + "78": 1164090880.0, + "79": 1164090880.0, + "80": 1164090880.0, + "81": 1164090880.0, + "82": 1164090880.0, + "83": 1165244928.0, + "84": 1165244928.0, + "85": 1165244928.0, + "86": 1165244928.0, + "87": 1165244928.0, + "88": 1165244928.0, + "89": 1165244928.0, + "90": 1165244928.0, + "91": 1165244928.0, + "92": 1165244928.0, + "93": 1165244928.0, + "94": 1165244928.0, + "95": 1165244928.0, + "96": 1165244928.0, + "97": 1165244928.0, + "98": 1165244928.0, + "99": 1165244928.0, + "100": 1165244928.0 } }, "iteration-time": { @@ -432,106 +432,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 19.23269, - "2": 0.72886, - "3": 0.65505, - "4": 0.57926, - "5": 0.56473, - "6": 0.56262, - "7": 0.55541, - "8": 0.55169, - "9": 0.54588, + "1": "nan", + "2": 11.36152, + "3": 0.60076, + "4": 0.57623, + "5": 0.56627, + "6": 0.55559, + "7": 0.5579, + "8": 0.54703, + "9": 0.54236, "10": 0.54513, - "11": 0.54209, - "12": 0.55074, - "13": 0.54861, - "14": 0.54825, - "15": 0.54517, - "16": 0.54378, - "17": 0.54038, - "18": 0.53418, - "19": 0.54272, - "20": 0.53786, - "21": 0.5453, - "22": 0.53544, - "23": 0.5385, - "24": 0.5306, - "25": 0.53752, - "26": 0.53028, - "27": 1.14331, - "28": 0.55476, - "29": 0.55192, - "30": 0.53922, - "31": 0.53776, - "32": 0.53422, - "33": 0.53153, - "34": 0.53781, - "35": 0.53428, - "36": 0.5321, - "37": 0.53103, - "38": 0.53328, - "39": 0.53189, - "40": 1.26265, - "41": 0.53531, - "42": 0.53252, - "43": 0.53665, - "44": 0.88396, - "45": 0.53586, - "46": 0.89593, - "47": 0.53907, - "48": 0.5309, - "49": 0.53767, - "50": 0.53491, - "51": 0.55263, - "52": 0.53343, - "53": 0.53673, - "54": 0.53859, - "55": 0.5329, - "56": 0.52954, - "57": 0.53085, - "58": 0.53458, - "59": 0.53132, - "60": 0.53967, - "61": 0.53205, - "62": 0.53559, - "63": 0.53393, - "64": 0.53143, - "65": 0.5339, - "66": 0.53358, - "67": 0.53117, - "68": 0.53709, - "69": 0.53768, - "70": 0.53628, - "71": 0.53275, - "72": 0.54058, - "73": 0.53091, - "74": 0.53069, - "75": 0.53307, - "76": 0.53389, - "77": 0.53403, - "78": 0.53188, - "79": 0.53173, - "80": 0.532, - "81": 0.53145, - "82": 0.5358, - "83": 0.53475, - "84": 0.5323, - "85": 0.54048, - "86": 0.53766, - "87": 0.53212, - "88": 0.53119, - "89": 0.53372, - "90": 0.53371, - "91": 0.53164, - "92": 0.53327, - "93": 0.54146, - "94": 0.53517, - "95": 0.53542, - "96": 0.5306, - "97": 0.53654, - "98": 0.53425, - "99": 0.53223, - "100": 0.53446 + "11": 0.54262, + "12": 0.54343, + "13": 0.53172, + "14": 0.52774, + "15": 0.52942, + "16": 0.53484, + "17": 0.53475, + "18": 0.52981, + "19": 0.5229, + "20": 0.53117, + "21": 0.53012, + "22": 0.52621, + "23": 0.52383, + "24": 0.52014, + "25": 0.52825, + "26": 0.52116, + "27": 0.51966, + "28": 0.54223, + "29": 0.53546, + "30": 0.53621, + "31": 0.53176, + "32": 0.52757, + "33": 0.52512, + "34": 0.53383, + "35": 0.53153, + "36": 0.52848, + "37": 0.52914, + "38": 0.536, + "39": 0.52705, + "40": 0.53726, + "41": 0.53047, + "42": 0.53508, + "43": 0.54472, + "44": 0.53321, + "45": 0.53635, + "46": 0.52755, + "47": 0.53103, + "48": 0.52361, + "49": 0.52997, + "50": 0.53743, + "51": 0.54995, + "52": 0.52978, + "53": 0.53361, + "54": 0.53015, + "55": 0.53133, + "56": 0.52891, + "57": 0.53227, + "58": 0.53599, + "59": 0.53237, + "60": 0.52042, + "61": 0.52388, + "62": 0.52838, + "63": 0.5226, + "64": 0.52309, + "65": 0.52284, + "66": 0.52422, + "67": 0.51996, + "68": 0.52685, + "69": 0.52571, + "70": 0.52449, + "71": 0.51945, + "72": 0.52323, + "73": 0.52092, + "74": 0.52309, + "75": 0.51979, + "76": 0.52928, + "77": 0.5212, + "78": 0.52179, + "79": 0.52614, + "80": 0.52173, + "81": 0.51808, + "82": 0.51906, + "83": 0.53309, + "84": 0.62023, + "85": 0.52496, + "86": 0.51903, + "87": 0.51763, + "88": 0.51983, + "89": 0.52128, + "90": 0.52168, + "91": 0.51921, + "92": 0.51729, + "93": 0.51835, + "94": 0.5177, + "95": 0.51686, + "96": 0.51883, + "97": 0.51846, + "98": 0.51489, + "99": 0.51984, + "100": 0.52188 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json index f4357530aed..00ed37733ae 100644 --- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp1_pp1/golden_values_dev_dgx_h100.json @@ -6,53 +6,53 @@ "values": { "1": 9.14877, "2": 9.15171, - "3": 9.14691, - "4": 9.15346, + "3": 9.1469, + "4": 9.15345, "5": 9.15057, - "6": 9.14683, + "6": 9.14688, "7": 9.14378, "8": 9.14363, - "9": 9.15069, - "10": 9.15231, - "11": 9.14609, + "9": 9.15072, + "10": 9.15239, + "11": 9.14608, "12": 9.14125, - "13": 9.1414, - "14": 9.14248, - "15": 9.13419, - "16": 9.12601, - "17": 9.12407, - "18": 9.12053, - "19": 9.11789, - "20": 9.09777, - "21": 9.06948, - "22": 9.06985, - "23": 9.07079, - "24": 9.06043, - "25": 9.05505, - "26": 9.05713, + "13": 9.14146, + "14": 9.14247, + "15": 9.13422, + "16": 9.12606, + "17": 9.12413, + "18": 9.12057, + "19": 9.1179, + "20": 9.09773, + "21": 9.0695, + "22": 9.06984, + "23": 9.07077, + "24": 9.06045, + "25": 9.05509, + "26": 9.05714, "27": 9.04089, "28": 9.0186, - "29": 9.00353, - "30": 8.99697, - "31": 8.99484, - "32": 8.98416, - "33": 8.97763, - "34": 8.98617, - "35": 8.94993, - "36": 8.94557, - "37": 8.92133, - "38": 8.94104, - "39": 8.92482, - "40": 8.87122, - "41": 8.89627, - "42": 8.87601, + "29": 9.00351, + "30": 8.99698, + "31": 8.9948, + "32": 8.98417, + "33": 8.97761, + "34": 8.9862, + "35": 8.94992, + "36": 8.9456, + "37": 8.92135, + "38": 8.94106, + "39": 8.92485, + "40": 8.87125, + "41": 8.89626, + "42": 8.87604, "43": 8.87414, - "44": 8.8411, - "45": 8.81228, - "46": 8.79564, + "44": 8.84111, + "45": 8.81225, + "46": 8.79568, "47": 8.84576, - "48": 8.77191, - "49": 8.78047, + "48": 8.77194, + "49": 8.78043, "50": 8.76196 } }, @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 3477955.0, - "2": 3392302.0, - "3": 3630021.0, - "4": 3532452.0, - "5": 3783960.0, - "6": 3584449.0, - "7": 3478372.0, - "8": 3414330.0, - "9": 3511649.0, - "10": 3544311.0, - "11": 3475468.0, - "12": 3518965.0, - "13": 3591786.0, - "14": 3549396.0, - "15": 3421163.0, - "16": 3383319.0, - "17": 3424120.0, - "18": 3509184.0, - "19": 3426107.0, - "20": 3465915.0, - "21": 3700118.0, - "22": 3474397.0, - "23": 3693474.0, - "24": 3405657.0, - "25": 3457588.0, - "26": 3479130.0, - "27": 3555371.0, - "28": 3496999.0, - "29": 3561842.0, - "30": 3708011.0, - "31": 3397663.0, - "32": 3467970.0, - "33": 3515742.0, - "34": 3501589.0, - "35": 3432484.0, - "36": 3453953.0, - "37": 3958777.0, - "38": 3488640.0, - "39": 3409958.0, - "40": 3614258.0, - "41": 3425709.0, - "42": 3643603.0, - "43": 3473029.0, - "44": 3448331.0, - "45": 3452202.0, - "46": 3585738.0, - "47": 3467386.0, - "48": 3462962.0, - "49": 3529813.0, - "50": 3412019.0 + "1": 3478044.0, + "2": 3392170.0, + "3": 3630100.0, + "4": 3532464.0, + "5": 3783909.0, + "6": 3584604.0, + "7": 3478292.0, + "8": 3414214.0, + "9": 3511551.0, + "10": 3544353.0, + "11": 3475513.0, + "12": 3519004.0, + "13": 3591792.0, + "14": 3549575.0, + "15": 3421322.0, + "16": 3383312.0, + "17": 3424142.0, + "18": 3509310.0, + "19": 3426210.0, + "20": 3465844.0, + "21": 3699866.0, + "22": 3474417.0, + "23": 3693512.0, + "24": 3405590.0, + "25": 3457789.0, + "26": 3479283.0, + "27": 3555496.0, + "28": 3497078.0, + "29": 3561734.0, + "30": 3708144.0, + "31": 3397570.0, + "32": 3467832.0, + "33": 3515682.0, + "34": 3501518.0, + "35": 3432575.0, + "36": 3454076.0, + "37": 3958864.0, + "38": 3488540.0, + "39": 3410013.0, + "40": 3614392.0, + "41": 3425670.0, + "42": 3643700.0, + "43": 3472723.0, + "44": 3448423.0, + "45": 3452103.0, + "46": 3585686.0, + "47": 3467299.0, + "48": 3462916.0, + "49": 3529603.0, + "50": 3411958.0 } }, "mem-allocated-bytes": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 21.47107, - "2": 0.21426, - "3": 0.18485, - "4": 0.1655, - "5": 0.16764, - "6": 0.16482, - "7": 0.16761, - "8": 0.16451, - "9": 0.16762, - "10": 0.16536, - "11": 0.17999, - "12": 0.18657, - "13": 0.16983, - "14": 0.16676, - "15": 0.16908, - "16": 0.16963, - "17": 0.17346, - "18": 0.17019, - "19": 0.17052, - "20": 0.17018, - "21": 0.16541, - "22": 0.16566, - "23": 0.16521, - "24": 0.16662, - "25": 0.16493, - "26": 0.16377, - "27": 0.16515, - "28": 0.16469, - "29": 0.16683, - "30": 0.16435, - "31": 0.1697, - "32": 0.16472, - "33": 0.1693, - "34": 0.16637, - "35": 0.16593, - "36": 0.16439, - "37": 0.16693, - "38": 0.16653, - "39": 0.16645, - "40": 0.16669, - "41": 0.16547, - "42": 0.16438, - "43": 0.16787, - "44": 0.16848, - "45": 0.16631, - "46": 0.16902, - "47": 0.16588, - "48": 0.16644, - "49": 0.16691, - "50": 0.1671 + "1": "nan", + "2": 4.3752, + "3": 0.18438, + "4": 0.17408, + "5": 0.17426, + "6": 0.17324, + "7": 0.17437, + "8": 0.16591, + "9": 0.16608, + "10": 0.16563, + "11": 0.16603, + "12": 0.1654, + "13": 0.16746, + "14": 0.16557, + "15": 0.16692, + "16": 0.16648, + "17": 0.16679, + "18": 0.1661, + "19": 0.16846, + "20": 0.17421, + "21": 0.16648, + "22": 0.16578, + "23": 0.16601, + "24": 0.16672, + "25": 0.16647, + "26": 0.16552, + "27": 0.16788, + "28": 0.16751, + "29": 0.1673, + "30": 0.1667, + "31": 0.16952, + "32": 0.16693, + "33": 0.16603, + "34": 0.17134, + "35": 0.16658, + "36": 0.16674, + "37": 0.16801, + "38": 0.16643, + "39": 0.1679, + "40": 0.16548, + "41": 0.16743, + "42": 0.16511, + "43": 0.16745, + "44": 0.16536, + "45": 0.16772, + "46": 0.16513, + "47": 0.16764, + "48": 0.16926, + "49": 0.16982, + "50": 0.16849 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json index b0c23087659..8fb704f4745 100644 --- a/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/multimodal-llava/multimodal_llava_mcore_te_tp4_sp_cp2/golden_values_dev_dgx_h100.json @@ -4,56 +4,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 9.28644, - "2": 9.28396, - "3": 9.28076, - "4": 9.28856, - "5": 9.27699, - "6": 9.28726, - "7": 9.27831, - "8": 9.28266, - "9": 9.28518, - "10": 9.28294, - "11": 9.28326, - "12": 9.27377, - "13": 9.27113, - "14": 9.27209, - "15": 9.25297, - "16": 9.24499, - "17": 9.24857, - "18": 9.2295, - "19": 9.23151, - "20": 9.20818, - "21": 9.1704, + "1": 9.28648, + "2": 9.28389, + "3": 9.2807, + "4": 9.2885, + "5": 9.27705, + "6": 9.28737, + "7": 9.27833, + "8": 9.2826, + "9": 9.28528, + "10": 9.28289, + "11": 9.28325, + "12": 9.27371, + "13": 9.27127, + "14": 9.27213, + "15": 9.253, + "16": 9.24492, + "17": 9.24859, + "18": 9.22958, + "19": 9.23149, + "20": 9.20816, + "21": 9.17058, "22": 9.15059, - "23": 9.16837, - "24": 9.15073, - "25": 9.14424, - "26": 9.14738, - "27": 9.12308, - "28": 9.09717, - "29": 9.09386, - "30": 9.07826, - "31": 8.97181, - "32": 9.0315, - "33": 9.02023, - "34": 8.98663, - "35": 8.95928, - "36": 8.97134, - "37": 8.91442, + "23": 9.16841, + "24": 9.15088, + "25": 9.14428, + "26": 9.14731, + "27": 9.12298, + "28": 9.09703, + "29": 9.09381, + "30": 9.07824, + "31": 8.97182, + "32": 9.03154, + "33": 9.02015, + "34": 8.98673, + "35": 8.95912, + "36": 8.97146, + "37": 8.91452, "38": 8.88791, - "39": 8.88879, - "40": 8.90639, - "41": 8.81803, - "42": 8.87405, - "43": 8.85655, - "44": 8.81693, - "45": 8.81356, - "46": 8.84453, - "47": 8.73701, - "48": 8.66923, - "49": 8.70104, - "50": 8.73489 + "39": 8.88878, + "40": 8.90648, + "41": 8.81814, + "42": 8.87399, + "43": 8.85661, + "44": 8.81711, + "45": 8.8137, + "46": 8.84467, + "47": 8.73715, + "48": 8.66933, + "49": 8.70117, + "50": 8.73514 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 5959428.0, - "2": 6553739.0, - "3": 7313558.0, - "4": 6377212.0, - "5": 6498220.0, - "6": 7152015.0, - "7": 6210260.0, - "8": 6334672.0, - "9": 6624655.0, - "10": 6529106.0, - "11": 7466660.0, - "12": 6471717.0, - "13": 6003465.0, - "14": 8072041.0, - "15": 6529968.0, - "16": 7526852.0, - "17": 6035134.0, - "18": 6289690.0, - "19": 6162498.0, - "20": 6527712.0, - "21": 6981897.0, - "22": 7132920.0, - "23": 5928645.0, - "24": 6210340.0, - "25": 6993116.0, - "26": 6471329.0, - "27": 6355333.0, - "28": 6876968.0, - "29": 6380137.0, - "30": 6468615.0, - "31": 8165212.0, - "32": 6765571.0, - "33": 6355561.0, - "34": 6662287.0, - "35": 7065313.0, - "36": 6076925.0, - "37": 7785462.0, - "38": 6727049.0, - "39": 7315988.0, - "40": 6555018.0, - "41": 7314645.0, - "42": 6591992.0, - "43": 6928020.0, - "44": 7274444.0, - "45": 6680179.0, - "46": 6232560.0, - "47": 6496796.0, - "48": 6809653.0, - "49": 6753531.0, - "50": 6238141.0 + "1": 5959426.0, + "2": 6553765.0, + "3": 7313448.0, + "4": 6377019.0, + "5": 6498265.0, + "6": 7151976.0, + "7": 6210453.0, + "8": 6334691.0, + "9": 6624555.0, + "10": 6529053.0, + "11": 7466628.0, + "12": 6471518.0, + "13": 6003450.0, + "14": 8071967.0, + "15": 6529964.0, + "16": 7526726.0, + "17": 6035087.0, + "18": 6289754.0, + "19": 6162432.0, + "20": 6527695.0, + "21": 6981984.0, + "22": 7132788.0, + "23": 5928504.0, + "24": 6210076.0, + "25": 6993073.0, + "26": 6471296.0, + "27": 6355325.0, + "28": 6877023.0, + "29": 6380286.0, + "30": 6468637.0, + "31": 8165049.0, + "32": 6765693.0, + "33": 6355607.0, + "34": 6662298.0, + "35": 7065050.0, + "36": 6077046.0, + "37": 7785469.0, + "38": 6727037.0, + "39": 7315675.0, + "40": 6555109.0, + "41": 7314543.0, + "42": 6591977.0, + "43": 6927941.0, + "44": 7274324.0, + "45": 6680295.0, + "46": 6232304.0, + "47": 6496696.0, + "48": 6809772.0, + "49": 6753632.0, + "50": 6238092.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1653821440.0, - "2": 1653821440.0, - "3": 1653821440.0, - "4": 1653821440.0, - "5": 1653821440.0, - "6": 1653821440.0, - "7": 1653821440.0, - "8": 1653821440.0, - "9": 1653821440.0, - "10": 1653821440.0, - "11": 1653821440.0, - "12": 1653821440.0, - "13": 1653821440.0, - "14": 1653821440.0, - "15": 1653821440.0, - "16": 1653821440.0, - "17": 1653821440.0, - "18": 1653821440.0, - "19": 1653821440.0, - "20": 1653821440.0, - "21": 1653821440.0, - "22": 1653821440.0, - "23": 1653821440.0, - "24": 1653821440.0, - "25": 1653821440.0, - "26": 1653821440.0, - "27": 1653821440.0, - "28": 1653821440.0, - "29": 1653821440.0, - "30": 1653821440.0, - "31": 1653821440.0, - "32": 1653821440.0, - "33": 1653821440.0, - "34": 1653821440.0, - "35": 1653821440.0, - "36": 1653821440.0, - "37": 1653821440.0, - "38": 1653821440.0, - "39": 1653821440.0, - "40": 1653821440.0, - "41": 1653821440.0, - "42": 1653821440.0, - "43": 1653821440.0, - "44": 1653821440.0, - "45": 1653821440.0, - "46": 1653821440.0, - "47": 1653821440.0, - "48": 1653821440.0, - "49": 1653821440.0, - "50": 1653821440.0 + "1": 1650282496.0, + "2": 1650282496.0, + "3": 1650282496.0, + "4": 1650282496.0, + "5": 1650282496.0, + "6": 1650282496.0, + "7": 1650282496.0, + "8": 1650282496.0, + "9": 1650282496.0, + "10": 1650282496.0, + "11": 1650282496.0, + "12": 1650282496.0, + "13": 1650282496.0, + "14": 1650282496.0, + "15": 1650282496.0, + "16": 1650282496.0, + "17": 1650282496.0, + "18": 1650282496.0, + "19": 1650282496.0, + "20": 1650282496.0, + "21": 1650282496.0, + "22": 1650282496.0, + "23": 1650282496.0, + "24": 1650282496.0, + "25": 1650282496.0, + "26": 1650282496.0, + "27": 1650282496.0, + "28": 1650282496.0, + "29": 1650282496.0, + "30": 1650282496.0, + "31": 1650282496.0, + "32": 1650282496.0, + "33": 1650282496.0, + "34": 1650282496.0, + "35": 1650282496.0, + "36": 1650282496.0, + "37": 1650282496.0, + "38": 1650282496.0, + "39": 1650282496.0, + "40": 1650282496.0, + "41": 1650282496.0, + "42": 1650282496.0, + "43": 1650282496.0, + "44": 1650282496.0, + "45": 1650282496.0, + "46": 1650282496.0, + "47": 1650282496.0, + "48": 1650282496.0, + "49": 1650282496.0, + "50": 1650282496.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1653825536.0, - "2": 2142998016.0, - "3": 2142998016.0, - "4": 2142998016.0, - "5": 2142998016.0, - "6": 2142998016.0, - "7": 2142998016.0, - "8": 2142998016.0, - "9": 2142998016.0, - "10": 2142998016.0, - "11": 2142998016.0, - "12": 2142998016.0, - "13": 2142998016.0, - "14": 2142998016.0, - "15": 2142998016.0, - "16": 2142998016.0, - "17": 2142998016.0, - "18": 2142998016.0, - "19": 2142998016.0, - "20": 2142998016.0, - "21": 2142998016.0, - "22": 2142998016.0, - "23": 2142998016.0, - "24": 2142998016.0, - "25": 2142998016.0, - "26": 2142998016.0, - "27": 2142998016.0, - "28": 2142998016.0, - "29": 2142998016.0, - "30": 2142998016.0, - "31": 2142998016.0, - "32": 2142998016.0, - "33": 2142998016.0, - "34": 2142998016.0, - "35": 2142998016.0, - "36": 2142998016.0, - "37": 2142998016.0, - "38": 2142998016.0, - "39": 2142998016.0, - "40": 2142998016.0, - "41": 2142998016.0, - "42": 2142998016.0, - "43": 2142998016.0, - "44": 2142998016.0, - "45": 2142998016.0, - "46": 2142998016.0, - "47": 2142998016.0, - "48": 2142998016.0, - "49": 2142998016.0, - "50": 2142998016.0 + "1": 1650286592.0, + "2": 2137679360.0, + "3": 2137679360.0, + "4": 2137679360.0, + "5": 2137679360.0, + "6": 2137679360.0, + "7": 2137679360.0, + "8": 2137679360.0, + "9": 2137679360.0, + "10": 2137679360.0, + "11": 2137679360.0, + "12": 2137679360.0, + "13": 2137679360.0, + "14": 2137679360.0, + "15": 2137679360.0, + "16": 2137679360.0, + "17": 2137679360.0, + "18": 2137679360.0, + "19": 2137679360.0, + "20": 2137679360.0, + "21": 2137679360.0, + "22": 2137679360.0, + "23": 2137679360.0, + "24": 2137679360.0, + "25": 2137679360.0, + "26": 2137679360.0, + "27": 2137679360.0, + "28": 2137679360.0, + "29": 2137679360.0, + "30": 2137679360.0, + "31": 2137679360.0, + "32": 2137679360.0, + "33": 2137679360.0, + "34": 2137679360.0, + "35": 2137679360.0, + "36": 2137679360.0, + "37": 2137679360.0, + "38": 2137679360.0, + "39": 2137679360.0, + "40": 2137679360.0, + "41": 2137679360.0, + "42": 2137679360.0, + "43": 2137679360.0, + "44": 2137679360.0, + "45": 2137679360.0, + "46": 2137679360.0, + "47": 2137679360.0, + "48": 2137679360.0, + "49": 2137679360.0, + "50": 2137679360.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 28.88794, - "2": 1.3875, - "3": 1.3655, - "4": 0.91436, - "5": 0.92323, - "6": 0.90862, - "7": 0.90351, - "8": 0.90087, - "9": 0.90804, - "10": 0.90099, - "11": 1.44829, - "12": 1.27198, - "13": 1.47603, - "14": 0.90715, - "15": 0.90169, - "16": 0.8955, - "17": 0.91977, - "18": 0.91161, - "19": 0.90173, - "20": 0.89581, - "21": 0.89026, - "22": 0.88949, - "23": 0.91159, - "24": 0.90975, - "25": 0.90708, - "26": 0.89948, - "27": 0.89544, - "28": 0.89745, - "29": 0.90068, - "30": 0.89534, - "31": 0.90066, - "32": 0.91859, - "33": 0.91419, - "34": 0.89878, - "35": 0.89846, - "36": 0.8945, - "37": 0.89356, - "38": 0.89475, - "39": 0.89372, - "40": 0.90674, - "41": 0.90461, - "42": 0.93092, - "43": 0.90002, - "44": 0.89721, - "45": 0.89453, - "46": 0.89499, - "47": 0.90828, - "48": 0.89629, - "49": 0.90644, - "50": 0.90588 + "1": "nan", + "2": 7.91637, + "3": 0.92764, + "4": 0.91334, + "5": 0.91263, + "6": 0.91881, + "7": 0.91512, + "8": 0.91691, + "9": 0.91241, + "10": 0.91216, + "11": 0.90894, + "12": 0.90923, + "13": 0.91112, + "14": 0.91514, + "15": 0.91364, + "16": 0.91332, + "17": 0.91209, + "18": 0.9107, + "19": 1.32194, + "20": 0.91463, + "21": 0.91083, + "22": 0.91645, + "23": 0.91042, + "24": 0.91104, + "25": 0.90961, + "26": 0.90996, + "27": 0.91215, + "28": 1.30488, + "29": 0.91859, + "30": 0.91091, + "31": 0.91133, + "32": 0.91333, + "33": 0.9091, + "34": 0.91409, + "35": 1.30333, + "36": 0.91422, + "37": 0.91635, + "38": 0.91473, + "39": 1.50529, + "40": 0.97326, + "41": 1.31231, + "42": 0.91182, + "43": 1.30555, + "44": 0.91651, + "45": 0.91568, + "46": 1.35255, + "47": 0.91348, + "48": 0.91258, + "49": 0.91001, + "50": 0.90511 } } } \ No newline at end of file From 30f0581147f40c0eee92877f539686be42a2d45c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Mon, 23 Feb 2026 10:32:53 +0000 Subject: [PATCH 06/12] remove marker MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .../distributed/megatron_fsdp/test_mfsdp_fully_shard.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit_tests/distributed/megatron_fsdp/test_mfsdp_fully_shard.py b/tests/unit_tests/distributed/megatron_fsdp/test_mfsdp_fully_shard.py index ea58411333f..cbca505b405 100644 --- a/tests/unit_tests/distributed/megatron_fsdp/test_mfsdp_fully_shard.py +++ b/tests/unit_tests/distributed/megatron_fsdp/test_mfsdp_fully_shard.py @@ -395,7 +395,6 @@ def test_fully_shard( # Required to reset the parallelism environment. destroy_device_mesh(device_mesh) - @pytest.mark.flaky_in_dev @pytest.mark.skipif( version.parse(torch.__version__) < version.parse('2.4.0'), reason="Requires DTensor and DeviceMesh support in (approximately) PyTorch 2.4.0 or later. Should not be run on 2.2.0a0+81ea7a4 (LTS).", From c7f4da3b02afc49cf34901e522796b1dd53d2955 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Tue, 24 Feb 2026 10:30:49 +0000 Subject: [PATCH 07/12] update golden values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .../golden_values_dev_dgx_h100.json | 500 +++---- .../golden_values_dev_dgx_h100.json | 500 +++---- .../golden_values_dev_dgx_h100.json | 500 +++---- .../golden_values_dev_dgx_h100.json | 500 +++---- .../golden_values_dev_dgx_h100.json | 1000 +++++++------- .../golden_values_dev_dgx_h100.json | 1000 +++++++------- .../golden_values_dev_dgx_h100.json | 500 +++---- .../golden_values_dev_dgx_gb200.json | 116 +- .../golden_values_dev_dgx_gb200.json | 116 +- .../golden_values_dev_dgx_gb200.json | 280 ++-- .../golden_values_dev_dgx_gb200.json | 278 ++-- .../golden_values_dev_dgx_gb200.json | 568 ++++---- .../golden_values_dev_dgx_gb200.json | 568 ++++---- .../golden_values_dev_dgx_gb200.json | 968 +++++++------- .../golden_values_dev_dgx_gb200.json | 278 ++-- .../golden_values_dev_dgx_gb200.json | 570 ++++---- .../golden_values_dev_dgx_gb200.json | 568 ++++---- .../golden_values_dev_dgx_gb200.json | 968 +++++++------- .../golden_values_dev_dgx_gb200.json | 288 ++-- .../golden_values_dev_dgx_gb200.json | 376 +++--- .../golden_values_dev_dgx_gb200.json | 274 ++-- .../golden_values_dev_dgx_gb200.json | 972 +++++++------- .../golden_values_dev_dgx_gb200.json | 972 +++++++------- .../golden_values_dev_dgx_gb200.json | 566 ++++---- .../golden_values_dev_dgx_gb200.json | 390 +++--- .../golden_values_dev_dgx_gb200.json | 280 ++-- .../golden_values_dev_dgx_h100.json | 488 +++---- .../golden_values_dev_dgx_gb200.json | 940 ++++++------- .../golden_values_dev_dgx_gb200.json | 936 ++++++------- .../golden_values_dev_dgx_gb200.json | 438 +++--- .../golden_values_dev_dgx_gb200.json | 438 +++--- .../golden_values_dev_dgx_gb200.json | 494 +++---- .../golden_values_dev_dgx_gb200.json | 934 ++++++------- .../golden_values_dev_dgx_a100.json | 424 +++--- .../golden_values_dev_dgx_gb200.json | 492 +++---- .../golden_values_dev_dgx_a100.json | 424 +++--- .../golden_values_dev_dgx_gb200.json | 492 +++---- .../golden_values_dev_dgx_a100.json | 436 +++--- .../golden_values_dev_dgx_h100.json | 496 +++---- .../golden_values_dev_dgx_a100.json | 436 +++--- .../golden_values_dev_dgx_h100.json | 496 +++---- .../golden_values_dev_dgx_gb200.json | 494 +++---- .../golden_values_dev_dgx_h100.json | 496 +++---- .../golden_values_dev_dgx_gb200.json | 1186 ++++++++--------- .../golden_values_dev_dgx_h100.json | 198 +-- 45 files changed, 12317 insertions(+), 12317 deletions(-) diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json index b9b1236875c..403d1fe1253 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json @@ -4,56 +4,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 10.47723, - "2": 10.47576, - "3": 10.46809, - "4": 10.47326, - "5": 10.47148, - "6": 10.46049, - "7": 10.46357, - "8": 10.47334, - "9": 10.48063, - "10": 10.46319, - "11": 10.47102, - "12": 10.45502, - "13": 10.44665, - "14": 10.451, - "15": 10.48846, - "16": 10.4509, - "17": 10.44648, - "18": 10.44272, - "19": 10.43057, - "20": 10.44534, - "21": 10.41771, - "22": 10.38656, - "23": 10.39328, - "24": 10.37849, - "25": 10.35466, - "26": 10.35965, - "27": 10.34523, - "28": 10.33556, - "29": 10.25418, - "30": 10.23008, - "31": 10.14093, - "32": 10.13603, - "33": 10.13936, - "34": 10.11381, - "35": 10.08888, - "36": 10.09238, - "37": 10.06851, - "38": 10.0466, - "39": 9.97582, - "40": 9.93764, - "41": 9.90872, - "42": 9.84882, - "43": 9.85772, - "44": 9.7925, - "45": 9.80329, - "46": 9.70285, - "47": 9.73423, - "48": 9.70106, - "49": 9.69966, - "50": 9.70252 + "1": 10.60763, + "2": 10.59402, + "3": 10.60795, + "4": 10.59589, + "5": 10.60931, + "6": 10.59415, + "7": 10.58965, + "8": 10.59915, + "9": 10.59851, + "10": 10.5837, + "11": 10.58821, + "12": 10.58578, + "13": 10.5893, + "14": 10.58439, + "15": 10.594, + "16": 10.57546, + "17": 10.56566, + "18": 10.57411, + "19": 10.56959, + "20": 10.56742, + "21": 10.55501, + "22": 10.50754, + "23": 10.4905, + "24": 10.48088, + "25": 10.46596, + "26": 10.47202, + "27": 10.46396, + "28": 10.45992, + "29": 10.41203, + "30": 10.31839, + "31": 10.27417, + "32": 10.2348, + "33": 10.24377, + "34": 10.18839, + "35": 10.21156, + "36": 10.17092, + "37": 10.15115, + "38": 10.13027, + "39": 10.09771, + "40": 10.05356, + "41": 9.9979, + "42": 9.94057, + "43": 9.92471, + "44": 9.87692, + "45": 9.85234, + "46": 9.79379, + "47": 9.78024, + "48": 9.75534, + "49": 9.79726, + "50": 9.75564 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 2137.0, - "2": 1618.0, - "3": 1561.0, - "4": 1871.0, - "5": 1983.0, - "6": 1565.0, - "7": 2779.0, - "8": 2108.0, - "9": 2008.0, - "10": 2086.0, - "11": 2534.0, - "12": 1686.0, - "13": 2120.0, - "14": 2814.0, - "15": 1735.0, - "16": 2535.0, - "17": 2409.0, - "18": 2345.0, - "19": 2374.0, - "20": 2743.0, - "21": 2039.0, - "22": 2925.0, - "23": 2630.0, - "24": 2821.0, - "25": 2366.0, - "26": 2633.0, - "27": 2921.0, - "28": 2760.0, - "29": 2635.0, - "30": 2614.0, - "31": 2073.0, - "32": 2275.0, - "33": 2130.0, - "34": 2185.0, - "35": 2312.0, - "36": 2789.0, - "37": 2937.0, - "38": 2652.0, - "39": 2929.0, - "40": 3348.0, - "41": 1812.0, - "42": 1441.0, - "43": 1726.0, - "44": 2437.0, - "45": 3263.0, - "46": 2813.0, - "47": 2668.0, - "48": 3411.0, - "49": 3174.0, - "50": 2441.0 + "1": 2282.0, + "2": 2715.0, + "3": 2495.0, + "4": 2373.0, + "5": 2854.0, + "6": 2551.0, + "7": 1613.0, + "8": 2242.0, + "9": 2185.0, + "10": 2245.0, + "11": 2404.0, + "12": 2675.0, + "13": 2384.0, + "14": 2122.0, + "15": 2765.0, + "16": 2433.0, + "17": 2643.0, + "18": 2686.0, + "19": 2739.0, + "20": 2298.0, + "21": 2297.0, + "22": 2666.0, + "23": 2414.0, + "24": 2554.0, + "25": 2425.0, + "26": 2476.0, + "27": 2675.0, + "28": 2528.0, + "29": 2604.0, + "30": 2574.0, + "31": 3120.0, + "32": 2892.0, + "33": 2709.0, + "34": 2790.0, + "35": 3081.0, + "36": 3174.0, + "37": 3030.0, + "38": 2712.0, + "39": 2550.0, + "40": 2185.0, + "41": 3638.0, + "42": 3606.0, + "43": 3383.0, + "44": 3558.0, + "45": 3606.0, + "46": 2694.0, + "47": 2103.0, + "48": 3161.0, + "49": 3268.0, + "50": 3644.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 3405920768.0, - "2": 3405920768.0, - "3": 3405920768.0, - "4": 3405920768.0, - "5": 3405920768.0, - "6": 3405920768.0, - "7": 3405920768.0, - "8": 3405920768.0, - "9": 3405920768.0, - "10": 3405920768.0, - "11": 3405920768.0, - "12": 3405920768.0, - "13": 3405920768.0, - "14": 3405920768.0, - "15": 3405920768.0, - "16": 3405920768.0, - "17": 3405920768.0, - "18": 3405920768.0, - "19": 3405920768.0, - "20": 3405920768.0, - "21": 3405920768.0, - "22": 3405920768.0, - "23": 3405920768.0, - "24": 3405920768.0, - "25": 3405920768.0, - "26": 3405920768.0, - "27": 3405920768.0, - "28": 3405920768.0, - "29": 3405920768.0, - "30": 3405920768.0, - "31": 3405920768.0, - "32": 3405920768.0, - "33": 3405920768.0, - "34": 3405920768.0, - "35": 3405920768.0, - "36": 3405920768.0, - "37": 3405920768.0, - "38": 3405920768.0, - "39": 3405920768.0, - "40": 3405920768.0, - "41": 3405920768.0, - "42": 3405920768.0, - "43": 3405920768.0, - "44": 3405920768.0, - "45": 3405920768.0, - "46": 3405920768.0, - "47": 3405920768.0, - "48": 3405920768.0, - "49": 3405920768.0, - "50": 3405920768.0 + "1": 3434522112.0, + "2": 3435308544.0, + "3": 3435308544.0, + "4": 3435308544.0, + "5": 3435308544.0, + "6": 3435308544.0, + "7": 3434522112.0, + "8": 3434522112.0, + "9": 3434522112.0, + "10": 3435308544.0, + "11": 3434522112.0, + "12": 3434522112.0, + "13": 3434522112.0, + "14": 3434522112.0, + "15": 3435308544.0, + "16": 3435308544.0, + "17": 3435308544.0, + "18": 3434522112.0, + "19": 3435308544.0, + "20": 3435308544.0, + "21": 3435308544.0, + "22": 3434522112.0, + "23": 3435308544.0, + "24": 3434522112.0, + "25": 3435308544.0, + "26": 3434522112.0, + "27": 3434522112.0, + "28": 3434522112.0, + "29": 3434522112.0, + "30": 3435308544.0, + "31": 3434522112.0, + "32": 3435308544.0, + "33": 3434522112.0, + "34": 3435308544.0, + "35": 3435308544.0, + "36": 3434522112.0, + "37": 3434522112.0, + "38": 3434522112.0, + "39": 3434522112.0, + "40": 3434522112.0, + "41": 3435308544.0, + "42": 3435308544.0, + "43": 3435308544.0, + "44": 3434522112.0, + "45": 3434522112.0, + "46": 3435308544.0, + "47": 3435308544.0, + "48": 3434522112.0, + "49": 3434522112.0, + "50": 3434522112.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 4195575808.0, - "2": 5662015488.0, - "3": 5662015488.0, - "4": 5662015488.0, - "5": 5662015488.0, - "6": 5662015488.0, - "7": 5662015488.0, - "8": 5662015488.0, - "9": 5662015488.0, - "10": 5662015488.0, - "11": 5662015488.0, - "12": 5662015488.0, - "13": 5662015488.0, - "14": 5662015488.0, - "15": 5662015488.0, - "16": 5662015488.0, - "17": 5662015488.0, - "18": 5662015488.0, - "19": 5662015488.0, - "20": 5662015488.0, - "21": 5662015488.0, - "22": 5662015488.0, - "23": 5662015488.0, - "24": 5662015488.0, - "25": 5662015488.0, - "26": 5662015488.0, - "27": 5662015488.0, - "28": 5662015488.0, - "29": 5662015488.0, - "30": 5662015488.0, - "31": 5662015488.0, - "32": 5662015488.0, - "33": 5662015488.0, - "34": 5662015488.0, - "35": 5662015488.0, - "36": 5662015488.0, - "37": 5662015488.0, - "38": 5662015488.0, - "39": 5662015488.0, - "40": 5662015488.0, - "41": 5662015488.0, - "42": 5662015488.0, - "43": 5662015488.0, - "44": 5662015488.0, - "45": 5662015488.0, - "46": 5662015488.0, - "47": 5662015488.0, - "48": 5662015488.0, - "49": 5662015488.0, - "50": 5662015488.0 + "1": 4230456320.0, + "2": 5708704256.0, + "3": 5708704256.0, + "4": 5708704256.0, + "5": 5708704256.0, + "6": 5708704256.0, + "7": 5708704768.0, + "8": 5708704768.0, + "9": 5708704768.0, + "10": 5708704768.0, + "11": 5708704768.0, + "12": 5708704768.0, + "13": 5708704768.0, + "14": 5708704768.0, + "15": 5708704768.0, + "16": 5708704768.0, + "17": 5708704768.0, + "18": 5708704768.0, + "19": 5708704768.0, + "20": 5708704768.0, + "21": 5708704768.0, + "22": 5708704768.0, + "23": 5708704768.0, + "24": 5708704768.0, + "25": 5708704768.0, + "26": 5708704768.0, + "27": 5708704768.0, + "28": 5708704768.0, + "29": 5708704768.0, + "30": 5708704768.0, + "31": 5708704768.0, + "32": 5708704768.0, + "33": 5708704768.0, + "34": 5708704768.0, + "35": 5708704768.0, + "36": 5708704768.0, + "37": 5708704768.0, + "38": 5708704768.0, + "39": 5708704768.0, + "40": 5708704768.0, + "41": 5708704768.0, + "42": 5708704768.0, + "43": 5709227520.0, + "44": 5709227520.0, + "45": 5709227520.0, + "46": 5709227520.0, + "47": 5709227520.0, + "48": 5709229056.0, + "49": 5709229056.0, + "50": 5709229056.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 9.33953, - "2": 0.53319, - "3": 0.47492, - "4": 0.43971, - "5": 0.43812, - "6": 0.43852, - "7": 0.4386, - "8": 0.43696, - "9": 0.4374, - "10": 0.43581, - "11": 0.71474, - "12": 0.44321, - "13": 0.73975, - "14": 0.44195, - "15": 0.43796, - "16": 0.43687, - "17": 0.43648, - "18": 0.43733, - "19": 0.43826, - "20": 0.44179, - "21": 1.02916, - "22": 0.7107, - "23": 0.70393, - "24": 0.904, - "25": 0.43822, - "26": 0.43864, - "27": 0.46131, - "28": 0.44753, - "29": 0.43372, - "30": 0.43644, - "31": 0.45145, - "32": 0.44608, - "33": 0.43714, - "34": 0.43395, - "35": 0.43358, - "36": 0.43471, - "37": 0.43343, - "38": 0.43378, - "39": 0.43774, - "40": 0.43399, - "41": 0.43662, - "42": 0.43501, - "43": 0.43703, - "44": 0.44084, - "45": 0.43443, - "46": 0.43652, - "47": 0.84278, - "48": 0.44024, - "49": 0.4409, - "50": 0.43833 + "1": "nan", + "2": 5.98593, + "3": 0.46994, + "4": 0.44538, + "5": 0.44329, + "6": 0.45802, + "7": 0.44298, + "8": 0.43592, + "9": 0.43535, + "10": 0.43911, + "11": 0.43744, + "12": 0.43512, + "13": 0.43522, + "14": 0.43513, + "15": 0.43723, + "16": 0.43945, + "17": 0.43667, + "18": 0.43792, + "19": 0.43525, + "20": 0.43711, + "21": 0.4375, + "22": 0.43715, + "23": 0.43657, + "24": 0.43624, + "25": 0.43653, + "26": 0.43652, + "27": 0.43638, + "28": 0.43842, + "29": 0.43702, + "30": 0.43458, + "31": 0.43557, + "32": 0.43539, + "33": 0.43697, + "34": 0.43601, + "35": 0.43849, + "36": 0.70822, + "37": 0.69532, + "38": 0.43677, + "39": 0.74727, + "40": 0.4357, + "41": 0.46813, + "42": 0.88067, + "43": 0.89866, + "44": 0.43802, + "45": 0.85415, + "46": 0.45011, + "47": 0.44994, + "48": 0.72982, + "49": 0.66393, + "50": 0.46635 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json index 30fa7e80d5a..78cfbf66189 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json @@ -4,56 +4,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 10.55236, - "2": 10.52891, - "3": 10.55085, - "4": 10.55035, - "5": 10.52311, - "6": 10.53328, - "7": 10.53097, - "8": 10.54323, - "9": 10.54514, - "10": 10.53676, - "11": 10.53791, - "12": 10.54319, - "13": 10.5263, - "14": 10.5316, - "15": 10.52714, - "16": 10.50594, - "17": 10.5009, - "18": 10.51024, - "19": 10.49283, - "20": 10.48852, - "21": 10.47463, - "22": 10.42802, - "23": 10.42674, - "24": 10.40359, - "25": 10.39998, - "26": 10.38464, - "27": 10.38236, - "28": 10.36891, - "29": 10.32202, - "30": 10.22049, - "31": 10.17103, - "32": 10.12583, - "33": 10.10622, - "34": 10.09458, - "35": 10.07043, - "36": 10.07484, - "37": 10.03646, - "38": 10.0182, - "39": 9.9686, - "40": 9.93086, - "41": 9.87312, - "42": 9.8185, - "43": 9.81546, - "44": 9.73852, - "45": 9.76279, - "46": 9.67679, - "47": 9.68692, - "48": 9.66292, - "49": 9.67587, - "50": 9.67447 + "1": 10.50326, + "2": 10.50525, + "3": 10.50338, + "4": 10.50978, + "5": 10.49527, + "6": 10.5069, + "7": 10.50655, + "8": 10.49761, + "9": 10.50205, + "10": 10.51242, + "11": 10.51237, + "12": 10.50319, + "13": 10.50028, + "14": 10.49326, + "15": 10.497, + "16": 10.46708, + "17": 10.46988, + "18": 10.4867, + "19": 10.47169, + "20": 10.47363, + "21": 10.45966, + "22": 10.41973, + "23": 10.41804, + "24": 10.41547, + "25": 10.37511, + "26": 10.38999, + "27": 10.35241, + "28": 10.37465, + "29": 10.32145, + "30": 10.22366, + "31": 10.17929, + "32": 10.15937, + "33": 10.16243, + "34": 10.1286, + "35": 10.10432, + "36": 10.08333, + "37": 10.076, + "38": 10.08394, + "39": 10.02227, + "40": 9.98037, + "41": 9.93077, + "42": 9.87377, + "43": 9.87387, + "44": 9.83581, + "45": 9.81097, + "46": 9.74786, + "47": 9.73156, + "48": 9.71596, + "49": 9.7651, + "50": 9.72689 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 2320.0, - "2": 2645.0, - "3": 2441.0, - "4": 2417.0, - "5": 2730.0, - "6": 2332.0, - "7": 1661.0, - "8": 2386.0, - "9": 2256.0, - "10": 2428.0, - "11": 2152.0, - "12": 2337.0, - "13": 2643.0, - "14": 2209.0, - "15": 2607.0, - "16": 2411.0, - "17": 2529.0, - "18": 2392.0, - "19": 2417.0, - "20": 2269.0, - "21": 2382.0, - "22": 2652.0, - "23": 2420.0, - "24": 2251.0, - "25": 2616.0, - "26": 2433.0, - "27": 2470.0, - "28": 2335.0, - "29": 2270.0, - "30": 2689.0, - "31": 2960.0, - "32": 2808.0, - "33": 2659.0, - "34": 2932.0, - "35": 2926.0, - "36": 3103.0, - "37": 3227.0, - "38": 2634.0, - "39": 2132.0, - "40": 2236.0, - "41": 3589.0, - "42": 3470.0, - "43": 3467.0, - "44": 4038.0, - "45": 4173.0, - "46": 2993.0, - "47": 1996.0, - "48": 3318.0, - "49": 3662.0, - "50": 3572.0 + "1": 2491.0, + "2": 1900.0, + "3": 1708.0, + "4": 2380.0, + "5": 2253.0, + "6": 1958.0, + "7": 2398.0, + "8": 2384.0, + "9": 2206.0, + "10": 2330.0, + "11": 2833.0, + "12": 1956.0, + "13": 2178.0, + "14": 2805.0, + "15": 2182.0, + "16": 2715.0, + "17": 2621.0, + "18": 2629.0, + "19": 2501.0, + "20": 2632.0, + "21": 2047.0, + "22": 2655.0, + "23": 2487.0, + "24": 2912.0, + "25": 2624.0, + "26": 2640.0, + "27": 2746.0, + "28": 2787.0, + "29": 2569.0, + "30": 3013.0, + "31": 2385.0, + "32": 2773.0, + "33": 2477.0, + "34": 2594.0, + "35": 2823.0, + "36": 3033.0, + "37": 3108.0, + "38": 2842.0, + "39": 2647.0, + "40": 3551.0, + "41": 1852.0, + "42": 1566.0, + "43": 1733.0, + "44": 3130.0, + "45": 3843.0, + "46": 3331.0, + "47": 2862.0, + "48": 3103.0, + "49": 2820.0, + "50": 2198.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 2062574080.0, - "2": 2062574080.0, - "3": 2062574080.0, - "4": 2062574080.0, - "5": 2062574080.0, - "6": 2062574080.0, - "7": 2062574080.0, - "8": 2062574080.0, - "9": 2062574080.0, - "10": 2062574080.0, - "11": 2062574080.0, - "12": 2062574080.0, - "13": 2062574080.0, - "14": 2062574080.0, - "15": 2062574080.0, - "16": 2062574080.0, - "17": 2062574080.0, - "18": 2062574080.0, - "19": 2062574080.0, - "20": 2062574080.0, - "21": 2062574080.0, - "22": 2062574080.0, - "23": 2062574080.0, - "24": 2062574080.0, - "25": 2062574080.0, - "26": 2062574080.0, - "27": 2062574080.0, - "28": 2062574080.0, - "29": 2062574080.0, - "30": 2062574080.0, - "31": 2062574080.0, - "32": 2062574080.0, - "33": 2062574080.0, - "34": 2062574080.0, - "35": 2062574080.0, - "36": 2062574080.0, - "37": 2062574080.0, - "38": 2062574080.0, - "39": 2062574080.0, - "40": 2062574080.0, - "41": 2062574080.0, - "42": 2062574080.0, - "43": 2062574080.0, - "44": 2062574080.0, - "45": 2062574080.0, - "46": 2062574080.0, - "47": 2062574080.0, - "48": 2062574080.0, - "49": 2062574080.0, - "50": 2062574080.0 + "1": 2091169280.0, + "2": 2091169280.0, + "3": 2091169280.0, + "4": 2091169280.0, + "5": 2091169280.0, + "6": 2091169280.0, + "7": 2091169280.0, + "8": 2091169280.0, + "9": 2091169280.0, + "10": 2091169280.0, + "11": 2091169280.0, + "12": 2091169280.0, + "13": 2091169280.0, + "14": 2091169280.0, + "15": 2091169280.0, + "16": 2091169280.0, + "17": 2091169280.0, + "18": 2091169280.0, + "19": 2091169280.0, + "20": 2091169280.0, + "21": 2091169280.0, + "22": 2091169280.0, + "23": 2091169280.0, + "24": 2091169280.0, + "25": 2091169280.0, + "26": 2091169280.0, + "27": 2091169280.0, + "28": 2091169280.0, + "29": 2091169280.0, + "30": 2091169280.0, + "31": 2091693568.0, + "32": 2091169280.0, + "33": 2091169280.0, + "34": 2091169280.0, + "35": 2091169280.0, + "36": 2091169280.0, + "37": 2091169280.0, + "38": 2091169280.0, + "39": 2091169280.0, + "40": 2091169280.0, + "41": 2091169280.0, + "42": 2091169280.0, + "43": 2091169280.0, + "44": 2091169280.0, + "45": 2091169280.0, + "46": 2091169280.0, + "47": 2091169280.0, + "48": 2091169280.0, + "49": 2091169280.0, + "50": 2091169280.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 4386474496.0, - "2": 5246722560.0, - "3": 5246722560.0, - "4": 5246722560.0, - "5": 5246722560.0, - "6": 5246722560.0, - "7": 5246722560.0, - "8": 5246722560.0, - "9": 5246722560.0, - "10": 5246722560.0, - "11": 5246722560.0, - "12": 5246722560.0, - "13": 5246722560.0, - "14": 5246722560.0, - "15": 5246722560.0, - "16": 5246722560.0, - "17": 5246722560.0, - "18": 5246722560.0, - "19": 5246722560.0, - "20": 5246722560.0, - "21": 5246722560.0, - "22": 5246722560.0, - "23": 5246722560.0, - "24": 5246722560.0, - "25": 5246722560.0, - "26": 5246722560.0, - "27": 5246722560.0, - "28": 5246722560.0, - "29": 5246722560.0, - "30": 5246722560.0, - "31": 5246722560.0, - "32": 5246722560.0, - "33": 5246722560.0, - "34": 5246722560.0, - "35": 5246722560.0, - "36": 5246722560.0, - "37": 5246722560.0, - "38": 5246722560.0, - "39": 5246722560.0, - "40": 5246722560.0, - "41": 5246722560.0, - "42": 5246722560.0, - "43": 5246722560.0, - "44": 5246722560.0, - "45": 5246722560.0, - "46": 5246722560.0, - "47": 5246722560.0, - "48": 5246722560.0, - "49": 5246722560.0, - "50": 5246722560.0 + "1": 4421348864.0, + "2": 5294192128.0, + "3": 5294716416.0, + "4": 5294716416.0, + "5": 5294716416.0, + "6": 5294716416.0, + "7": 5294716416.0, + "8": 5294716416.0, + "9": 5294716416.0, + "10": 5294716416.0, + "11": 5294716416.0, + "12": 5294716416.0, + "13": 5294716416.0, + "14": 5294716416.0, + "15": 5294716416.0, + "16": 5294716416.0, + "17": 5294716416.0, + "18": 5294716416.0, + "19": 5294716416.0, + "20": 5294716416.0, + "21": 5294716416.0, + "22": 5294716416.0, + "23": 5294716416.0, + "24": 5294716416.0, + "25": 5294716416.0, + "26": 5294716416.0, + "27": 5294716416.0, + "28": 5294716416.0, + "29": 5294716416.0, + "30": 5294716416.0, + "31": 5294716416.0, + "32": 5294716416.0, + "33": 5294716416.0, + "34": 5294716416.0, + "35": 5294716416.0, + "36": 5294716416.0, + "37": 5294716416.0, + "38": 5294716416.0, + "39": 5294716416.0, + "40": 5294716416.0, + "41": 5294716416.0, + "42": 5294716416.0, + "43": 5294716416.0, + "44": 5294716416.0, + "45": 5294716416.0, + "46": 5294716416.0, + "47": 5294716416.0, + "48": 5294716416.0, + "49": 5294716416.0, + "50": 5294716416.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 12.53778, - "2": 0.64042, - "3": 0.57704, - "4": 0.56942, - "5": 0.55857, - "6": 1.5214, - "7": 0.8799, - "8": 0.58802, - "9": 0.58845, - "10": 0.91566, - "11": 1.66597, - "12": 1.31669, - "13": 0.9054, - "14": 0.55959, - "15": 0.55349, - "16": 0.56731, - "17": 0.54994, - "18": 0.56124, - "19": 0.54032, - "20": 0.54467, - "21": 0.56577, - "22": 0.59073, - "23": 0.55848, - "24": 0.5515, - "25": 0.56783, - "26": 0.58223, - "27": 0.56278, - "28": 0.55385, - "29": 0.54473, - "30": 0.54779, - "31": 0.54239, - "32": 0.53324, - "33": 0.54812, - "34": 0.57008, - "35": 0.56814, - "36": 0.55146, - "37": 0.56138, - "38": 0.80574, - "39": 0.5919, - "40": 0.83084, - "41": 0.9006, - "42": 0.82734, - "43": 0.98233, - "44": 1.08635, - "45": 1.33415, - "46": 1.29362, - "47": 1.03481, - "48": 1.02838, - "49": 0.56104, - "50": 0.57748 + "1": "nan", + "2": 8.01999, + "3": 0.56866, + "4": 0.53972, + "5": 0.54716, + "6": 0.55645, + "7": 0.54019, + "8": 0.54218, + "9": 0.55986, + "10": 0.56452, + "11": 0.5598, + "12": 0.53842, + "13": 0.55086, + "14": 0.54615, + "15": 0.56416, + "16": 0.56092, + "17": 1.0193, + "18": 1.23706, + "19": 1.271, + "20": 1.26275, + "21": 1.76427, + "22": 1.84856, + "23": 1.41201, + "24": 0.92392, + "25": 0.54441, + "26": 0.53908, + "27": 0.5709, + "28": 0.5559, + "29": 0.56284, + "30": 0.53843, + "31": 0.54401, + "32": 0.52817, + "33": 0.54638, + "34": 0.57179, + "35": 0.58599, + "36": 0.56335, + "37": 0.56457, + "38": 0.55358, + "39": 0.5496, + "40": 0.55457, + "41": 0.57059, + "42": 0.53866, + "43": 0.55125, + "44": 0.55925, + "45": 0.54021, + "46": 1.06149, + "47": 0.84305, + "48": 1.10614, + "49": 1.45848, + "50": 1.12052 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json index 7a21f7ae2f9..48744b5c021 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json @@ -4,56 +4,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 10.48367, - "2": 10.48426, - "3": 10.48254, - "4": 10.48311, - "5": 10.4764, - "6": 10.4844, - "7": 10.48458, - "8": 10.48829, - "9": 10.49008, - "10": 10.47268, - "11": 10.47256, - "12": 10.48259, - "13": 10.47857, - "14": 10.45154, - "15": 10.47925, - "16": 10.45346, - "17": 10.45145, - "18": 10.46238, - "19": 10.44113, - "20": 10.45448, - "21": 10.43454, - "22": 10.40591, - "23": 10.39975, - "24": 10.37583, - "25": 10.38168, - "26": 10.3515, - "27": 10.35388, - "28": 10.34965, - "29": 10.28701, - "30": 10.21143, - "31": 10.17272, - "32": 10.13416, - "33": 10.14725, - "34": 10.10738, - "35": 10.10592, - "36": 10.08739, - "37": 10.08157, - "38": 10.07245, - "39": 10.00093, - "40": 9.98138, - "41": 9.92543, - "42": 9.87534, - "43": 9.88716, - "44": 9.80646, - "45": 9.82342, - "46": 9.73786, - "47": 9.74811, - "48": 9.71614, - "49": 9.74493, - "50": 9.73 + "1": 10.52605, + "2": 10.54515, + "3": 10.53013, + "4": 10.54119, + "5": 10.53738, + "6": 10.54198, + "7": 10.54842, + "8": 10.54207, + "9": 10.53241, + "10": 10.52878, + "11": 10.53233, + "12": 10.53794, + "13": 10.54081, + "14": 10.52465, + "15": 10.52208, + "16": 10.50333, + "17": 10.51348, + "18": 10.50677, + "19": 10.50509, + "20": 10.50927, + "21": 10.50899, + "22": 10.44601, + "23": 10.43118, + "24": 10.44177, + "25": 10.40785, + "26": 10.41558, + "27": 10.39996, + "28": 10.40992, + "29": 10.36533, + "30": 10.28201, + "31": 10.23061, + "32": 10.20558, + "33": 10.21632, + "34": 10.17137, + "35": 10.14581, + "36": 10.1275, + "37": 10.11512, + "38": 10.11943, + "39": 10.0799, + "40": 10.01106, + "41": 9.96694, + "42": 9.92772, + "43": 9.92016, + "44": 9.86405, + "45": 9.83804, + "46": 9.77979, + "47": 9.77207, + "48": 9.7488, + "49": 9.77845, + "50": 9.75235 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 2570.0, - "2": 1923.0, - "3": 1512.0, - "4": 2322.0, - "5": 2033.0, - "6": 1774.0, - "7": 2781.0, - "8": 2460.0, - "9": 2308.0, - "10": 2635.0, - "11": 2397.0, - "12": 1817.0, - "13": 2348.0, - "14": 2749.0, - "15": 2027.0, - "16": 2719.0, - "17": 2487.0, - "18": 2533.0, - "19": 2547.0, - "20": 2850.0, - "21": 1990.0, - "22": 2964.0, - "23": 2695.0, - "24": 2772.0, - "25": 2524.0, - "26": 2977.0, - "27": 2627.0, - "28": 2776.0, - "29": 2514.0, - "30": 2843.0, - "31": 2070.0, - "32": 2362.0, - "33": 2211.0, - "34": 2574.0, - "35": 2499.0, - "36": 2943.0, - "37": 3347.0, - "38": 2628.0, - "39": 2781.0, - "40": 3335.0, - "41": 1800.0, - "42": 1598.0, - "43": 1719.0, - "44": 2631.0, - "45": 3492.0, - "46": 2988.0, - "47": 2784.0, - "48": 2951.0, - "49": 2907.0, - "50": 2113.0 + "1": 2665.0, + "2": 2118.0, + "3": 1770.0, + "4": 2558.0, + "5": 2317.0, + "6": 2052.0, + "7": 2434.0, + "8": 2608.0, + "9": 2485.0, + "10": 2440.0, + "11": 2673.0, + "12": 2025.0, + "13": 2299.0, + "14": 2842.0, + "15": 2136.0, + "16": 2721.0, + "17": 2652.0, + "18": 2538.0, + "19": 2662.0, + "20": 2845.0, + "21": 2242.0, + "22": 2745.0, + "23": 2672.0, + "24": 2739.0, + "25": 2663.0, + "26": 2756.0, + "27": 2696.0, + "28": 2775.0, + "29": 2509.0, + "30": 2967.0, + "31": 2508.0, + "32": 2862.0, + "33": 2542.0, + "34": 2619.0, + "35": 2758.0, + "36": 3043.0, + "37": 3327.0, + "38": 2725.0, + "39": 2853.0, + "40": 3511.0, + "41": 1709.0, + "42": 1452.0, + "43": 1696.0, + "44": 2955.0, + "45": 3662.0, + "46": 3297.0, + "47": 3146.0, + "48": 2674.0, + "49": 2478.0, + "50": 2078.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1785063936.0, - "2": 1785063936.0, - "3": 1785063936.0, - "4": 1785063936.0, - "5": 1785063936.0, - "6": 1785063936.0, - "7": 1785063936.0, - "8": 1785063936.0, - "9": 1785063936.0, - "10": 1785063936.0, - "11": 1785063936.0, - "12": 1785063936.0, - "13": 1785063936.0, - "14": 1785063936.0, - "15": 1785063936.0, - "16": 1785063936.0, - "17": 1785063936.0, - "18": 1785063936.0, - "19": 1785063936.0, - "20": 1785063936.0, - "21": 1785063936.0, - "22": 1785063936.0, - "23": 1785063936.0, - "24": 1785063936.0, - "25": 1785063936.0, - "26": 1785063936.0, - "27": 1785063936.0, - "28": 1785063936.0, - "29": 1785063936.0, - "30": 1785063936.0, - "31": 1785063936.0, - "32": 1785063936.0, - "33": 1785063936.0, - "34": 1785063936.0, - "35": 1785063936.0, - "36": 1785063936.0, - "37": 1785063936.0, - "38": 1785063936.0, - "39": 1785063936.0, - "40": 1785063936.0, - "41": 1785063936.0, - "42": 1785063936.0, - "43": 1785063936.0, - "44": 1785063936.0, - "45": 1785063936.0, - "46": 1785063936.0, - "47": 1785063936.0, - "48": 1785063936.0, - "49": 1785063936.0, - "50": 1785063936.0 + "1": 1796646400.0, + "2": 1796646400.0, + "3": 1796646400.0, + "4": 1796646400.0, + "5": 1796646400.0, + "6": 1796646400.0, + "7": 1796646400.0, + "8": 1796646400.0, + "9": 1796646400.0, + "10": 1796646400.0, + "11": 1796646400.0, + "12": 1796646400.0, + "13": 1796646400.0, + "14": 1796646400.0, + "15": 1796646400.0, + "16": 1796646400.0, + "17": 1796646400.0, + "18": 1796646400.0, + "19": 1796646400.0, + "20": 1796646400.0, + "21": 1796646400.0, + "22": 1796646400.0, + "23": 1796646400.0, + "24": 1796646400.0, + "25": 1796646400.0, + "26": 1796646400.0, + "27": 1796646400.0, + "28": 1796646400.0, + "29": 1796646400.0, + "30": 1796646400.0, + "31": 1796646400.0, + "32": 1796646400.0, + "33": 1796646400.0, + "34": 1796646400.0, + "35": 1797694976.0, + "36": 1796646400.0, + "37": 1796646400.0, + "38": 1796646400.0, + "39": 1796646400.0, + "40": 1796646400.0, + "41": 1796646400.0, + "42": 1796646400.0, + "43": 1796646400.0, + "44": 1796646400.0, + "45": 1796646400.0, + "46": 1796646400.0, + "47": 1796646400.0, + "48": 1796646400.0, + "49": 1796646400.0, + "50": 1796646400.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 2366910464.0, - "2": 3109894144.0, - "3": 3109894144.0, - "4": 3109894144.0, - "5": 3109894144.0, - "6": 3109894144.0, - "7": 3109894144.0, - "8": 3109894144.0, - "9": 3109894144.0, - "10": 3109894144.0, - "11": 3109894144.0, - "12": 3109894144.0, - "13": 3109894144.0, - "14": 3109894144.0, - "15": 3109897216.0, - "16": 3109897216.0, - "17": 3109897216.0, - "18": 3109897216.0, - "19": 3109897216.0, - "20": 3109897216.0, - "21": 3109897216.0, - "22": 3109897216.0, - "23": 3109897216.0, - "24": 3109897216.0, - "25": 3109897216.0, - "26": 3109897216.0, - "27": 3109897216.0, - "28": 3109897216.0, - "29": 3109897216.0, - "30": 3109897216.0, - "31": 3109897216.0, - "32": 3109897216.0, - "33": 3109897216.0, - "34": 3109897216.0, - "35": 3109897216.0, - "36": 3109897216.0, - "37": 3109897216.0, - "38": 3109897216.0, - "39": 3109897216.0, - "40": 3109897216.0, - "41": 3109897216.0, - "42": 3109897216.0, - "43": 3109897216.0, - "44": 3109897216.0, - "45": 3109897216.0, - "46": 3109897216.0, - "47": 3109897216.0, - "48": 3109897216.0, - "49": 3109897216.0, - "50": 3109897216.0 + "1": 2376915456.0, + "2": 3126723584.0, + "3": 3126724096.0, + "4": 3126724096.0, + "5": 3126724096.0, + "6": 3126724096.0, + "7": 3126724096.0, + "8": 3126724096.0, + "9": 3126724096.0, + "10": 3126724096.0, + "11": 3126724096.0, + "12": 3126724096.0, + "13": 3126724096.0, + "14": 3126724096.0, + "15": 3126724096.0, + "16": 3126724096.0, + "17": 3126724096.0, + "18": 3126724096.0, + "19": 3126724096.0, + "20": 3126724096.0, + "21": 3126724096.0, + "22": 3126724096.0, + "23": 3126724096.0, + "24": 3126724096.0, + "25": 3126724096.0, + "26": 3126724096.0, + "27": 3126724096.0, + "28": 3126724096.0, + "29": 3126724096.0, + "30": 3128821248.0, + "31": 3128821248.0, + "32": 3128821248.0, + "33": 3128821248.0, + "34": 3128821248.0, + "35": 3128821248.0, + "36": 3128821248.0, + "37": 3128821248.0, + "38": 3128821248.0, + "39": 3128821248.0, + "40": 3128821248.0, + "41": 3128821248.0, + "42": 3128821248.0, + "43": 3128821248.0, + "44": 3128821248.0, + "45": 3128821248.0, + "46": 3128821248.0, + "47": 3128821248.0, + "48": 3128821248.0, + "49": 3128821248.0, + "50": 3128821248.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 10.5121, - "2": 1.00958, - "3": 0.92732, - "4": 0.90421, - "5": 0.90504, - "6": 0.89943, - "7": 0.90319, - "8": 1.1748, - "9": 1.95208, - "10": 0.92148, - "11": 0.91859, - "12": 0.92137, - "13": 0.92531, - "14": 1.25591, - "15": 0.92418, - "16": 0.91961, - "17": 0.90838, - "18": 0.90766, - "19": 0.90747, - "20": 0.9061, - "21": 0.93723, - "22": 0.90644, - "23": 0.91067, - "24": 1.66749, - "25": 0.91188, - "26": 0.91194, - "27": 0.988, - "28": 0.92516, - "29": 0.91117, - "30": 1.435, - "31": 0.89868, - "32": 0.90735, - "33": 1.29737, - "34": 1.32235, - "35": 0.91506, - "36": 0.91851, - "37": 0.92715, - "38": 0.92769, - "39": 0.92632, - "40": 1.26827, - "41": 1.07193, - "42": 1.07217, - "43": 0.98674, - "44": 1.07179, - "45": 1.09756, - "46": 1.10568, - "47": 0.92215, - "48": 0.92051, - "49": 0.92335, - "50": 0.92251 + "1": "nan", + "2": 7.30404, + "3": 0.94091, + "4": 0.92204, + "5": 0.92114, + "6": 0.92111, + "7": 0.90621, + "8": 0.90764, + "9": 0.90745, + "10": 0.90814, + "11": 0.91042, + "12": 0.90559, + "13": 0.90811, + "14": 0.9575, + "15": 0.92282, + "16": 0.92228, + "17": 0.91866, + "18": 0.91983, + "19": 0.92061, + "20": 0.91825, + "21": 1.25693, + "22": 1.10637, + "23": 1.15254, + "24": 0.90684, + "25": 0.9132, + "26": 1.16081, + "27": 0.90621, + "28": 1.16419, + "29": 0.90868, + "30": 1.24759, + "31": 0.94976, + "32": 0.9232, + "33": 0.90997, + "34": 0.90937, + "35": 0.90824, + "36": 0.91023, + "37": 0.90952, + "38": 0.91076, + "39": 0.90936, + "40": 0.9063, + "41": 0.92378, + "42": 0.92283, + "43": 0.92095, + "44": 0.91236, + "45": 0.90954, + "46": 0.90765, + "47": 0.90877, + "48": 1.08773, + "49": 0.9072, + "50": 0.91697 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json index d034c6bf7d8..5fcd4069a97 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json @@ -4,56 +4,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 10.4837, - "2": 10.48435, - "3": 10.48251, - "4": 10.48303, - "5": 10.47647, - "6": 10.48423, - "7": 10.48457, - "8": 10.48837, - "9": 10.49003, - "10": 10.47255, - "11": 10.47245, - "12": 10.4828, - "13": 10.47855, - "14": 10.45162, - "15": 10.47936, - "16": 10.45364, - "17": 10.45143, - "18": 10.46239, - "19": 10.44136, - "20": 10.45438, - "21": 10.43469, - "22": 10.40587, - "23": 10.39982, - "24": 10.37585, - "25": 10.38173, - "26": 10.35154, - "27": 10.35401, - "28": 10.3497, - "29": 10.28714, - "30": 10.21194, - "31": 10.17274, - "32": 10.13439, - "33": 10.14753, - "34": 10.10759, - "35": 10.10592, - "36": 10.08756, - "37": 10.08177, - "38": 10.07257, - "39": 10.0013, - "40": 9.9816, - "41": 9.92551, - "42": 9.87537, - "43": 9.88725, - "44": 9.80659, - "45": 9.82349, - "46": 9.73821, - "47": 9.74829, - "48": 9.71628, - "49": 9.74489, - "50": 9.73004 + "1": 10.52646, + "2": 10.54515, + "3": 10.5313, + "4": 10.53837, + "5": 10.53725, + "6": 10.54437, + "7": 10.54661, + "8": 10.54186, + "9": 10.53417, + "10": 10.53023, + "11": 10.53138, + "12": 10.53806, + "13": 10.53901, + "14": 10.52435, + "15": 10.52291, + "16": 10.50388, + "17": 10.51258, + "18": 10.50728, + "19": 10.505, + "20": 10.50929, + "21": 10.51045, + "22": 10.4465, + "23": 10.43169, + "24": 10.44107, + "25": 10.40841, + "26": 10.4161, + "27": 10.39933, + "28": 10.41056, + "29": 10.36605, + "30": 10.28358, + "31": 10.23042, + "32": 10.205, + "33": 10.21542, + "34": 10.17025, + "35": 10.14669, + "36": 10.12668, + "37": 10.11652, + "38": 10.11826, + "39": 10.08113, + "40": 10.01198, + "41": 9.9667, + "42": 9.92772, + "43": 9.91963, + "44": 9.86358, + "45": 9.83787, + "46": 9.77904, + "47": 9.77267, + "48": 9.74922, + "49": 9.77992, + "50": 9.75251 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 2554.0, - "2": 1919.0, - "3": 1521.0, - "4": 2330.0, - "5": 2010.0, - "6": 1725.0, - "7": 2803.0, - "8": 2435.0, - "9": 2286.0, - "10": 2570.0, - "11": 2438.0, - "12": 1829.0, - "13": 2332.0, - "14": 2832.0, - "15": 2008.0, - "16": 2659.0, - "17": 2454.0, - "18": 2500.0, - "19": 2588.0, - "20": 2834.0, - "21": 2042.0, - "22": 3037.0, - "23": 2702.0, - "24": 2700.0, - "25": 2568.0, - "26": 2896.0, - "27": 2735.0, - "28": 2699.0, - "29": 2548.0, - "30": 2843.0, - "31": 2160.0, - "32": 2458.0, - "33": 2130.0, - "34": 2517.0, - "35": 2597.0, - "36": 3001.0, - "37": 3305.0, - "38": 2682.0, - "39": 2805.0, - "40": 3430.0, - "41": 1767.0, - "42": 1516.0, - "43": 1798.0, - "44": 2790.0, - "45": 3578.0, - "46": 3016.0, - "47": 2890.0, - "48": 3065.0, - "49": 2914.0, - "50": 2208.0 + "1": 2680.0, + "2": 2049.0, + "3": 1714.0, + "4": 2460.0, + "5": 2301.0, + "6": 1981.0, + "7": 2388.0, + "8": 2533.0, + "9": 2517.0, + "10": 2512.0, + "11": 2671.0, + "12": 1939.0, + "13": 2323.0, + "14": 2763.0, + "15": 2225.0, + "16": 2787.0, + "17": 2755.0, + "18": 2621.0, + "19": 2763.0, + "20": 2794.0, + "21": 2179.0, + "22": 2877.0, + "23": 2632.0, + "24": 2826.0, + "25": 2691.0, + "26": 2767.0, + "27": 2730.0, + "28": 2782.0, + "29": 2521.0, + "30": 2921.0, + "31": 2472.0, + "32": 2921.0, + "33": 2388.0, + "34": 2551.0, + "35": 2647.0, + "36": 3023.0, + "37": 3267.0, + "38": 2786.0, + "39": 3010.0, + "40": 3454.0, + "41": 1758.0, + "42": 1488.0, + "43": 1763.0, + "44": 2999.0, + "45": 3594.0, + "46": 3353.0, + "47": 3172.0, + "48": 2692.0, + "49": 2463.0, + "50": 2115.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1768285696.0, - "2": 1768285696.0, - "3": 1768285696.0, - "4": 1768285696.0, - "5": 1768285696.0, - "6": 1768285696.0, - "7": 1768285696.0, - "8": 1768285696.0, - "9": 1768285696.0, - "10": 1768285696.0, - "11": 1768285696.0, - "12": 1768285696.0, - "13": 1768285696.0, - "14": 1768285696.0, - "15": 1768285696.0, - "16": 1768285696.0, - "17": 1768285696.0, - "18": 1768285696.0, - "19": 1768285696.0, - "20": 1768285696.0, - "21": 1768285696.0, - "22": 1768285696.0, - "23": 1768285696.0, - "24": 1768285696.0, - "25": 1768285696.0, - "26": 1768285696.0, - "27": 1768285696.0, - "28": 1768285696.0, - "29": 1768285696.0, - "30": 1768285696.0, - "31": 1768285696.0, - "32": 1768285696.0, - "33": 1768285696.0, - "34": 1768285696.0, - "35": 1768285696.0, - "36": 1768285696.0, - "37": 1768285696.0, - "38": 1768285696.0, - "39": 1768285696.0, - "40": 1768285696.0, - "41": 1768285696.0, - "42": 1768285696.0, - "43": 1768285696.0, - "44": 1768285696.0, - "45": 1768285696.0, - "46": 1768285696.0, - "47": 1768285696.0, - "48": 1768285696.0, - "49": 1768285696.0, - "50": 1768285696.0 + "1": 1779868160.0, + "2": 1779868160.0, + "3": 1779868160.0, + "4": 1779868160.0, + "5": 1779868160.0, + "6": 1779868160.0, + "7": 1779868160.0, + "8": 1779868160.0, + "9": 1779868160.0, + "10": 1779868160.0, + "11": 1779868160.0, + "12": 1779868160.0, + "13": 1779868160.0, + "14": 1779868160.0, + "15": 1779868160.0, + "16": 1779868160.0, + "17": 1779868160.0, + "18": 1779868160.0, + "19": 1779868160.0, + "20": 1779868160.0, + "21": 1779868160.0, + "22": 1779868160.0, + "23": 1779868160.0, + "24": 1779868160.0, + "25": 1779868160.0, + "26": 1779868160.0, + "27": 1779868160.0, + "28": 1779868160.0, + "29": 1779868160.0, + "30": 1779868160.0, + "31": 1779868160.0, + "32": 1779868160.0, + "33": 1779868160.0, + "34": 1779868160.0, + "35": 1779868160.0, + "36": 1779868160.0, + "37": 1779868160.0, + "38": 1779868160.0, + "39": 1779868160.0, + "40": 1779868160.0, + "41": 1779868160.0, + "42": 1779868160.0, + "43": 1779868160.0, + "44": 1779868160.0, + "45": 1779868160.0, + "46": 1779868160.0, + "47": 1779868160.0, + "48": 1779868160.0, + "49": 1779868160.0, + "50": 1779868160.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 2337549312.0, - "2": 3080536064.0, - "3": 3082107392.0, - "4": 3082107392.0, - "5": 3082107392.0, - "6": 3082107392.0, - "7": 3082107392.0, - "8": 3082107392.0, - "9": 3082107392.0, - "10": 3082107392.0, - "11": 3082107392.0, - "12": 3082107392.0, - "13": 3082107392.0, - "14": 3082107392.0, - "15": 3082107392.0, - "16": 3082108928.0, - "17": 3082108928.0, - "18": 3082108928.0, - "19": 3082108928.0, - "20": 3082108928.0, - "21": 3082108928.0, - "22": 3082108928.0, - "23": 3082108928.0, - "24": 3082108928.0, - "25": 3082108928.0, - "26": 3082108928.0, - "27": 3082108928.0, - "28": 3082108928.0, - "29": 3082108928.0, - "30": 3082108928.0, - "31": 3082108928.0, - "32": 3082108928.0, - "33": 3082108928.0, - "34": 3082108928.0, - "35": 3082108928.0, - "36": 3082108928.0, - "37": 3082108928.0, - "38": 3082108928.0, - "39": 3082108928.0, - "40": 3082108928.0, - "41": 3082108928.0, - "42": 3082108928.0, - "43": 3082108928.0, - "44": 3082108928.0, - "45": 3082108928.0, - "46": 3082108928.0, - "47": 3082108928.0, - "48": 3082108928.0, - "49": 3082108928.0, - "50": 3082108928.0 + "1": 2347554304.0, + "2": 3097360384.0, + "3": 3097360384.0, + "4": 3097360384.0, + "5": 3097360384.0, + "6": 3097360384.0, + "7": 3097360384.0, + "8": 3097360384.0, + "9": 3097362944.0, + "10": 3097362944.0, + "11": 3097362944.0, + "12": 3097362944.0, + "13": 3097362944.0, + "14": 3097362944.0, + "15": 3097362944.0, + "16": 3097362944.0, + "17": 3097362944.0, + "18": 3097362944.0, + "19": 3097362944.0, + "20": 3097362944.0, + "21": 3097362944.0, + "22": 3097362944.0, + "23": 3097362944.0, + "24": 3097362944.0, + "25": 3097362944.0, + "26": 3097362944.0, + "27": 3097362944.0, + "28": 3097362944.0, + "29": 3097362944.0, + "30": 3097362944.0, + "31": 3097362944.0, + "32": 3097362944.0, + "33": 3097362944.0, + "34": 3097362944.0, + "35": 3097362944.0, + "36": 3097362944.0, + "37": 3097362944.0, + "38": 3097362944.0, + "39": 3097362944.0, + "40": 3097362944.0, + "41": 3097362944.0, + "42": 3097362944.0, + "43": 3097362944.0, + "44": 3097362944.0, + "45": 3097362944.0, + "46": 3097362944.0, + "47": 3097362944.0, + "48": 3097362944.0, + "49": 3097362944.0, + "50": 3097362944.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 10.51798, - "2": 0.89864, - "3": 0.7978, - "4": 0.74774, - "5": 0.73987, - "6": 0.74277, - "7": 0.76779, - "8": 0.74313, - "9": 1.58315, - "10": 0.73453, - "11": 0.73215, - "12": 0.72957, - "13": 0.72967, - "14": 0.73868, - "15": 0.73216, - "16": 1.10392, - "17": 0.73363, - "18": 0.73647, - "19": 0.76464, - "20": 0.73565, - "21": 0.72858, - "22": 0.72652, - "23": 0.72858, - "24": 0.74508, - "25": 0.74166, - "26": 0.7704, - "27": 1.15428, - "28": 1.146, - "29": 0.73283, - "30": 0.73304, - "31": 0.73237, - "32": 0.7343, - "33": 0.73304, - "34": 0.72879, - "35": 0.73286, - "36": 1.74169, - "37": 1.10377, - "38": 0.73148, - "39": 0.73227, - "40": 0.73028, - "41": 0.73026, - "42": 1.15127, - "43": 1.11655, - "44": 0.73185, - "45": 1.17599, - "46": 1.07292, - "47": 0.72983, - "48": 0.72804, - "49": 0.73205, - "50": 0.72929 + "1": "nan", + "2": 6.65278, + "3": 0.80729, + "4": 0.75309, + "5": 0.75657, + "6": 0.75567, + "7": 0.75051, + "8": 0.79758, + "9": 0.74801, + "10": 0.74907, + "11": 0.75864, + "12": 0.77822, + "13": 0.77353, + "14": 0.7675, + "15": 0.74681, + "16": 0.74699, + "17": 0.74713, + "18": 0.74909, + "19": 1.17339, + "20": 1.04917, + "21": 1.0602, + "22": 0.75705, + "23": 1.40883, + "24": 1.35546, + "25": 0.77334, + "26": 0.77065, + "27": 0.7506, + "28": 0.76835, + "29": 0.75344, + "30": 0.7614, + "31": 0.76286, + "32": 0.76206, + "33": 0.7584, + "34": 0.76586, + "35": 0.79058, + "36": 0.74744, + "37": 0.75671, + "38": 0.75006, + "39": 0.75076, + "40": 0.74876, + "41": 0.74984, + "42": 0.75073, + "43": 0.87349, + "44": 0.8073, + "45": 0.77888, + "46": 0.99536, + "47": 0.76688, + "48": 0.77466, + "49": 1.15924, + "50": 0.74701 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json index 4302b8e40ca..40bea4ac462 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json @@ -4,106 +4,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 10.48367, - "2": 10.48426, - "3": 10.48254, - "4": 10.48311, - "5": 10.4764, - "6": 10.4844, - "7": 10.48458, - "8": 10.48829, - "9": 10.49008, - "10": 10.47268, - "11": 10.47256, - "12": 10.48259, - "13": 10.47857, - "14": 10.45154, - "15": 10.47925, - "16": 10.45346, - "17": 10.45145, - "18": 10.46238, - "19": 10.44113, - "20": 10.45448, - "21": 10.43454, - "22": 10.40591, - "23": 10.39975, - "24": 10.37583, - "25": 10.38168, - "26": 10.3515, - "27": 10.35388, - "28": 10.34965, - "29": 10.28701, - "30": 10.21143, - "31": 10.17272, - "32": 10.13416, - "33": 10.14725, - "34": 10.10738, - "35": 10.10592, - "36": 10.08739, - "37": 10.08157, - "38": 10.07245, - "39": 10.00093, - "40": 9.98138, - "41": 9.92543, - "42": 9.87534, - "43": 9.88716, - "44": 9.80646, - "45": 9.82342, - "46": 9.73786, - "47": 9.74811, - "48": 9.71614, - "49": 9.74493, - "50": 9.73, - "51": 9.71492, - "52": 9.66464, - "53": 9.60912, - "54": 9.62726, - "55": 9.6101, - "56": 9.61721, - "57": 9.56794, - "58": 9.52741, - "59": 9.51674, - "60": 9.51863, - "61": 9.53132, - "62": 9.45018, - "63": 9.4572, - "64": 9.43437, - "65": 9.45816, - "66": 9.43669, - "67": 9.39678, - "68": 9.36478, - "69": 9.40956, - "70": 9.37595, - "71": 9.41738, - "72": 9.42564, - "73": 9.37611, - "74": 9.41543, - "75": 9.3788, - "76": 9.28012, - "77": 9.32212, - "78": 9.35744, - "79": 9.3215, - "80": 9.31497, - "81": 9.26785, - "82": 9.34183, - "83": 9.32151, - "84": 9.24796, - "85": 9.35033, - "86": 9.224, - "87": 9.30611, - "88": 9.29894, - "89": 9.22704, - "90": 9.28479, - "91": 9.2311, - "92": 9.27474, - "93": 9.19219, - "94": 9.23969, - "95": 9.28, - "96": 9.17525, - "97": 9.21888, - "98": 9.1721, - "99": 9.16455, - "100": 9.1482 + "1": 10.52515, + "2": 10.54464, + "3": 10.53225, + "4": 10.53963, + "5": 10.53658, + "6": 10.54252, + "7": 10.54919, + "8": 10.54127, + "9": 10.53395, + "10": 10.52862, + "11": 10.53067, + "12": 10.53697, + "13": 10.53945, + "14": 10.52443, + "15": 10.52179, + "16": 10.50323, + "17": 10.5127, + "18": 10.50939, + "19": 10.50594, + "20": 10.50997, + "21": 10.51032, + "22": 10.44577, + "23": 10.43367, + "24": 10.44118, + "25": 10.40813, + "26": 10.41475, + "27": 10.39983, + "28": 10.41071, + "29": 10.36795, + "30": 10.28189, + "31": 10.23052, + "32": 10.20533, + "33": 10.21364, + "34": 10.17063, + "35": 10.14593, + "36": 10.12739, + "37": 10.11585, + "38": 10.11919, + "39": 10.07882, + "40": 10.01252, + "41": 9.96692, + "42": 9.92758, + "43": 9.92049, + "44": 9.86484, + "45": 9.83822, + "46": 9.77998, + "47": 9.77239, + "48": 9.74982, + "49": 9.778, + "50": 9.75365, + "51": 9.76005, + "52": 9.70986, + "53": 9.67159, + "54": 9.69242, + "55": 9.67995, + "56": 9.67294, + "57": 9.60193, + "58": 9.61969, + "59": 9.54603, + "60": 9.6122, + "61": 9.54787, + "62": 9.53722, + "63": 9.52349, + "64": 9.51067, + "65": 9.52347, + "66": 9.49062, + "67": 9.45803, + "68": 9.44212, + "69": 9.44324, + "70": 9.43922, + "71": 9.47041, + "72": 9.45725, + "73": 9.40536, + "74": 9.45635, + "75": 9.40507, + "76": 9.37316, + "77": 9.34396, + "78": 9.37786, + "79": 9.41166, + "80": 9.34493, + "81": 9.33096, + "82": 9.34642, + "83": 9.31611, + "84": 9.29902, + "85": 9.33654, + "86": 9.26861, + "87": 9.31388, + "88": 9.29805, + "89": 9.26894, + "90": 9.34087, + "91": 9.25631, + "92": 9.29651, + "93": 9.29935, + "94": 9.27574, + "95": 9.28048, + "96": 9.18131, + "97": 9.26438, + "98": 9.19722, + "99": 9.21951, + "100": 9.22923 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 2570.0, - "2": 1923.0, - "3": 1512.0, - "4": 2322.0, - "5": 2033.0, - "6": 1774.0, - "7": 2781.0, - "8": 2460.0, - "9": 2308.0, - "10": 2635.0, - "11": 2397.0, - "12": 1817.0, - "13": 2348.0, - "14": 2749.0, - "15": 2027.0, - "16": 2719.0, - "17": 2487.0, - "18": 2533.0, - "19": 2547.0, - "20": 2850.0, - "21": 1990.0, - "22": 2964.0, - "23": 2695.0, - "24": 2772.0, - "25": 2524.0, - "26": 2977.0, - "27": 2627.0, - "28": 2776.0, - "29": 2514.0, - "30": 2843.0, - "31": 2070.0, - "32": 2362.0, - "33": 2211.0, - "34": 2574.0, - "35": 2499.0, - "36": 2943.0, - "37": 3347.0, - "38": 2628.0, - "39": 2781.0, - "40": 3335.0, - "41": 1800.0, - "42": 1598.0, - "43": 1719.0, - "44": 2631.0, - "45": 3492.0, - "46": 2988.0, - "47": 2784.0, - "48": 2951.0, - "49": 2907.0, - "50": 2113.0, - "51": 1961.0, - "52": 2445.0, - "53": 3654.0, - "54": 3489.0, - "55": 3419.0, - "56": 4364.0, - "57": 4145.0, - "58": 4155.0, - "59": 1699.0, - "60": 2358.0, - "61": 2070.0, - "62": 4094.0, - "63": 3516.0, - "64": 4287.0, - "65": 2891.0, - "66": 1733.0, - "67": 1914.0, - "68": 4420.0, - "69": 4479.0, - "70": 4656.0, - "71": 2135.0, - "72": 4476.0, - "73": 4048.0, - "74": 3199.0, - "75": 4735.0, - "76": 2218.0, - "77": 4952.0, - "78": 4158.0, - "79": 2657.0, - "80": 3846.0, - "81": 3472.0, - "82": 2979.0, - "83": 5364.0, - "84": 4430.0, - "85": 4249.0, - "86": 3509.0, - "87": 4817.0, - "88": 3434.0, - "89": 4711.0, - "90": 4448.0, - "91": 4374.0, - "92": 3507.0, - "93": 5549.0, - "94": 3635.0, - "95": 4540.0, - "96": 3659.0, - "97": 3756.0, - "98": 4513.0, - "99": 4491.0, - "100": 3445.0 + "1": 2694.0, + "2": 2038.0, + "3": 1725.0, + "4": 2486.0, + "5": 2286.0, + "6": 2006.0, + "7": 2358.0, + "8": 2557.0, + "9": 2439.0, + "10": 2467.0, + "11": 2695.0, + "12": 2001.0, + "13": 2274.0, + "14": 2852.0, + "15": 2293.0, + "16": 2718.0, + "17": 2764.0, + "18": 2543.0, + "19": 2783.0, + "20": 2742.0, + "21": 2199.0, + "22": 2754.0, + "23": 2704.0, + "24": 2827.0, + "25": 2664.0, + "26": 2810.0, + "27": 2678.0, + "28": 2751.0, + "29": 2624.0, + "30": 2875.0, + "31": 2498.0, + "32": 2970.0, + "33": 2380.0, + "34": 2542.0, + "35": 2774.0, + "36": 2985.0, + "37": 3282.0, + "38": 2706.0, + "39": 2924.0, + "40": 3560.0, + "41": 1578.0, + "42": 1531.0, + "43": 1744.0, + "44": 2892.0, + "45": 3574.0, + "46": 3340.0, + "47": 3082.0, + "48": 2605.0, + "49": 2496.0, + "50": 2102.0, + "51": 1769.0, + "52": 2645.0, + "53": 3852.0, + "54": 3615.0, + "55": 3365.0, + "56": 4146.0, + "57": 3860.0, + "58": 4285.0, + "59": 1773.0, + "60": 2698.0, + "61": 2192.0, + "62": 3981.0, + "63": 3916.0, + "64": 4576.0, + "65": 3081.0, + "66": 1913.0, + "67": 2157.0, + "68": 4109.0, + "69": 4392.0, + "70": 4007.0, + "71": 2078.0, + "72": 4121.0, + "73": 3482.0, + "74": 2587.0, + "75": 5381.0, + "76": 2630.0, + "77": 4087.0, + "78": 4282.0, + "79": 2267.0, + "80": 3523.0, + "81": 3970.0, + "82": 3684.0, + "83": 4798.0, + "84": 5334.0, + "85": 4550.0, + "86": 4009.0, + "87": 3707.0, + "88": 4522.0, + "89": 3812.0, + "90": 4600.0, + "91": 4730.0, + "92": 3955.0, + "93": 3787.0, + "94": 2962.0, + "95": 4073.0, + "96": 3648.0, + "97": 3327.0, + "98": 4531.0, + "99": 3795.0, + "100": 3279.0 } }, "mem-allocated-bytes": { @@ -218,106 +218,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1785063936.0, - "2": 1785063936.0, - "3": 1785063936.0, - "4": 1785063936.0, - "5": 1785063936.0, - "6": 1785063936.0, - "7": 1785063936.0, - "8": 1785063936.0, - "9": 1785063936.0, - "10": 1785063936.0, - "11": 1785063936.0, - "12": 1785063936.0, - "13": 1785063936.0, - "14": 1785063936.0, - "15": 1785063936.0, - "16": 1785063936.0, - "17": 1785063936.0, - "18": 1785063936.0, - "19": 1785063936.0, - "20": 1785063936.0, - "21": 1785063936.0, - "22": 1785063936.0, - "23": 1785063936.0, - "24": 1785063936.0, - "25": 1785063936.0, - "26": 1785063936.0, - "27": 1785063936.0, - "28": 1785588224.0, - "29": 1785063936.0, - "30": 1785063936.0, - "31": 1785063936.0, - "32": 1785063936.0, - "33": 1785063936.0, - "34": 1785063936.0, - "35": 1785063936.0, - "36": 1785063936.0, - "37": 1785063936.0, - "38": 1785063936.0, - "39": 1785063936.0, - "40": 1785063936.0, - "41": 1785063936.0, - "42": 1785063936.0, - "43": 1785063936.0, - "44": 1785063936.0, - "45": 1785063936.0, - "46": 1785063936.0, - "47": 1785063936.0, - "48": 1785063936.0, - "49": 1785063936.0, - "50": 1785063936.0, - "51": 1785063936.0, - "52": 1785063936.0, - "53": 1785063936.0, - "54": 1785063936.0, - "55": 1785063936.0, - "56": 1785063936.0, - "57": 1785063936.0, - "58": 1785063936.0, - "59": 1785063936.0, - "60": 1785063936.0, - "61": 1785063936.0, - "62": 1785063936.0, - "63": 1785063936.0, - "64": 1785063936.0, - "65": 1785063936.0, - "66": 1785063936.0, - "67": 1785063936.0, - "68": 1785063936.0, - "69": 1785063936.0, - "70": 1785063936.0, - "71": 1785063936.0, - "72": 1785063936.0, - "73": 1785063936.0, - "74": 1785063936.0, - "75": 1785063936.0, - "76": 1785063936.0, - "77": 1785063936.0, - "78": 1785063936.0, - "79": 1785063936.0, - "80": 1785063936.0, - "81": 1785063936.0, - "82": 1785063936.0, - "83": 1785063936.0, - "84": 1785063936.0, - "85": 1785063936.0, - "86": 1785063936.0, - "87": 1785063936.0, - "88": 1785063936.0, - "89": 1785063936.0, - "90": 1785063936.0, - "91": 1785063936.0, - "92": 1785063936.0, - "93": 1785063936.0, - "94": 1785063936.0, - "95": 1785063936.0, - "96": 1785063936.0, - "97": 1785063936.0, - "98": 1785063936.0, - "99": 1785063936.0, - "100": 1785063936.0 + "1": 1796646400.0, + "2": 1796646400.0, + "3": 1796646400.0, + "4": 1796646400.0, + "5": 1796646400.0, + "6": 1796646400.0, + "7": 1796646400.0, + "8": 1796646400.0, + "9": 1796646400.0, + "10": 1796646400.0, + "11": 1796646400.0, + "12": 1796646400.0, + "13": 1796646400.0, + "14": 1796646400.0, + "15": 1796646400.0, + "16": 1796646400.0, + "17": 1796646400.0, + "18": 1796646400.0, + "19": 1796646400.0, + "20": 1796646400.0, + "21": 1796646400.0, + "22": 1796646400.0, + "23": 1796646400.0, + "24": 1796646400.0, + "25": 1796646400.0, + "26": 1796646400.0, + "27": 1796646400.0, + "28": 1796646400.0, + "29": 1796646400.0, + "30": 1796646400.0, + "31": 1796646400.0, + "32": 1796646400.0, + "33": 1796646400.0, + "34": 1796646400.0, + "35": 1796646400.0, + "36": 1796646400.0, + "37": 1796646400.0, + "38": 1796646400.0, + "39": 1796646400.0, + "40": 1796646400.0, + "41": 1796646400.0, + "42": 1796646400.0, + "43": 1796646400.0, + "44": 1796646400.0, + "45": 1796646400.0, + "46": 1796646400.0, + "47": 1796646400.0, + "48": 1796646400.0, + "49": 1796646400.0, + "50": 1796646400.0, + "51": 1796646400.0, + "52": 1796646400.0, + "53": 1796646400.0, + "54": 1796646400.0, + "55": 1796646400.0, + "56": 1796646400.0, + "57": 1796646400.0, + "58": 1796646400.0, + "59": 1796646400.0, + "60": 1796646400.0, + "61": 1796646400.0, + "62": 1796646400.0, + "63": 1796646400.0, + "64": 1796646400.0, + "65": 1796646400.0, + "66": 1796646400.0, + "67": 1796646400.0, + "68": 1796646400.0, + "69": 1796646400.0, + "70": 1796646400.0, + "71": 1796646400.0, + "72": 1796646400.0, + "73": 1796646400.0, + "74": 1796646400.0, + "75": 1796646400.0, + "76": 1796646400.0, + "77": 1796646400.0, + "78": 1796646400.0, + "79": 1796646400.0, + "80": 1796646400.0, + "81": 1796646400.0, + "82": 1796646400.0, + "83": 1796646400.0, + "84": 1796646400.0, + "85": 1796646400.0, + "86": 1796646400.0, + "87": 1796646400.0, + "88": 1796646400.0, + "89": 1796646400.0, + "90": 1796646400.0, + "91": 1796646400.0, + "92": 1796646400.0, + "93": 1796646400.0, + "94": 1796646400.0, + "95": 1796646400.0, + "96": 1796646400.0, + "97": 1796646400.0, + "98": 1796646400.0, + "99": 1796646400.0, + "100": 1796646400.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 2366910464.0, - "2": 3109372928.0, - "3": 3109372928.0, - "4": 3109372928.0, - "5": 3109372928.0, - "6": 3109892608.0, - "7": 3109892608.0, - "8": 3111465472.0, - "9": 3111465472.0, - "10": 3111465472.0, - "11": 3111465472.0, - "12": 3111470080.0, - "13": 3111470080.0, - "14": 3111470080.0, - "15": 3111470080.0, - "16": 3111470080.0, - "17": 3111470080.0, - "18": 3111470080.0, - "19": 3111470080.0, - "20": 3111470080.0, - "21": 3111470080.0, - "22": 3111470080.0, - "23": 3111470080.0, - "24": 3111470080.0, - "25": 3111470080.0, - "26": 3111470080.0, - "27": 3111470080.0, - "28": 3111470080.0, - "29": 3111470080.0, - "30": 3111470080.0, - "31": 3111470080.0, - "32": 3111470080.0, - "33": 3111470080.0, - "34": 3111470080.0, - "35": 3111470080.0, - "36": 3111988224.0, - "37": 3111988224.0, - "38": 3111988224.0, - "39": 3111988224.0, - "40": 3111988224.0, - "41": 3111988224.0, - "42": 3111988224.0, - "43": 3111988224.0, - "44": 3111988224.0, - "45": 3111988224.0, - "46": 3111988224.0, - "47": 3111988224.0, - "48": 3111988224.0, - "49": 3111988224.0, - "50": 3111988224.0, - "51": 3111988224.0, - "52": 3111988224.0, - "53": 3111988224.0, - "54": 3111988224.0, - "55": 3111988224.0, - "56": 3111988224.0, - "57": 3111988224.0, - "58": 3111988224.0, - "59": 3111988224.0, - "60": 3111988224.0, - "61": 3111988224.0, - "62": 3111988224.0, - "63": 3111988224.0, - "64": 3111988224.0, - "65": 3111988224.0, - "66": 3111988224.0, - "67": 3111988224.0, - "68": 3111988224.0, - "69": 3111988224.0, - "70": 3111988224.0, - "71": 3111988224.0, - "72": 3111988224.0, - "73": 3111988224.0, - "74": 3111988224.0, - "75": 3111988224.0, - "76": 3111988224.0, - "77": 3111988224.0, - "78": 3111988224.0, - "79": 3111988224.0, - "80": 3111988224.0, - "81": 3111988224.0, - "82": 3111988224.0, - "83": 3111988224.0, - "84": 3111988224.0, - "85": 3111988224.0, - "86": 3111988224.0, - "87": 3111988224.0, - "88": 3111988224.0, - "89": 3111988224.0, - "90": 3111988224.0, - "91": 3111988224.0, - "92": 3111988224.0, - "93": 3111988224.0, - "94": 3111988224.0, - "95": 3111988224.0, - "96": 3111988224.0, - "97": 3111988224.0, - "98": 3111988224.0, - "99": 3111988224.0, - "100": 3111988224.0 + "1": 2376915456.0, + "2": 3124626944.0, + "3": 3124626944.0, + "4": 3124626944.0, + "5": 3124626944.0, + "6": 3124626944.0, + "7": 3124626944.0, + "8": 3124626944.0, + "9": 3124626944.0, + "10": 3124626944.0, + "11": 3124626944.0, + "12": 3124626944.0, + "13": 3124626944.0, + "14": 3124626944.0, + "15": 3124626944.0, + "16": 3124626944.0, + "17": 3124626944.0, + "18": 3124626944.0, + "19": 3124626944.0, + "20": 3124626944.0, + "21": 3124626944.0, + "22": 3124626944.0, + "23": 3124626944.0, + "24": 3124626944.0, + "25": 3124626944.0, + "26": 3124626944.0, + "27": 3124626944.0, + "28": 3124626944.0, + "29": 3124626944.0, + "30": 3124626944.0, + "31": 3124626944.0, + "32": 3124626944.0, + "33": 3124626944.0, + "34": 3124626944.0, + "35": 3124626944.0, + "36": 3124626944.0, + "37": 3124626944.0, + "38": 3124626944.0, + "39": 3124626944.0, + "40": 3124626944.0, + "41": 3124626944.0, + "42": 3124626944.0, + "43": 3124626944.0, + "44": 3124626944.0, + "45": 3124626944.0, + "46": 3124626944.0, + "47": 3124626944.0, + "48": 3124626944.0, + "49": 3124626944.0, + "50": 3124626944.0, + "51": 3124626944.0, + "52": 3124626944.0, + "53": 3124626944.0, + "54": 3124626944.0, + "55": 3124626944.0, + "56": 3124626944.0, + "57": 3124626944.0, + "58": 3124626944.0, + "59": 3124626944.0, + "60": 3124626944.0, + "61": 3124626944.0, + "62": 3124626944.0, + "63": 3124626944.0, + "64": 3124626944.0, + "65": 3124626944.0, + "66": 3124626944.0, + "67": 3124626944.0, + "68": 3124626944.0, + "69": 3124626944.0, + "70": 3124626944.0, + "71": 3124626944.0, + "72": 3124626944.0, + "73": 3124626944.0, + "74": 3124626944.0, + "75": 3124626944.0, + "76": 3124626944.0, + "77": 3124626944.0, + "78": 3124626944.0, + "79": 3124626944.0, + "80": 3124626944.0, + "81": 3124626944.0, + "82": 3124626944.0, + "83": 3124626944.0, + "84": 3124626944.0, + "85": 3124626944.0, + "86": 3124626944.0, + "87": 3124626944.0, + "88": 3124626944.0, + "89": 3124626944.0, + "90": 3124626944.0, + "91": 3124626944.0, + "92": 3124626944.0, + "93": 3124626944.0, + "94": 3124626944.0, + "95": 3124626944.0, + "96": 3124626944.0, + "97": 3124626944.0, + "98": 3124626944.0, + "99": 3124626944.0, + "100": 3124626944.0 } }, "iteration-time": { @@ -432,106 +432,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 11.18542, - "2": 0.99156, - "3": 0.93327, - "4": 0.90681, - "5": 0.90504, - "6": 0.90415, - "7": 0.90281, - "8": 1.14692, - "9": 1.44306, - "10": 0.89873, - "11": 0.90113, - "12": 0.89984, - "13": 1.24688, - "14": 0.90399, - "15": 0.90327, - "16": 0.89945, - "17": 0.90194, - "18": 0.89984, - "19": 0.89878, - "20": 0.89865, - "21": 0.90167, - "22": 0.90176, - "23": 0.90423, - "24": 2.02738, - "25": 0.90411, - "26": 0.90354, - "27": 0.90203, - "28": 1.26668, - "29": 0.89854, - "30": 1.45828, - "31": 0.90574, - "32": 0.90137, - "33": 1.70784, - "34": 0.89924, - "35": 0.90059, - "36": 0.90525, - "37": 0.90801, - "38": 0.90691, - "39": 0.9048, - "40": 1.47233, - "41": 0.91116, - "42": 1.22468, - "43": 1.0011, - "44": 1.22804, - "45": 1.12037, - "46": 1.00115, - "47": 0.91003, - "48": 0.91208, - "49": 0.91545, - "50": 0.91, - "51": 0.91471, - "52": 0.91238, - "53": 0.90865, - "54": 0.91588, - "55": 0.91889, - "56": 0.91882, - "57": 0.92072, - "58": 0.9202, - "59": 0.92355, - "60": 0.92097, - "61": 0.91924, - "62": 0.91496, - "63": 0.91648, - "64": 0.91615, - "65": 0.91333, - "66": 0.91743, - "67": 0.9094, - "68": 0.91122, - "69": 0.90894, - "70": 0.91968, - "71": 0.92199, - "72": 0.91976, - "73": 0.92156, - "74": 0.91995, - "75": 0.90852, - "76": 0.90983, - "77": 1.19595, - "78": 0.9092, - "79": 1.16564, - "80": 1.06882, - "81": 0.90637, - "82": 0.90812, - "83": 0.91, - "84": 0.90847, - "85": 0.88526, - "86": 0.87691, - "87": 0.88881, - "88": 0.87995, - "89": 0.9042, - "90": 0.90269, - "91": 0.90587, - "92": 0.90035, - "93": 0.89985, - "94": 0.90093, - "95": 0.90088, - "96": 0.89612, - "97": 0.89401, - "98": 0.89773, - "99": 0.90081, - "100": 0.8988 + "1": "nan", + "2": 6.77679, + "3": 0.96083, + "4": 0.93727, + "5": 0.88655, + "6": 0.88656, + "7": 0.88571, + "8": 0.8873, + "9": 0.8882, + "10": 0.8882, + "11": 0.88472, + "12": 0.88772, + "13": 0.88421, + "14": 0.88527, + "15": 0.88246, + "16": 0.88256, + "17": 0.88373, + "18": 0.88359, + "19": 0.8828, + "20": 1.45042, + "21": 1.16338, + "22": 1.44778, + "23": 0.88114, + "24": 0.88173, + "25": 0.88445, + "26": 0.8792, + "27": 0.8816, + "28": 0.87992, + "29": 0.88178, + "30": 0.88128, + "31": 0.88303, + "32": 0.88483, + "33": 0.88377, + "34": 0.88155, + "35": 0.88366, + "36": 0.89127, + "37": 0.88175, + "38": 0.88225, + "39": 0.88197, + "40": 0.88138, + "41": 0.88142, + "42": 0.88687, + "43": 0.88349, + "44": 0.88194, + "45": 0.88206, + "46": 0.88445, + "47": 1.08645, + "48": 0.8826, + "49": 1.27411, + "50": 1.07698, + "51": 1.09616, + "52": 1.26661, + "53": 0.88344, + "54": 0.91118, + "55": 0.88351, + "56": 0.88478, + "57": 0.88696, + "58": 0.8847, + "59": 0.88986, + "60": 0.88289, + "61": 0.88661, + "62": 0.88371, + "63": 0.8837, + "64": 0.88432, + "65": 1.76327, + "66": 1.32625, + "67": 0.88423, + "68": 0.88521, + "69": 0.88368, + "70": 1.47401, + "71": 0.8851, + "72": 0.88573, + "73": 0.88815, + "74": 0.89154, + "75": 0.99337, + "76": 1.04308, + "77": 0.8854, + "78": 1.08033, + "79": 1.08794, + "80": 1.03415, + "81": 1.02193, + "82": 0.99549, + "83": 0.88423, + "84": 0.89046, + "85": 0.8859, + "86": 0.8846, + "87": 0.88492, + "88": 0.8837, + "89": 0.88611, + "90": 0.88537, + "91": 0.8864, + "92": 0.92431, + "93": 1.42975, + "94": 1.42328, + "95": 1.38667, + "96": 0.88689, + "97": 0.88623, + "98": 0.88695, + "99": 0.88564, + "100": 0.88402 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json index fe766022589..cbc6ad4a652 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json @@ -4,106 +4,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 10.4837, - "2": 10.48435, - "3": 10.48251, - "4": 10.48303, - "5": 10.47647, - "6": 10.48423, - "7": 10.48457, - "8": 10.48837, - "9": 10.49003, - "10": 10.47255, - "11": 10.47245, - "12": 10.4828, - "13": 10.47855, - "14": 10.45162, - "15": 10.47936, - "16": 10.45364, - "17": 10.45143, - "18": 10.46239, - "19": 10.44136, - "20": 10.45438, - "21": 10.43469, - "22": 10.40587, - "23": 10.39982, - "24": 10.37585, - "25": 10.38173, - "26": 10.35154, - "27": 10.35401, - "28": 10.3497, - "29": 10.28714, - "30": 10.21194, - "31": 10.17274, - "32": 10.13439, - "33": 10.14753, - "34": 10.10759, - "35": 10.10592, - "36": 10.08756, - "37": 10.08177, - "38": 10.07257, - "39": 10.0013, - "40": 9.9816, - "41": 9.92551, - "42": 9.87537, - "43": 9.88725, - "44": 9.80659, - "45": 9.82349, - "46": 9.73821, - "47": 9.74829, - "48": 9.71628, - "49": 9.74489, - "50": 9.73004, - "51": 9.71501, - "52": 9.66488, - "53": 9.60917, - "54": 9.62733, - "55": 9.61022, - "56": 9.61723, - "57": 9.56794, - "58": 9.52733, - "59": 9.51677, - "60": 9.5188, - "61": 9.53149, - "62": 9.45031, - "63": 9.45717, - "64": 9.43441, - "65": 9.45812, - "66": 9.43672, - "67": 9.39687, - "68": 9.36469, - "69": 9.40964, - "70": 9.37606, - "71": 9.41737, - "72": 9.42585, - "73": 9.37601, - "74": 9.4154, - "75": 9.37896, - "76": 9.28004, - "77": 9.32212, - "78": 9.35755, - "79": 9.3216, - "80": 9.31491, - "81": 9.26783, - "82": 9.342, - "83": 9.32159, - "84": 9.24786, - "85": 9.35018, - "86": 9.22384, - "87": 9.30618, - "88": 9.29905, - "89": 9.22708, - "90": 9.28498, - "91": 9.23123, - "92": 9.27487, - "93": 9.19233, - "94": 9.23985, - "95": 9.28002, - "96": 9.17532, - "97": 9.21898, - "98": 9.17203, - "99": 9.16444, - "100": 9.14821 + "1": 10.52631, + "2": 10.54437, + "3": 10.53254, + "4": 10.54074, + "5": 10.5359, + "6": 10.54329, + "7": 10.54765, + "8": 10.54167, + "9": 10.53321, + "10": 10.52933, + "11": 10.53062, + "12": 10.53814, + "13": 10.53802, + "14": 10.52489, + "15": 10.52257, + "16": 10.50286, + "17": 10.51143, + "18": 10.5081, + "19": 10.50518, + "20": 10.51059, + "21": 10.51051, + "22": 10.44691, + "23": 10.43219, + "24": 10.44067, + "25": 10.40702, + "26": 10.41509, + "27": 10.39929, + "28": 10.41147, + "29": 10.36654, + "30": 10.28105, + "31": 10.23151, + "32": 10.2049, + "33": 10.21579, + "34": 10.17143, + "35": 10.14594, + "36": 10.12636, + "37": 10.11518, + "38": 10.11834, + "39": 10.08081, + "40": 10.0113, + "41": 9.96736, + "42": 9.92723, + "43": 9.92086, + "44": 9.86387, + "45": 9.83849, + "46": 9.77899, + "47": 9.77275, + "48": 9.74926, + "49": 9.77905, + "50": 9.75337, + "51": 9.75957, + "52": 9.71049, + "53": 9.67204, + "54": 9.69247, + "55": 9.68095, + "56": 9.67223, + "57": 9.60238, + "58": 9.61977, + "59": 9.54652, + "60": 9.61145, + "61": 9.54865, + "62": 9.53743, + "63": 9.52268, + "64": 9.51137, + "65": 9.52253, + "66": 9.49069, + "67": 9.45779, + "68": 9.44155, + "69": 9.44406, + "70": 9.4415, + "71": 9.47005, + "72": 9.4581, + "73": 9.40624, + "74": 9.45654, + "75": 9.40466, + "76": 9.37369, + "77": 9.34406, + "78": 9.37846, + "79": 9.41118, + "80": 9.34482, + "81": 9.33075, + "82": 9.34654, + "83": 9.31619, + "84": 9.29945, + "85": 9.33659, + "86": 9.26918, + "87": 9.31391, + "88": 9.29854, + "89": 9.26934, + "90": 9.34147, + "91": 9.25663, + "92": 9.29671, + "93": 9.2992, + "94": 9.27519, + "95": 9.28018, + "96": 9.18148, + "97": 9.2644, + "98": 9.19676, + "99": 9.21954, + "100": 9.22959 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 2554.0, - "2": 1919.0, - "3": 1521.0, - "4": 2330.0, - "5": 2010.0, - "6": 1725.0, - "7": 2803.0, - "8": 2435.0, - "9": 2286.0, - "10": 2570.0, - "11": 2438.0, - "12": 1829.0, - "13": 2332.0, - "14": 2832.0, - "15": 2008.0, - "16": 2659.0, - "17": 2454.0, - "18": 2500.0, - "19": 2588.0, - "20": 2834.0, - "21": 2042.0, - "22": 3037.0, - "23": 2702.0, - "24": 2700.0, - "25": 2568.0, - "26": 2896.0, - "27": 2735.0, - "28": 2699.0, - "29": 2548.0, - "30": 2843.0, - "31": 2160.0, - "32": 2458.0, - "33": 2130.0, - "34": 2517.0, - "35": 2597.0, - "36": 3001.0, - "37": 3305.0, - "38": 2682.0, - "39": 2805.0, - "40": 3430.0, - "41": 1767.0, - "42": 1516.0, - "43": 1798.0, - "44": 2790.0, - "45": 3578.0, - "46": 3016.0, - "47": 2890.0, - "48": 3065.0, - "49": 2914.0, - "50": 2208.0, - "51": 1900.0, - "52": 2483.0, - "53": 3763.0, - "54": 3478.0, - "55": 3412.0, - "56": 4400.0, - "57": 4019.0, - "58": 4253.0, - "59": 1805.0, - "60": 2457.0, - "61": 2045.0, - "62": 3994.0, - "63": 3650.0, - "64": 4466.0, - "65": 2968.0, - "66": 1837.0, - "67": 1961.0, - "68": 4347.0, - "69": 4441.0, - "70": 4452.0, - "71": 2131.0, - "72": 4523.0, - "73": 4105.0, - "74": 3300.0, - "75": 4651.0, - "76": 2216.0, - "77": 4932.0, - "78": 4218.0, - "79": 2784.0, - "80": 3824.0, - "81": 3472.0, - "82": 2976.0, - "83": 5282.0, - "84": 4464.0, - "85": 4344.0, - "86": 3460.0, - "87": 4774.0, - "88": 3426.0, - "89": 4600.0, - "90": 4360.0, - "91": 4283.0, - "92": 3362.0, - "93": 5633.0, - "94": 3676.0, - "95": 4610.0, - "96": 3449.0, - "97": 3751.0, - "98": 4524.0, - "99": 4399.0, - "100": 3295.0 + "1": 2617.0, + "2": 2094.0, + "3": 1798.0, + "4": 2505.0, + "5": 2297.0, + "6": 2078.0, + "7": 2301.0, + "8": 2625.0, + "9": 2497.0, + "10": 2504.0, + "11": 2784.0, + "12": 1904.0, + "13": 2303.0, + "14": 2875.0, + "15": 2193.0, + "16": 2770.0, + "17": 2665.0, + "18": 2573.0, + "19": 2627.0, + "20": 2816.0, + "21": 2300.0, + "22": 2823.0, + "23": 2599.0, + "24": 2828.0, + "25": 2674.0, + "26": 2747.0, + "27": 2770.0, + "28": 2854.0, + "29": 2523.0, + "30": 2875.0, + "31": 2490.0, + "32": 2860.0, + "33": 2319.0, + "34": 2527.0, + "35": 2726.0, + "36": 3054.0, + "37": 3300.0, + "38": 2754.0, + "39": 2733.0, + "40": 3533.0, + "41": 1743.0, + "42": 1529.0, + "43": 1772.0, + "44": 2961.0, + "45": 3611.0, + "46": 3413.0, + "47": 3127.0, + "48": 2770.0, + "49": 2539.0, + "50": 2141.0, + "51": 1761.0, + "52": 2660.0, + "53": 3915.0, + "54": 3652.0, + "55": 3296.0, + "56": 4245.0, + "57": 4057.0, + "58": 4098.0, + "59": 1783.0, + "60": 2705.0, + "61": 2237.0, + "62": 3914.0, + "63": 3917.0, + "64": 4487.0, + "65": 3080.0, + "66": 1921.0, + "67": 2164.0, + "68": 4161.0, + "69": 4434.0, + "70": 4023.0, + "71": 2111.0, + "72": 4044.0, + "73": 3510.0, + "74": 2619.0, + "75": 5231.0, + "76": 2626.0, + "77": 4109.0, + "78": 4337.0, + "79": 2321.0, + "80": 3502.0, + "81": 3952.0, + "82": 3644.0, + "83": 4827.0, + "84": 5477.0, + "85": 4396.0, + "86": 3953.0, + "87": 3499.0, + "88": 4439.0, + "89": 3874.0, + "90": 4637.0, + "91": 4734.0, + "92": 3999.0, + "93": 3762.0, + "94": 3075.0, + "95": 4043.0, + "96": 3804.0, + "97": 3339.0, + "98": 4824.0, + "99": 3730.0, + "100": 3312.0 } }, "mem-allocated-bytes": { @@ -218,106 +218,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1768285696.0, - "2": 1768285696.0, - "3": 1768285696.0, - "4": 1768285696.0, - "5": 1768285696.0, - "6": 1768285696.0, - "7": 1768285696.0, - "8": 1768285696.0, - "9": 1768285696.0, - "10": 1768285696.0, - "11": 1768285696.0, - "12": 1768285696.0, - "13": 1768285696.0, - "14": 1768285696.0, - "15": 1768285696.0, - "16": 1768285696.0, - "17": 1768285696.0, - "18": 1768285696.0, - "19": 1768285696.0, - "20": 1768285696.0, - "21": 1768285696.0, - "22": 1768285696.0, - "23": 1768285696.0, - "24": 1768285696.0, - "25": 1768285696.0, - "26": 1768285696.0, - "27": 1768285696.0, - "28": 1768285696.0, - "29": 1768285696.0, - "30": 1768285696.0, - "31": 1768285696.0, - "32": 1768285696.0, - "33": 1768285696.0, - "34": 1768285696.0, - "35": 1768285696.0, - "36": 1768285696.0, - "37": 1768285696.0, - "38": 1768285696.0, - "39": 1768285696.0, - "40": 1768285696.0, - "41": 1768285696.0, - "42": 1768285696.0, - "43": 1768285696.0, - "44": 1768285696.0, - "45": 1768285696.0, - "46": 1768285696.0, - "47": 1768285696.0, - "48": 1768285696.0, - "49": 1768285696.0, - "50": 1768285696.0, - "51": 1768285696.0, - "52": 1768285696.0, - "53": 1768285696.0, - "54": 1768285696.0, - "55": 1768285696.0, - "56": 1768285696.0, - "57": 1768285696.0, - "58": 1768285696.0, - "59": 1768285696.0, - "60": 1768285696.0, - "61": 1768285696.0, - "62": 1768285696.0, - "63": 1768285696.0, - "64": 1768285696.0, - "65": 1768285696.0, - "66": 1768285696.0, - "67": 1768285696.0, - "68": 1768285696.0, - "69": 1768285696.0, - "70": 1768285696.0, - "71": 1768285696.0, - "72": 1768285696.0, - "73": 1768285696.0, - "74": 1769334272.0, - "75": 1768285696.0, - "76": 1768285696.0, - "77": 1768285696.0, - "78": 1768285696.0, - "79": 1768285696.0, - "80": 1768285696.0, - "81": 1768285696.0, - "82": 1768285696.0, - "83": 1768285696.0, - "84": 1768285696.0, - "85": 1768285696.0, - "86": 1768285696.0, - "87": 1768285696.0, - "88": 1768285696.0, - "89": 1768285696.0, - "90": 1768285696.0, - "91": 1768285696.0, - "92": 1768285696.0, - "93": 1768285696.0, - "94": 1768285696.0, - "95": 1768285696.0, - "96": 1768285696.0, - "97": 1768285696.0, - "98": 1768285696.0, - "99": 1768285696.0, - "100": 1768285696.0 + "1": 1779868160.0, + "2": 1779868160.0, + "3": 1779868160.0, + "4": 1779868160.0, + "5": 1779868160.0, + "6": 1779868160.0, + "7": 1779868160.0, + "8": 1779868160.0, + "9": 1779868160.0, + "10": 1779868160.0, + "11": 1779868160.0, + "12": 1779868160.0, + "13": 1779868160.0, + "14": 1779868160.0, + "15": 1779868160.0, + "16": 1779868160.0, + "17": 1779868160.0, + "18": 1779868160.0, + "19": 1779868160.0, + "20": 1779868160.0, + "21": 1779868160.0, + "22": 1779868160.0, + "23": 1779868160.0, + "24": 1779868160.0, + "25": 1779868160.0, + "26": 1779868160.0, + "27": 1779868160.0, + "28": 1779868160.0, + "29": 1779868160.0, + "30": 1779868160.0, + "31": 1779868160.0, + "32": 1779868160.0, + "33": 1779868160.0, + "34": 1779868160.0, + "35": 1779868160.0, + "36": 1779868160.0, + "37": 1779868160.0, + "38": 1779868160.0, + "39": 1779868160.0, + "40": 1779868160.0, + "41": 1779868160.0, + "42": 1779868160.0, + "43": 1779868160.0, + "44": 1779868160.0, + "45": 1779868160.0, + "46": 1779868160.0, + "47": 1779868160.0, + "48": 1779868160.0, + "49": 1779868160.0, + "50": 1779868160.0, + "51": 1779868160.0, + "52": 1779868160.0, + "53": 1779868160.0, + "54": 1779868160.0, + "55": 1779868160.0, + "56": 1779868160.0, + "57": 1779868160.0, + "58": 1779868160.0, + "59": 1779868160.0, + "60": 1779868160.0, + "61": 1779868160.0, + "62": 1779868160.0, + "63": 1779868160.0, + "64": 1779868160.0, + "65": 1779868160.0, + "66": 1779868160.0, + "67": 1779868160.0, + "68": 1779868160.0, + "69": 1779868160.0, + "70": 1779868160.0, + "71": 1779868160.0, + "72": 1779868160.0, + "73": 1779868160.0, + "74": 1779868160.0, + "75": 1779868160.0, + "76": 1779868160.0, + "77": 1779868160.0, + "78": 1779868160.0, + "79": 1779868160.0, + "80": 1779868160.0, + "81": 1779868160.0, + "82": 1779868160.0, + "83": 1779868160.0, + "84": 1779868160.0, + "85": 1779868160.0, + "86": 1779868160.0, + "87": 1779868160.0, + "88": 1779868160.0, + "89": 1779868160.0, + "90": 1779868160.0, + "91": 1779868160.0, + "92": 1779868160.0, + "93": 1779868160.0, + "94": 1779868160.0, + "95": 1779868160.0, + "96": 1779868160.0, + "97": 1779868160.0, + "98": 1779868160.0, + "99": 1779868160.0, + "100": 1779868160.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 2337549312.0, - "2": 3080536064.0, - "3": 3080536064.0, - "4": 3080536064.0, - "5": 3080536064.0, - "6": 3080536064.0, - "7": 3080536064.0, - "8": 3080536064.0, - "9": 3080536064.0, - "10": 3080536064.0, - "11": 3080536064.0, - "12": 3080536064.0, - "13": 3080536064.0, - "14": 3080536064.0, - "15": 3080536064.0, - "16": 3080536064.0, - "17": 3080536064.0, - "18": 3080536064.0, - "19": 3080536064.0, - "20": 3080536064.0, - "21": 3080536064.0, - "22": 3080536064.0, - "23": 3082107392.0, - "24": 3082107392.0, - "25": 3082107392.0, - "26": 3082107392.0, - "27": 3082107392.0, - "28": 3082107392.0, - "29": 3082107392.0, - "30": 3082107392.0, - "31": 3082107392.0, - "32": 3082107392.0, - "33": 3082107392.0, - "34": 3082107392.0, - "35": 3082107392.0, - "36": 3082107392.0, - "37": 3082107392.0, - "38": 3082107392.0, - "39": 3082107392.0, - "40": 3082107392.0, - "41": 3082107392.0, - "42": 3082107392.0, - "43": 3082107392.0, - "44": 3082107392.0, - "45": 3082107392.0, - "46": 3082107392.0, - "47": 3082107392.0, - "48": 3082107392.0, - "49": 3082107392.0, - "50": 3082107392.0, - "51": 3082107392.0, - "52": 3082107392.0, - "53": 3082107392.0, - "54": 3082107392.0, - "55": 3082107392.0, - "56": 3082107392.0, - "57": 3082107392.0, - "58": 3082107392.0, - "59": 3082107392.0, - "60": 3082107392.0, - "61": 3082107392.0, - "62": 3082107392.0, - "63": 3082107392.0, - "64": 3082107392.0, - "65": 3082107392.0, - "66": 3082107392.0, - "67": 3082107392.0, - "68": 3082107392.0, - "69": 3082107392.0, - "70": 3082107392.0, - "71": 3082107392.0, - "72": 3082107392.0, - "73": 3082107392.0, - "74": 3082108928.0, - "75": 3082108928.0, - "76": 3082108928.0, - "77": 3082108928.0, - "78": 3082108928.0, - "79": 3082108928.0, - "80": 3082108928.0, - "81": 3082108928.0, - "82": 3082108928.0, - "83": 3082108928.0, - "84": 3082108928.0, - "85": 3082108928.0, - "86": 3082108928.0, - "87": 3082108928.0, - "88": 3082108928.0, - "89": 3082108928.0, - "90": 3082108928.0, - "91": 3082108928.0, - "92": 3082108928.0, - "93": 3082108928.0, - "94": 3082108928.0, - "95": 3082108928.0, - "96": 3082108928.0, - "97": 3082108928.0, - "98": 3082108928.0, - "99": 3082108928.0, - "100": 3082108928.0 + "1": 2347554304.0, + "2": 3097362432.0, + "3": 3097362944.0, + "4": 3097362944.0, + "5": 3097362944.0, + "6": 3097362944.0, + "7": 3097362944.0, + "8": 3097362944.0, + "9": 3097362944.0, + "10": 3097362944.0, + "11": 3097362944.0, + "12": 3097362944.0, + "13": 3097362944.0, + "14": 3097362944.0, + "15": 3097362944.0, + "16": 3097362944.0, + "17": 3097362944.0, + "18": 3097362944.0, + "19": 3097362944.0, + "20": 3097362944.0, + "21": 3097362944.0, + "22": 3097362944.0, + "23": 3097362944.0, + "24": 3097362944.0, + "25": 3097362944.0, + "26": 3097362944.0, + "27": 3097362944.0, + "28": 3097362944.0, + "29": 3097362944.0, + "30": 3097362944.0, + "31": 3097362944.0, + "32": 3097362944.0, + "33": 3097362944.0, + "34": 3097362944.0, + "35": 3097362944.0, + "36": 3097362944.0, + "37": 3097362944.0, + "38": 3097362944.0, + "39": 3097362944.0, + "40": 3097362944.0, + "41": 3097362944.0, + "42": 3097362944.0, + "43": 3097362944.0, + "44": 3097362944.0, + "45": 3097362944.0, + "46": 3097362944.0, + "47": 3097362944.0, + "48": 3097362944.0, + "49": 3097362944.0, + "50": 3097362944.0, + "51": 3097362944.0, + "52": 3097362944.0, + "53": 3097362944.0, + "54": 3097362944.0, + "55": 3097362944.0, + "56": 3097362944.0, + "57": 3097362944.0, + "58": 3097362944.0, + "59": 3097362944.0, + "60": 3097362944.0, + "61": 3097362944.0, + "62": 3097362944.0, + "63": 3097362944.0, + "64": 3097362944.0, + "65": 3097362944.0, + "66": 3097362944.0, + "67": 3097362944.0, + "68": 3097362944.0, + "69": 3097362944.0, + "70": 3097362944.0, + "71": 3097362944.0, + "72": 3097362944.0, + "73": 3097362944.0, + "74": 3097362944.0, + "75": 3097362944.0, + "76": 3097362944.0, + "77": 3097362944.0, + "78": 3097362944.0, + "79": 3097362944.0, + "80": 3097362944.0, + "81": 3097362944.0, + "82": 3097362944.0, + "83": 3097362944.0, + "84": 3097362944.0, + "85": 3097362944.0, + "86": 3097362944.0, + "87": 3097362944.0, + "88": 3097362944.0, + "89": 3097362944.0, + "90": 3097362944.0, + "91": 3097362944.0, + "92": 3097362944.0, + "93": 3097362944.0, + "94": 3097362944.0, + "95": 3097362944.0, + "96": 3097362944.0, + "97": 3097362944.0, + "98": 3097362944.0, + "99": 3097362944.0, + "100": 3097362944.0 } }, "iteration-time": { @@ -432,106 +432,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 10.24286, - "2": 0.82679, - "3": 0.79409, - "4": 0.76435, - "5": 0.77118, - "6": 0.74558, - "7": 0.74667, - "8": 0.77701, - "9": 1.97605, - "10": 0.75455, - "11": 0.74398, - "12": 0.74114, - "13": 0.7501, - "14": 0.74704, - "15": 0.74029, - "16": 1.1307, - "17": 0.73862, - "18": 0.73445, - "19": 0.73384, - "20": 0.73927, - "21": 0.74153, - "22": 0.73755, - "23": 0.76958, - "24": 0.7377, - "25": 0.73987, - "26": 0.77483, - "27": 1.30185, - "28": 0.76, - "29": 0.75644, - "30": 0.77716, - "31": 0.83125, - "32": 0.80226, - "33": 0.74041, - "34": 0.74334, - "35": 1.17386, - "36": 1.53868, - "37": 0.77003, - "38": 0.76358, - "39": 0.77015, - "40": 0.77216, - "41": 0.76865, - "42": 1.214, - "43": 1.04802, - "44": 0.758, - "45": 1.27424, - "46": 1.12734, - "47": 0.7573, - "48": 0.74875, - "49": 0.74989, - "50": 0.75416, - "51": 0.75904, - "52": 0.75338, - "53": 0.75124, - "54": 0.73937, - "55": 0.74096, - "56": 0.75129, - "57": 0.75097, - "58": 0.74724, - "59": 0.74661, - "60": 0.74245, - "61": 0.74378, - "62": 0.74491, - "63": 0.74147, - "64": 0.74756, - "65": 0.74511, - "66": 0.74967, - "67": 0.7462, - "68": 0.74176, - "69": 0.74258, - "70": 0.74323, - "71": 0.74412, - "72": 0.74522, - "73": 0.74053, - "74": 0.74312, - "75": 0.74157, - "76": 1.12862, - "77": 0.74522, - "78": 1.08987, - "79": 0.94746, - "80": 0.877, - "81": 0.74472, - "82": 0.74142, - "83": 0.74342, - "84": 0.7418, - "85": 0.74017, - "86": 0.7399, - "87": 0.73594, - "88": 0.73916, - "89": 0.73537, - "90": 0.75037, - "91": 0.7341, - "92": 0.73469, - "93": 0.7333, - "94": 0.73221, - "95": 0.73055, - "96": 0.73133, - "97": 0.73591, - "98": 0.74108, - "99": 0.74467, - "100": 0.73711 + "1": "nan", + "2": 6.64764, + "3": 0.79084, + "4": 0.75471, + "5": 0.75484, + "6": 0.75223, + "7": 0.75145, + "8": 0.75475, + "9": 0.75533, + "10": 0.75399, + "11": 0.75254, + "12": 0.77237, + "13": 0.76941, + "14": 0.75371, + "15": 0.74878, + "16": 0.75748, + "17": 0.75013, + "18": 0.75393, + "19": 0.75308, + "20": 0.82461, + "21": 1.14815, + "22": 1.40873, + "23": 0.78134, + "24": 1.13678, + "25": 0.77065, + "26": 0.77109, + "27": 0.75236, + "28": 0.7541, + "29": 0.97476, + "30": 0.98559, + "31": 0.75096, + "32": 0.7479, + "33": 0.74863, + "34": 0.74963, + "35": 0.74885, + "36": 0.82711, + "37": 0.85378, + "38": 0.76888, + "39": 0.81485, + "40": 0.75679, + "41": 0.74893, + "42": 0.75104, + "43": 0.7494, + "44": 0.75881, + "45": 0.75257, + "46": 0.75827, + "47": 0.7504, + "48": 0.98004, + "49": 0.91266, + "50": 0.75782, + "51": 1.21882, + "52": 0.75137, + "53": 0.98393, + "54": 0.74921, + "55": 0.7528, + "56": 0.75403, + "57": 0.75214, + "58": 0.99844, + "59": 0.75039, + "60": 0.91137, + "61": 0.75353, + "62": 0.75465, + "63": 0.76022, + "64": 0.74973, + "65": 0.7503, + "66": 0.75508, + "67": 0.75386, + "68": 0.7506, + "69": 0.75959, + "70": 0.75257, + "71": 0.75584, + "72": 0.75851, + "73": 0.75296, + "74": 0.74943, + "75": 0.755, + "76": 0.99487, + "77": 0.75879, + "78": 0.92637, + "79": 0.75765, + "80": 0.91589, + "81": 1.0084, + "82": 0.75509, + "83": 1.18005, + "84": 0.75533, + "85": 0.76431, + "86": 0.81253, + "87": 1.14347, + "88": 0.75555, + "89": 0.75535, + "90": 1.13878, + "91": 0.75648, + "92": 0.75589, + "93": 0.75482, + "94": 0.75398, + "95": 0.75489, + "96": 0.75471, + "97": 0.7583, + "98": 0.75293, + "99": 0.752, + "100": 0.74962 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json index bc0ee3bcb1e..15e0ee3f6e7 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json @@ -4,56 +4,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 10.42626, - "2": 10.41171, - "3": 10.41885, - "4": 10.42153, - "5": 10.42192, - "6": 10.41563, - "7": 10.42859, - "8": 10.42079, - "9": 10.43013, - "10": 10.4087, - "11": 10.43493, - "12": 10.40244, - "13": 10.42282, - "14": 10.41239, - "15": 10.40952, - "16": 10.40789, - "17": 10.38944, - "18": 10.38859, - "19": 10.37154, - "20": 10.40445, - "21": 10.36609, - "22": 10.34962, - "23": 10.354, - "24": 10.30131, - "25": 10.3111, - "26": 10.30252, - "27": 10.28202, - "28": 10.27924, - "29": 10.23941, - "30": 10.14739, - "31": 10.10547, - "32": 10.09424, - "33": 10.09034, - "34": 10.0645, - "35": 10.04644, - "36": 10.03308, - "37": 10.00522, - "38": 10.00297, - "39": 9.91428, - "40": 9.91112, - "41": 9.86566, - "42": 9.78083, - "43": 9.79476, - "44": 9.73084, - "45": 9.74269, - "46": 9.63796, - "47": 9.68694, - "48": 9.63705, - "49": 9.65524, - "50": 9.65788 + "1": 10.51806, + "2": 10.49565, + "3": 10.51267, + "4": 10.51141, + "5": 10.49369, + "6": 10.48608, + "7": 10.49117, + "8": 10.50241, + "9": 10.50043, + "10": 10.49509, + "11": 10.49799, + "12": 10.51384, + "13": 10.49269, + "14": 10.48489, + "15": 10.502, + "16": 10.48076, + "17": 10.47767, + "18": 10.4852, + "19": 10.47833, + "20": 10.47379, + "21": 10.47299, + "22": 10.42889, + "23": 10.41776, + "24": 10.41531, + "25": 10.40697, + "26": 10.38423, + "27": 10.37494, + "28": 10.37653, + "29": 10.32604, + "30": 10.24422, + "31": 10.23408, + "32": 10.19383, + "33": 10.20522, + "34": 10.16587, + "35": 10.15852, + "36": 10.12269, + "37": 10.11497, + "38": 10.10705, + "39": 10.06641, + "40": 10.02632, + "41": 9.98556, + "42": 9.92428, + "43": 9.90962, + "44": 9.88341, + "45": 9.84923, + "46": 9.81212, + "47": 9.79588, + "48": 9.76846, + "49": 9.82614, + "50": 9.78525 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 3452.0, - "2": 2890.0, - "3": 1856.0, - "4": 3256.0, - "5": 3333.0, - "6": 2985.0, - "7": 3208.0, - "8": 3314.0, - "9": 3210.0, - "10": 3297.0, - "11": 2833.0, - "12": 2982.0, - "13": 3178.0, - "14": 3705.0, - "15": 3252.0, - "16": 3615.0, - "17": 3789.0, - "18": 3620.0, - "19": 3327.0, - "20": 3539.0, - "21": 3129.0, - "22": 3597.0, - "23": 3595.0, - "24": 2781.0, - "25": 3585.0, - "26": 3607.0, - "27": 4015.0, - "28": 3836.0, - "29": 3716.0, - "30": 4150.0, - "31": 3472.0, - "32": 3024.0, - "33": 3553.0, - "34": 3793.0, - "35": 3757.0, - "36": 4205.0, - "37": 4221.0, - "38": 3819.0, - "39": 3866.0, - "40": 3554.0, - "41": 2883.0, - "42": 2592.0, - "43": 2856.0, - "44": 3173.0, - "45": 4948.0, - "46": 4572.0, - "47": 4077.0, - "48": 4355.0, - "49": 3885.0, - "50": 3266.0 + "1": 3606.0, + "2": 3061.0, + "3": 2908.0, + "4": 3502.0, + "5": 3205.0, + "6": 2982.0, + "7": 3777.0, + "8": 3489.0, + "9": 3413.0, + "10": 2253.0, + "11": 3940.0, + "12": 2933.0, + "13": 3430.0, + "14": 4218.0, + "15": 3298.0, + "16": 4000.0, + "17": 3603.0, + "18": 3495.0, + "19": 3808.0, + "20": 3820.0, + "21": 3255.0, + "22": 3972.0, + "23": 3917.0, + "24": 2882.0, + "25": 2803.0, + "26": 4001.0, + "27": 3883.0, + "28": 3910.0, + "29": 2547.0, + "30": 3964.0, + "31": 3511.0, + "32": 3717.0, + "33": 3377.0, + "34": 3548.0, + "35": 3571.0, + "36": 4212.0, + "37": 4317.0, + "38": 4005.0, + "39": 4202.0, + "40": 4472.0, + "41": 2706.0, + "42": 2542.0, + "43": 2678.0, + "44": 3338.0, + "45": 4117.0, + "46": 3984.0, + "47": 3750.0, + "48": 4219.0, + "49": 3967.0, + "50": 3325.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1662815232.0, - "2": 1662815232.0, - "3": 1662815232.0, - "4": 1662815232.0, - "5": 1662815232.0, - "6": 1662815232.0, - "7": 1662815232.0, - "8": 1662815232.0, - "9": 1662815232.0, - "10": 1662815232.0, - "11": 1662815232.0, - "12": 1662815232.0, - "13": 1662815232.0, - "14": 1662815232.0, - "15": 1662815232.0, - "16": 1662815232.0, - "17": 1662815232.0, - "18": 1662815232.0, - "19": 1662815232.0, - "20": 1662815232.0, - "21": 1662815232.0, - "22": 1662815232.0, - "23": 1662815232.0, - "24": 1662815232.0, - "25": 1662815232.0, - "26": 1662815232.0, - "27": 1662815232.0, - "28": 1662815232.0, - "29": 1662815232.0, - "30": 1662815232.0, - "31": 1662815232.0, - "32": 1662815232.0, - "33": 1662815232.0, - "34": 1662815232.0, - "35": 1662815232.0, - "36": 1662815232.0, - "37": 1662815232.0, - "38": 1662815232.0, - "39": 1662815232.0, - "40": 1662815232.0, - "41": 1662815232.0, - "42": 1662815232.0, - "43": 1662815232.0, - "44": 1662815232.0, - "45": 1662815232.0, - "46": 1662815232.0, - "47": 1662815232.0, - "48": 1662815232.0, - "49": 1662815232.0, - "50": 1662815232.0 + "1": 1670994432.0, + "2": 1670994432.0, + "3": 1670994432.0, + "4": 1670994432.0, + "5": 1670994432.0, + "6": 1670994432.0, + "7": 1670994432.0, + "8": 1670994432.0, + "9": 1670994432.0, + "10": 1670994432.0, + "11": 1670994432.0, + "12": 1670994432.0, + "13": 1670994432.0, + "14": 1670994432.0, + "15": 1670994432.0, + "16": 1670994432.0, + "17": 1670994432.0, + "18": 1670994432.0, + "19": 1670994432.0, + "20": 1670994432.0, + "21": 1670994432.0, + "22": 1670994432.0, + "23": 1670994432.0, + "24": 1670994432.0, + "25": 1670994432.0, + "26": 1670994432.0, + "27": 1670994432.0, + "28": 1670994432.0, + "29": 1670994432.0, + "30": 1670994432.0, + "31": 1670994432.0, + "32": 1670994432.0, + "33": 1670994432.0, + "34": 1670994432.0, + "35": 1670994432.0, + "36": 1670994432.0, + "37": 1670994432.0, + "38": 1670994432.0, + "39": 1670994432.0, + "40": 1670994432.0, + "41": 1670994432.0, + "42": 1670994432.0, + "43": 1670994432.0, + "44": 1670994432.0, + "45": 1670994432.0, + "46": 1670994432.0, + "47": 1670994432.0, + "48": 1670994432.0, + "49": 1670994432.0, + "50": 1670994432.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 2507528704.0, - "2": 3206498816.0, - "3": 3206498816.0, - "4": 3206498816.0, - "5": 3206498816.0, - "6": 3206498816.0, - "7": 3206498816.0, - "8": 3206498816.0, - "9": 3206498816.0, - "10": 3206498816.0, - "11": 3206498816.0, - "12": 3206498816.0, - "13": 3206498816.0, - "14": 3206498816.0, - "15": 3206498816.0, - "16": 3206498816.0, - "17": 3206498816.0, - "18": 3206498816.0, - "19": 3206498816.0, - "20": 3206498816.0, - "21": 3206498816.0, - "22": 3206498816.0, - "23": 3206498816.0, - "24": 3206498816.0, - "25": 3206498816.0, - "26": 3206498816.0, - "27": 3206498816.0, - "28": 3206498816.0, - "29": 3206498816.0, - "30": 3206498816.0, - "31": 3206498816.0, - "32": 3206498816.0, - "33": 3206498816.0, - "34": 3206498816.0, - "35": 3206498816.0, - "36": 3206498816.0, - "37": 3206498816.0, - "38": 3206498816.0, - "39": 3206498816.0, - "40": 3206498816.0, - "41": 3206498816.0, - "42": 3206498816.0, - "43": 3206498816.0, - "44": 3206498816.0, - "45": 3206498816.0, - "46": 3206498816.0, - "47": 3206498816.0, - "48": 3206498816.0, - "49": 3206498816.0, - "50": 3206498816.0 + "1": 2521210368.0, + "2": 3221232128.0, + "3": 3221232128.0, + "4": 3221232128.0, + "5": 3221232128.0, + "6": 3221232128.0, + "7": 3221232128.0, + "8": 3221232128.0, + "9": 3221232128.0, + "10": 3221232128.0, + "11": 3221232128.0, + "12": 3221232128.0, + "13": 3221232128.0, + "14": 3221232128.0, + "15": 3221232128.0, + "16": 3221232128.0, + "17": 3221232128.0, + "18": 3221232128.0, + "19": 3221232128.0, + "20": 3221232128.0, + "21": 3221232128.0, + "22": 3221232128.0, + "23": 3221232128.0, + "24": 3221232128.0, + "25": 3221232128.0, + "26": 3221232128.0, + "27": 3221232128.0, + "28": 3221232128.0, + "29": 3221232128.0, + "30": 3221232128.0, + "31": 3221232128.0, + "32": 3221232128.0, + "33": 3221232128.0, + "34": 3221232128.0, + "35": 3221232128.0, + "36": 3221232128.0, + "37": 3221232128.0, + "38": 3221232128.0, + "39": 3221232128.0, + "40": 3221232128.0, + "41": 3221232128.0, + "42": 3221232128.0, + "43": 3221232128.0, + "44": 3221232128.0, + "45": 3221232128.0, + "46": 3221232128.0, + "47": 3221232128.0, + "48": 3221232128.0, + "49": 3221232128.0, + "50": 3221232128.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 10.8403, - "2": 1.75656, - "3": 1.70317, - "4": 1.66346, - "5": 1.6703, - "6": 1.66753, - "7": 2.21547, - "8": 1.68918, - "9": 1.77005, - "10": 1.75261, - "11": 1.77153, - "12": 1.65933, - "13": 1.65337, - "14": 2.37845, - "15": 2.04839, - "16": 2.07092, - "17": 1.67053, - "18": 1.6729, - "19": 1.65463, - "20": 1.67298, - "21": 1.66273, - "22": 1.64743, - "23": 1.64351, - "24": 1.63695, - "25": 1.66076, - "26": 1.66885, - "27": 1.64423, - "28": 1.64773, - "29": 1.64565, - "30": 1.64171, - "31": 1.63705, - "32": 1.64216, - "33": 1.64504, - "34": 1.64255, - "35": 1.64762, - "36": 1.64913, - "37": 1.63831, - "38": 1.65213, - "39": 1.66065, - "40": 1.63954, - "41": 1.63964, - "42": 1.64408, - "43": 1.64113, - "44": 1.65016, - "45": 1.63618, - "46": 1.65229, - "47": 1.64761, - "48": 1.76963, - "49": 1.62535, - "50": 1.63142 + "1": "nan", + "2": 5.99604, + "3": 1.63944, + "4": 1.61469, + "5": 1.62145, + "6": 1.62065, + "7": 1.65639, + "8": 1.62144, + "9": 1.61901, + "10": 1.61769, + "11": 1.61883, + "12": 1.66045, + "13": 1.68762, + "14": 1.61513, + "15": 1.61681, + "16": 1.61577, + "17": 1.61337, + "18": 1.60921, + "19": 1.63599, + "20": 1.6134, + "21": 1.6127, + "22": 1.74027, + "23": 1.6175, + "24": 1.82766, + "25": 2.13333, + "26": 1.60883, + "27": 1.61034, + "28": 1.61171, + "29": 1.61548, + "30": 2.01691, + "31": 2.01164, + "32": 1.61379, + "33": 2.00557, + "34": 1.61478, + "35": 2.00811, + "36": 1.619, + "37": 1.62046, + "38": 2.05026, + "39": 2.03159, + "40": 1.61554, + "41": 1.6173, + "42": 1.99606, + "43": 1.61893, + "44": 1.61472, + "45": 1.61889, + "46": 1.6138, + "47": 1.62029, + "48": 1.61753, + "49": 1.61676, + "50": 1.6175 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_gb200.json index 0174aaf4684..21b63e8effd 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp1_pp4_memory_speed/golden_values_dev_dgx_gb200.json @@ -6,29 +6,29 @@ "values": { "1": 12.59654, "2": 12.60484, - "3": 12.59799, - "4": 12.59687, - "5": 12.59285, - "6": 12.59259, - "7": 12.58011, - "8": 12.54308, - "9": 12.51049, - "10": 12.49679, - "11": 12.32875, - "12": 12.29944, - "13": 12.2346, + "3": 12.59797, + "4": 12.5969, + "5": 12.59289, + "6": 12.59265, + "7": 12.58015, + "8": 12.54318, + "9": 12.5105, + "10": 12.49672, + "11": 12.32881, + "12": 12.29939, + "13": 12.23473, "14": 12.23325, - "15": 11.81699, - "16": 11.80131, - "17": 11.76433, - "18": 11.73986, - "19": 11.6089, - "20": 11.50642, - "21": 11.26938, - "22": 11.37967, - "23": 11.288, + "15": 11.817, + "16": 11.80134, + "17": 11.76435, + "18": 11.73993, + "19": 11.60889, + "20": 11.50648, + "21": 11.26946, + "22": 11.37969, + "23": 11.28801, "24": 11.16331, - "25": 10.99891 + "25": 10.9989 } }, "num-zeros": { @@ -36,31 +36,31 @@ "end_step": 25, "step_interval": 1, "values": { - "1": 521037632.0, - "2": 521666368.0, - "3": 520934816.0, - "4": 521227264.0, - "5": 520996064.0, - "6": 521371840.0, - "7": 521420352.0, - "8": 521057344.0, - "9": 521461504.0, - "10": 521178624.0, - "11": 522279104.0, - "12": 521439616.0, - "13": 521475712.0, - "14": 522445376.0, - "15": 521592960.0, - "16": 521416448.0, - "17": 521026496.0, - "18": 521277760.0, - "19": 521154656.0, - "20": 521134784.0, - "21": 522907648.0, - "22": 521590304.0, - "23": 521352384.0, - "24": 521424640.0, - "25": 523543808.0 + "1": 521038208.0, + "2": 521665504.0, + "3": 520934784.0, + "4": 521226912.0, + "5": 520995584.0, + "6": 521371136.0, + "7": 521420160.0, + "8": 521056672.0, + "9": 521461088.0, + "10": 521178048.0, + "11": 522280576.0, + "12": 521439168.0, + "13": 521475200.0, + "14": 522446240.0, + "15": 521590592.0, + "16": 521416064.0, + "17": 521027200.0, + "18": 521279360.0, + "19": 521153088.0, + "20": 521134144.0, + "21": 522908352.0, + "22": 521591200.0, + "23": 521351488.0, + "24": 521424000.0, + "25": 523543424.0 } }, "mem-allocated-bytes": { @@ -133,29 +133,29 @@ "step_interval": 1, "values": { "1": "nan", - "2": 3.93489, + "2": 10.37368, "3": "nan", - "4": 0.83885, + "4": 0.82471, "5": "nan", - "6": 0.86101, + "6": 0.81418, "7": "nan", - "8": 0.82617, + "8": 0.81547, "9": "nan", - "10": 0.8264, + "10": 0.82718, "11": "nan", - "12": 0.82456, + "12": 0.82851, "13": "nan", - "14": 0.82414, + "14": 0.81363, "15": "nan", - "16": 0.82604, + "16": 0.81374, "17": "nan", - "18": 0.83002, + "18": 0.81396, "19": "nan", - "20": 0.8234, + "20": 0.81346, "21": "nan", - "22": 0.82298, + "22": 0.81255, "23": "nan", - "24": 0.82311, + "24": 0.81353, "25": "nan" } } diff --git a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_gb200.json index ca51cd1bcb3..dc762c9b513 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_7b_tp4_pp1_memory_speed/golden_values_dev_dgx_gb200.json @@ -6,29 +6,29 @@ "values": { "1": 12.61164, "2": 12.60596, - "3": 12.60278, + "3": 12.60284, "4": 12.59692, - "5": 12.5956, - "6": 12.59777, - "7": 12.58051, - "8": 12.53845, - "9": 12.51222, - "10": 12.49859, - "11": 12.32384, - "12": 12.29418, - "13": 12.23141, - "14": 12.22824, - "15": 11.82221, - "16": 11.80412, - "17": 11.76119, - "18": 11.73708, - "19": 11.61309, + "5": 12.59563, + "6": 12.59765, + "7": 12.58048, + "8": 12.53848, + "9": 12.51216, + "10": 12.4986, + "11": 12.32362, + "12": 12.29423, + "13": 12.23125, + "14": 12.22834, + "15": 11.82216, + "16": 11.80406, + "17": 11.76114, + "18": 11.7371, + "19": 11.61304, "20": 11.50147, - "21": 11.26475, - "22": 11.37638, - "23": 11.28398, - "24": 11.1565, - "25": 10.99865 + "21": 11.26477, + "22": 11.37633, + "23": 11.28391, + "24": 11.15655, + "25": 10.99866 } }, "num-zeros": { @@ -36,31 +36,31 @@ "end_step": 25, "step_interval": 1, "values": { - "1": 523049152.0, - "2": 523677792.0, - "3": 522947712.0, - "4": 523241632.0, - "5": 523021120.0, - "6": 523374368.0, - "7": 523437888.0, - "8": 523083584.0, - "9": 523470432.0, - "10": 523196128.0, - "11": 524297728.0, - "12": 523455584.0, - "13": 523501312.0, - "14": 524479392.0, - "15": 523634048.0, - "16": 523462624.0, - "17": 523079392.0, - "18": 523360448.0, - "19": 523209952.0, - "20": 523228480.0, - "21": 524938432.0, - "22": 523660512.0, - "23": 523415872.0, - "24": 523485056.0, - "25": 525638592.0 + "1": 523050144.0, + "2": 523678816.0, + "3": 522945600.0, + "4": 523240640.0, + "5": 523021472.0, + "6": 523373120.0, + "7": 523437344.0, + "8": 523085504.0, + "9": 523469120.0, + "10": 523195520.0, + "11": 524297440.0, + "12": 523455616.0, + "13": 523497152.0, + "14": 524479520.0, + "15": 523635392.0, + "16": 523462432.0, + "17": 523079680.0, + "18": 523362816.0, + "19": 523208736.0, + "20": 523229056.0, + "21": 524937344.0, + "22": 523659200.0, + "23": 523415552.0, + "24": 523485568.0, + "25": 525640512.0 } }, "mem-allocated-bytes": { @@ -133,29 +133,29 @@ "step_interval": 1, "values": { "1": "nan", - "2": 2.88247, + "2": 5.70576, "3": "nan", - "4": 0.98359, + "4": 0.89304, "5": "nan", - "6": 0.91373, + "6": 0.89085, "7": "nan", - "8": 1.07044, + "8": 0.89054, "9": "nan", - "10": 0.91309, + "10": 0.88818, "11": "nan", - "12": 0.91579, + "12": 0.88741, "13": "nan", - "14": 0.90609, + "14": 0.88829, "15": "nan", - "16": 0.90906, + "16": 0.89204, "17": "nan", - "18": 0.91134, + "18": 0.8886, "19": "nan", - "20": 0.90623, + "20": 0.88626, "21": "nan", - "22": 0.91236, + "22": 0.8871, "23": "nan", - "24": 0.9145, + "24": 0.88991, "25": "nan" } } diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_gb200.json index bac18297ae6..1597a12fc00 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset/golden_values_dev_dgx_gb200.json @@ -4,56 +4,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 10.77536, + "1": 10.77535, "2": 10.78444, - "3": 10.78593, - "4": 10.7484, - "5": 10.81554, - "6": 10.82691, - "7": 10.78469, - "8": 10.77764, + "3": 10.78594, + "4": 10.74844, + "5": 10.81549, + "6": 10.82694, + "7": 10.78465, + "8": 10.77766, "9": 10.78351, "10": 10.74241, - "11": 10.83031, - "12": 10.80335, - "13": 10.81653, - "14": 10.82186, - "15": 10.74223, - "16": 10.75087, - "17": 10.71888, - "18": 10.74308, - "19": 10.7407, - "20": 10.63713, - "21": 10.6277, - "22": 10.48435, - "23": 10.65701, - "24": 10.52682, - "25": 10.47546, - "26": 10.54091, - "27": 10.55554, - "28": 10.52147, + "11": 10.8303, + "12": 10.80334, + "13": 10.81651, + "14": 10.82185, + "15": 10.7422, + "16": 10.75086, + "17": 10.71886, + "18": 10.74306, + "19": 10.74073, + "20": 10.63717, + "21": 10.62764, + "22": 10.48433, + "23": 10.657, + "24": 10.52681, + "25": 10.47547, + "26": 10.54093, + "27": 10.55549, + "28": 10.52151, "29": 10.53465, - "30": 10.30892, - "31": 10.06663, + "30": 10.30894, + "31": 10.06666, "32": 10.41746, - "33": 10.42487, - "34": 10.1739, - "35": 10.22475, - "36": 10.18282, - "37": 10.29689, + "33": 10.42488, + "34": 10.17386, + "35": 10.2248, + "36": 10.18284, + "37": 10.29686, "38": 10.14801, "39": 10.36934, - "40": 10.04004, - "41": 10.10752, - "42": 10.18198, + "40": 10.04006, + "41": 10.10749, + "42": 10.18199, "43": 9.79649, - "44": 9.91071, - "45": 9.79715, + "44": 9.91069, + "45": 9.79712, "46": 9.79411, - "47": 10.11365, - "48": 9.82516, - "49": 9.50416, - "50": 9.88698 + "47": 10.11362, + "48": 9.82518, + "49": 9.50417, + "50": 9.887 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1625.0, - "2": 1666.0, - "3": 1695.0, - "4": 1746.0, - "5": 1977.0, - "6": 1839.0, - "7": 1894.0, - "8": 1665.0, - "9": 1929.0, - "10": 1436.0, - "11": 1794.0, - "12": 1845.0, - "13": 1976.0, - "14": 1931.0, - "15": 1971.0, - "16": 2095.0, - "17": 1805.0, - "18": 1764.0, - "19": 1753.0, - "20": 1693.0, - "21": 1872.0, - "22": 1669.0, - "23": 2113.0, - "24": 1589.0, - "25": 1679.0, - "26": 1667.0, - "27": 1779.0, - "28": 2025.0, - "29": 1940.0, - "30": 1885.0, - "31": 1623.0, - "32": 1978.0, - "33": 2203.0, - "34": 1947.0, - "35": 2040.0, - "36": 2002.0, - "37": 2346.0, - "38": 2100.0, - "39": 2479.0, - "40": 2258.0, - "41": 2347.0, - "42": 2331.0, - "43": 2125.0, - "44": 2126.0, - "45": 2130.0, - "46": 2342.0, - "47": 2550.0, - "48": 2401.0, - "49": 2216.0, - "50": 2456.0 + "1": 1597.0, + "2": 1627.0, + "3": 1645.0, + "4": 1760.0, + "5": 1863.0, + "6": 1829.0, + "7": 1908.0, + "8": 1642.0, + "9": 1885.0, + "10": 1444.0, + "11": 1844.0, + "12": 1833.0, + "13": 1903.0, + "14": 1906.0, + "15": 1984.0, + "16": 2015.0, + "17": 1820.0, + "18": 1763.0, + "19": 1723.0, + "20": 1681.0, + "21": 1875.0, + "22": 1677.0, + "23": 1981.0, + "24": 1571.0, + "25": 1588.0, + "26": 1659.0, + "27": 1732.0, + "28": 2035.0, + "29": 1977.0, + "30": 1932.0, + "31": 1579.0, + "32": 1890.0, + "33": 2186.0, + "34": 1984.0, + "35": 2000.0, + "36": 1941.0, + "37": 2334.0, + "38": 2132.0, + "39": 2533.0, + "40": 2156.0, + "41": 2318.0, + "42": 2339.0, + "43": 1998.0, + "44": 2097.0, + "45": 2178.0, + "46": 2287.0, + "47": 2409.0, + "48": 2319.0, + "49": 2104.0, + "50": 2433.0 } }, "mem-allocated-bytes": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 2.46491, - "3": 0.11014, - "4": 0.09722, - "5": 0.09703, - "6": 0.09705, - "7": 0.09593, - "8": 0.09584, - "9": 0.09505, - "10": 0.0949, - "11": 0.09504, - "12": 0.09589, - "13": 0.09506, - "14": 0.09425, - "15": 0.09404, - "16": 0.09465, - "17": 0.09237, - "18": 0.09201, - "19": 0.09159, - "20": 0.09124, - "21": 0.09092, - "22": 0.09028, - "23": 0.08966, - "24": 0.08893, - "25": 0.09042, - "26": 0.09055, - "27": 0.08889, - "28": 0.08857, - "29": 0.0884, - "30": 0.08807, - "31": 0.08777, - "32": 0.08747, - "33": 0.0876, - "34": 0.08733, - "35": 0.0886, - "36": 0.08828, - "37": 0.08789, - "38": 0.08768, - "39": 0.08819, - "40": 0.08922, - "41": 0.08797, - "42": 0.0876, - "43": 0.0868, - "44": 0.08693, - "45": 0.08661, - "46": 0.08657, - "47": 0.08769, - "48": 0.08644, - "49": 0.08681, - "50": 0.08702 + "2": 3.85031, + "3": 0.10579, + "4": 0.08811, + "5": 0.08828, + "6": 0.09023, + "7": 0.09011, + "8": 0.09009, + "9": 0.08883, + "10": 0.09032, + "11": 0.08909, + "12": 0.09028, + "13": 0.08981, + "14": 0.0908, + "15": 0.09035, + "16": 0.08825, + "17": 0.09092, + "18": 0.09041, + "19": 0.09012, + "20": 0.09006, + "21": 0.08995, + "22": 0.09051, + "23": 0.09078, + "24": 0.09133, + "25": 0.0906, + "26": 0.09043, + "27": 0.08991, + "28": 0.08972, + "29": 0.09046, + "30": 0.08921, + "31": 0.09085, + "32": 0.09076, + "33": 0.0898, + "34": 0.08988, + "35": 0.09085, + "36": 0.08951, + "37": 0.09036, + "38": 0.08966, + "39": 0.08995, + "40": 0.0898, + "41": 0.09082, + "42": 0.09019, + "43": 0.09295, + "44": 0.09078, + "45": 0.0912, + "46": 0.09208, + "47": 0.09077, + "48": 0.09093, + "49": 0.09052, + "50": 0.08959 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_gb200.json index 8bcd3aa91d5..045c3d3b47a 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files/golden_values_dev_dgx_gb200.json @@ -7,52 +7,52 @@ "1": 10.77472, "2": 10.7834, "3": 10.783, - "4": 10.74952, - "5": 10.8207, - "6": 10.8234, - "7": 10.79076, - "8": 10.78002, - "9": 10.78621, - "10": 10.74365, + "4": 10.74953, + "5": 10.82069, + "6": 10.82339, + "7": 10.79075, + "8": 10.78, + "9": 10.78617, + "10": 10.74367, "11": 10.8322, "12": 10.80441, - "13": 10.8213, - "14": 10.82574, - "15": 10.74146, - "16": 10.75035, - "17": 10.72535, - "18": 10.74231, - "19": 10.7445, - "20": 10.63706, - "21": 10.63104, - "22": 10.48032, - "23": 10.65993, - "24": 10.5253, - "25": 10.47539, - "26": 10.54133, - "27": 10.5547, - "28": 10.521, + "13": 10.82131, + "14": 10.82576, + "15": 10.74148, + "16": 10.75034, + "17": 10.72538, + "18": 10.74232, + "19": 10.74454, + "20": 10.63704, + "21": 10.63099, + "22": 10.48029, + "23": 10.65995, + "24": 10.52537, + "25": 10.47538, + "26": 10.54137, + "27": 10.55474, + "28": 10.52102, "29": 10.53614, - "30": 10.30519, - "31": 10.06487, - "32": 10.41559, - "33": 10.42241, - "34": 10.1741, - "35": 10.22337, - "36": 10.18522, + "30": 10.30518, + "31": 10.06489, + "32": 10.41554, + "33": 10.42245, + "34": 10.17407, + "35": 10.22339, + "36": 10.18526, "37": 10.30398, - "38": 10.14967, + "38": 10.14971, "39": 10.37031, - "40": 10.04015, - "41": 10.10913, - "42": 10.17951, - "43": 9.79734, + "40": 10.04014, + "41": 10.10916, + "42": 10.17947, + "43": 9.79735, "44": 9.90801, - "45": 9.79837, - "46": 9.79661, - "47": 10.12063, - "48": 9.82076, - "49": 9.50507, + "45": 9.79833, + "46": 9.79662, + "47": 10.1206, + "48": 9.82074, + "49": 9.50511, "50": 9.88047 } }, @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1603.0, - "2": 1689.0, - "3": 1616.0, - "4": 1774.0, - "5": 2059.0, - "6": 1983.0, - "7": 2102.0, - "8": 1640.0, - "9": 1877.0, - "10": 1435.0, - "11": 1981.0, - "12": 1898.0, - "13": 1949.0, - "14": 1797.0, - "15": 1923.0, - "16": 1993.0, - "17": 1804.0, - "18": 1793.0, - "19": 1808.0, - "20": 1658.0, - "21": 1881.0, - "22": 1744.0, - "23": 2029.0, - "24": 1621.0, - "25": 1550.0, - "26": 1686.0, - "27": 1794.0, - "28": 1927.0, - "29": 1974.0, - "30": 1884.0, - "31": 1610.0, - "32": 1934.0, - "33": 2098.0, - "34": 1840.0, - "35": 2033.0, - "36": 2052.0, - "37": 2302.0, - "38": 2119.0, - "39": 2421.0, - "40": 2242.0, - "41": 2339.0, - "42": 2362.0, - "43": 2065.0, - "44": 2186.0, - "45": 2266.0, - "46": 2378.0, - "47": 2504.0, - "48": 2503.0, - "49": 2303.0, - "50": 2494.0 + "1": 1542.0, + "2": 1772.0, + "3": 1677.0, + "4": 1763.0, + "5": 1987.0, + "6": 1880.0, + "7": 1948.0, + "8": 1686.0, + "9": 1930.0, + "10": 1437.0, + "11": 1928.0, + "12": 1829.0, + "13": 1980.0, + "14": 1810.0, + "15": 2006.0, + "16": 1885.0, + "17": 1765.0, + "18": 1742.0, + "19": 1788.0, + "20": 1717.0, + "21": 1880.0, + "22": 1707.0, + "23": 2116.0, + "24": 1644.0, + "25": 1581.0, + "26": 1664.0, + "27": 1810.0, + "28": 2052.0, + "29": 1954.0, + "30": 1943.0, + "31": 1590.0, + "32": 1906.0, + "33": 2122.0, + "34": 1865.0, + "35": 1994.0, + "36": 1902.0, + "37": 2380.0, + "38": 2161.0, + "39": 2414.0, + "40": 2260.0, + "41": 2308.0, + "42": 2275.0, + "43": 2109.0, + "44": 2189.0, + "45": 2236.0, + "46": 2437.0, + "47": 2581.0, + "48": 2351.0, + "49": 2345.0, + "50": 2524.0 } }, "mem-allocated-bytes": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 2.34829, - "3": 0.10511, - "4": 0.09797, - "5": 0.09705, - "6": 0.09665, - "7": 0.09616, - "8": 0.09616, - "9": 0.0968, - "10": 0.09708, - "11": 0.09598, - "12": 0.09533, - "13": 0.09549, - "14": 0.09665, - "15": 0.09303, - "16": 0.0963, - "17": 0.10058, - "18": 0.09955, - "19": 0.10067, - "20": 0.10221, - "21": 0.09941, - "22": 0.09872, - "23": 0.09975, - "24": 0.10322, - "25": 0.09837, - "26": 0.09834, - "27": 0.09843, - "28": 0.09692, - "29": 0.09907, - "30": 0.09889, - "31": 0.10064, - "32": 0.09748, - "33": 0.09927, - "34": 0.09831, - "35": 0.09862, - "36": 0.09852, - "37": 0.09869, - "38": 0.09941, - "39": 0.09945, - "40": 0.10014, - "41": 0.09934, - "42": 0.10081, - "43": 0.10148, - "44": 0.09766, - "45": 0.09746, - "46": 0.09842, - "47": 0.09924, - "48": 0.09864, - "49": 0.09829, - "50": 0.09685 + "2": 4.06847, + "3": 0.10126, + "4": 0.08879, + "5": 0.0895, + "6": 0.09253, + "7": 0.09257, + "8": 0.09092, + "9": 0.0912, + "10": 0.09343, + "11": 0.09132, + "12": 0.09098, + "13": 0.08924, + "14": 0.08868, + "15": 0.0917, + "16": 0.09022, + "17": 0.09175, + "18": 0.08931, + "19": 0.0903, + "20": 0.08975, + "21": 0.08914, + "22": 0.09136, + "23": 0.09031, + "24": 0.08986, + "25": 0.08928, + "26": 0.08905, + "27": 0.0893, + "28": 0.08978, + "29": 0.08991, + "30": 0.08929, + "31": 0.09073, + "32": 0.08895, + "33": 0.08888, + "34": 0.0889, + "35": 0.08867, + "36": 0.08814, + "37": 0.08834, + "38": 0.08834, + "39": 0.08804, + "40": 0.08849, + "41": 0.08911, + "42": 0.08844, + "43": 0.0897, + "44": 0.08853, + "45": 0.09005, + "46": 0.09453, + "47": 0.09155, + "48": 0.08894, + "49": 0.09025, + "50": 0.08973 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_gb200.json index 63c74381364..947616883e3 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear/golden_values_dev_dgx_gb200.json @@ -6,104 +6,104 @@ "values": { "1": 10.83582, "2": 10.83571, - "3": 10.83523, + "3": 10.83524, "4": 10.79949, "5": 10.84909, - "6": 10.86563, - "7": 10.82789, + "6": 10.86567, + "7": 10.82786, "8": 10.8363, "9": 10.83997, - "10": 10.79865, - "11": 10.8677, - "12": 10.84994, - "13": 10.85915, - "14": 10.86874, - "15": 10.80173, - "16": 10.79183, - "17": 10.77353, - "18": 10.78739, - "19": 10.78983, - "20": 10.68446, - "21": 10.6784, - "22": 10.5257, - "23": 10.70726, - "24": 10.56551, - "25": 10.51602, - "26": 10.58017, - "27": 10.58981, + "10": 10.79867, + "11": 10.86763, + "12": 10.84992, + "13": 10.85912, + "14": 10.8687, + "15": 10.80171, + "16": 10.79189, + "17": 10.77351, + "18": 10.78742, + "19": 10.78986, + "20": 10.68447, + "21": 10.67839, + "22": 10.52572, + "23": 10.70729, + "24": 10.56549, + "25": 10.51603, + "26": 10.58018, + "27": 10.58977, "28": 10.54551, - "29": 10.57726, - "30": 10.34051, - "31": 10.07051, - "32": 10.44503, - "33": 10.44293, - "34": 10.19391, - "35": 10.24261, - "36": 10.19236, - "37": 10.32969, + "29": 10.57724, + "30": 10.34049, + "31": 10.07056, + "32": 10.44508, + "33": 10.44289, + "34": 10.19394, + "35": 10.2426, + "36": 10.19235, + "37": 10.32972, "38": 10.16551, "39": 10.38729, "40": 10.05174, - "41": 10.12191, - "42": 10.19259, - "43": 9.8069, - "44": 9.92475, - "45": 9.80639, - "46": 9.80145, - "47": 10.12104, - "48": 9.83127, - "49": 9.50404, - "50": 9.87954, + "41": 10.12185, + "42": 10.19258, + "43": 9.80694, + "44": 9.92474, + "45": 9.80636, + "46": 9.80144, + "47": 10.12106, + "48": 9.83126, + "49": 9.50406, + "50": 9.87955, "51": 9.83807, - "52": 9.72058, - "53": 10.0568, - "54": 9.95032, - "55": 9.88328, - "56": 9.60431, + "52": 9.72057, + "53": 10.05682, + "54": 9.95031, + "55": 9.88332, + "56": 9.60428, "57": 9.45518, - "58": 9.81927, - "59": 9.58262, + "58": 9.81923, + "59": 9.58266, "60": 9.48844, - "61": 9.68577, - "62": 9.97779, + "61": 9.68574, + "62": 9.9778, "63": 9.36765, - "64": 9.75913, - "65": 8.9376, - "66": 9.69257, - "67": 9.36621, - "68": 9.78303, - "69": 9.79318, - "70": 9.72699, + "64": 9.75912, + "65": 8.93762, + "66": 9.6926, + "67": 9.36619, + "68": 9.78309, + "69": 9.79315, + "70": 9.72695, "71": 9.62875, - "72": 9.58004, + "72": 9.58006, "73": 9.487, - "74": 8.92041, - "75": 9.41128, - "76": 9.07564, - "77": 10.05848, - "78": 9.72184, - "79": 9.3732, - "80": 9.40079, - "81": 9.4792, - "82": 9.69754, - "83": 9.31037, - "84": 9.41777, - "85": 9.61194, - "86": 9.07155, - "87": 9.59661, - "88": 9.74709, - "89": 9.59667, - "90": 9.82915, - "91": 9.33725, - "92": 9.3564, - "93": 9.08552, - "94": 8.82807, - "95": 9.52842, - "96": 9.52611, - "97": 9.30632, - "98": 9.66808, - "99": 8.89461, - "100": 9.40666 + "74": 8.92045, + "75": 9.41127, + "76": 9.0757, + "77": 10.05849, + "78": 9.72185, + "79": 9.37321, + "80": 9.40082, + "81": 9.47926, + "82": 9.69753, + "83": 9.31033, + "84": 9.41773, + "85": 9.61195, + "86": 9.07158, + "87": 9.59659, + "88": 9.74711, + "89": 9.59669, + "90": 9.82914, + "91": 9.33728, + "92": 9.35642, + "93": 9.08554, + "94": 8.82803, + "95": 9.52843, + "96": 9.52607, + "97": 9.30634, + "98": 9.66809, + "99": 8.89459, + "100": 9.40668 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1536.0, - "2": 1592.0, - "3": 1551.0, - "4": 1769.0, - "5": 1824.0, - "6": 1800.0, - "7": 1734.0, - "8": 1619.0, - "9": 1829.0, - "10": 1355.0, - "11": 1911.0, - "12": 1721.0, - "13": 1913.0, - "14": 1708.0, - "15": 1919.0, - "16": 1938.0, - "17": 1740.0, - "18": 1676.0, - "19": 1743.0, - "20": 1535.0, - "21": 1797.0, - "22": 1661.0, - "23": 1887.0, - "24": 1666.0, - "25": 1633.0, - "26": 1676.0, - "27": 1740.0, - "28": 1991.0, - "29": 1918.0, - "30": 1806.0, - "31": 1588.0, - "32": 1863.0, - "33": 2126.0, - "34": 1812.0, - "35": 1976.0, - "36": 1875.0, - "37": 2301.0, - "38": 2131.0, - "39": 2351.0, - "40": 2130.0, - "41": 2391.0, - "42": 2255.0, - "43": 1975.0, - "44": 2138.0, - "45": 2208.0, - "46": 2364.0, - "47": 2564.0, - "48": 2337.0, - "49": 2142.0, - "50": 2423.0, - "51": 2546.0, - "52": 2590.0, - "53": 2879.0, - "54": 2697.0, - "55": 2316.0, - "56": 2549.0, - "57": 2261.0, - "58": 2904.0, - "59": 2740.0, - "60": 2434.0, - "61": 2801.0, - "62": 2663.0, - "63": 2502.0, - "64": 2948.0, - "65": 2644.0, - "66": 2961.0, - "67": 2813.0, - "68": 2686.0, - "69": 2912.0, - "70": 3096.0, - "71": 2854.0, - "72": 2454.0, - "73": 3081.0, - "74": 1933.0, - "75": 2465.0, - "76": 3012.0, - "77": 3163.0, - "78": 2997.0, - "79": 3089.0, - "80": 3187.0, - "81": 3500.0, - "82": 3339.0, - "83": 2705.0, - "84": 3205.0, - "85": 3033.0, - "86": 2818.0, - "87": 3671.0, - "88": 3190.0, - "89": 3336.0, - "90": 3320.0, - "91": 2698.0, - "92": 3072.0, - "93": 2750.0, - "94": 3397.0, - "95": 3317.0, - "96": 3290.0, - "97": 3116.0, - "98": 3732.0, - "99": 3049.0, - "100": 2974.0 + "1": 1501.0, + "2": 1576.0, + "3": 1604.0, + "4": 1778.0, + "5": 1880.0, + "6": 1803.0, + "7": 1789.0, + "8": 1669.0, + "9": 1811.0, + "10": 1392.0, + "11": 1835.0, + "12": 1663.0, + "13": 1855.0, + "14": 1841.0, + "15": 1874.0, + "16": 1844.0, + "17": 1738.0, + "18": 1724.0, + "19": 1742.0, + "20": 1612.0, + "21": 1751.0, + "22": 1713.0, + "23": 1964.0, + "24": 1632.0, + "25": 1570.0, + "26": 1675.0, + "27": 1752.0, + "28": 2026.0, + "29": 1938.0, + "30": 1848.0, + "31": 1557.0, + "32": 1926.0, + "33": 2052.0, + "34": 1880.0, + "35": 2022.0, + "36": 1926.0, + "37": 2344.0, + "38": 2202.0, + "39": 2285.0, + "40": 2225.0, + "41": 2328.0, + "42": 2200.0, + "43": 1984.0, + "44": 2142.0, + "45": 2173.0, + "46": 2308.0, + "47": 2592.0, + "48": 2460.0, + "49": 2242.0, + "50": 2383.0, + "51": 2489.0, + "52": 2497.0, + "53": 2875.0, + "54": 2654.0, + "55": 2317.0, + "56": 2599.0, + "57": 2299.0, + "58": 2830.0, + "59": 2784.0, + "60": 2437.0, + "61": 2916.0, + "62": 2599.0, + "63": 2388.0, + "64": 2785.0, + "65": 2677.0, + "66": 2972.0, + "67": 2797.0, + "68": 2752.0, + "69": 3049.0, + "70": 3087.0, + "71": 2952.0, + "72": 2411.0, + "73": 3099.0, + "74": 1975.0, + "75": 2614.0, + "76": 2941.0, + "77": 3166.0, + "78": 3123.0, + "79": 3085.0, + "80": 3198.0, + "81": 3350.0, + "82": 3322.0, + "83": 2858.0, + "84": 3125.0, + "85": 3194.0, + "86": 2777.0, + "87": 3602.0, + "88": 3006.0, + "89": 3267.0, + "90": 3133.0, + "91": 2753.0, + "92": 3113.0, + "93": 2714.0, + "94": 3364.0, + "95": 3273.0, + "96": 3202.0, + "97": 3124.0, + "98": 3716.0, + "99": 3121.0, + "100": 3131.0 } }, "mem-allocated-bytes": { @@ -325,7 +325,7 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1939785728.0, + "1": 1938737152.0, "2": 2222434304.0, "3": 2222434304.0, "4": 2222434304.0, @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 2.20838, - "3": 0.13042, - "4": 0.11826, - "5": 0.11718, - "6": 0.11797, - "7": 0.1177, - "8": 0.11717, - "9": 0.11846, - "10": 0.11778, - "11": 0.11712, - "12": 0.11866, - "13": 0.12004, - "14": 0.11788, - "15": 0.11787, - "16": 0.1181, - "17": 0.11903, - "18": 0.11843, - "19": 0.11754, - "20": 0.11834, - "21": 0.11897, - "22": 0.12726, - "23": 0.13834, - "24": 0.15039, - "25": 0.14107, - "26": 0.14586, - "27": 0.16343, - "28": 0.2297, - "29": 0.26681, - "30": 0.19748, - "31": 0.2586, - "32": 0.12068, - "33": 0.11944, - "34": 0.11896, - "35": 0.11984, - "36": 0.11823, - "37": 0.11997, - "38": 0.11949, - "39": 0.11877, - "40": 0.11898, - "41": 0.11996, - "42": 0.11893, - "43": 0.12547, - "44": 0.13195, - "45": 0.12144, - "46": 0.11997, - "47": 0.12005, - "48": 0.11855, - "49": 0.11944, - "50": 0.11842, - "51": 0.14635, - "52": 0.12016, - "53": 0.11762, - "54": 0.11802, - "55": 0.1184, - "56": 0.11774, - "57": 0.12181, - "58": 0.11784, - "59": 0.11936, - "60": 0.11831, - "61": 0.11819, - "62": 0.11807, - "63": 0.11828, - "64": 0.11663, - "65": 0.11901, - "66": 0.1168, - "67": 0.1167, - "68": 0.12002, - "69": 0.12016, - "70": 0.1186, - "71": 0.11772, - "72": 0.1189, - "73": 0.11915, - "74": 0.11908, - "75": 0.11898, - "76": 0.11863, - "77": 0.11869, - "78": 0.11971, - "79": 0.11843, - "80": 0.1198, - "81": 0.12003, - "82": 0.11885, - "83": 0.11905, - "84": 0.12002, - "85": 0.1192, - "86": 0.11872, - "87": 0.11777, - "88": 0.11801, - "89": 0.11864, - "90": 0.11769, - "91": 0.11692, - "92": 0.12015, - "93": 0.12072, - "94": 0.11802, - "95": 0.11798, - "96": 0.12278, - "97": 0.11941, - "98": 0.1174, - "99": 0.11816, - "100": 0.12102 + "2": 5.86605, + "3": 0.13527, + "4": 0.12296, + "5": 0.12443, + "6": 0.12222, + "7": 0.12431, + "8": 0.12301, + "9": 0.12262, + "10": 0.12615, + "11": 0.12479, + "12": 0.12612, + "13": 0.12396, + "14": 0.12727, + "15": 0.1273, + "16": 0.12644, + "17": 0.1273, + "18": 0.12789, + "19": 0.12832, + "20": 0.12576, + "21": 0.12724, + "22": 0.12793, + "23": 0.1258, + "24": 0.12564, + "25": 0.12595, + "26": 0.12455, + "27": 0.12657, + "28": 0.12588, + "29": 0.12712, + "30": 0.12691, + "31": 0.126, + "32": 0.12515, + "33": 0.12441, + "34": 0.12458, + "35": 0.12577, + "36": 0.12386, + "37": 0.12673, + "38": 0.1247, + "39": 0.12614, + "40": 0.12388, + "41": 0.12934, + "42": 0.12674, + "43": 0.12687, + "44": 0.1272, + "45": 0.1238, + "46": 0.12514, + "47": 0.12467, + "48": 0.12579, + "49": 0.12624, + "50": 0.12487, + "51": 0.16156, + "52": 0.1308, + "53": 0.12721, + "54": 0.12451, + "55": 0.12517, + "56": 0.12436, + "57": 0.12767, + "58": 0.12444, + "59": 0.12475, + "60": 0.12331, + "61": 0.12518, + "62": 0.12457, + "63": 0.12132, + "64": 0.12553, + "65": 0.12416, + "66": 0.12219, + "67": 0.12402, + "68": 0.12407, + "69": 0.12423, + "70": 0.12433, + "71": 0.12449, + "72": 0.12308, + "73": 0.12596, + "74": 0.12432, + "75": 0.12395, + "76": 0.12485, + "77": 0.12332, + "78": 0.12531, + "79": 0.1263, + "80": 0.12438, + "81": 0.1277, + "82": 0.12699, + "83": 0.12503, + "84": 0.12566, + "85": 0.12342, + "86": 0.12385, + "87": 0.12328, + "88": 0.12366, + "89": 0.12501, + "90": 0.1245, + "91": 0.12538, + "92": 0.12418, + "93": 0.12242, + "94": 0.12316, + "95": 0.12244, + "96": 0.12316, + "97": 0.12448, + "98": 0.12205, + "99": 0.12459, + "100": 0.12444 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_gb200.json index a9a12874e97..463885b1cba 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear/golden_values_dev_dgx_gb200.json @@ -6,104 +6,104 @@ "values": { "1": 10.83582, "2": 10.83571, - "3": 10.83523, + "3": 10.83524, "4": 10.79949, "5": 10.84909, - "6": 10.86563, - "7": 10.82789, + "6": 10.86567, + "7": 10.82786, "8": 10.8363, "9": 10.83997, - "10": 10.79865, - "11": 10.8677, - "12": 10.84994, - "13": 10.85915, - "14": 10.86874, - "15": 10.80173, - "16": 10.79183, - "17": 10.77353, - "18": 10.78739, - "19": 10.78983, - "20": 10.68446, - "21": 10.6784, - "22": 10.5257, - "23": 10.70726, - "24": 10.56551, - "25": 10.51602, - "26": 10.58017, - "27": 10.58981, + "10": 10.79867, + "11": 10.86763, + "12": 10.84992, + "13": 10.85912, + "14": 10.8687, + "15": 10.80171, + "16": 10.79189, + "17": 10.77351, + "18": 10.78742, + "19": 10.78986, + "20": 10.68447, + "21": 10.67839, + "22": 10.52572, + "23": 10.70729, + "24": 10.56549, + "25": 10.51603, + "26": 10.58018, + "27": 10.58977, "28": 10.54551, - "29": 10.57726, - "30": 10.34051, - "31": 10.07051, - "32": 10.44503, - "33": 10.44293, - "34": 10.19391, - "35": 10.24261, - "36": 10.19236, - "37": 10.32969, + "29": 10.57724, + "30": 10.34049, + "31": 10.07056, + "32": 10.44508, + "33": 10.44289, + "34": 10.19394, + "35": 10.2426, + "36": 10.19235, + "37": 10.32972, "38": 10.16551, "39": 10.38729, "40": 10.05174, - "41": 10.12191, - "42": 10.19259, - "43": 9.8069, - "44": 9.92475, - "45": 9.80639, - "46": 9.80145, - "47": 10.12104, - "48": 9.83127, - "49": 9.50404, - "50": 9.87954, + "41": 10.12185, + "42": 10.19258, + "43": 9.80694, + "44": 9.92474, + "45": 9.80636, + "46": 9.80144, + "47": 10.12106, + "48": 9.83126, + "49": 9.50406, + "50": 9.87955, "51": 9.83807, - "52": 9.72058, - "53": 10.0568, - "54": 9.95032, - "55": 9.88328, - "56": 9.60431, + "52": 9.72057, + "53": 10.05682, + "54": 9.95031, + "55": 9.88332, + "56": 9.60428, "57": 9.45518, - "58": 9.81927, - "59": 9.58262, + "58": 9.81923, + "59": 9.58266, "60": 9.48844, - "61": 9.68577, - "62": 9.97779, + "61": 9.68574, + "62": 9.9778, "63": 9.36765, - "64": 9.75913, - "65": 8.9376, - "66": 9.69257, - "67": 9.36621, - "68": 9.78303, - "69": 9.79318, - "70": 9.72699, + "64": 9.75912, + "65": 8.93762, + "66": 9.6926, + "67": 9.36619, + "68": 9.78309, + "69": 9.79315, + "70": 9.72695, "71": 9.62875, - "72": 9.58004, + "72": 9.58006, "73": 9.487, - "74": 8.92041, - "75": 9.41128, - "76": 9.07564, - "77": 10.05848, - "78": 9.72184, - "79": 9.3732, - "80": 9.40079, - "81": 9.4792, - "82": 9.69754, - "83": 9.31037, - "84": 9.41777, - "85": 9.61194, - "86": 9.07155, - "87": 9.59661, - "88": 9.74709, - "89": 9.59667, - "90": 9.82915, - "91": 9.33725, - "92": 9.3564, - "93": 9.08552, - "94": 8.82807, - "95": 9.52842, - "96": 9.52611, - "97": 9.30632, - "98": 9.66808, - "99": 8.89461, - "100": 9.40666 + "74": 8.92045, + "75": 9.41127, + "76": 9.0757, + "77": 10.05849, + "78": 9.72185, + "79": 9.37321, + "80": 9.40082, + "81": 9.47926, + "82": 9.69753, + "83": 9.31033, + "84": 9.41773, + "85": 9.61195, + "86": 9.07158, + "87": 9.59659, + "88": 9.74711, + "89": 9.59669, + "90": 9.82914, + "91": 9.33728, + "92": 9.35642, + "93": 9.08554, + "94": 8.82803, + "95": 9.52843, + "96": 9.52607, + "97": 9.30634, + "98": 9.66809, + "99": 8.89459, + "100": 9.40668 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1536.0, - "2": 1592.0, - "3": 1551.0, - "4": 1769.0, - "5": 1824.0, - "6": 1800.0, - "7": 1734.0, - "8": 1619.0, - "9": 1829.0, - "10": 1355.0, - "11": 1911.0, - "12": 1721.0, - "13": 1913.0, - "14": 1708.0, - "15": 1919.0, - "16": 1938.0, - "17": 1740.0, - "18": 1676.0, - "19": 1743.0, - "20": 1535.0, - "21": 1797.0, - "22": 1661.0, - "23": 1887.0, - "24": 1666.0, - "25": 1633.0, - "26": 1676.0, - "27": 1740.0, - "28": 1991.0, - "29": 1918.0, - "30": 1806.0, - "31": 1588.0, - "32": 1863.0, - "33": 2126.0, - "34": 1812.0, - "35": 1976.0, - "36": 1875.0, - "37": 2301.0, - "38": 2131.0, - "39": 2351.0, - "40": 2130.0, - "41": 2391.0, - "42": 2255.0, - "43": 1975.0, - "44": 2138.0, - "45": 2208.0, - "46": 2364.0, - "47": 2564.0, - "48": 2337.0, - "49": 2142.0, - "50": 2423.0, - "51": 2546.0, - "52": 2590.0, - "53": 2879.0, - "54": 2697.0, - "55": 2316.0, - "56": 2549.0, - "57": 2261.0, - "58": 2904.0, - "59": 2740.0, - "60": 2434.0, - "61": 2801.0, - "62": 2663.0, - "63": 2502.0, - "64": 2948.0, - "65": 2644.0, - "66": 2961.0, - "67": 2813.0, - "68": 2686.0, - "69": 2912.0, - "70": 3096.0, - "71": 2854.0, - "72": 2454.0, - "73": 3081.0, - "74": 1933.0, - "75": 2465.0, - "76": 3012.0, - "77": 3163.0, - "78": 2997.0, - "79": 3089.0, - "80": 3187.0, - "81": 3500.0, - "82": 3339.0, - "83": 2705.0, - "84": 3205.0, - "85": 3033.0, - "86": 2818.0, - "87": 3671.0, - "88": 3190.0, - "89": 3336.0, - "90": 3320.0, - "91": 2698.0, - "92": 3072.0, - "93": 2750.0, - "94": 3397.0, - "95": 3317.0, - "96": 3290.0, - "97": 3116.0, - "98": 3732.0, - "99": 3049.0, - "100": 2974.0 + "1": 1501.0, + "2": 1576.0, + "3": 1604.0, + "4": 1778.0, + "5": 1880.0, + "6": 1803.0, + "7": 1789.0, + "8": 1669.0, + "9": 1811.0, + "10": 1392.0, + "11": 1835.0, + "12": 1663.0, + "13": 1855.0, + "14": 1841.0, + "15": 1874.0, + "16": 1844.0, + "17": 1738.0, + "18": 1724.0, + "19": 1742.0, + "20": 1612.0, + "21": 1751.0, + "22": 1713.0, + "23": 1964.0, + "24": 1632.0, + "25": 1570.0, + "26": 1675.0, + "27": 1752.0, + "28": 2026.0, + "29": 1938.0, + "30": 1848.0, + "31": 1557.0, + "32": 1926.0, + "33": 2052.0, + "34": 1880.0, + "35": 2022.0, + "36": 1926.0, + "37": 2344.0, + "38": 2202.0, + "39": 2285.0, + "40": 2225.0, + "41": 2328.0, + "42": 2200.0, + "43": 1984.0, + "44": 2142.0, + "45": 2173.0, + "46": 2308.0, + "47": 2592.0, + "48": 2460.0, + "49": 2242.0, + "50": 2383.0, + "51": 2489.0, + "52": 2497.0, + "53": 2875.0, + "54": 2654.0, + "55": 2317.0, + "56": 2599.0, + "57": 2299.0, + "58": 2830.0, + "59": 2784.0, + "60": 2437.0, + "61": 2916.0, + "62": 2599.0, + "63": 2388.0, + "64": 2785.0, + "65": 2677.0, + "66": 2972.0, + "67": 2797.0, + "68": 2752.0, + "69": 3049.0, + "70": 3087.0, + "71": 2952.0, + "72": 2411.0, + "73": 3099.0, + "74": 1975.0, + "75": 2614.0, + "76": 2941.0, + "77": 3166.0, + "78": 3123.0, + "79": 3085.0, + "80": 3198.0, + "81": 3350.0, + "82": 3322.0, + "83": 2858.0, + "84": 3125.0, + "85": 3194.0, + "86": 2777.0, + "87": 3602.0, + "88": 3006.0, + "89": 3267.0, + "90": 3133.0, + "91": 2753.0, + "92": 3113.0, + "93": 2714.0, + "94": 3364.0, + "95": 3273.0, + "96": 3202.0, + "97": 3124.0, + "98": 3716.0, + "99": 3121.0, + "100": 3131.0 } }, "mem-allocated-bytes": { @@ -325,7 +325,7 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1939785728.0, + "1": 1938737152.0, "2": 2222434304.0, "3": 2222434304.0, "4": 2222434304.0, @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 2.22807, - "3": 0.13601, - "4": 0.12128, - "5": 0.1198, - "6": 0.1228, - "7": 0.12056, - "8": 0.11886, - "9": 0.11944, - "10": 0.11995, - "11": 0.11935, - "12": 0.11905, - "13": 0.11975, - "14": 0.12242, - "15": 0.12061, - "16": 0.12046, - "17": 0.1208, - "18": 0.12205, - "19": 0.12427, - "20": 0.12315, - "21": 0.11965, - "22": 0.12231, - "23": 0.12286, - "24": 0.12394, - "25": 0.12377, - "26": 0.12221, - "27": 0.11936, - "28": 0.11894, - "29": 0.11945, - "30": 0.12192, - "31": 0.12571, - "32": 0.12346, - "33": 0.12413, - "34": 0.12225, - "35": 0.12328, - "36": 0.12241, - "37": 0.12432, - "38": 0.12195, - "39": 0.12262, - "40": 0.12198, - "41": 0.12396, - "42": 0.12194, - "43": 0.12435, - "44": 0.12108, - "45": 0.12326, - "46": 0.1218, - "47": 0.12308, - "48": 0.12384, - "49": 0.12795, - "50": 0.12572, - "51": 0.13502, - "52": 0.13106, - "53": 0.14515, - "54": 0.12597, - "55": 0.1249, - "56": 0.12535, - "57": 0.12569, - "58": 0.12489, - "59": 0.12862, - "60": 0.12778, - "61": 0.12731, - "62": 0.12786, - "63": 0.13022, - "64": 0.12789, - "65": 0.12838, - "66": 0.12571, - "67": 0.12651, - "68": 0.12592, - "69": 0.12663, - "70": 0.12691, - "71": 0.12636, - "72": 0.12638, - "73": 0.12671, - "74": 0.12637, - "75": 0.12602, - "76": 0.12598, - "77": 0.12554, - "78": 0.12553, - "79": 0.12501, - "80": 0.13898, - "81": 0.14589, - "82": 0.14718, - "83": 0.14665, - "84": 0.16017, - "85": 0.14231, - "86": 0.15628, - "87": 0.14055, - "88": 0.13961, - "89": 0.14878, - "90": 0.14486, - "91": 0.1432, - "92": 0.14946, - "93": 0.14581, - "94": 0.1623, - "95": 0.15638, - "96": 0.12895, - "97": 0.12907, - "98": 0.12824, - "99": 0.12741, - "100": 0.12543 + "2": 5.94954, + "3": 1.50291, + "4": 0.53566, + "5": 0.40704, + "6": 0.34894, + "7": 0.22372, + "8": 0.53898, + "9": 0.12291, + "10": 0.16074, + "11": 0.35135, + "12": 0.12732, + "13": 0.12357, + "14": 0.12383, + "15": 0.12737, + "16": 0.12362, + "17": 0.12343, + "18": 0.12473, + "19": 0.12595, + "20": 0.12604, + "21": 0.12429, + "22": 0.12556, + "23": 0.1253, + "24": 0.1263, + "25": 0.12594, + "26": 0.12525, + "27": 0.127, + "28": 0.12526, + "29": 0.12711, + "30": 0.12734, + "31": 0.12949, + "32": 0.12815, + "33": 0.12674, + "34": 0.12825, + "35": 0.12995, + "36": 0.12713, + "37": 0.12917, + "38": 0.12617, + "39": 0.12706, + "40": 0.1252, + "41": 0.12715, + "42": 0.12676, + "43": 0.12603, + "44": 0.1271, + "45": 0.12586, + "46": 0.1272, + "47": 0.1267, + "48": 0.12718, + "49": 0.12784, + "50": 0.1242, + "51": 0.1345, + "52": 0.1299, + "53": 0.14634, + "54": 0.12463, + "55": 0.12725, + "56": 0.12725, + "57": 0.12837, + "58": 0.12517, + "59": 0.12774, + "60": 0.1254, + "61": 0.12644, + "62": 0.12626, + "63": 0.12406, + "64": 0.12571, + "65": 0.12665, + "66": 0.12645, + "67": 0.12761, + "68": 0.12692, + "69": 0.12838, + "70": 0.12887, + "71": 0.1276, + "72": 0.12853, + "73": 0.12876, + "74": 0.12752, + "75": 0.12802, + "76": 0.12753, + "77": 0.12582, + "78": 0.12904, + "79": 0.12901, + "80": 0.12789, + "81": 0.12964, + "82": 0.1301, + "83": 0.1264, + "84": 0.12524, + "85": 0.1274, + "86": 0.12805, + "87": 0.12637, + "88": 0.12675, + "89": 0.12674, + "90": 0.12851, + "91": 0.12781, + "92": 0.12833, + "93": 0.1262, + "94": 0.1255, + "95": 0.12578, + "96": 0.12506, + "97": 0.12696, + "98": 0.12507, + "99": 0.12893, + "100": 0.12679 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_gb200.json index 1fc5ef869c5..fe86682f4bc 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu/golden_values_dev_dgx_gb200.json @@ -6,104 +6,104 @@ "values": { "1": 10.74992, "2": 10.77613, - "3": 10.75714, + "3": 10.75715, "4": 10.72305, - "5": 10.80036, - "6": 10.821, + "5": 10.80038, + "6": 10.82103, "7": 10.77176, - "8": 10.7988, - "9": 10.77447, - "10": 10.70645, - "11": 10.8328, + "8": 10.79877, + "9": 10.77445, + "10": 10.70642, + "11": 10.83286, "12": 10.81872, - "13": 10.83078, - "14": 10.83381, - "15": 10.76396, - "16": 10.76573, - "17": 10.71925, - "18": 10.76797, - "19": 10.75316, - "20": 10.70911, - "21": 10.69217, - "22": 10.56534, - "23": 10.70907, - "24": 10.6159, - "25": 10.55058, - "26": 10.62591, - "27": 10.64705, - "28": 10.63623, - "29": 10.65641, - "30": 10.43675, - "31": 10.21912, - "32": 10.5512, - "33": 10.53381, - "34": 10.31821, + "13": 10.83075, + "14": 10.8338, + "15": 10.76397, + "16": 10.76575, + "17": 10.71928, + "18": 10.768, + "19": 10.75318, + "20": 10.70918, + "21": 10.69214, + "22": 10.56531, + "23": 10.7091, + "24": 10.61591, + "25": 10.55061, + "26": 10.6259, + "27": 10.64706, + "28": 10.6362, + "29": 10.65644, + "30": 10.43679, + "31": 10.21909, + "32": 10.55114, + "33": 10.5338, + "34": 10.31817, "35": 10.36833, - "36": 10.3562, - "37": 10.46302, - "38": 10.33833, - "39": 10.50306, - "40": 10.23446, - "41": 10.27335, - "42": 10.3295, - "43": 9.97414, - "44": 10.1075, - "45": 9.98853, - "46": 9.95474, - "47": 10.2514, - "48": 10.01228, - "49": 9.70796, - "50": 10.05505, + "36": 10.35618, + "37": 10.46296, + "38": 10.33836, + "39": 10.50307, + "40": 10.23444, + "41": 10.2734, + "42": 10.32945, + "43": 9.97415, + "44": 10.10754, + "45": 9.9885, + "46": 9.95475, + "47": 10.25141, + "48": 10.01227, + "49": 9.70793, + "50": 10.05501, "51": 9.9812, - "52": 9.89198, - "53": 10.19208, + "52": 9.89199, + "53": 10.19201, "54": 10.09574, - "55": 10.00506, - "56": 9.78714, - "57": 9.64607, - "58": 9.9862, - "59": 9.72684, + "55": 10.00502, + "56": 9.78719, + "57": 9.6461, + "58": 9.98626, + "59": 9.72683, "60": 9.67172, - "61": 9.80984, + "61": 9.80986, "62": 10.11126, - "63": 9.54877, - "64": 9.90929, - "65": 9.08735, - "66": 9.84659, - "67": 9.48264, - "68": 9.89439, - "69": 9.87695, - "70": 9.82469, - "71": 9.72751, - "72": 9.72911, - "73": 9.62051, - "74": 9.11601, - "75": 9.55057, + "63": 9.54873, + "64": 9.90931, + "65": 9.08736, + "66": 9.84658, + "67": 9.48259, + "68": 9.89433, + "69": 9.87692, + "70": 9.82465, + "71": 9.72749, + "72": 9.7291, + "73": 9.62049, + "74": 9.11605, + "75": 9.55059, "76": 9.21504, "77": 10.14893, "78": 9.8138, - "79": 9.47515, - "80": 9.51582, - "81": 9.58685, - "82": 9.79026, - "83": 9.45587, + "79": 9.4751, + "80": 9.51583, + "81": 9.58687, + "82": 9.79025, + "83": 9.45586, "84": 9.50503, "85": 9.71387, - "86": 9.17463, - "87": 9.66601, - "88": 9.84354, - "89": 9.70734, - "90": 9.8955, - "91": 9.48652, - "92": 9.47023, - "93": 9.21481, - "94": 8.94327, - "95": 9.6154, - "96": 9.63634, - "97": 9.37644, - "98": 9.74975, - "99": 9.01753, - "100": 9.50515 + "86": 9.17462, + "87": 9.666, + "88": 9.84355, + "89": 9.70736, + "90": 9.89548, + "91": 9.48655, + "92": 9.47022, + "93": 9.2148, + "94": 8.94328, + "95": 9.61538, + "96": 9.63633, + "97": 9.37646, + "98": 9.74974, + "99": 9.01759, + "100": 9.50514 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 2656.0, - "2": 2663.0, - "3": 2673.0, - "4": 2426.0, - "5": 2931.0, - "6": 3062.0, - "7": 2591.0, - "8": 2693.0, - "9": 2713.0, - "10": 2502.0, - "11": 2904.0, - "12": 2792.0, - "13": 2979.0, - "14": 3000.0, - "15": 2952.0, - "16": 2860.0, - "17": 2717.0, - "18": 2802.0, - "19": 2868.0, - "20": 2620.0, - "21": 2792.0, - "22": 2532.0, - "23": 2701.0, - "24": 2580.0, - "25": 2466.0, - "26": 2839.0, - "27": 2703.0, - "28": 2719.0, - "29": 2971.0, - "30": 2755.0, - "31": 2448.0, - "32": 2670.0, - "33": 2791.0, - "34": 2439.0, - "35": 2662.0, - "36": 2496.0, - "37": 2806.0, - "38": 2697.0, - "39": 2786.0, - "40": 2539.0, - "41": 2605.0, - "42": 2640.0, - "43": 2324.0, - "44": 2548.0, - "45": 2291.0, - "46": 2437.0, - "47": 2605.0, - "48": 2395.0, - "49": 2478.0, - "50": 2633.0, - "51": 2676.0, - "52": 2581.0, - "53": 2898.0, - "54": 2849.0, - "55": 2548.0, - "56": 2661.0, - "57": 2510.0, - "58": 2758.0, - "59": 2650.0, - "60": 2242.0, - "61": 2628.0, - "62": 2899.0, - "63": 2605.0, - "64": 2939.0, - "65": 2572.0, - "66": 2896.0, - "67": 2640.0, - "68": 2709.0, - "69": 2889.0, - "70": 3012.0, - "71": 2978.0, - "72": 2536.0, - "73": 2964.0, - "74": 2163.0, - "75": 2603.0, - "76": 2974.0, - "77": 3007.0, - "78": 3138.0, - "79": 3197.0, - "80": 2984.0, - "81": 3280.0, - "82": 3341.0, - "83": 2757.0, - "84": 3399.0, - "85": 3320.0, - "86": 2882.0, - "87": 3407.0, - "88": 3278.0, - "89": 3336.0, - "90": 3322.0, - "91": 2472.0, - "92": 3061.0, - "93": 2911.0, - "94": 3005.0, - "95": 2984.0, - "96": 2991.0, - "97": 3178.0, - "98": 3343.0, - "99": 2929.0, - "100": 2588.0 + "1": 2677.0, + "2": 2700.0, + "3": 2731.0, + "4": 2503.0, + "5": 2843.0, + "6": 2937.0, + "7": 2619.0, + "8": 2649.0, + "9": 2579.0, + "10": 2466.0, + "11": 2864.0, + "12": 2732.0, + "13": 2935.0, + "14": 2829.0, + "15": 2919.0, + "16": 2924.0, + "17": 2683.0, + "18": 2796.0, + "19": 2828.0, + "20": 2631.0, + "21": 2797.0, + "22": 2631.0, + "23": 2797.0, + "24": 2668.0, + "25": 2526.0, + "26": 2856.0, + "27": 2658.0, + "28": 2939.0, + "29": 3084.0, + "30": 2744.0, + "31": 2420.0, + "32": 2634.0, + "33": 2750.0, + "34": 2458.0, + "35": 2614.0, + "36": 2570.0, + "37": 2879.0, + "38": 2662.0, + "39": 2815.0, + "40": 2558.0, + "41": 2587.0, + "42": 2691.0, + "43": 2442.0, + "44": 2537.0, + "45": 2368.0, + "46": 2456.0, + "47": 2525.0, + "48": 2378.0, + "49": 2264.0, + "50": 2670.0, + "51": 2668.0, + "52": 2560.0, + "53": 2793.0, + "54": 2927.0, + "55": 2495.0, + "56": 2665.0, + "57": 2574.0, + "58": 2851.0, + "59": 2766.0, + "60": 2219.0, + "61": 2640.0, + "62": 2855.0, + "63": 2733.0, + "64": 3001.0, + "65": 2651.0, + "66": 2794.0, + "67": 2786.0, + "68": 2802.0, + "69": 2823.0, + "70": 2942.0, + "71": 2946.0, + "72": 2538.0, + "73": 2930.0, + "74": 2132.0, + "75": 2613.0, + "76": 2961.0, + "77": 2992.0, + "78": 3034.0, + "79": 3106.0, + "80": 3002.0, + "81": 3244.0, + "82": 3292.0, + "83": 2665.0, + "84": 3380.0, + "85": 3218.0, + "86": 2747.0, + "87": 3363.0, + "88": 3272.0, + "89": 3369.0, + "90": 3343.0, + "91": 2487.0, + "92": 2967.0, + "93": 2844.0, + "94": 2936.0, + "95": 3080.0, + "96": 3070.0, + "97": 3045.0, + "98": 3285.0, + "99": 2834.0, + "100": 2448.0 } }, "mem-allocated-bytes": { @@ -218,106 +218,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 745732608.0, - "2": 745732608.0, - "3": 745732608.0, - "4": 745732608.0, - "5": 745732608.0, - "6": 745732608.0, - "7": 745732608.0, - "8": 745732608.0, - "9": 745732608.0, - "10": 745732608.0, - "11": 745732608.0, - "12": 745732608.0, - "13": 745732608.0, - "14": 745732608.0, - "15": 745732608.0, - "16": 745732608.0, - "17": 745732608.0, - "18": 745732608.0, - "19": 745732608.0, - "20": 745732608.0, - "21": 745732608.0, - "22": 745732608.0, - "23": 745732608.0, - "24": 745732608.0, - "25": 745732608.0, - "26": 745732608.0, - "27": 745732608.0, - "28": 745732608.0, - "29": 745732608.0, - "30": 745732608.0, - "31": 745732608.0, - "32": 745732608.0, - "33": 745732608.0, - "34": 745732608.0, - "35": 745732608.0, - "36": 745732608.0, - "37": 745732608.0, - "38": 745732608.0, - "39": 745732608.0, - "40": 745732608.0, - "41": 745732608.0, - "42": 745732608.0, - "43": 745732608.0, - "44": 745732608.0, - "45": 745732608.0, - "46": 745732608.0, - "47": 745732608.0, - "48": 745732608.0, - "49": 745732608.0, - "50": 745732608.0, - "51": 745732608.0, - "52": 745732608.0, - "53": 745732608.0, - "54": 745732608.0, - "55": 745732608.0, - "56": 745732608.0, - "57": 745732608.0, - "58": 745732608.0, - "59": 745732608.0, - "60": 745732608.0, - "61": 745732608.0, - "62": 745732608.0, - "63": 745732608.0, - "64": 745732608.0, - "65": 745732608.0, - "66": 745732608.0, - "67": 745732608.0, - "68": 745732608.0, - "69": 745732608.0, - "70": 745732608.0, - "71": 745732608.0, - "72": 745732608.0, - "73": 745732608.0, - "74": 745732608.0, - "75": 745732608.0, - "76": 745732608.0, - "77": 745732608.0, - "78": 745732608.0, - "79": 745732608.0, - "80": 745732608.0, - "81": 745732608.0, - "82": 745732608.0, - "83": 745732608.0, - "84": 745732608.0, - "85": 745732608.0, - "86": 745732608.0, - "87": 745732608.0, - "88": 745732608.0, - "89": 745732608.0, - "90": 745732608.0, - "91": 745732608.0, - "92": 745732608.0, - "93": 745732608.0, - "94": 745732608.0, - "95": 745732608.0, - "96": 745732608.0, - "97": 745732608.0, - "98": 745732608.0, - "99": 745732608.0, - "100": 745732608.0 + "1": 745077248.0, + "2": 745077248.0, + "3": 745077248.0, + "4": 745077248.0, + "5": 745077248.0, + "6": 745077248.0, + "7": 745077248.0, + "8": 745077248.0, + "9": 745077248.0, + "10": 745077248.0, + "11": 745077248.0, + "12": 745077248.0, + "13": 745077248.0, + "14": 745077248.0, + "15": 745077248.0, + "16": 745077248.0, + "17": 745077248.0, + "18": 745077248.0, + "19": 745077248.0, + "20": 745077248.0, + "21": 745077248.0, + "22": 745077248.0, + "23": 745077248.0, + "24": 745077248.0, + "25": 745077248.0, + "26": 745077248.0, + "27": 745077248.0, + "28": 745077248.0, + "29": 745077248.0, + "30": 745077248.0, + "31": 745077248.0, + "32": 745077248.0, + "33": 745077248.0, + "34": 745077248.0, + "35": 745077248.0, + "36": 745077248.0, + "37": 745077248.0, + "38": 745077248.0, + "39": 745077248.0, + "40": 745077248.0, + "41": 745077248.0, + "42": 745077248.0, + "43": 745077248.0, + "44": 745077248.0, + "45": 745077248.0, + "46": 745077248.0, + "47": 745077248.0, + "48": 745077248.0, + "49": 745077248.0, + "50": 745077248.0, + "51": 745077248.0, + "52": 745077248.0, + "53": 745077248.0, + "54": 745077248.0, + "55": 745077248.0, + "56": 745077248.0, + "57": 745077248.0, + "58": 745077248.0, + "59": 745077248.0, + "60": 745077248.0, + "61": 745077248.0, + "62": 745077248.0, + "63": 745077248.0, + "64": 745077248.0, + "65": 745077248.0, + "66": 745077248.0, + "67": 745077248.0, + "68": 745077248.0, + "69": 745077248.0, + "70": 745077248.0, + "71": 745077248.0, + "72": 745077248.0, + "73": 745077248.0, + "74": 745077248.0, + "75": 745077248.0, + "76": 745077248.0, + "77": 745077248.0, + "78": 745077248.0, + "79": 745077248.0, + "80": 745077248.0, + "81": 745077248.0, + "82": 745077248.0, + "83": 745077248.0, + "84": 745077248.0, + "85": 745077248.0, + "86": 745077248.0, + "87": 745077248.0, + "88": 745077248.0, + "89": 745077248.0, + "90": 745077248.0, + "91": 745077248.0, + "92": 745077248.0, + "93": 745077248.0, + "94": 745077248.0, + "95": 745077248.0, + "96": 745077248.0, + "97": 745077248.0, + "98": 745077248.0, + "99": 745077248.0, + "100": 745077248.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1940442112.0, - "2": 2223151104.0, - "3": 2223151104.0, - "4": 2223151104.0, - "5": 2223151104.0, - "6": 2223151104.0, - "7": 2223151104.0, - "8": 2223151104.0, - "9": 2223151104.0, - "10": 2223151104.0, - "11": 2223151104.0, - "12": 2223151104.0, - "13": 2223151104.0, - "14": 2223151104.0, - "15": 2223151104.0, - "16": 2223151104.0, - "17": 2223151104.0, - "18": 2223151104.0, - "19": 2223151104.0, - "20": 2223151104.0, - "21": 2223151104.0, - "22": 2223151104.0, - "23": 2223151104.0, - "24": 2223151104.0, - "25": 2223151104.0, - "26": 2223151104.0, - "27": 2223151104.0, - "28": 2223151104.0, - "29": 2223151104.0, - "30": 2223151104.0, - "31": 2223151104.0, - "32": 2223151104.0, - "33": 2223151104.0, - "34": 2223151104.0, - "35": 2223151104.0, - "36": 2223151104.0, - "37": 2223151104.0, - "38": 2223151104.0, - "39": 2223151104.0, - "40": 2223151104.0, - "41": 2223151104.0, - "42": 2223151104.0, - "43": 2223151104.0, - "44": 2223151104.0, - "45": 2223151104.0, - "46": 2223151104.0, - "47": 2223151104.0, - "48": 2223151104.0, - "49": 2223151104.0, - "50": 2223151104.0, - "51": 2223151104.0, - "52": 2223151104.0, - "53": 2223151104.0, - "54": 2223151104.0, - "55": 2223151104.0, - "56": 2223151104.0, - "57": 2223151104.0, - "58": 2223151104.0, - "59": 2223151104.0, - "60": 2223151104.0, - "61": 2223151104.0, - "62": 2223151104.0, - "63": 2223151104.0, - "64": 2223151104.0, - "65": 2223151104.0, - "66": 2223151104.0, - "67": 2223151104.0, - "68": 2223151104.0, - "69": 2223151104.0, - "70": 2223151104.0, - "71": 2223151104.0, - "72": 2223151104.0, - "73": 2223151104.0, - "74": 2223151104.0, - "75": 2223151104.0, - "76": 2223151104.0, - "77": 2223151104.0, - "78": 2223151104.0, - "79": 2223151104.0, - "80": 2223151104.0, - "81": 2223151104.0, - "82": 2223151104.0, - "83": 2223151104.0, - "84": 2223151104.0, - "85": 2223151104.0, - "86": 2223151104.0, - "87": 2223151104.0, - "88": 2223151104.0, - "89": 2223151104.0, - "90": 2223151104.0, - "91": 2223151104.0, - "92": 2223151104.0, - "93": 2223151104.0, - "94": 2223151104.0, - "95": 2223151104.0, - "96": 2223151104.0, - "97": 2223151104.0, - "98": 2223151104.0, - "99": 2223151104.0, - "100": 2223151104.0 + "1": 1939393536.0, + "2": 2220398592.0, + "3": 2220398592.0, + "4": 2220398592.0, + "5": 2220398592.0, + "6": 2220398592.0, + "7": 2220398592.0, + "8": 2220398592.0, + "9": 2220398592.0, + "10": 2220398592.0, + "11": 2220398592.0, + "12": 2220398592.0, + "13": 2220398592.0, + "14": 2220398592.0, + "15": 2220398592.0, + "16": 2220398592.0, + "17": 2220398592.0, + "18": 2220398592.0, + "19": 2220398592.0, + "20": 2220398592.0, + "21": 2220398592.0, + "22": 2220398592.0, + "23": 2220398592.0, + "24": 2220398592.0, + "25": 2220398592.0, + "26": 2220398592.0, + "27": 2220398592.0, + "28": 2220398592.0, + "29": 2220398592.0, + "30": 2220398592.0, + "31": 2220398592.0, + "32": 2220398592.0, + "33": 2220398592.0, + "34": 2220398592.0, + "35": 2220398592.0, + "36": 2220398592.0, + "37": 2220398592.0, + "38": 2220398592.0, + "39": 2220398592.0, + "40": 2220398592.0, + "41": 2220398592.0, + "42": 2220398592.0, + "43": 2220398592.0, + "44": 2220398592.0, + "45": 2220398592.0, + "46": 2220398592.0, + "47": 2220398592.0, + "48": 2220398592.0, + "49": 2220398592.0, + "50": 2220398592.0, + "51": 2220398592.0, + "52": 2220398592.0, + "53": 2220398592.0, + "54": 2220398592.0, + "55": 2220398592.0, + "56": 2220398592.0, + "57": 2220398592.0, + "58": 2220398592.0, + "59": 2220398592.0, + "60": 2220398592.0, + "61": 2220398592.0, + "62": 2220398592.0, + "63": 2220398592.0, + "64": 2220398592.0, + "65": 2220398592.0, + "66": 2220398592.0, + "67": 2220398592.0, + "68": 2220398592.0, + "69": 2220398592.0, + "70": 2220398592.0, + "71": 2220398592.0, + "72": 2220398592.0, + "73": 2220398592.0, + "74": 2220398592.0, + "75": 2220398592.0, + "76": 2220398592.0, + "77": 2220398592.0, + "78": 2220398592.0, + "79": 2220398592.0, + "80": 2220398592.0, + "81": 2220398592.0, + "82": 2220398592.0, + "83": 2220398592.0, + "84": 2220398592.0, + "85": 2220398592.0, + "86": 2220398592.0, + "87": 2220398592.0, + "88": 2220398592.0, + "89": 2220398592.0, + "90": 2220398592.0, + "91": 2220398592.0, + "92": 2220398592.0, + "93": 2220398592.0, + "94": 2220398592.0, + "95": 2220398592.0, + "96": 2220398592.0, + "97": 2220398592.0, + "98": 2220398592.0, + "99": 2220398592.0, + "100": 2220398592.0 } }, "iteration-time": { @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 2.5568, - "3": 0.14788, - "4": 0.13602, - "5": 0.13596, - "6": 0.136, - "7": 0.13621, - "8": 0.13502, - "9": 0.13408, - "10": 0.23083, - "11": 0.14377, - "12": 0.14332, - "13": 0.15453, - "14": 0.15537, - "15": 0.15549, - "16": 0.15444, - "17": 0.15453, - "18": 0.15178, - "19": 0.21432, - "20": 0.15336, - "21": 0.1534, - "22": 0.15483, - "23": 0.15395, - "24": 0.15469, - "25": 0.15447, - "26": 0.15509, - "27": 0.1545, - "28": 0.15527, - "29": 0.15593, - "30": 0.15688, - "31": 0.15659, - "32": 0.15629, - "33": 0.15533, - "34": 0.155, - "35": 0.15519, - "36": 0.15784, - "37": 0.15943, - "38": 0.15552, - "39": 0.15486, - "40": 0.15539, - "41": 0.15618, - "42": 0.15569, - "43": 0.15725, - "44": 0.15522, - "45": 0.1553, - "46": 0.15719, - "47": 0.15571, - "48": 0.15568, - "49": 0.15362, - "50": 0.15495, - "51": 0.18287, - "52": 0.16115, - "53": 0.15739, - "54": 0.15665, - "55": 0.15684, - "56": 0.15658, - "57": 0.15631, - "58": 0.22153, - "59": 0.15604, - "60": 0.15313, - "61": 0.15485, - "62": 0.15518, - "63": 0.15719, - "64": 0.15757, - "65": 0.15904, - "66": 0.15846, - "67": 0.15846, - "68": 0.15754, - "69": 0.15779, - "70": 0.1589, - "71": 0.16037, - "72": 0.15778, - "73": 0.15771, - "74": 0.155, - "75": 0.15611, - "76": 0.15702, - "77": 0.15564, - "78": 0.15892, - "79": 0.15669, - "80": 0.15768, - "81": 0.15805, - "82": 0.15778, - "83": 0.15674, - "84": 0.15715, - "85": 0.15834, - "86": 0.15763, - "87": 0.15855, - "88": 0.15589, - "89": 0.15616, - "90": 0.15639, - "91": 0.15722, - "92": 0.15788, - "93": 0.15597, - "94": 0.15817, - "95": 0.15819, - "96": 0.15869, - "97": 0.15875, - "98": 0.15993, - "99": 0.16297, - "100": 0.16682 + "2": 6.8425, + "3": 0.15357, + "4": 0.1409, + "5": 0.13808, + "6": 0.1382, + "7": 0.24407, + "8": 0.13904, + "9": 0.13868, + "10": 0.13899, + "11": 0.13745, + "12": 0.13793, + "13": 0.13808, + "14": 0.1368, + "15": 0.13736, + "16": 0.13801, + "17": 0.13947, + "18": 0.13945, + "19": 0.13791, + "20": 0.13947, + "21": 0.13849, + "22": 0.13877, + "23": 0.13852, + "24": 0.13794, + "25": 0.13904, + "26": 0.14025, + "27": 0.13916, + "28": 0.13997, + "29": 0.1407, + "30": 0.13911, + "31": 0.13955, + "32": 0.1446, + "33": 0.24847, + "34": 0.14784, + "35": 0.14131, + "36": 0.13933, + "37": 0.13988, + "38": 0.19634, + "39": 0.14058, + "40": 0.14008, + "41": 0.14147, + "42": 0.14265, + "43": 0.1426, + "44": 0.14006, + "45": 0.14114, + "46": 0.14113, + "47": 0.1398, + "48": 0.14109, + "49": 0.14027, + "50": 0.13929, + "51": 0.16842, + "52": 0.14006, + "53": 0.13988, + "54": 0.13768, + "55": 0.13634, + "56": 0.13659, + "57": 0.13814, + "58": 0.13574, + "59": 0.13686, + "60": 0.1366, + "61": 0.13869, + "62": 0.13965, + "63": 0.13601, + "64": 0.13824, + "65": 0.13849, + "66": 0.13984, + "67": 0.13968, + "68": 0.13808, + "69": 0.13877, + "70": 0.14016, + "71": 0.13855, + "72": 0.13617, + "73": 0.13908, + "74": 0.13686, + "75": 0.13824, + "76": 0.13807, + "77": 0.13594, + "78": 0.13948, + "79": 0.13994, + "80": 0.13781, + "81": 0.13907, + "82": 0.14148, + "83": 0.13897, + "84": 0.13796, + "85": 0.13672, + "86": 0.13869, + "87": 0.13816, + "88": 0.13546, + "89": 0.13874, + "90": 0.13865, + "91": 0.21702, + "92": 0.13637, + "93": 0.13856, + "94": 0.14121, + "95": 0.1418, + "96": 0.16074, + "97": 0.14631, + "98": 0.13758, + "99": 0.13713, + "100": 0.13749 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_gb200.json index 2610b7fe2f4..65904c58aca 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1/golden_values_dev_dgx_gb200.json @@ -8,52 +8,52 @@ "2": 10.83322, "3": 10.82737, "4": 10.79588, - "5": 10.85708, - "6": 10.86392, - "7": 10.8269, - "8": 10.82589, - "9": 10.83705, - "10": 10.79716, + "5": 10.85705, + "6": 10.8639, + "7": 10.82692, + "8": 10.82592, + "9": 10.83704, + "10": 10.79717, "11": 10.87851, - "12": 10.85794, - "13": 10.8537, - "14": 10.87547, - "15": 10.79179, - "16": 10.80303, - "17": 10.7745, - "18": 10.804, - "19": 10.79363, - "20": 10.69591, + "12": 10.85796, + "13": 10.85375, + "14": 10.8755, + "15": 10.79176, + "16": 10.80297, + "17": 10.77451, + "18": 10.80401, + "19": 10.79366, + "20": 10.69587, "21": 10.68551, - "22": 10.53149, - "23": 10.70658, - "24": 10.57317, - "25": 10.51546, - "26": 10.59072, - "27": 10.60736, + "22": 10.53152, + "23": 10.70657, + "24": 10.57319, + "25": 10.51544, + "26": 10.59074, + "27": 10.60737, "28": 10.57024, - "29": 10.58904, - "30": 10.34679, - "31": 10.07734, - "32": 10.46319, - "33": 10.45704, - "34": 10.19923, - "35": 10.25593, + "29": 10.58907, + "30": 10.34675, + "31": 10.07735, + "32": 10.46316, + "33": 10.45702, + "34": 10.19922, + "35": 10.25588, "36": 10.21246, - "37": 10.34688, - "38": 10.18009, + "37": 10.34692, + "38": 10.18008, "39": 10.408, - "40": 10.07603, - "41": 10.12932, - "42": 10.21134, + "40": 10.07601, + "41": 10.12933, + "42": 10.21132, "43": 9.81692, - "44": 9.94028, - "45": 9.81699, - "46": 9.80606, - "47": 10.12475, - "48": 9.8405, - "49": 9.50971, - "50": 9.88934 + "44": 9.94031, + "45": 9.81697, + "46": 9.80607, + "47": 10.12474, + "48": 9.84052, + "49": 9.50972, + "50": 9.88931 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1691.0, - "2": 1553.0, - "3": 1673.0, - "4": 1760.0, - "5": 1852.0, - "6": 1861.0, - "7": 1907.0, - "8": 1712.0, - "9": 1919.0, - "10": 1427.0, - "11": 1965.0, - "12": 1742.0, - "13": 1946.0, - "14": 1903.0, - "15": 1851.0, - "16": 1804.0, - "17": 1778.0, - "18": 1702.0, - "19": 1703.0, - "20": 1706.0, - "21": 1916.0, - "22": 1698.0, - "23": 2009.0, - "24": 1606.0, - "25": 1625.0, - "26": 1722.0, - "27": 1784.0, - "28": 1981.0, - "29": 1919.0, - "30": 1948.0, - "31": 1503.0, - "32": 1904.0, - "33": 2058.0, - "34": 1737.0, - "35": 1916.0, - "36": 1980.0, - "37": 2263.0, - "38": 2121.0, - "39": 2277.0, - "40": 2021.0, - "41": 2202.0, - "42": 2340.0, - "43": 1973.0, - "44": 2006.0, - "45": 2128.0, - "46": 2132.0, - "47": 2438.0, - "48": 2286.0, - "49": 2215.0, - "50": 2337.0 + "1": 1692.0, + "2": 1562.0, + "3": 1659.0, + "4": 1661.0, + "5": 1890.0, + "6": 1885.0, + "7": 1867.0, + "8": 1651.0, + "9": 1883.0, + "10": 1424.0, + "11": 1819.0, + "12": 1770.0, + "13": 1985.0, + "14": 1835.0, + "15": 1964.0, + "16": 1829.0, + "17": 1826.0, + "18": 1644.0, + "19": 1752.0, + "20": 1709.0, + "21": 1984.0, + "22": 1705.0, + "23": 1997.0, + "24": 1643.0, + "25": 1621.0, + "26": 1650.0, + "27": 1750.0, + "28": 1867.0, + "29": 1959.0, + "30": 2054.0, + "31": 1547.0, + "32": 1858.0, + "33": 2093.0, + "34": 1860.0, + "35": 1961.0, + "36": 1983.0, + "37": 2362.0, + "38": 2143.0, + "39": 2286.0, + "40": 2090.0, + "41": 2199.0, + "42": 2313.0, + "43": 1992.0, + "44": 2026.0, + "45": 2082.0, + "46": 2197.0, + "47": 2446.0, + "48": 2296.0, + "49": 2232.0, + "50": 2425.0 } }, "mem-allocated-bytes": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 2.94258, - "3": 0.12978, - "4": 0.11688, - "5": 0.11937, - "6": 0.12093, - "7": 0.12307, - "8": 0.13062, - "9": 0.12926, - "10": 0.1228, - "11": 0.12859, - "12": 0.12404, - "13": 0.12912, - "14": 0.12318, - "15": 0.12609, - "16": 0.13327, - "17": 0.12859, - "18": 0.12957, - "19": 0.12658, - "20": 0.12929, - "21": 0.12937, - "22": 0.1298, - "23": 0.12888, - "24": 0.12917, - "25": 0.1285, - "26": 0.12864, - "27": 0.13061, - "28": 0.1272, - "29": 0.12953, - "30": 0.12693, - "31": 0.13141, - "32": 0.12786, - "33": 0.12815, - "34": 0.12937, - "35": 0.12957, - "36": 0.12737, - "37": 0.1313, - "38": 0.12977, - "39": 0.12805, - "40": 0.1298, - "41": 0.1296, - "42": 0.13074, - "43": 0.12955, - "44": 0.13171, - "45": 0.13055, - "46": 0.13271, - "47": 0.13004, - "48": 0.12873, - "49": 0.13129, - "50": 0.12858 + "2": 9.03804, + "3": 0.13858, + "4": 0.12507, + "5": 0.12463, + "6": 0.12425, + "7": 0.12393, + "8": 0.12365, + "9": 0.12427, + "10": 0.12648, + "11": 0.12263, + "12": 0.12575, + "13": 0.12379, + "14": 0.12295, + "15": 0.12869, + "16": 0.12461, + "17": 0.12438, + "18": 0.12268, + "19": 0.12324, + "20": 0.12324, + "21": 0.12291, + "22": 0.12582, + "23": 0.12767, + "24": 0.12691, + "25": 0.12504, + "26": 0.12483, + "27": 0.12358, + "28": 0.1246, + "29": 0.12998, + "30": 0.1346, + "31": 0.12439, + "32": 0.12524, + "33": 0.12436, + "34": 0.12347, + "35": 0.12901, + "36": 0.12928, + "37": 0.13039, + "38": 0.12726, + "39": 0.1253, + "40": 0.12465, + "41": 0.12644, + "42": 0.12361, + "43": 0.12563, + "44": 0.12445, + "45": 0.12536, + "46": 0.12648, + "47": 0.12433, + "48": 0.12535, + "49": 0.12492, + "50": 0.12369 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_gb200.json index a34edb3389a..4ce33e9f3b9 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr/golden_values_dev_dgx_gb200.json @@ -7,103 +7,103 @@ "1": 10.82555, "2": 10.83286, "3": 10.82762, - "4": 10.79573, - "5": 10.85695, - "6": 10.86391, - "7": 10.82616, - "8": 10.82544, - "9": 10.83584, - "10": 10.79629, - "11": 10.8782, - "12": 10.85821, + "4": 10.7957, + "5": 10.85697, + "6": 10.86388, + "7": 10.82617, + "8": 10.82543, + "9": 10.83586, + "10": 10.7963, + "11": 10.87822, + "12": 10.85823, "13": 10.85418, - "14": 10.87518, - "15": 10.79205, + "14": 10.87517, + "15": 10.79204, "16": 10.80305, - "17": 10.77428, - "18": 10.8046, - "19": 10.79338, - "20": 10.69563, - "21": 10.68645, - "22": 10.53149, - "23": 10.70629, - "24": 10.57273, - "25": 10.5144, - "26": 10.58993, - "27": 10.60707, + "17": 10.77433, + "18": 10.80462, + "19": 10.79337, + "20": 10.69556, + "21": 10.68641, + "22": 10.53147, + "23": 10.70631, + "24": 10.57272, + "25": 10.51439, + "26": 10.58989, + "27": 10.60708, "28": 10.57003, - "29": 10.58929, - "30": 10.34675, - "31": 10.07709, - "32": 10.46194, - "33": 10.45484, - "34": 10.19662, - "35": 10.25291, + "29": 10.5893, + "30": 10.34669, + "31": 10.07712, + "32": 10.46192, + "33": 10.4548, + "34": 10.1966, + "35": 10.2529, "36": 10.20971, - "37": 10.34492, - "38": 10.17789, - "39": 10.4061, - "40": 10.07414, - "41": 10.12736, - "42": 10.20823, - "43": 9.81194, - "44": 9.93354, + "37": 10.34489, + "38": 10.1779, + "39": 10.40615, + "40": 10.07413, + "41": 10.12733, + "42": 10.2082, + "43": 9.81191, + "44": 9.93355, "45": 9.80953, - "46": 9.79773, - "47": 10.11569, - "48": 9.83234, - "49": 9.50281, - "50": 9.88181, - "51": 9.83458, - "52": 9.71756, - "53": 10.05126, - "54": 9.94371, - "55": 9.87457, - "56": 9.6029, + "46": 9.79775, + "47": 10.11572, + "48": 9.83237, + "49": 9.50279, + "50": 9.8818, + "51": 9.8346, + "52": 9.71755, + "53": 10.05121, + "54": 9.94375, + "55": 9.87452, + "56": 9.60291, "57": 9.45086, - "58": 9.811, + "58": 9.81098, "59": 9.56395, - "60": 9.47155, - "61": 9.66553, - "62": 9.96353, - "63": 9.34709, - "64": 9.743, - "65": 8.92136, - "66": 9.67858, - "67": 9.35222, - "68": 9.76563, - "69": 9.7774, - "70": 9.70407, - "71": 9.60099, - "72": 9.5498, + "60": 9.47154, + "61": 9.66555, + "62": 9.96351, + "63": 9.34708, + "64": 9.74296, + "65": 8.92132, + "66": 9.67854, + "67": 9.3522, + "68": 9.76559, + "69": 9.77742, + "70": 9.70406, + "71": 9.601, + "72": 9.54984, "73": 9.46046, - "74": 8.89068, - "75": 9.3874, - "76": 9.04469, - "77": 10.03647, - "78": 9.6996, - "79": 9.34722, + "74": 8.89067, + "75": 9.38738, + "76": 9.04468, + "77": 10.03651, + "78": 9.69957, + "79": 9.34723, "80": 9.37822, - "81": 9.45421, - "82": 9.67529, - "83": 9.28446, - "84": 9.39113, - "85": 9.58663, - "86": 9.04694, - "87": 9.56972, - "88": 9.72085, - "89": 9.5673, + "81": 9.4542, + "82": 9.67532, + "83": 9.28445, + "84": 9.39112, + "85": 9.58662, + "86": 9.04692, + "87": 9.5697, + "88": 9.72082, + "89": 9.56729, "90": 9.79474, - "91": 9.30448, - "92": 9.32183, - "93": 9.0517, - "94": 8.79005, - "95": 9.4918, - "96": 9.48711, - "97": 9.26589, - "98": 9.62592, + "91": 9.30452, + "92": 9.32188, + "93": 9.05169, + "94": 8.79001, + "95": 9.49179, + "96": 9.48712, + "97": 9.2659, + "98": 9.62594, "99": 8.85252, - "100": 9.35907 + "100": 9.35905 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1651.0, - "2": 1716.0, - "3": 1772.0, - "4": 1774.0, - "5": 1920.0, - "6": 1864.0, - "7": 1830.0, - "8": 1695.0, - "9": 1858.0, - "10": 1367.0, - "11": 1915.0, - "12": 1797.0, - "13": 1899.0, - "14": 1769.0, - "15": 1880.0, - "16": 1806.0, - "17": 1822.0, - "18": 1686.0, - "19": 1728.0, - "20": 1667.0, - "21": 1897.0, - "22": 1703.0, - "23": 1967.0, - "24": 1595.0, - "25": 1583.0, - "26": 1684.0, - "27": 1911.0, - "28": 1969.0, - "29": 1864.0, - "30": 1943.0, - "31": 1535.0, - "32": 1895.0, - "33": 2078.0, - "34": 1739.0, - "35": 1940.0, - "36": 1919.0, - "37": 2460.0, - "38": 2107.0, - "39": 2261.0, - "40": 2059.0, - "41": 2183.0, - "42": 2269.0, - "43": 1972.0, - "44": 2040.0, - "45": 2093.0, - "46": 2140.0, - "47": 2476.0, - "48": 2311.0, - "49": 2165.0, - "50": 2411.0, - "51": 2471.0, - "52": 2670.0, - "53": 2883.0, - "54": 2589.0, - "55": 2427.0, - "56": 2774.0, - "57": 2246.0, - "58": 2994.0, - "59": 2922.0, - "60": 2416.0, - "61": 2960.0, - "62": 2646.0, - "63": 2488.0, - "64": 2956.0, - "65": 2746.0, - "66": 2864.0, - "67": 2794.0, - "68": 2703.0, - "69": 2990.0, - "70": 3012.0, - "71": 2884.0, - "72": 2536.0, - "73": 3054.0, - "74": 2100.0, - "75": 2573.0, - "76": 3076.0, - "77": 3025.0, - "78": 3014.0, - "79": 3083.0, - "80": 2989.0, - "81": 3452.0, - "82": 3253.0, - "83": 2759.0, - "84": 3186.0, - "85": 3247.0, - "86": 2624.0, - "87": 3594.0, - "88": 3009.0, - "89": 3286.0, - "90": 3354.0, - "91": 2869.0, - "92": 3156.0, - "93": 2809.0, - "94": 3350.0, - "95": 3033.0, - "96": 3323.0, - "97": 3091.0, - "98": 3356.0, - "99": 3326.0, - "100": 3144.0 + "1": 1622.0, + "2": 1729.0, + "3": 1639.0, + "4": 1678.0, + "5": 1914.0, + "6": 1900.0, + "7": 1843.0, + "8": 1671.0, + "9": 1818.0, + "10": 1434.0, + "11": 1893.0, + "12": 1772.0, + "13": 1950.0, + "14": 1863.0, + "15": 1885.0, + "16": 1738.0, + "17": 1743.0, + "18": 1679.0, + "19": 1702.0, + "20": 1729.0, + "21": 1914.0, + "22": 1696.0, + "23": 1958.0, + "24": 1574.0, + "25": 1531.0, + "26": 1707.0, + "27": 1804.0, + "28": 1939.0, + "29": 1973.0, + "30": 2024.0, + "31": 1494.0, + "32": 1960.0, + "33": 1971.0, + "34": 1813.0, + "35": 1950.0, + "36": 2051.0, + "37": 2382.0, + "38": 2098.0, + "39": 2262.0, + "40": 2137.0, + "41": 2191.0, + "42": 2258.0, + "43": 2023.0, + "44": 2104.0, + "45": 2062.0, + "46": 2219.0, + "47": 2490.0, + "48": 2393.0, + "49": 2210.0, + "50": 2478.0, + "51": 2565.0, + "52": 2533.0, + "53": 2848.0, + "54": 2623.0, + "55": 2487.0, + "56": 2760.0, + "57": 2384.0, + "58": 2929.0, + "59": 2814.0, + "60": 2418.0, + "61": 2943.0, + "62": 2620.0, + "63": 2470.0, + "64": 2875.0, + "65": 2652.0, + "66": 3070.0, + "67": 2805.0, + "68": 2612.0, + "69": 3021.0, + "70": 3054.0, + "71": 2976.0, + "72": 2577.0, + "73": 2989.0, + "74": 2019.0, + "75": 2649.0, + "76": 3096.0, + "77": 3057.0, + "78": 2960.0, + "79": 3045.0, + "80": 3030.0, + "81": 3357.0, + "82": 3310.0, + "83": 2751.0, + "84": 3176.0, + "85": 3342.0, + "86": 2659.0, + "87": 3504.0, + "88": 3082.0, + "89": 3296.0, + "90": 3304.0, + "91": 2910.0, + "92": 3195.0, + "93": 2812.0, + "94": 3284.0, + "95": 3090.0, + "96": 3330.0, + "97": 3095.0, + "98": 3486.0, + "99": 3148.0, + "100": 3169.0 } }, "mem-allocated-bytes": { @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 2.73603, - "3": 0.12344, - "4": 0.10783, - "5": 0.10595, - "6": 0.10649, - "7": 0.10691, - "8": 0.10679, - "9": 0.10607, - "10": 0.10675, - "11": 0.10687, - "12": 0.10636, - "13": 0.10663, - "14": 0.10668, - "15": 0.10696, - "16": 0.10672, - "17": 0.10678, - "18": 0.10603, - "19": 0.10659, - "20": 0.10684, - "21": 0.10766, - "22": 0.10849, - "23": 0.10853, - "24": 0.10805, - "25": 0.10776, - "26": 0.1069, - "27": 0.10818, - "28": 0.10669, - "29": 0.10643, - "30": 0.10634, - "31": 0.10766, - "32": 0.1076, - "33": 0.10583, - "34": 0.10631, - "35": 0.10587, - "36": 0.1054, - "37": 0.10589, - "38": 0.10633, - "39": 0.10593, - "40": 0.10674, - "41": 0.10812, - "42": 0.11127, - "43": 0.11494, - "44": 0.11409, - "45": 0.11538, - "46": 0.11702, - "47": 0.1155, - "48": 0.11481, - "49": 0.11507, - "50": 0.11401, - "51": 0.11655, - "52": 0.11513, - "53": 0.11379, - "54": 0.11378, - "55": 0.11658, - "56": 0.11792, - "57": 0.11792, - "58": 0.11715, - "59": 0.11915, - "60": 0.11642, - "61": 0.11578, - "62": 0.1171, - "63": 0.11758, - "64": 0.11517, - "65": 0.11624, - "66": 0.11434, - "67": 0.11609, - "68": 0.11506, - "69": 0.11568, - "70": 0.11661, - "71": 0.11647, - "72": 0.1166, - "73": 0.11795, - "74": 0.11661, - "75": 0.11785, - "76": 0.11659, - "77": 0.11531, - "78": 0.11705, - "79": 0.11662, - "80": 0.11765, - "81": 0.11829, - "82": 0.11742, - "83": 0.11529, - "84": 0.11678, - "85": 0.11581, - "86": 0.11703, - "87": 0.11699, - "88": 0.11641, - "89": 0.11638, - "90": 0.11586, - "91": 0.11853, - "92": 0.11725, - "93": 0.1178, - "94": 0.11647, - "95": 0.11672, - "96": 0.11702, - "97": 0.11754, - "98": 0.11614, - "99": 0.11757, - "100": 0.11708 + "2": 8.98992, + "3": 0.1294, + "4": 0.11595, + "5": 0.11241, + "6": 0.11556, + "7": 0.11737, + "8": 0.11364, + "9": 0.11507, + "10": 0.117, + "11": 0.11605, + "12": 0.11662, + "13": 0.11519, + "14": 0.11521, + "15": 0.11573, + "16": 0.11538, + "17": 0.11465, + "18": 0.1166, + "19": 0.11515, + "20": 0.11437, + "21": 0.11558, + "22": 0.11522, + "23": 0.1153, + "24": 0.11438, + "25": 0.11808, + "26": 0.11687, + "27": 0.11525, + "28": 0.11383, + "29": 0.11673, + "30": 0.11524, + "31": 0.1166, + "32": 0.11702, + "33": 0.11405, + "34": 0.11492, + "35": 0.11579, + "36": 0.11598, + "37": 0.11831, + "38": 0.117, + "39": 0.11673, + "40": 0.11174, + "41": 0.11645, + "42": 0.11308, + "43": 0.11563, + "44": 0.11397, + "45": 0.11626, + "46": 0.11355, + "47": 0.11499, + "48": 0.11524, + "49": 0.11557, + "50": 0.11265, + "51": 0.11887, + "52": 0.11543, + "53": 0.1134, + "54": 0.11629, + "55": 0.11697, + "56": 0.11712, + "57": 0.11885, + "58": 0.11734, + "59": 0.11534, + "60": 0.11888, + "61": 0.11756, + "62": 0.11757, + "63": 0.11525, + "64": 0.11676, + "65": 0.1176, + "66": 0.11477, + "67": 0.11557, + "68": 0.116, + "69": 0.11786, + "70": 0.11593, + "71": 0.11704, + "72": 0.11671, + "73": 0.11593, + "74": 0.11743, + "75": 0.11579, + "76": 0.11805, + "77": 0.11433, + "78": 0.11717, + "79": 0.11771, + "80": 0.11637, + "81": 0.11676, + "82": 0.11803, + "83": 0.11703, + "84": 0.11777, + "85": 0.11644, + "86": 0.11704, + "87": 0.11621, + "88": 0.11725, + "89": 0.11643, + "90": 0.1164, + "91": 0.11606, + "92": 0.1162, + "93": 0.11808, + "94": 0.11939, + "95": 0.11748, + "96": 0.11697, + "97": 0.11629, + "98": 0.11519, + "99": 0.11719, + "100": 0.11973 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_gb200.json index 0758fd3a8cf..64f3b7e5f89 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss/golden_values_dev_dgx_gb200.json @@ -6,104 +6,104 @@ "values": { "1": 10.82555, "2": 10.83286, - "3": 10.82763, - "4": 10.79573, - "5": 10.85699, - "6": 10.8639, + "3": 10.82762, + "4": 10.79569, + "5": 10.85695, + "6": 10.86388, "7": 10.82612, "8": 10.82543, - "9": 10.8359, - "10": 10.79633, - "11": 10.87819, - "12": 10.85823, + "9": 10.83589, + "10": 10.79632, + "11": 10.8782, + "12": 10.85826, "13": 10.85425, - "14": 10.87526, - "15": 10.79206, - "16": 10.80309, - "17": 10.77438, - "18": 10.80484, - "19": 10.79368, - "20": 10.69574, - "21": 10.68657, - "22": 10.53162, - "23": 10.70642, - "24": 10.57336, - "25": 10.51534, - "26": 10.59088, - "27": 10.60779, - "28": 10.57051, - "29": 10.58978, - "30": 10.34722, - "31": 10.07772, + "14": 10.87525, + "15": 10.79207, + "16": 10.80307, + "17": 10.7744, + "18": 10.80487, + "19": 10.7937, + "20": 10.69578, + "21": 10.68658, + "22": 10.53159, + "23": 10.70644, + "24": 10.57339, + "25": 10.51533, + "26": 10.59086, + "27": 10.60781, + "28": 10.57047, + "29": 10.58977, + "30": 10.34723, + "31": 10.07764, "32": 10.46349, - "33": 10.45726, - "34": 10.19975, - "35": 10.25642, + "33": 10.45724, + "34": 10.19974, + "35": 10.25638, "36": 10.21264, - "37": 10.34717, - "38": 10.18011, + "37": 10.34716, + "38": 10.1801, "39": 10.40833, - "40": 10.07628, - "41": 10.1297, + "40": 10.07629, + "41": 10.12976, "42": 10.21174, - "43": 9.8171, + "43": 9.81705, "44": 9.94032, - "45": 9.81748, + "45": 9.81746, "46": 9.8063, - "47": 10.12475, - "48": 9.84049, - "49": 9.51015, - "50": 9.88941, - "51": 9.8426, - "52": 9.72578, - "53": 10.05977, - "54": 9.95226, - "55": 9.88321, - "56": 9.61276, + "47": 10.12473, + "48": 9.84048, + "49": 9.51014, + "50": 9.88943, + "51": 9.84256, + "52": 9.72576, + "53": 10.05974, + "54": 9.95227, + "55": 9.88316, + "56": 9.61277, "57": 9.46222, "58": 9.82313, - "59": 9.57665, + "59": 9.57668, "60": 9.48518, - "61": 9.6788, + "61": 9.67879, "62": 9.97777, - "63": 9.36212, - "64": 9.75714, - "65": 8.93499, - "66": 9.69281, - "67": 9.36709, - "68": 9.78179, - "69": 9.79451, - "70": 9.72295, - "71": 9.62027, - "72": 9.56974, - "73": 9.481, - "74": 8.91241, - "75": 9.40906, - "76": 9.06623, - "77": 10.05808, - "78": 9.72188, - "79": 9.36927, - "80": 9.40027, - "81": 9.47702, - "82": 9.69788, - "83": 9.30742, - "84": 9.41496, - "85": 9.61115, - "86": 9.07104, + "63": 9.36211, + "64": 9.75715, + "65": 8.93497, + "66": 9.6928, + "67": 9.3671, + "68": 9.78177, + "69": 9.79452, + "70": 9.72299, + "71": 9.62026, + "72": 9.56976, + "73": 9.48099, + "74": 8.91243, + "75": 9.40905, + "76": 9.06619, + "77": 10.0581, + "78": 9.72189, + "79": 9.36926, + "80": 9.40028, + "81": 9.47705, + "82": 9.69791, + "83": 9.30741, + "84": 9.41491, + "85": 9.61114, + "86": 9.07103, "87": 9.59609, - "88": 9.74908, - "89": 9.5961, - "90": 9.82722, - "91": 9.3366, - "92": 9.3558, - "93": 9.08695, - "94": 8.82752, - "95": 9.53066, - "96": 9.52759, - "97": 9.30671, - "98": 9.66909, - "99": 8.89637, - "100": 9.4052 + "88": 9.74911, + "89": 9.59604, + "90": 9.82721, + "91": 9.33657, + "92": 9.35581, + "93": 9.08692, + "94": 8.82753, + "95": 9.53064, + "96": 9.52758, + "97": 9.30666, + "98": 9.66906, + "99": 8.89638, + "100": 9.40523 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1651.0, - "2": 1716.0, - "3": 1760.0, - "4": 1771.0, - "5": 1899.0, - "6": 1905.0, - "7": 1842.0, - "8": 1706.0, - "9": 1891.0, - "10": 1543.0, - "11": 1937.0, - "12": 1794.0, - "13": 1982.0, - "14": 1727.0, - "15": 1890.0, - "16": 1746.0, - "17": 1818.0, - "18": 1651.0, - "19": 1782.0, - "20": 1698.0, - "21": 1950.0, - "22": 1702.0, - "23": 1972.0, - "24": 1551.0, - "25": 1587.0, - "26": 1773.0, - "27": 1791.0, - "28": 1858.0, - "29": 1950.0, - "30": 1951.0, - "31": 1499.0, - "32": 1823.0, - "33": 2055.0, - "34": 1788.0, - "35": 1877.0, - "36": 1933.0, - "37": 2302.0, - "38": 2181.0, - "39": 2223.0, - "40": 2009.0, - "41": 2178.0, - "42": 2185.0, - "43": 2041.0, - "44": 2069.0, - "45": 2004.0, - "46": 2212.0, - "47": 2446.0, - "48": 2290.0, - "49": 2183.0, - "50": 2323.0, - "51": 2587.0, - "52": 2574.0, - "53": 2831.0, - "54": 2602.0, - "55": 2403.0, - "56": 2822.0, - "57": 2223.0, - "58": 2954.0, - "59": 2871.0, - "60": 2518.0, - "61": 2922.0, - "62": 2677.0, - "63": 2533.0, - "64": 3023.0, - "65": 2609.0, - "66": 2960.0, - "67": 2867.0, - "68": 2652.0, - "69": 3053.0, - "70": 3011.0, - "71": 2870.0, - "72": 2460.0, - "73": 3114.0, - "74": 2017.0, - "75": 2527.0, - "76": 2954.0, - "77": 2955.0, - "78": 3055.0, - "79": 3098.0, - "80": 3047.0, - "81": 3362.0, - "82": 3296.0, - "83": 2825.0, - "84": 3113.0, - "85": 3196.0, - "86": 2666.0, - "87": 3583.0, - "88": 2985.0, - "89": 3259.0, - "90": 3220.0, - "91": 2781.0, - "92": 3090.0, - "93": 2686.0, - "94": 3474.0, - "95": 3147.0, - "96": 3418.0, - "97": 3036.0, - "98": 3411.0, - "99": 3152.0, - "100": 3098.0 + "1": 1622.0, + "2": 1729.0, + "3": 1764.0, + "4": 1727.0, + "5": 1879.0, + "6": 1863.0, + "7": 1896.0, + "8": 1661.0, + "9": 1798.0, + "10": 1465.0, + "11": 1851.0, + "12": 1790.0, + "13": 1974.0, + "14": 1776.0, + "15": 1923.0, + "16": 1869.0, + "17": 1814.0, + "18": 1673.0, + "19": 1744.0, + "20": 1680.0, + "21": 1843.0, + "22": 1710.0, + "23": 2112.0, + "24": 1600.0, + "25": 1567.0, + "26": 1625.0, + "27": 1804.0, + "28": 1901.0, + "29": 1938.0, + "30": 1963.0, + "31": 1473.0, + "32": 1891.0, + "33": 2173.0, + "34": 1831.0, + "35": 1908.0, + "36": 1909.0, + "37": 2422.0, + "38": 2084.0, + "39": 2280.0, + "40": 2103.0, + "41": 2182.0, + "42": 2326.0, + "43": 1949.0, + "44": 2128.0, + "45": 2031.0, + "46": 2217.0, + "47": 2517.0, + "48": 2333.0, + "49": 2180.0, + "50": 2334.0, + "51": 2524.0, + "52": 2584.0, + "53": 2836.0, + "54": 2611.0, + "55": 2414.0, + "56": 2748.0, + "57": 2319.0, + "58": 2938.0, + "59": 2855.0, + "60": 2471.0, + "61": 2934.0, + "62": 2575.0, + "63": 2539.0, + "64": 2873.0, + "65": 2772.0, + "66": 2864.0, + "67": 2833.0, + "68": 2686.0, + "69": 3058.0, + "70": 3005.0, + "71": 2876.0, + "72": 2524.0, + "73": 2982.0, + "74": 2040.0, + "75": 2608.0, + "76": 2865.0, + "77": 3054.0, + "78": 2925.0, + "79": 3052.0, + "80": 2997.0, + "81": 3408.0, + "82": 3305.0, + "83": 2772.0, + "84": 3103.0, + "85": 3193.0, + "86": 2633.0, + "87": 3586.0, + "88": 3066.0, + "89": 3297.0, + "90": 3232.0, + "91": 2904.0, + "92": 3076.0, + "93": 2716.0, + "94": 3276.0, + "95": 3120.0, + "96": 3367.0, + "97": 3095.0, + "98": 3430.0, + "99": 3274.0, + "100": 3079.0 } }, "mem-allocated-bytes": { @@ -325,7 +325,7 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 2398288896.0, + "1": 2398288384.0, "2": 2681049088.0, "3": 2681049088.0, "4": 2681049088.0, @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 3.0922, - "3": 0.12788, - "4": 0.11451, - "5": 0.11407, - "6": 0.11304, - "7": 0.11565, - "8": 0.11482, - "9": 0.11841, - "10": 0.11916, - "11": 0.11884, - "12": 0.11911, - "13": 0.1155, - "14": 0.12253, - "15": 0.11369, - "16": 0.11887, - "17": 0.11433, - "18": 0.12243, - "19": 0.11544, - "20": 0.11344, - "21": 0.1254, - "22": 0.11712, - "23": 0.12494, - "24": 0.12239, - "25": 0.12344, - "26": 0.11952, - "27": 0.12117, - "28": 0.11916, - "29": 0.11974, - "30": 0.11517, - "31": 0.1219, - "32": 0.12112, - "33": 0.11997, - "34": 0.1133, - "35": 0.12245, - "36": 0.12118, - "37": 0.11239, - "38": 0.12174, - "39": 0.11964, - "40": 0.11993, - "41": 0.12013, - "42": 0.12614, - "43": 0.11697, - "44": 0.11669, - "45": 0.11781, - "46": 0.11776, - "47": 0.11182, - "48": 0.1196, - "49": 0.11814, - "50": 0.11736, - "51": 0.12093, - "52": 0.1107, - "53": 0.11502, - "54": 0.11571, - "55": 0.11493, - "56": 0.11712, - "57": 0.11663, - "58": 0.11203, - "59": 0.11604, - "60": 0.11649, - "61": 0.11616, - "62": 0.11641, - "63": 0.11603, - "64": 0.11613, - "65": 0.11708, - "66": 0.11292, - "67": 0.11356, - "68": 0.11416, - "69": 0.11305, - "70": 0.11582, - "71": 0.11552, - "72": 0.11318, - "73": 0.11798, - "74": 0.11632, - "75": 0.11624, - "76": 0.11602, - "77": 0.11547, - "78": 0.11457, - "79": 0.11402, - "80": 0.11415, - "81": 0.11627, - "82": 0.11295, - "83": 0.11397, - "84": 0.11221, - "85": 0.11326, - "86": 0.11792, - "87": 0.11391, - "88": 0.11365, - "89": 0.11478, - "90": 0.11346, - "91": 0.11213, - "92": 0.11712, - "93": 0.11574, - "94": 0.11724, - "95": 0.11254, - "96": 0.11871, - "97": 0.11957, - "98": 0.11759, - "99": 0.11864, - "100": 0.11833 + "2": 9.0179, + "3": 0.12425, + "4": 0.11171, + "5": 0.1113, + "6": 0.11136, + "7": 0.11152, + "8": 0.1128, + "9": 0.11283, + "10": 0.11178, + "11": 0.1114, + "12": 0.10919, + "13": 0.11062, + "14": 0.11052, + "15": 0.11204, + "16": 0.11101, + "17": 0.10801, + "18": 0.10961, + "19": 0.10805, + "20": 0.10908, + "21": 0.11181, + "22": 0.11019, + "23": 0.10842, + "24": 0.1101, + "25": 0.11377, + "26": 0.10767, + "27": 0.10828, + "28": 0.10775, + "29": 0.1111, + "30": 0.11204, + "31": 0.11206, + "32": 0.11166, + "33": 0.11093, + "34": 0.1093, + "35": 0.11115, + "36": 0.11082, + "37": 0.11056, + "38": 0.11028, + "39": 0.10835, + "40": 0.10939, + "41": 0.11079, + "42": 0.10956, + "43": 0.10806, + "44": 0.10932, + "45": 0.10799, + "46": 0.10768, + "47": 0.10905, + "48": 0.11353, + "49": 0.10994, + "50": 0.10866, + "51": 0.12844, + "52": 0.11643, + "53": 0.10864, + "54": 0.11347, + "55": 0.11478, + "56": 0.11082, + "57": 0.1112, + "58": 0.11325, + "59": 0.1109, + "60": 0.10898, + "61": 0.11011, + "62": 0.11005, + "63": 0.1101, + "64": 0.11097, + "65": 0.11278, + "66": 0.11156, + "67": 0.11122, + "68": 0.11125, + "69": 0.11008, + "70": 0.10837, + "71": 0.10856, + "72": 0.11244, + "73": 0.11132, + "74": 0.11053, + "75": 0.11064, + "76": 0.11148, + "77": 0.11101, + "78": 0.11284, + "79": 0.1119, + "80": 0.11215, + "81": 0.11292, + "82": 0.11191, + "83": 0.1108, + "84": 0.11285, + "85": 0.11227, + "86": 0.11126, + "87": 0.11007, + "88": 0.11445, + "89": 0.11142, + "90": 0.11159, + "91": 0.11062, + "92": 0.11088, + "93": 0.11224, + "94": 0.11204, + "95": 0.11256, + "96": 0.11345, + "97": 0.11247, + "98": 0.11327, + "99": 0.11177, + "100": 0.11086 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200.json index 28843c12217..a3165b8856a 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce/golden_values_dev_dgx_gb200.json @@ -6,104 +6,104 @@ "values": { "1": 10.82555, "2": 10.83286, - "3": 10.82763, - "4": 10.79573, - "5": 10.85699, - "6": 10.8639, + "3": 10.82762, + "4": 10.79569, + "5": 10.85695, + "6": 10.86388, "7": 10.82612, - "8": 10.82542, - "9": 10.83587, - "10": 10.79627, - "11": 10.87822, - "12": 10.85824, - "13": 10.85426, - "14": 10.87526, - "15": 10.79208, + "8": 10.82543, + "9": 10.83589, + "10": 10.79632, + "11": 10.8782, + "12": 10.85826, + "13": 10.85425, + "14": 10.87525, + "15": 10.79207, "16": 10.80307, - "17": 10.77438, + "17": 10.7744, "18": 10.80487, - "19": 10.79369, - "20": 10.69576, - "21": 10.68654, - "22": 10.53161, - "23": 10.70646, - "24": 10.57337, - "25": 10.51533, + "19": 10.7937, + "20": 10.69579, + "21": 10.68655, + "22": 10.53162, + "23": 10.70645, + "24": 10.5734, + "25": 10.51531, "26": 10.5909, - "27": 10.60777, - "28": 10.57049, - "29": 10.58979, - "30": 10.34722, - "31": 10.07771, - "32": 10.46349, + "27": 10.60778, + "28": 10.57052, + "29": 10.58974, + "30": 10.34723, + "31": 10.07765, + "32": 10.4635, "33": 10.45722, - "34": 10.19974, - "35": 10.25643, - "36": 10.21263, - "37": 10.34718, + "34": 10.19977, + "35": 10.25639, + "36": 10.21261, + "37": 10.34715, "38": 10.18009, - "39": 10.40838, - "40": 10.07629, - "41": 10.1297, - "42": 10.2117, - "43": 9.81708, - "44": 9.94034, - "45": 9.81748, - "46": 9.80633, - "47": 10.12473, - "48": 9.84047, - "49": 9.51012, - "50": 9.88943, - "51": 9.84256, - "52": 9.72573, + "39": 10.40837, + "40": 10.07626, + "41": 10.12969, + "42": 10.21172, + "43": 9.81709, + "44": 9.94035, + "45": 9.81749, + "46": 9.80632, + "47": 10.12471, + "48": 9.84046, + "49": 9.51013, + "50": 9.88941, + "51": 9.84258, + "52": 9.72577, "53": 10.05974, - "54": 9.95226, - "55": 9.88318, - "56": 9.61275, - "57": 9.46219, - "58": 9.8231, - "59": 9.57666, - "60": 9.48516, - "61": 9.67876, - "62": 9.97782, - "63": 9.36212, - "64": 9.75714, - "65": 8.93494, + "54": 9.95228, + "55": 9.88317, + "56": 9.61277, + "57": 9.46223, + "58": 9.82312, + "59": 9.57667, + "60": 9.48518, + "61": 9.67881, + "62": 9.97778, + "63": 9.36213, + "64": 9.75717, + "65": 8.93497, "66": 9.69283, - "67": 9.36708, + "67": 9.36709, "68": 9.78178, - "69": 9.79452, - "70": 9.72296, - "71": 9.62031, - "72": 9.56974, - "73": 9.48101, - "74": 8.91241, - "75": 9.40905, - "76": 9.06617, + "69": 9.79453, + "70": 9.72298, + "71": 9.62028, + "72": 9.56979, + "73": 9.48099, + "74": 8.91237, + "75": 9.40908, + "76": 9.06623, "77": 10.05809, - "78": 9.72194, - "79": 9.36927, - "80": 9.40029, - "81": 9.47702, - "82": 9.69787, - "83": 9.30742, - "84": 9.41492, + "78": 9.72192, + "79": 9.36926, + "80": 9.40026, + "81": 9.477, + "82": 9.69791, + "83": 9.30743, + "84": 9.41493, "85": 9.61113, - "86": 9.07103, - "87": 9.5961, - "88": 9.74909, - "89": 9.59604, + "86": 9.07104, + "87": 9.59611, + "88": 9.74908, + "89": 9.5961, "90": 9.82722, - "91": 9.33657, - "92": 9.35582, - "93": 9.08689, - "94": 8.82754, + "91": 9.33658, + "92": 9.3558, + "93": 9.08695, + "94": 8.82753, "95": 9.53065, - "96": 9.5276, - "97": 9.30672, - "98": 9.66905, - "99": 8.89635, - "100": 9.40525 + "96": 9.52762, + "97": 9.30668, + "98": 9.66908, + "99": 8.89636, + "100": 9.40522 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1651.0, - "2": 1716.0, - "3": 1760.0, - "4": 1771.0, - "5": 1899.0, - "6": 1905.0, - "7": 1842.0, - "8": 1667.0, - "9": 1822.0, - "10": 1434.0, - "11": 1852.0, - "12": 1741.0, - "13": 1905.0, - "14": 1841.0, - "15": 1857.0, - "16": 1841.0, - "17": 1800.0, - "18": 1666.0, - "19": 1803.0, - "20": 1800.0, - "21": 1836.0, - "22": 1688.0, - "23": 1994.0, - "24": 1641.0, - "25": 1577.0, - "26": 1676.0, - "27": 1876.0, - "28": 1970.0, - "29": 1945.0, - "30": 1916.0, - "31": 1494.0, - "32": 1868.0, - "33": 2135.0, - "34": 1740.0, - "35": 1924.0, - "36": 1854.0, - "37": 2363.0, - "38": 2164.0, - "39": 2262.0, - "40": 2081.0, - "41": 2168.0, - "42": 2247.0, - "43": 2055.0, - "44": 2070.0, - "45": 1988.0, - "46": 2208.0, - "47": 2559.0, - "48": 2287.0, - "49": 2194.0, - "50": 2303.0, - "51": 2552.0, - "52": 2565.0, - "53": 2883.0, - "54": 2710.0, - "55": 2301.0, - "56": 2798.0, - "57": 2334.0, - "58": 2979.0, - "59": 2960.0, - "60": 2451.0, - "61": 2841.0, - "62": 2577.0, - "63": 2516.0, - "64": 2907.0, - "65": 2567.0, - "66": 2862.0, - "67": 2809.0, - "68": 2609.0, - "69": 2965.0, - "70": 2985.0, - "71": 2864.0, - "72": 2613.0, - "73": 3108.0, - "74": 2048.0, - "75": 2563.0, - "76": 3046.0, - "77": 3127.0, - "78": 2959.0, - "79": 3082.0, - "80": 3025.0, - "81": 3400.0, - "82": 3223.0, - "83": 2786.0, - "84": 3180.0, - "85": 3233.0, - "86": 2611.0, - "87": 3542.0, - "88": 3084.0, - "89": 3210.0, - "90": 3271.0, - "91": 2770.0, - "92": 3220.0, - "93": 2662.0, - "94": 3405.0, - "95": 3085.0, - "96": 3336.0, - "97": 3050.0, - "98": 3421.0, - "99": 3271.0, - "100": 3079.0 + "1": 1622.0, + "2": 1729.0, + "3": 1764.0, + "4": 1727.0, + "5": 1879.0, + "6": 1863.0, + "7": 1896.0, + "8": 1661.0, + "9": 1798.0, + "10": 1465.0, + "11": 1851.0, + "12": 1790.0, + "13": 1974.0, + "14": 1776.0, + "15": 1923.0, + "16": 1869.0, + "17": 1814.0, + "18": 1673.0, + "19": 1720.0, + "20": 1699.0, + "21": 1893.0, + "22": 1735.0, + "23": 1986.0, + "24": 1634.0, + "25": 1621.0, + "26": 1685.0, + "27": 1759.0, + "28": 1882.0, + "29": 1990.0, + "30": 2038.0, + "31": 1536.0, + "32": 1900.0, + "33": 2086.0, + "34": 1761.0, + "35": 1979.0, + "36": 1971.0, + "37": 2325.0, + "38": 2104.0, + "39": 2266.0, + "40": 2078.0, + "41": 2184.0, + "42": 2202.0, + "43": 2015.0, + "44": 2078.0, + "45": 2015.0, + "46": 2066.0, + "47": 2458.0, + "48": 2363.0, + "49": 2175.0, + "50": 2409.0, + "51": 2558.0, + "52": 2588.0, + "53": 2829.0, + "54": 2591.0, + "55": 2369.0, + "56": 2739.0, + "57": 2309.0, + "58": 2883.0, + "59": 2848.0, + "60": 2493.0, + "61": 2888.0, + "62": 2602.0, + "63": 2545.0, + "64": 2899.0, + "65": 2710.0, + "66": 3002.0, + "67": 2804.0, + "68": 2632.0, + "69": 2956.0, + "70": 2966.0, + "71": 2906.0, + "72": 2514.0, + "73": 3054.0, + "74": 2000.0, + "75": 2549.0, + "76": 2973.0, + "77": 3043.0, + "78": 3007.0, + "79": 3087.0, + "80": 2994.0, + "81": 3419.0, + "82": 3217.0, + "83": 2800.0, + "84": 3251.0, + "85": 3154.0, + "86": 2553.0, + "87": 3551.0, + "88": 3114.0, + "89": 3200.0, + "90": 3219.0, + "91": 2907.0, + "92": 3034.0, + "93": 2797.0, + "94": 3431.0, + "95": 3018.0, + "96": 3330.0, + "97": 3017.0, + "98": 3495.0, + "99": 3308.0, + "100": 3089.0 } }, "mem-allocated-bytes": { @@ -218,106 +218,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 552325632.0, - "2": 552325632.0, - "3": 552325632.0, - "4": 552325632.0, - "5": 552325632.0, - "6": 552325632.0, - "7": 552325632.0, - "8": 552325632.0, - "9": 552325632.0, - "10": 552325632.0, - "11": 552325632.0, - "12": 552325632.0, - "13": 552325632.0, - "14": 552325632.0, - "15": 552325632.0, - "16": 552325632.0, - "17": 552325632.0, - "18": 552325632.0, - "19": 552325632.0, - "20": 552325632.0, - "21": 552325632.0, - "22": 552325632.0, - "23": 552325632.0, - "24": 552325632.0, - "25": 552325632.0, - "26": 552325632.0, - "27": 552325632.0, - "28": 552325632.0, - "29": 552325632.0, - "30": 552325632.0, - "31": 552325632.0, - "32": 552325632.0, - "33": 552325632.0, - "34": 552325632.0, - "35": 552325632.0, - "36": 552325632.0, - "37": 552325632.0, - "38": 552325632.0, - "39": 552325632.0, - "40": 552325632.0, - "41": 552325632.0, - "42": 552325632.0, - "43": 552325632.0, - "44": 552325632.0, - "45": 553374208.0, - "46": 552325632.0, - "47": 552325632.0, - "48": 553374208.0, - "49": 552325632.0, - "50": 552325632.0, - "51": 552325632.0, - "52": 552325632.0, - "53": 552325632.0, - "54": 552325632.0, - "55": 552325632.0, - "56": 552325632.0, - "57": 552325632.0, - "58": 552325632.0, - "59": 552325632.0, - "60": 552325632.0, - "61": 552325632.0, - "62": 552325632.0, - "63": 552325632.0, - "64": 552325632.0, - "65": 552325632.0, - "66": 552325632.0, - "67": 552325632.0, - "68": 552325632.0, - "69": 552325632.0, - "70": 552325632.0, - "71": 552325632.0, - "72": 552325632.0, - "73": 552325632.0, - "74": 552325632.0, - "75": 552325632.0, - "76": 552325632.0, - "77": 552325632.0, - "78": 552325632.0, - "79": 552325632.0, - "80": 552325632.0, - "81": 552325632.0, - "82": 552325632.0, - "83": 552325632.0, - "84": 552325632.0, - "85": 552325632.0, - "86": 552325632.0, - "87": 552325632.0, - "88": 552325632.0, - "89": 552325632.0, - "90": 552325632.0, - "91": 552325632.0, - "92": 552325632.0, - "93": 552325632.0, - "94": 552325632.0, - "95": 552325632.0, - "96": 552325632.0, - "97": 552325632.0, - "98": 552325632.0, - "99": 552325632.0, - "100": 552325632.0 + "1": 551273984.0, + "2": 551273984.0, + "3": 551273984.0, + "4": 551273984.0, + "5": 551273984.0, + "6": 551273984.0, + "7": 551273984.0, + "8": 551273984.0, + "9": 551273984.0, + "10": 551273984.0, + "11": 551273984.0, + "12": 551273984.0, + "13": 551273984.0, + "14": 551273984.0, + "15": 551273984.0, + "16": 551273984.0, + "17": 551273984.0, + "18": 551273984.0, + "19": 551273984.0, + "20": 551273984.0, + "21": 551273984.0, + "22": 551273984.0, + "23": 551273984.0, + "24": 551273984.0, + "25": 551273984.0, + "26": 551273984.0, + "27": 551273984.0, + "28": 551273984.0, + "29": 551273984.0, + "30": 551273984.0, + "31": 551273984.0, + "32": 551273984.0, + "33": 551273984.0, + "34": 551273984.0, + "35": 551273984.0, + "36": 551273984.0, + "37": 551273984.0, + "38": 551273984.0, + "39": 551273984.0, + "40": 551273984.0, + "41": 551273984.0, + "42": 551273984.0, + "43": 551273984.0, + "44": 551273984.0, + "45": 551273984.0, + "46": 551273984.0, + "47": 551273984.0, + "48": 551273984.0, + "49": 551273984.0, + "50": 551273984.0, + "51": 551273984.0, + "52": 551273984.0, + "53": 551273984.0, + "54": 551273984.0, + "55": 551273984.0, + "56": 551273984.0, + "57": 551273984.0, + "58": 551273984.0, + "59": 551273984.0, + "60": 551273984.0, + "61": 551273984.0, + "62": 551273984.0, + "63": 551273984.0, + "64": 551273984.0, + "65": 551273984.0, + "66": 551273984.0, + "67": 551273984.0, + "68": 551273984.0, + "69": 551273984.0, + "70": 551273984.0, + "71": 551273984.0, + "72": 551273984.0, + "73": 551273984.0, + "74": 551273984.0, + "75": 551273984.0, + "76": 551273984.0, + "77": 551273984.0, + "78": 551273984.0, + "79": 551273984.0, + "80": 551273984.0, + "81": 551273984.0, + "82": 551273984.0, + "83": 551273984.0, + "84": 551273984.0, + "85": 551273984.0, + "86": 551273984.0, + "87": 551273984.0, + "88": 551273984.0, + "89": 551273984.0, + "90": 551273984.0, + "91": 551273984.0, + "92": 551273984.0, + "93": 551273984.0, + "94": 551273984.0, + "95": 551273984.0, + "96": 551273984.0, + "97": 551273984.0, + "98": 551273984.0, + "99": 551273984.0, + "100": 551273984.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 2328239104.0, - "2": 2471201792.0, - "3": 2471201792.0, - "4": 2471201792.0, - "5": 2471201792.0, - "6": 2471201792.0, - "7": 2471201792.0, - "8": 2471201792.0, - "9": 2471201792.0, - "10": 2471201792.0, - "11": 2471201792.0, - "12": 2471201792.0, - "13": 2471201792.0, - "14": 2471201792.0, - "15": 2471201792.0, - "16": 2471201792.0, - "17": 2471201792.0, - "18": 2471201792.0, - "19": 2471201792.0, - "20": 2471201792.0, - "21": 2471201792.0, - "22": 2471201792.0, - "23": 2471201792.0, - "24": 2471201792.0, - "25": 2471201792.0, - "26": 2471201792.0, - "27": 2471201792.0, - "28": 2471201792.0, - "29": 2471201792.0, - "30": 2471201792.0, - "31": 2471201792.0, - "32": 2471201792.0, - "33": 2471201792.0, - "34": 2471201792.0, - "35": 2471201792.0, - "36": 2471201792.0, - "37": 2471201792.0, - "38": 2471201792.0, - "39": 2471201792.0, - "40": 2471201792.0, - "41": 2471201792.0, - "42": 2471201792.0, - "43": 2471201792.0, - "44": 2471201792.0, - "45": 2471201792.0, - "46": 2471201792.0, - "47": 2471201792.0, - "48": 2471201792.0, - "49": 2471201792.0, - "50": 2471201792.0, - "51": 2471201792.0, - "52": 2471201792.0, - "53": 2471201792.0, - "54": 2471201792.0, - "55": 2471201792.0, - "56": 2471201792.0, - "57": 2471201792.0, - "58": 2471201792.0, - "59": 2471201792.0, - "60": 2471201792.0, - "61": 2471201792.0, - "62": 2471201792.0, - "63": 2471201792.0, - "64": 2471201792.0, - "65": 2471201792.0, - "66": 2471201792.0, - "67": 2471201792.0, - "68": 2471201792.0, - "69": 2471201792.0, - "70": 2471201792.0, - "71": 2471201792.0, - "72": 2471201792.0, - "73": 2471201792.0, - "74": 2471201792.0, - "75": 2471201792.0, - "76": 2471201792.0, - "77": 2471201792.0, - "78": 2471201792.0, - "79": 2471201792.0, - "80": 2471201792.0, - "81": 2471201792.0, - "82": 2471201792.0, - "83": 2471201792.0, - "84": 2471201792.0, - "85": 2471201792.0, - "86": 2471201792.0, - "87": 2471201792.0, - "88": 2471201792.0, - "89": 2471201792.0, - "90": 2471201792.0, - "91": 2471201792.0, - "92": 2471201792.0, - "93": 2471201792.0, - "94": 2471201792.0, - "95": 2471201792.0, - "96": 2471201792.0, - "97": 2471201792.0, - "98": 2471201792.0, - "99": 2471201792.0, - "100": 2471201792.0 + "1": 2328238592.0, + "2": 2470149120.0, + "3": 2470149120.0, + "4": 2470149120.0, + "5": 2470149120.0, + "6": 2470149120.0, + "7": 2470149120.0, + "8": 2470149120.0, + "9": 2470149120.0, + "10": 2470149120.0, + "11": 2470149120.0, + "12": 2470149120.0, + "13": 2470149120.0, + "14": 2470149120.0, + "15": 2470149120.0, + "16": 2470149120.0, + "17": 2470149120.0, + "18": 2470149120.0, + "19": 2470149120.0, + "20": 2470149120.0, + "21": 2470149120.0, + "22": 2470149120.0, + "23": 2470149120.0, + "24": 2470149120.0, + "25": 2470149120.0, + "26": 2470149120.0, + "27": 2470149120.0, + "28": 2470149120.0, + "29": 2470149120.0, + "30": 2470149120.0, + "31": 2470149120.0, + "32": 2470149120.0, + "33": 2470149120.0, + "34": 2470149120.0, + "35": 2470149120.0, + "36": 2470149120.0, + "37": 2470149120.0, + "38": 2470149120.0, + "39": 2470149120.0, + "40": 2470149120.0, + "41": 2470149120.0, + "42": 2470149120.0, + "43": 2470149120.0, + "44": 2470149120.0, + "45": 2470149120.0, + "46": 2470149120.0, + "47": 2470149120.0, + "48": 2470149120.0, + "49": 2470149120.0, + "50": 2470149120.0, + "51": 2470149120.0, + "52": 2470149120.0, + "53": 2470149120.0, + "54": 2470149120.0, + "55": 2470149120.0, + "56": 2470149120.0, + "57": 2470149120.0, + "58": 2470149120.0, + "59": 2470149120.0, + "60": 2470149120.0, + "61": 2470149120.0, + "62": 2470149120.0, + "63": 2470149120.0, + "64": 2470149120.0, + "65": 2470149120.0, + "66": 2470149120.0, + "67": 2470149120.0, + "68": 2470149120.0, + "69": 2470149120.0, + "70": 2470149120.0, + "71": 2470149120.0, + "72": 2470149120.0, + "73": 2470149120.0, + "74": 2470149120.0, + "75": 2470149120.0, + "76": 2470149120.0, + "77": 2470149120.0, + "78": 2470149120.0, + "79": 2470149120.0, + "80": 2470149120.0, + "81": 2470149120.0, + "82": 2470149120.0, + "83": 2470149120.0, + "84": 2470149120.0, + "85": 2470149120.0, + "86": 2470149120.0, + "87": 2470149120.0, + "88": 2470149120.0, + "89": 2470149120.0, + "90": 2470149120.0, + "91": 2470149120.0, + "92": 2470149120.0, + "93": 2470149120.0, + "94": 2470149120.0, + "95": 2470149120.0, + "96": 2470149120.0, + "97": 2470149120.0, + "98": 2470149120.0, + "99": 2470149120.0, + "100": 2470149120.0 } }, "iteration-time": { @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 3.33855, - "3": 0.12562, - "4": 0.10973, - "5": 0.10864, - "6": 0.10778, - "7": 0.10885, - "8": 0.10884, - "9": 0.10877, - "10": 0.10868, - "11": 0.10997, - "12": 0.10853, - "13": 0.1086, - "14": 0.10927, - "15": 0.10879, - "16": 0.10908, - "17": 0.10873, - "18": 0.10883, - "19": 0.11028, - "20": 0.11031, - "21": 0.11086, - "22": 0.10971, - "23": 0.10987, - "24": 0.1089, - "25": 0.11118, - "26": 0.10952, - "27": 0.1165, - "28": 0.11961, - "29": 0.11977, - "30": 0.11657, - "31": 0.11728, - "32": 0.11689, - "33": 0.11642, - "34": 0.11739, - "35": 0.11665, - "36": 0.11537, - "37": 0.11552, - "38": 0.11544, - "39": 0.11538, - "40": 0.11584, - "41": 0.11597, - "42": 0.11635, - "43": 0.11593, - "44": 0.11678, - "45": 0.11608, - "46": 0.11637, - "47": 0.11572, - "48": 0.11577, - "49": 0.11481, - "50": 0.11561, - "51": 0.1213, - "52": 0.10892, - "53": 0.10742, - "54": 0.10842, - "55": 0.10806, - "56": 0.10869, - "57": 0.11057, - "58": 0.108, - "59": 0.10875, - "60": 0.10969, - "61": 0.1087, - "62": 0.10795, - "63": 0.1094, - "64": 0.10922, - "65": 0.11102, - "66": 0.11016, - "67": 0.10977, - "68": 0.10988, - "69": 0.11029, - "70": 0.11078, - "71": 0.11019, - "72": 0.11727, - "73": 0.11024, - "74": 0.11054, - "75": 0.10949, - "76": 0.11384, - "77": 0.11011, - "78": 0.1101, - "79": 0.10943, - "80": 0.11059, - "81": 0.11173, - "82": 0.10987, - "83": 0.1094, - "84": 0.10956, - "85": 0.11029, - "86": 0.11179, - "87": 0.10953, - "88": 0.11045, - "89": 0.1102, - "90": 0.10897, - "91": 0.11022, - "92": 0.10965, - "93": 0.11042, - "94": 0.11158, - "95": 0.11059, - "96": 0.11046, - "97": 0.11123, - "98": 0.11055, - "99": 0.11178, - "100": 0.11266 + "2": 9.34621, + "3": 0.13335, + "4": 0.11754, + "5": 0.11536, + "6": 0.11367, + "7": 0.11663, + "8": 0.11385, + "9": 0.11574, + "10": 0.11631, + "11": 0.11616, + "12": 0.11786, + "13": 0.11675, + "14": 0.12155, + "15": 0.11663, + "16": 0.11781, + "17": 0.11932, + "18": 0.11766, + "19": 0.11708, + "20": 0.11635, + "21": 0.11736, + "22": 0.11806, + "23": 0.11804, + "24": 0.1169, + "25": 0.11774, + "26": 0.11641, + "27": 0.11674, + "28": 0.1177, + "29": 0.11804, + "30": 0.11805, + "31": 0.11916, + "32": 0.11895, + "33": 0.11909, + "34": 0.1191, + "35": 0.11894, + "36": 0.11897, + "37": 0.11622, + "38": 0.11982, + "39": 0.1177, + "40": 0.1197, + "41": 0.11987, + "42": 0.11911, + "43": 0.11866, + "44": 0.11971, + "45": 0.11825, + "46": 0.1203, + "47": 0.11863, + "48": 0.1192, + "49": 0.11951, + "50": 0.11918, + "51": 0.13867, + "52": 0.12177, + "53": 0.12036, + "54": 0.1191, + "55": 0.11745, + "56": 0.11631, + "57": 0.11887, + "58": 0.11974, + "59": 0.12067, + "60": 0.1174, + "61": 0.12254, + "62": 0.11811, + "63": 0.11737, + "64": 0.11568, + "65": 0.11917, + "66": 0.1185, + "67": 0.1199, + "68": 0.11894, + "69": 0.11777, + "70": 0.11816, + "71": 0.11833, + "72": 0.11801, + "73": 0.11849, + "74": 0.11806, + "75": 0.11903, + "76": 0.11812, + "77": 0.11852, + "78": 0.11938, + "79": 0.1176, + "80": 0.11841, + "81": 0.11882, + "82": 0.11877, + "83": 0.11747, + "84": 0.11992, + "85": 0.12148, + "86": 0.11854, + "87": 0.1177, + "88": 0.1201, + "89": 0.11888, + "90": 0.1191, + "91": 0.11815, + "92": 0.1186, + "93": 0.1183, + "94": 0.11918, + "95": 0.13193, + "96": 0.13346, + "97": 0.12725, + "98": 0.13104, + "99": 0.12509, + "100": 0.12079 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_gb200.json index 8a90b6fb7df..5e655485d52 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap/golden_values_dev_dgx_gb200.json @@ -6,54 +6,54 @@ "values": { "1": 10.81848, "2": 10.8198, - "3": 10.83668, - "4": 10.83525, - "5": 10.84996, - "6": 10.83445, - "7": 10.82529, - "8": 10.81514, - "9": 10.87713, + "3": 10.83667, + "4": 10.8353, + "5": 10.85001, + "6": 10.83446, + "7": 10.8253, + "8": 10.81516, + "9": 10.87716, "10": 10.88261, - "11": 10.87195, - "12": 10.8249, - "13": 10.84823, - "14": 10.81959, - "15": 10.80596, - "16": 10.80141, - "17": 10.77143, - "18": 10.78633, - "19": 10.74566, - "20": 10.62432, - "21": 10.68067, + "11": 10.87191, + "12": 10.82486, + "13": 10.84827, + "14": 10.8196, + "15": 10.80595, + "16": 10.8014, + "17": 10.77144, + "18": 10.78637, + "19": 10.74572, + "20": 10.62431, + "21": 10.68065, "22": 10.65086, - "23": 10.76421, - "24": 10.61849, - "25": 10.46057, - "26": 10.59622, - "27": 10.54041, + "23": 10.76414, + "24": 10.61844, + "25": 10.46056, + "26": 10.59625, + "27": 10.54045, "28": 10.44496, - "29": 10.39552, - "30": 10.40391, - "31": 10.51272, - "32": 10.32089, - "33": 10.26353, - "34": 10.46902, - "35": 9.95972, - "36": 10.11517, - "37": 10.01099, - "38": 10.38317, - "39": 9.78588, - "40": 10.10413, - "41": 10.13151, - "42": 10.02832, - "43": 10.2098, - "44": 10.07339, - "45": 9.69361, - "46": 9.99604, - "47": 9.93464, - "48": 9.67414, - "49": 9.91775, - "50": 9.93121 + "29": 10.3955, + "30": 10.40393, + "31": 10.51269, + "32": 10.32093, + "33": 10.26351, + "34": 10.46901, + "35": 9.95973, + "36": 10.11518, + "37": 10.01096, + "38": 10.38318, + "39": 9.78592, + "40": 10.10416, + "41": 10.13149, + "42": 10.02829, + "43": 10.20978, + "44": 10.07337, + "45": 9.6936, + "46": 9.99607, + "47": 9.93467, + "48": 9.67415, + "49": 9.91776, + "50": 9.93114 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1118.0, - "2": 1206.0, - "3": 1308.0, - "4": 1243.0, - "5": 1256.0, - "6": 1296.0, - "7": 1259.0, - "8": 1023.0, - "9": 1295.0, - "10": 1319.0, - "11": 1282.0, - "12": 1361.0, - "13": 1336.0, - "14": 1176.0, - "15": 1188.0, - "16": 1255.0, - "17": 1182.0, - "18": 1341.0, - "19": 1043.0, - "20": 1099.0, - "21": 1248.0, - "22": 1233.0, - "23": 1369.0, - "24": 1365.0, - "25": 1073.0, - "26": 1245.0, - "27": 1211.0, - "28": 1306.0, - "29": 1317.0, - "30": 1426.0, - "31": 1476.0, - "32": 1399.0, - "33": 1444.0, - "34": 1483.0, - "35": 1242.0, - "36": 1326.0, - "37": 1447.0, - "38": 1542.0, - "39": 1342.0, - "40": 1560.0, - "41": 1611.0, - "42": 1607.0, - "43": 1651.0, - "44": 1594.0, - "45": 1499.0, - "46": 1744.0, - "47": 1571.0, - "48": 1523.0, - "49": 1629.0, - "50": 1747.0 + "1": 1076.0, + "2": 1208.0, + "3": 1258.0, + "4": 1277.0, + "5": 1246.0, + "6": 1250.0, + "7": 1209.0, + "8": 1001.0, + "9": 1360.0, + "10": 1325.0, + "11": 1245.0, + "12": 1358.0, + "13": 1303.0, + "14": 1131.0, + "15": 1174.0, + "16": 1254.0, + "17": 1207.0, + "18": 1343.0, + "19": 1091.0, + "20": 1062.0, + "21": 1233.0, + "22": 1257.0, + "23": 1376.0, + "24": 1275.0, + "25": 1134.0, + "26": 1179.0, + "27": 1217.0, + "28": 1307.0, + "29": 1321.0, + "30": 1372.0, + "31": 1475.0, + "32": 1394.0, + "33": 1486.0, + "34": 1522.0, + "35": 1129.0, + "36": 1305.0, + "37": 1428.0, + "38": 1593.0, + "39": 1392.0, + "40": 1482.0, + "41": 1626.0, + "42": 1594.0, + "43": 1737.0, + "44": 1511.0, + "45": 1489.0, + "46": 1774.0, + "47": 1605.0, + "48": 1596.0, + "49": 1712.0, + "50": 1740.0 } }, "mem-allocated-bytes": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 3.1265, - "3": 0.15779, - "4": 0.14192, - "5": 0.14446, - "6": 0.14251, - "7": 0.14375, - "8": 0.1446, - "9": 0.14351, - "10": 0.14568, - "11": 0.14477, - "12": 0.14491, - "13": 0.1447, - "14": 0.14656, - "15": 0.14652, - "16": 0.14521, - "17": 0.14638, - "18": 0.14483, - "19": 0.14549, - "20": 0.14457, - "21": 0.14306, - "22": 0.14559, - "23": 0.14596, - "24": 0.14513, - "25": 0.14367, - "26": 0.14368, - "27": 0.14398, - "28": 0.14369, - "29": 0.14435, - "30": 0.14415, - "31": 0.1433, - "32": 0.14342, - "33": 0.1441, - "34": 0.14372, - "35": 0.14431, - "36": 0.1454, - "37": 0.14634, - "38": 0.14514, - "39": 0.14529, - "40": 0.14504, - "41": 0.14496, - "42": 0.14436, - "43": 0.14492, - "44": 0.14452, - "45": 0.14629, - "46": 0.14514, - "47": 0.14578, - "48": 0.1442, - "49": 0.14396, - "50": 0.14376 + "2": 9.61562, + "3": 0.16844, + "4": 0.15723, + "5": 0.1525, + "6": 0.15368, + "7": 0.1564, + "8": 0.15395, + "9": 0.15394, + "10": 0.15187, + "11": 0.1507, + "12": 0.15595, + "13": 0.15908, + "14": 0.15006, + "15": 0.1512, + "16": 0.15414, + "17": 0.16741, + "18": 0.1517, + "19": 0.15049, + "20": 0.15743, + "21": 0.16405, + "22": 0.1515, + "23": 0.15504, + "24": 0.14877, + "25": 0.15039, + "26": 0.15018, + "27": 0.14901, + "28": 0.15124, + "29": 0.1496, + "30": 0.15239, + "31": 0.14986, + "32": 0.15144, + "33": 0.15057, + "34": 0.14938, + "35": 0.14965, + "36": 0.15083, + "37": 0.15057, + "38": 0.15276, + "39": 0.15348, + "40": 0.1583, + "41": 0.15028, + "42": 0.14947, + "43": 0.14843, + "44": 0.14972, + "45": 0.1515, + "46": 0.15337, + "47": 0.14819, + "48": 0.15122, + "49": 0.15663, + "50": 0.14862 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_gb200.json index da72109d85d..b59f2eab3b3 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline/golden_values_dev_dgx_gb200.json @@ -6,54 +6,54 @@ "values": { "1": 10.93757, "2": 10.92393, - "3": 10.94318, - "4": 10.93348, + "3": 10.94316, + "4": 10.93346, "5": 10.93027, - "6": 10.92214, - "7": 10.9129, - "8": 10.92494, - "9": 10.94302, + "6": 10.92207, + "7": 10.91292, + "8": 10.92493, + "9": 10.94303, "10": 10.92589, - "11": 10.89715, - "12": 10.91085, - "13": 10.91359, - "14": 10.90092, - "15": 10.87211, - "16": 10.86524, - "17": 10.869, - "18": 10.85374, + "11": 10.89718, + "12": 10.91088, + "13": 10.91361, + "14": 10.90096, + "15": 10.87207, + "16": 10.86525, + "17": 10.86902, + "18": 10.85373, "19": 10.84295, - "20": 10.76663, - "21": 10.74374, - "22": 10.67695, - "23": 10.72701, - "24": 10.66494, - "25": 10.62546, + "20": 10.76659, + "21": 10.74373, + "22": 10.67694, + "23": 10.72697, + "24": 10.66498, + "25": 10.6255, "26": 10.654, - "27": 10.62035, - "28": 10.56813, + "27": 10.62029, + "28": 10.56817, "29": 10.56412, - "30": 10.41005, - "31": 10.21717, + "30": 10.41003, + "31": 10.21718, "32": 10.46613, - "33": 10.47136, - "34": 10.26038, - "35": 10.30272, - "36": 10.264, - "37": 10.35196, - "38": 10.22183, - "39": 10.38981, - "40": 10.11089, - "41": 10.13597, - "42": 10.21619, - "43": 9.89444, - "44": 9.985, - "45": 9.87317, - "46": 9.86222, + "33": 10.47129, + "34": 10.26041, + "35": 10.30274, + "36": 10.26404, + "37": 10.35194, + "38": 10.22181, + "39": 10.38983, + "40": 10.11085, + "41": 10.13592, + "42": 10.21622, + "43": 9.89443, + "44": 9.98496, + "45": 9.8732, + "46": 9.86221, "47": 10.13614, - "48": 9.86196, + "48": 9.86198, "49": 9.56912, - "50": 9.91564 + "50": 9.91565 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 22727242.0, - "2": 22924896.0, - "3": 22597216.0, - "4": 23219716.0, - "5": 22714012.0, - "6": 23021178.0, - "7": 22770808.0, - "8": 22926716.0, - "9": 22842500.0, - "10": 22918960.0, - "11": 22500834.0, - "12": 22460340.0, - "13": 22917536.0, - "14": 22388990.0, - "15": 22821224.0, - "16": 22831266.0, - "17": 22819108.0, - "18": 22582264.0, - "19": 22617384.0, - "20": 22693436.0, - "21": 22739352.0, - "22": 22800104.0, - "23": 22539998.0, - "24": 22771512.0, - "25": 22819132.0, - "26": 22547588.0, - "27": 22468844.0, - "28": 22453516.0, - "29": 22529320.0, - "30": 22630996.0, - "31": 22955520.0, - "32": 22585756.0, - "33": 22557744.0, - "34": 22835696.0, - "35": 22787828.0, - "36": 22588412.0, - "37": 22498040.0, - "38": 22896082.0, - "39": 22801992.0, - "40": 22657536.0, - "41": 22659220.0, - "42": 22667844.0, - "43": 22975904.0, - "44": 22745960.0, - "45": 22675400.0, - "46": 22884844.0, - "47": 22633716.0, - "48": 22928608.0, - "49": 22727282.0, - "50": 22904808.0 + "1": 22727352.0, + "2": 22924862.0, + "3": 22597228.0, + "4": 23219764.0, + "5": 22714064.0, + "6": 23021164.0, + "7": 22770838.0, + "8": 22926708.0, + "9": 22842636.0, + "10": 22918932.0, + "11": 22500860.0, + "12": 22460278.0, + "13": 22917508.0, + "14": 22388992.0, + "15": 22821332.0, + "16": 22831360.0, + "17": 22818988.0, + "18": 22582260.0, + "19": 22617512.0, + "20": 22693482.0, + "21": 22739376.0, + "22": 22800208.0, + "23": 22540044.0, + "24": 22771516.0, + "25": 22819082.0, + "26": 22547594.0, + "27": 22468792.0, + "28": 22453650.0, + "29": 22529422.0, + "30": 22631024.0, + "31": 22955468.0, + "32": 22585716.0, + "33": 22557726.0, + "34": 22835712.0, + "35": 22787748.0, + "36": 22588464.0, + "37": 22498104.0, + "38": 22895980.0, + "39": 22801960.0, + "40": 22657588.0, + "41": 22659166.0, + "42": 22667978.0, + "43": 22975850.0, + "44": 22745892.0, + "45": 22675412.0, + "46": 22884788.0, + "47": 22633592.0, + "48": 22928708.0, + "49": 22727260.0, + "50": 22904912.0 } }, "mem-allocated-bytes": { @@ -176,55 +176,55 @@ "step_interval": 1, "values": { "1": 2158025216.0, - "2": 2416613888.0, - "3": 2416613888.0, - "4": 2416613888.0, - "5": 2416613888.0, - "6": 2416613888.0, - "7": 2416613888.0, - "8": 2416613888.0, - "9": 2416613888.0, - "10": 2416613888.0, - "11": 2416613888.0, - "12": 2416613888.0, - "13": 2416613888.0, - "14": 2416613888.0, - "15": 2416613888.0, - "16": 2416613888.0, - "17": 2416613888.0, - "18": 2416613888.0, - "19": 2416613888.0, - "20": 2416613888.0, - "21": 2416613888.0, - "22": 2416613888.0, - "23": 2416613888.0, - "24": 2416613888.0, - "25": 2416613888.0, - "26": 2416613888.0, - "27": 2416613888.0, - "28": 2416613888.0, - "29": 2416613888.0, - "30": 2416613888.0, - "31": 2416613888.0, - "32": 2416613888.0, - "33": 2416613888.0, - "34": 2416613888.0, - "35": 2416613888.0, - "36": 2416613888.0, - "37": 2416613888.0, - "38": 2416613888.0, - "39": 2416613888.0, - "40": 2416613888.0, - "41": 2416613888.0, - "42": 2416613888.0, - "43": 2416613888.0, - "44": 2416613888.0, - "45": 2416613888.0, - "46": 2416613888.0, - "47": 2416613888.0, - "48": 2416613888.0, - "49": 2416613888.0, - "50": 2416613888.0 + "2": 2415566336.0, + "3": 2415566336.0, + "4": 2415566336.0, + "5": 2415566336.0, + "6": 2415566336.0, + "7": 2415566336.0, + "8": 2415566336.0, + "9": 2415566336.0, + "10": 2415566336.0, + "11": 2415566336.0, + "12": 2415566336.0, + "13": 2415566336.0, + "14": 2415566336.0, + "15": 2415566336.0, + "16": 2415566336.0, + "17": 2415566336.0, + "18": 2415566336.0, + "19": 2415566336.0, + "20": 2415566336.0, + "21": 2415566336.0, + "22": 2415566336.0, + "23": 2415566336.0, + "24": 2415566336.0, + "25": 2415566336.0, + "26": 2415566336.0, + "27": 2415566336.0, + "28": 2415566336.0, + "29": 2415566336.0, + "30": 2415566336.0, + "31": 2415566336.0, + "32": 2415566336.0, + "33": 2415566336.0, + "34": 2415566336.0, + "35": 2415566336.0, + "36": 2415566336.0, + "37": 2415566336.0, + "38": 2415566336.0, + "39": 2415566336.0, + "40": 2415566336.0, + "41": 2415566336.0, + "42": 2415566336.0, + "43": 2415566336.0, + "44": 2415566336.0, + "45": 2415566336.0, + "46": 2415566336.0, + "47": 2415566336.0, + "48": 2415566336.0, + "49": 2415566336.0, + "50": 2415566336.0 } }, "iteration-time": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 2.44061, - "3": 0.15375, - "4": 0.14111, - "5": 0.14053, - "6": 0.14023, - "7": 0.14152, - "8": 0.14128, - "9": 0.1417, - "10": 0.14155, - "11": 0.14076, - "12": 0.1405, - "13": 0.14129, - "14": 0.14106, - "15": 0.14101, - "16": 0.14178, - "17": 0.14173, - "18": 0.14103, - "19": 0.14094, - "20": 0.14012, - "21": 0.14153, - "22": 0.14228, - "23": 0.14118, - "24": 0.14079, - "25": 0.14034, - "26": 0.14027, - "27": 0.13947, - "28": 0.13928, - "29": 0.1398, - "30": 0.14085, - "31": 0.14179, - "32": 0.13944, - "33": 0.14174, - "34": 0.1436, - "35": 0.13902, - "36": 0.13933, - "37": 0.13922, - "38": 0.13997, - "39": 0.13881, - "40": 0.13924, - "41": 0.1392, - "42": 0.14092, - "43": 0.14136, - "44": 0.14035, - "45": 0.13841, - "46": 0.1411, - "47": 0.13878, - "48": 0.14005, - "49": 0.13925, - "50": 0.13845 + "2": 7.55862, + "3": 0.15424, + "4": 0.14447, + "5": 0.14487, + "6": 0.14649, + "7": 0.14905, + "8": 0.14737, + "9": 0.14546, + "10": 0.14639, + "11": 0.14694, + "12": 0.14764, + "13": 0.14783, + "14": 0.15046, + "15": 0.14727, + "16": 0.14715, + "17": 0.14912, + "18": 0.15267, + "19": 0.14754, + "20": 0.14944, + "21": 0.14739, + "22": 0.14765, + "23": 0.14985, + "24": 0.14828, + "25": 0.14802, + "26": 0.15122, + "27": 0.14967, + "28": 0.1501, + "29": 0.15013, + "30": 0.15048, + "31": 0.15048, + "32": 0.15018, + "33": 0.14712, + "34": 0.15007, + "35": 0.14904, + "36": 0.14828, + "37": 0.14709, + "38": 0.14853, + "39": 0.14847, + "40": 0.14939, + "41": 0.14997, + "42": 0.15, + "43": 0.15082, + "44": 0.15106, + "45": 0.14817, + "46": 0.14957, + "47": 0.14882, + "48": 0.22849, + "49": 0.14824, + "50": 0.14598 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_gb200.json index 31729dd5fe5..047db9b77c3 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_mla/golden_values_dev_dgx_gb200.json @@ -6,54 +6,54 @@ "values": { "1": 10.94863, "2": 10.95748, - "3": 10.95458, - "4": 10.95314, - "5": 10.94301, - "6": 10.93709, - "7": 10.94818, - "8": 10.94698, + "3": 10.95457, + "4": 10.95313, + "5": 10.94299, + "6": 10.9371, + "7": 10.9482, + "8": 10.94697, "9": 10.94866, - "10": 10.95119, + "10": 10.95117, "11": 10.9406, - "12": 10.94105, - "13": 10.94375, + "12": 10.94108, + "13": 10.94377, "14": 10.94739, - "15": 10.9429, - "16": 10.93682, - "17": 10.94182, - "18": 10.93022, + "15": 10.94292, + "16": 10.93684, + "17": 10.94187, + "18": 10.93023, "19": 10.93614, - "20": 10.92135, - "21": 10.91434, - "22": 10.92114, - "23": 10.92039, - "24": 10.91062, - "25": 10.91171, - "26": 10.9101, - "27": 10.90559, + "20": 10.92134, + "21": 10.91432, + "22": 10.92113, + "23": 10.92038, + "24": 10.91059, + "25": 10.9117, + "26": 10.91012, + "27": 10.90561, "28": 10.87901, - "29": 10.87862, + "29": 10.87865, "30": 10.82431, "31": 10.7917, "32": 10.85763, - "33": 10.85278, - "34": 10.80465, + "33": 10.85279, + "34": 10.80469, "35": 10.81124, - "36": 10.79299, - "37": 10.82161, + "36": 10.79298, + "37": 10.82159, "38": 10.74654, - "39": 10.79066, - "40": 10.67639, - "41": 10.71189, - "42": 10.72663, - "43": 10.58635, - "44": 10.63487, - "45": 10.59555, - "46": 10.58202, - "47": 10.67878, - "48": 10.55683, + "39": 10.79069, + "40": 10.67643, + "41": 10.71186, + "42": 10.72666, + "43": 10.58633, + "44": 10.63486, + "45": 10.59559, + "46": 10.58203, + "47": 10.67874, + "48": 10.55684, "49": 10.43321, - "50": 10.57623 + "50": 10.57624 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 22792076.0, - "2": 22989660.0, - "3": 22661158.0, - "4": 23283080.0, - "5": 22778860.0, - "6": 23085232.0, - "7": 22834892.0, - "8": 22990502.0, - "9": 22906480.0, - "10": 22983488.0, - "11": 22563552.0, - "12": 22523694.0, - "13": 22980968.0, - "14": 22453154.0, - "15": 22885546.0, - "16": 22895028.0, - "17": 22882956.0, - "18": 22647168.0, - "19": 22682056.0, - "20": 22757444.0, - "21": 22803808.0, - "22": 22864026.0, - "23": 22603204.0, - "24": 22835232.0, - "25": 22883270.0, - "26": 22611998.0, - "27": 22532132.0, - "28": 22516960.0, - "29": 22593572.0, - "30": 22695024.0, - "31": 23019244.0, - "32": 22648204.0, - "33": 22623192.0, - "34": 22899922.0, - "35": 22852560.0, - "36": 22652964.0, - "37": 22559866.0, - "38": 22960222.0, - "39": 22864432.0, - "40": 22721420.0, - "41": 22722086.0, - "42": 22730128.0, - "43": 23040178.0, - "44": 22809816.0, - "45": 22738252.0, - "46": 22947510.0, - "47": 22697018.0, - "48": 22992168.0, - "49": 22790946.0, - "50": 22969044.0 + "1": 22792008.0, + "2": 22989688.0, + "3": 22661140.0, + "4": 23283044.0, + "5": 22778806.0, + "6": 23085212.0, + "7": 22834896.0, + "8": 22990508.0, + "9": 22906460.0, + "10": 22983366.0, + "11": 22563548.0, + "12": 22523680.0, + "13": 22980988.0, + "14": 22453176.0, + "15": 22885472.0, + "16": 22894992.0, + "17": 22882968.0, + "18": 22647152.0, + "19": 22682046.0, + "20": 22757300.0, + "21": 22803894.0, + "22": 22864052.0, + "23": 22603212.0, + "24": 22835274.0, + "25": 22883332.0, + "26": 22612002.0, + "27": 22532080.0, + "28": 22516992.0, + "29": 22593624.0, + "30": 22694994.0, + "31": 23019280.0, + "32": 22648240.0, + "33": 22623122.0, + "34": 22900010.0, + "35": 22852524.0, + "36": 22652920.0, + "37": 22559892.0, + "38": 22960296.0, + "39": 22864440.0, + "40": 22721274.0, + "41": 22721990.0, + "42": 22730036.0, + "43": 23040186.0, + "44": 22809892.0, + "45": 22738188.0, + "46": 22947462.0, + "47": 22696952.0, + "48": 22992052.0, + "49": 22791010.0, + "50": 22969004.0 } }, "mem-allocated-bytes": { @@ -175,7 +175,7 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1122385408.0, + "1": 1123433984.0, "2": 1245635072.0, "3": 1245635072.0, "4": 1245635072.0, @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 2.75152, - "3": 0.1678, - "4": 0.1543, - "5": 0.15772, - "6": 0.15798, - "7": 0.15886, - "8": 0.16038, - "9": 0.15983, - "10": 0.16009, - "11": 0.15881, - "12": 0.16004, - "13": 0.15648, - "14": 0.15396, - "15": 0.15394, - "16": 0.1544, - "17": 0.15329, - "18": 0.1539, - "19": 0.15442, - "20": 0.1521, - "21": 0.15368, - "22": 0.15287, - "23": 0.15397, - "24": 0.15553, - "25": 0.15617, - "26": 0.15925, - "27": 0.145, - "28": 0.14456, - "29": 0.14869, - "30": 0.15407, - "31": 0.15556, - "32": 0.15651, - "33": 0.15726, - "34": 0.1574, - "35": 0.15981, - "36": 0.16037, - "37": 0.16044, - "38": 0.15744, - "39": 0.15875, - "40": 0.15964, - "41": 0.15984, - "42": 0.1605, - "43": 0.15901, - "44": 0.16037, - "45": 0.1616, - "46": 0.16046, - "47": 0.16125, - "48": 0.16168, - "49": 0.1611, - "50": 0.15977 + "2": 5.70034, + "3": 0.16043, + "4": 0.14515, + "5": 0.14665, + "6": 0.14743, + "7": 0.14773, + "8": 0.14897, + "9": 0.14739, + "10": 0.15002, + "11": 0.14783, + "12": 0.14932, + "13": 0.14731, + "14": 0.14893, + "15": 0.14704, + "16": 0.14861, + "17": 0.14821, + "18": 0.14808, + "19": 0.14853, + "20": 0.1507, + "21": 0.14953, + "22": 0.15066, + "23": 0.14924, + "24": 0.15017, + "25": 0.15085, + "26": 0.18249, + "27": 0.15492, + "28": 0.15306, + "29": 0.14968, + "30": 0.15095, + "31": 0.15033, + "32": 0.15059, + "33": 0.1505, + "34": 0.1502, + "35": 0.14853, + "36": 0.15099, + "37": 0.1478, + "38": 0.1494, + "39": 0.14947, + "40": 0.15074, + "41": 0.14956, + "42": 0.1512, + "43": 0.14912, + "44": 0.15691, + "45": 0.14931, + "46": 0.15116, + "47": 0.14871, + "48": 0.14949, + "49": 0.15211, + "50": 0.15069 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_gb200.json index caa1e54ee64..b7ced9b3ff9 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_gb200.json @@ -6,104 +6,104 @@ "values": { "1": 10.86897, "2": 10.88544, - "3": 10.86473, - "4": 10.86826, - "5": 10.87436, + "3": 10.86477, + "4": 10.86825, + "5": 10.87437, "6": 10.89005, - "7": 10.87769, + "7": 10.87768, "8": 10.86364, - "9": 10.88282, - "10": 10.84687, - "11": 10.87102, - "12": 10.87345, + "9": 10.8828, + "10": 10.84683, + "11": 10.87103, + "12": 10.87356, "13": 10.8814, - "14": 10.8877, - "15": 10.83869, + "14": 10.88767, + "15": 10.83862, "16": 10.8239, - "17": 10.80197, - "18": 10.81094, - "19": 10.82192, - "20": 10.71791, - "21": 10.68914, - "22": 10.57271, - "23": 10.7081, + "17": 10.80195, + "18": 10.81091, + "19": 10.82189, + "20": 10.71788, + "21": 10.6892, + "22": 10.57272, + "23": 10.70811, "24": 10.59543, - "25": 10.55292, - "26": 10.61257, - "27": 10.60051, - "28": 10.56173, - "29": 10.58089, - "30": 10.35595, - "31": 10.1182, - "32": 10.44815, - "33": 10.4542, - "34": 10.21553, - "35": 10.26124, - "36": 10.20776, - "37": 10.33673, + "25": 10.55295, + "26": 10.61259, + "27": 10.60052, + "28": 10.56168, + "29": 10.58085, + "30": 10.35597, + "31": 10.11819, + "32": 10.44814, + "33": 10.45418, + "34": 10.21548, + "35": 10.26125, + "36": 10.20779, + "37": 10.33671, "38": 10.17741, - "39": 10.39297, - "40": 10.06349, - "41": 10.13887, - "42": 10.2056, - "43": 9.82809, - "44": 9.94547, - "45": 9.82561, - "46": 9.80186, - "47": 10.14049, - "48": 9.84276, - "49": 9.52016, - "50": 9.88454, - "51": 9.84743, - "52": 9.74209, - "53": 10.05697, - "54": 9.9505, - "55": 9.88145, - "56": 9.61274, + "39": 10.39294, + "40": 10.06346, + "41": 10.13891, + "42": 10.20558, + "43": 9.82807, + "44": 9.94542, + "45": 9.82557, + "46": 9.80182, + "47": 10.14053, + "48": 9.8428, + "49": 9.52012, + "50": 9.88456, + "51": 9.84742, + "52": 9.74207, + "53": 10.05695, + "54": 9.95053, + "55": 9.8814, + "56": 9.61275, "57": 9.4687, - "58": 9.82193, + "58": 9.82189, "59": 9.57642, - "60": 9.49762, - "61": 9.69189, + "60": 9.4976, + "61": 9.69188, "62": 9.9867, - "63": 9.37512, - "64": 9.76679, - "65": 8.94648, - "66": 9.7023, - "67": 9.36326, - "68": 9.7831, - "69": 9.7986, - "70": 9.7317, - "71": 9.62571, - "72": 9.58488, - "73": 9.48967, - "74": 8.9286, - "75": 9.40862, - "76": 9.07925, - "77": 10.0594, - "78": 9.72288, - "79": 9.37784, - "80": 9.40429, - "81": 9.48309, - "82": 9.7004, - "83": 9.31595, - "84": 9.41838, - "85": 9.61685, - "86": 9.07533, - "87": 9.59616, - "88": 9.75215, - "89": 9.60184, - "90": 9.82281, - "91": 9.34037, + "63": 9.37509, + "64": 9.76682, + "65": 8.94649, + "66": 9.70228, + "67": 9.36325, + "68": 9.78311, + "69": 9.79862, + "70": 9.73172, + "71": 9.62574, + "72": 9.58486, + "73": 9.48964, + "74": 8.92859, + "75": 9.40865, + "76": 9.07927, + "77": 10.05938, + "78": 9.72283, + "79": 9.37782, + "80": 9.40428, + "81": 9.48312, + "82": 9.70038, + "83": 9.3159, + "84": 9.41834, + "85": 9.61684, + "86": 9.07537, + "87": 9.59617, + "88": 9.75218, + "89": 9.60187, + "90": 9.82286, + "91": 9.34039, "92": 9.35854, - "93": 9.08805, - "94": 8.83037, + "93": 9.08806, + "94": 8.83038, "95": 9.5266, - "96": 9.53049, - "97": 9.30389, - "98": 9.67196, - "99": 8.89637, - "100": 9.40644 + "96": 9.53051, + "97": 9.30391, + "98": 9.67199, + "99": 8.8964, + "100": 9.40645 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1621.0, - "2": 1657.0, - "3": 1580.0, - "4": 1839.0, - "5": 1862.0, - "6": 1724.0, - "7": 1714.0, - "8": 1670.0, - "9": 1762.0, - "10": 1358.0, - "11": 1734.0, - "12": 1682.0, - "13": 1761.0, - "14": 1731.0, - "15": 1788.0, - "16": 1801.0, - "17": 1866.0, - "18": 1636.0, - "19": 1709.0, - "20": 1607.0, - "21": 1821.0, - "22": 1666.0, - "23": 1991.0, - "24": 1585.0, - "25": 1587.0, - "26": 1631.0, - "27": 1714.0, - "28": 1966.0, - "29": 1997.0, - "30": 1851.0, - "31": 1581.0, - "32": 1864.0, - "33": 2107.0, - "34": 1846.0, - "35": 1982.0, - "36": 1904.0, - "37": 2373.0, - "38": 2172.0, - "39": 2343.0, - "40": 2149.0, - "41": 2331.0, - "42": 2199.0, - "43": 1914.0, - "44": 2065.0, - "45": 2081.0, - "46": 2352.0, - "47": 2497.0, - "48": 2303.0, - "49": 2346.0, - "50": 2411.0, - "51": 2491.0, - "52": 2552.0, - "53": 2980.0, - "54": 2680.0, - "55": 2274.0, - "56": 2734.0, - "57": 2319.0, - "58": 2907.0, - "59": 2886.0, - "60": 2566.0, - "61": 2855.0, - "62": 2704.0, - "63": 2370.0, - "64": 2998.0, - "65": 2563.0, - "66": 2868.0, - "67": 2762.0, - "68": 2739.0, - "69": 2730.0, - "70": 3156.0, - "71": 2803.0, - "72": 2506.0, - "73": 2896.0, - "74": 1937.0, - "75": 2450.0, - "76": 2794.0, - "77": 3047.0, - "78": 3104.0, - "79": 3069.0, - "80": 3286.0, - "81": 3543.0, - "82": 3192.0, - "83": 2614.0, - "84": 3273.0, - "85": 3111.0, - "86": 2680.0, - "87": 3654.0, - "88": 3117.0, - "89": 3351.0, - "90": 3086.0, - "91": 2721.0, - "92": 3045.0, - "93": 2672.0, - "94": 3326.0, - "95": 3125.0, - "96": 3309.0, - "97": 3208.0, - "98": 3572.0, - "99": 2980.0, - "100": 3355.0 + "1": 1592.0, + "2": 1638.0, + "3": 1658.0, + "4": 1737.0, + "5": 1836.0, + "6": 1740.0, + "7": 1780.0, + "8": 1628.0, + "9": 1785.0, + "10": 1372.0, + "11": 1738.0, + "12": 1745.0, + "13": 1808.0, + "14": 1709.0, + "15": 1798.0, + "16": 1765.0, + "17": 1875.0, + "18": 1580.0, + "19": 1744.0, + "20": 1573.0, + "21": 1835.0, + "22": 1649.0, + "23": 1993.0, + "24": 1600.0, + "25": 1527.0, + "26": 1592.0, + "27": 1818.0, + "28": 1889.0, + "29": 1909.0, + "30": 1889.0, + "31": 1635.0, + "32": 1847.0, + "33": 2066.0, + "34": 1761.0, + "35": 1861.0, + "36": 1916.0, + "37": 2298.0, + "38": 2162.0, + "39": 2226.0, + "40": 2127.0, + "41": 2226.0, + "42": 2204.0, + "43": 1844.0, + "44": 2106.0, + "45": 2056.0, + "46": 2238.0, + "47": 2476.0, + "48": 2289.0, + "49": 2209.0, + "50": 2363.0, + "51": 2520.0, + "52": 2558.0, + "53": 2942.0, + "54": 2681.0, + "55": 2327.0, + "56": 2615.0, + "57": 2270.0, + "58": 2908.0, + "59": 2839.0, + "60": 2480.0, + "61": 2826.0, + "62": 2734.0, + "63": 2456.0, + "64": 2959.0, + "65": 2545.0, + "66": 2920.0, + "67": 2728.0, + "68": 2711.0, + "69": 2809.0, + "70": 3161.0, + "71": 2910.0, + "72": 2486.0, + "73": 2907.0, + "74": 1911.0, + "75": 2411.0, + "76": 2887.0, + "77": 3233.0, + "78": 3181.0, + "79": 3168.0, + "80": 3234.0, + "81": 3589.0, + "82": 3171.0, + "83": 2616.0, + "84": 3228.0, + "85": 3154.0, + "86": 2732.0, + "87": 3792.0, + "88": 3096.0, + "89": 3401.0, + "90": 3040.0, + "91": 2717.0, + "92": 3139.0, + "93": 2742.0, + "94": 3101.0, + "95": 3195.0, + "96": 3298.0, + "97": 3343.0, + "98": 3542.0, + "99": 3040.0, + "100": 3293.0 } }, "mem-allocated-bytes": { @@ -218,106 +218,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 514359808.0, - "2": 514359808.0, - "3": 514359808.0, - "4": 514359808.0, - "5": 514359808.0, - "6": 514359808.0, - "7": 514359808.0, - "8": 514359808.0, - "9": 514359808.0, - "10": 514359808.0, - "11": 514359808.0, - "12": 514359808.0, - "13": 514359808.0, - "14": 514359808.0, - "15": 514359808.0, - "16": 514359808.0, - "17": 514359808.0, - "18": 514359808.0, - "19": 514359808.0, - "20": 514359808.0, - "21": 514359808.0, - "22": 514359808.0, - "23": 514359808.0, - "24": 514359808.0, - "25": 514359808.0, - "26": 514359808.0, - "27": 514359808.0, - "28": 514359808.0, - "29": 514359808.0, - "30": 514359808.0, - "31": 514359808.0, - "32": 514359808.0, - "33": 514359808.0, - "34": 514359808.0, - "35": 514359808.0, - "36": 514359808.0, - "37": 514359808.0, - "38": 514359808.0, - "39": 514359808.0, - "40": 514359808.0, - "41": 514359808.0, - "42": 514359808.0, - "43": 514359808.0, - "44": 514359808.0, - "45": 514359808.0, - "46": 514359808.0, - "47": 514359808.0, - "48": 514359808.0, - "49": 514359808.0, - "50": 514359808.0, - "51": 514359808.0, - "52": 514359808.0, - "53": 514359808.0, - "54": 514359808.0, - "55": 514359808.0, - "56": 514359808.0, - "57": 514359808.0, - "58": 514359808.0, - "59": 514359808.0, - "60": 514359808.0, - "61": 514359808.0, - "62": 514359808.0, - "63": 514359808.0, - "64": 514359808.0, - "65": 514359808.0, - "66": 514359808.0, - "67": 514359808.0, - "68": 514359808.0, - "69": 514359808.0, - "70": 514359808.0, - "71": 514359808.0, - "72": 514359808.0, - "73": 514359808.0, - "74": 514359808.0, - "75": 514359808.0, - "76": 514359808.0, - "77": 514359808.0, - "78": 514359808.0, - "79": 514359808.0, - "80": 514359808.0, - "81": 514359808.0, - "82": 514359808.0, - "83": 514359808.0, - "84": 514359808.0, - "85": 514359808.0, - "86": 514359808.0, - "87": 514359808.0, - "88": 514359808.0, - "89": 514359808.0, - "90": 514359808.0, - "91": 514359808.0, - "92": 514359808.0, - "93": 514359808.0, - "94": 514359808.0, - "95": 514359808.0, - "96": 514359808.0, - "97": 514359808.0, - "98": 514359808.0, - "99": 514359808.0, - "100": 514359808.0 + "1": 514884096.0, + "2": 514884096.0, + "3": 514884096.0, + "4": 514884096.0, + "5": 514884096.0, + "6": 514884096.0, + "7": 514884096.0, + "8": 514884096.0, + "9": 514884096.0, + "10": 514884096.0, + "11": 514884096.0, + "12": 514884096.0, + "13": 514884096.0, + "14": 514884096.0, + "15": 514884096.0, + "16": 514884096.0, + "17": 514884096.0, + "18": 514884096.0, + "19": 514884096.0, + "20": 514884096.0, + "21": 514884096.0, + "22": 514884096.0, + "23": 514884096.0, + "24": 514884096.0, + "25": 514884096.0, + "26": 514884096.0, + "27": 514884096.0, + "28": 514884096.0, + "29": 514884096.0, + "30": 514884096.0, + "31": 514884096.0, + "32": 514884096.0, + "33": 514884096.0, + "34": 514884096.0, + "35": 514884096.0, + "36": 514884096.0, + "37": 514884096.0, + "38": 514884096.0, + "39": 514884096.0, + "40": 514884096.0, + "41": 514884096.0, + "42": 514884096.0, + "43": 514884096.0, + "44": 514884096.0, + "45": 514884096.0, + "46": 514884096.0, + "47": 514884096.0, + "48": 514884096.0, + "49": 514884096.0, + "50": 514884096.0, + "51": 514884096.0, + "52": 514884096.0, + "53": 514884096.0, + "54": 514884096.0, + "55": 514884096.0, + "56": 514884096.0, + "57": 514884096.0, + "58": 514884096.0, + "59": 514884096.0, + "60": 514884096.0, + "61": 514884096.0, + "62": 514884096.0, + "63": 514884096.0, + "64": 514884096.0, + "65": 514884096.0, + "66": 514884096.0, + "67": 514884096.0, + "68": 514884096.0, + "69": 514884096.0, + "70": 514884096.0, + "71": 514884096.0, + "72": 514884096.0, + "73": 514884096.0, + "74": 514884096.0, + "75": 514884096.0, + "76": 514884096.0, + "77": 514884096.0, + "78": 514884096.0, + "79": 514884096.0, + "80": 514884096.0, + "81": 514884096.0, + "82": 514884096.0, + "83": 514884096.0, + "84": 514884096.0, + "85": 514884096.0, + "86": 514884096.0, + "87": 514884096.0, + "88": 514884096.0, + "89": 514884096.0, + "90": 514884096.0, + "91": 514884096.0, + "92": 514884096.0, + "93": 514884096.0, + "94": 514884096.0, + "95": 514884096.0, + "96": 514884096.0, + "97": 514884096.0, + "98": 514884096.0, + "99": 514884096.0, + "100": 514884096.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1258060288.0, - "2": 1437084160.0, - "3": 1437084160.0, - "4": 1437084160.0, - "5": 1437084160.0, - "6": 1437084160.0, - "7": 1437084160.0, - "8": 1437084160.0, - "9": 1437084160.0, - "10": 1437084160.0, - "11": 1437084160.0, - "12": 1437084160.0, - "13": 1437084160.0, - "14": 1437084160.0, - "15": 1437084160.0, - "16": 1437084160.0, - "17": 1437084160.0, - "18": 1437084160.0, - "19": 1437084160.0, - "20": 1437084160.0, - "21": 1437084160.0, - "22": 1437084160.0, - "23": 1437084160.0, - "24": 1437084160.0, - "25": 1437084160.0, - "26": 1437084160.0, - "27": 1437084160.0, - "28": 1437084160.0, - "29": 1437084160.0, - "30": 1437084160.0, - "31": 1437084160.0, - "32": 1437084160.0, - "33": 1437084160.0, - "34": 1437084160.0, - "35": 1437084160.0, - "36": 1437084160.0, - "37": 1437084160.0, - "38": 1437084160.0, - "39": 1437084160.0, - "40": 1437084160.0, - "41": 1437084160.0, - "42": 1437084160.0, - "43": 1437084160.0, - "44": 1437084160.0, - "45": 1437084160.0, - "46": 1437084160.0, - "47": 1437084160.0, - "48": 1437084160.0, - "49": 1437084160.0, - "50": 1437084160.0, - "51": 1437084160.0, - "52": 1437084160.0, - "53": 1437084160.0, - "54": 1437084160.0, - "55": 1437084160.0, - "56": 1437084160.0, - "57": 1437084160.0, - "58": 1437084160.0, - "59": 1437084160.0, - "60": 1437084160.0, - "61": 1437084160.0, - "62": 1437084160.0, - "63": 1437084160.0, - "64": 1437084160.0, - "65": 1437084160.0, - "66": 1437084160.0, - "67": 1437084160.0, - "68": 1437084160.0, - "69": 1437084160.0, - "70": 1437084160.0, - "71": 1437084160.0, - "72": 1437084160.0, - "73": 1437084160.0, - "74": 1437084160.0, - "75": 1437084160.0, - "76": 1437084160.0, - "77": 1437084160.0, - "78": 1437084160.0, - "79": 1437084160.0, - "80": 1437084160.0, - "81": 1437084160.0, - "82": 1437084160.0, - "83": 1437084160.0, - "84": 1437084160.0, - "85": 1437084160.0, - "86": 1437084160.0, - "87": 1437084160.0, - "88": 1437084160.0, - "89": 1437084160.0, - "90": 1437084160.0, - "91": 1437084160.0, - "92": 1437084160.0, - "93": 1437084160.0, - "94": 1437084160.0, - "95": 1437084160.0, - "96": 1437084160.0, - "97": 1437084160.0, - "98": 1437084160.0, - "99": 1437084160.0, - "100": 1437084160.0 + "1": 1259108864.0, + "2": 1438394880.0, + "3": 1438394880.0, + "4": 1438394880.0, + "5": 1438394880.0, + "6": 1438394880.0, + "7": 1438394880.0, + "8": 1438394880.0, + "9": 1438394880.0, + "10": 1438394880.0, + "11": 1438394880.0, + "12": 1438394880.0, + "13": 1438394880.0, + "14": 1438394880.0, + "15": 1438394880.0, + "16": 1438394880.0, + "17": 1438394880.0, + "18": 1438394880.0, + "19": 1438394880.0, + "20": 1438394880.0, + "21": 1438394880.0, + "22": 1438394880.0, + "23": 1438394880.0, + "24": 1438394880.0, + "25": 1438394880.0, + "26": 1438394880.0, + "27": 1438394880.0, + "28": 1438394880.0, + "29": 1438394880.0, + "30": 1438394880.0, + "31": 1438394880.0, + "32": 1438394880.0, + "33": 1438394880.0, + "34": 1438394880.0, + "35": 1438394880.0, + "36": 1438394880.0, + "37": 1438394880.0, + "38": 1438394880.0, + "39": 1438394880.0, + "40": 1438394880.0, + "41": 1438394880.0, + "42": 1438394880.0, + "43": 1438394880.0, + "44": 1438394880.0, + "45": 1438394880.0, + "46": 1438394880.0, + "47": 1438394880.0, + "48": 1438394880.0, + "49": 1438394880.0, + "50": 1438394880.0, + "51": 1438394880.0, + "52": 1438394880.0, + "53": 1438394880.0, + "54": 1438394880.0, + "55": 1438394880.0, + "56": 1438394880.0, + "57": 1438394880.0, + "58": 1438394880.0, + "59": 1438394880.0, + "60": 1438394880.0, + "61": 1438394880.0, + "62": 1438394880.0, + "63": 1438394880.0, + "64": 1438394880.0, + "65": 1438394880.0, + "66": 1438394880.0, + "67": 1438394880.0, + "68": 1438394880.0, + "69": 1438394880.0, + "70": 1438394880.0, + "71": 1438394880.0, + "72": 1438394880.0, + "73": 1438394880.0, + "74": 1438394880.0, + "75": 1438394880.0, + "76": 1438394880.0, + "77": 1438394880.0, + "78": 1438394880.0, + "79": 1438394880.0, + "80": 1438394880.0, + "81": 1438394880.0, + "82": 1438394880.0, + "83": 1438394880.0, + "84": 1438394880.0, + "85": 1438394880.0, + "86": 1438394880.0, + "87": 1438394880.0, + "88": 1438394880.0, + "89": 1438394880.0, + "90": 1438394880.0, + "91": 1438394880.0, + "92": 1438394880.0, + "93": 1438394880.0, + "94": 1438394880.0, + "95": 1438394880.0, + "96": 1438394880.0, + "97": 1438394880.0, + "98": 1438394880.0, + "99": 1438394880.0, + "100": 1438394880.0 } }, "iteration-time": { @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 2.48366, - "3": 0.20961, - "4": 0.19355, - "5": 0.19146, - "6": 0.19108, - "7": 0.19236, - "8": 0.19259, - "9": 0.19267, - "10": 0.19436, - "11": 0.19257, - "12": 0.19432, - "13": 0.19332, - "14": 0.19442, - "15": 0.19393, - "16": 0.19417, - "17": 0.19555, - "18": 0.19451, - "19": 0.19452, - "20": 0.19555, - "21": 0.19375, - "22": 0.19402, - "23": 0.19539, - "24": 0.19475, - "25": 0.19576, - "26": 0.19424, - "27": 0.19514, - "28": 0.19519, - "29": 0.19578, - "30": 0.19503, - "31": 0.19394, - "32": 0.19582, - "33": 0.19444, - "34": 0.19405, - "35": 0.19498, - "36": 0.19463, - "37": 0.19572, - "38": 0.19362, - "39": 0.19492, - "40": 0.19487, - "41": 0.19497, - "42": 0.19617, - "43": 0.19571, - "44": 0.19661, - "45": 0.19634, - "46": 0.19537, - "47": 0.19646, - "48": 0.19658, - "49": 0.19727, - "50": 0.19567, - "51": 0.21203, - "52": 0.19551, - "53": 0.19415, - "54": 0.19434, - "55": 0.19584, - "56": 0.19437, - "57": 0.19536, - "58": 0.20364, - "59": 0.20029, - "60": 0.1929, - "61": 0.19274, - "62": 0.19364, - "63": 0.19667, - "64": 0.19406, - "65": 0.19781, - "66": 0.19435, - "67": 0.19308, - "68": 0.1932, - "69": 0.19478, - "70": 0.19591, - "71": 0.19922, - "72": 0.19646, - "73": 0.19646, - "74": 0.19739, - "75": 0.19817, - "76": 0.20056, - "77": 0.19655, - "78": 0.19459, - "79": 0.19478, - "80": 0.19638, - "81": 0.19329, - "82": 0.19254, - "83": 0.19379, - "84": 0.19435, - "85": 0.19517, - "86": 0.19446, - "87": 0.19464, - "88": 0.19501, - "89": 0.19544, - "90": 0.19268, - "91": 0.19425, - "92": 0.1933, - "93": 0.19366, - "94": 0.19328, - "95": 0.19408, - "96": 0.19474, - "97": 0.19719, - "98": 0.19535, - "99": 0.19604, - "100": 0.19554 + "2": 7.02396, + "3": 1.72471, + "4": 4.59112, + "5": 0.64605, + "6": 0.19769, + "7": 0.19652, + "8": 0.19968, + "9": 0.19671, + "10": 0.19901, + "11": 0.19969, + "12": 0.1998, + "13": 0.19841, + "14": 0.19825, + "15": 0.19777, + "16": 0.19822, + "17": 0.19797, + "18": 0.1996, + "19": 0.19784, + "20": 0.19764, + "21": 0.19694, + "22": 0.19945, + "23": 0.2371, + "24": 0.29995, + "25": 0.23957, + "26": 0.29727, + "27": 0.19838, + "28": 0.19795, + "29": 0.28338, + "30": 0.22699, + "31": 0.24258, + "32": 0.29474, + "33": 0.25539, + "34": 0.19968, + "35": 0.19808, + "36": 0.19822, + "37": 0.19897, + "38": 0.19895, + "39": 0.20247, + "40": 0.20143, + "41": 0.2013, + "42": 0.20244, + "43": 0.23533, + "44": 0.20223, + "45": 0.20033, + "46": 0.204, + "47": 0.20426, + "48": 0.23129, + "49": 0.22153, + "50": 0.2051, + "51": 0.22226, + "52": 0.199, + "53": 0.22428, + "54": 0.19788, + "55": 0.20032, + "56": 0.20098, + "57": 0.19989, + "58": 0.19967, + "59": 0.19858, + "60": 0.22128, + "61": 0.19988, + "62": 0.19702, + "63": 0.19878, + "64": 0.20043, + "65": 0.19873, + "66": 0.19929, + "67": 0.19902, + "68": 0.19908, + "69": 0.20073, + "70": 0.19874, + "71": 0.20014, + "72": 0.19866, + "73": 0.19911, + "74": 0.19875, + "75": 0.19788, + "76": 0.19803, + "77": 0.19842, + "78": 0.19894, + "79": 0.19804, + "80": 0.19881, + "81": 0.19809, + "82": 0.19856, + "83": 0.19924, + "84": 0.19721, + "85": 0.19922, + "86": 0.19726, + "87": 0.19809, + "88": 0.19884, + "89": 0.20014, + "90": 0.19844, + "91": 0.19909, + "92": 0.19796, + "93": 0.20176, + "94": 0.19878, + "95": 0.19855, + "96": 0.1997, + "97": 0.19858, + "98": 0.19901, + "99": 0.19906, + "100": 0.1993 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_gb200.json index 27eb21de0f0..09e346c2bca 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective/golden_values_dev_dgx_gb200.json @@ -6,104 +6,104 @@ "values": { "1": 10.86897, "2": 10.88544, - "3": 10.86473, - "4": 10.86826, - "5": 10.87436, + "3": 10.86477, + "4": 10.86825, + "5": 10.87437, "6": 10.89005, - "7": 10.87769, + "7": 10.87768, "8": 10.86364, - "9": 10.88282, - "10": 10.84687, - "11": 10.87102, - "12": 10.87345, + "9": 10.8828, + "10": 10.84683, + "11": 10.87103, + "12": 10.87356, "13": 10.8814, - "14": 10.8877, - "15": 10.83869, + "14": 10.88767, + "15": 10.83862, "16": 10.8239, - "17": 10.80197, - "18": 10.81094, - "19": 10.82192, - "20": 10.71791, - "21": 10.68914, - "22": 10.57271, - "23": 10.7081, + "17": 10.80195, + "18": 10.81091, + "19": 10.82189, + "20": 10.71788, + "21": 10.6892, + "22": 10.57272, + "23": 10.70811, "24": 10.59543, - "25": 10.55292, - "26": 10.61257, - "27": 10.60051, - "28": 10.56173, - "29": 10.58089, - "30": 10.35595, - "31": 10.1182, - "32": 10.44815, - "33": 10.4542, - "34": 10.21553, - "35": 10.26124, - "36": 10.20776, - "37": 10.33673, + "25": 10.55295, + "26": 10.61259, + "27": 10.60052, + "28": 10.56168, + "29": 10.58085, + "30": 10.35597, + "31": 10.11819, + "32": 10.44814, + "33": 10.45418, + "34": 10.21548, + "35": 10.26125, + "36": 10.20779, + "37": 10.33671, "38": 10.17741, - "39": 10.39297, - "40": 10.06349, - "41": 10.13887, - "42": 10.2056, - "43": 9.82809, - "44": 9.94547, - "45": 9.82561, - "46": 9.80186, - "47": 10.14049, - "48": 9.84276, - "49": 9.52016, - "50": 9.88454, - "51": 9.84743, - "52": 9.74209, - "53": 10.05697, - "54": 9.9505, - "55": 9.88145, - "56": 9.61274, + "39": 10.39294, + "40": 10.06346, + "41": 10.13891, + "42": 10.20558, + "43": 9.82807, + "44": 9.94542, + "45": 9.82557, + "46": 9.80182, + "47": 10.14053, + "48": 9.8428, + "49": 9.52012, + "50": 9.88456, + "51": 9.84742, + "52": 9.74207, + "53": 10.05695, + "54": 9.95053, + "55": 9.8814, + "56": 9.61275, "57": 9.4687, - "58": 9.82193, + "58": 9.82189, "59": 9.57642, - "60": 9.49762, - "61": 9.69189, + "60": 9.4976, + "61": 9.69188, "62": 9.9867, - "63": 9.37512, - "64": 9.76679, - "65": 8.94648, - "66": 9.7023, - "67": 9.36326, - "68": 9.7831, - "69": 9.7986, - "70": 9.7317, - "71": 9.62571, - "72": 9.58488, - "73": 9.48967, - "74": 8.9286, - "75": 9.40862, - "76": 9.07925, - "77": 10.0594, - "78": 9.72288, - "79": 9.37784, - "80": 9.40429, - "81": 9.48309, - "82": 9.7004, - "83": 9.31595, - "84": 9.41838, - "85": 9.61685, - "86": 9.07533, - "87": 9.59616, - "88": 9.75215, - "89": 9.60184, - "90": 9.82281, - "91": 9.34037, + "63": 9.37509, + "64": 9.76682, + "65": 8.94649, + "66": 9.70228, + "67": 9.36325, + "68": 9.78311, + "69": 9.79862, + "70": 9.73172, + "71": 9.62574, + "72": 9.58486, + "73": 9.48964, + "74": 8.92859, + "75": 9.40865, + "76": 9.07927, + "77": 10.05938, + "78": 9.72283, + "79": 9.37782, + "80": 9.40428, + "81": 9.48312, + "82": 9.70038, + "83": 9.3159, + "84": 9.41834, + "85": 9.61684, + "86": 9.07537, + "87": 9.59617, + "88": 9.75218, + "89": 9.60187, + "90": 9.82286, + "91": 9.34039, "92": 9.35854, - "93": 9.08805, - "94": 8.83037, + "93": 9.08806, + "94": 8.83038, "95": 9.5266, - "96": 9.53049, - "97": 9.30389, - "98": 9.67196, - "99": 8.89637, - "100": 9.40644 + "96": 9.53051, + "97": 9.30391, + "98": 9.67199, + "99": 8.8964, + "100": 9.40645 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1621.0, - "2": 1657.0, - "3": 1580.0, - "4": 1839.0, - "5": 1862.0, - "6": 1724.0, - "7": 1714.0, - "8": 1670.0, - "9": 1762.0, - "10": 1358.0, - "11": 1734.0, - "12": 1682.0, - "13": 1761.0, - "14": 1731.0, - "15": 1788.0, - "16": 1801.0, - "17": 1866.0, - "18": 1636.0, - "19": 1709.0, - "20": 1607.0, - "21": 1821.0, - "22": 1666.0, - "23": 1991.0, - "24": 1585.0, - "25": 1587.0, - "26": 1631.0, - "27": 1714.0, - "28": 1966.0, - "29": 1997.0, - "30": 1851.0, - "31": 1581.0, - "32": 1864.0, - "33": 2107.0, - "34": 1846.0, - "35": 1982.0, - "36": 1904.0, - "37": 2373.0, - "38": 2172.0, - "39": 2343.0, - "40": 2149.0, - "41": 2331.0, - "42": 2199.0, - "43": 1914.0, - "44": 2065.0, - "45": 2081.0, - "46": 2352.0, - "47": 2497.0, - "48": 2303.0, - "49": 2346.0, - "50": 2411.0, - "51": 2491.0, - "52": 2552.0, - "53": 2980.0, - "54": 2680.0, - "55": 2274.0, - "56": 2734.0, - "57": 2319.0, - "58": 2907.0, - "59": 2886.0, - "60": 2566.0, - "61": 2855.0, - "62": 2704.0, - "63": 2370.0, - "64": 2998.0, - "65": 2563.0, - "66": 2868.0, - "67": 2762.0, - "68": 2739.0, - "69": 2730.0, - "70": 3156.0, - "71": 2803.0, - "72": 2506.0, - "73": 2896.0, - "74": 1937.0, - "75": 2450.0, - "76": 2794.0, - "77": 3047.0, - "78": 3104.0, - "79": 3069.0, - "80": 3286.0, - "81": 3543.0, - "82": 3192.0, - "83": 2614.0, - "84": 3273.0, - "85": 3111.0, - "86": 2680.0, - "87": 3654.0, - "88": 3117.0, - "89": 3351.0, - "90": 3086.0, - "91": 2721.0, - "92": 3045.0, - "93": 2672.0, - "94": 3326.0, - "95": 3125.0, - "96": 3309.0, - "97": 3208.0, - "98": 3572.0, - "99": 2980.0, - "100": 3355.0 + "1": 1592.0, + "2": 1638.0, + "3": 1658.0, + "4": 1737.0, + "5": 1836.0, + "6": 1740.0, + "7": 1780.0, + "8": 1628.0, + "9": 1785.0, + "10": 1372.0, + "11": 1738.0, + "12": 1745.0, + "13": 1808.0, + "14": 1709.0, + "15": 1798.0, + "16": 1765.0, + "17": 1875.0, + "18": 1580.0, + "19": 1744.0, + "20": 1573.0, + "21": 1835.0, + "22": 1649.0, + "23": 1993.0, + "24": 1600.0, + "25": 1527.0, + "26": 1592.0, + "27": 1818.0, + "28": 1889.0, + "29": 1909.0, + "30": 1889.0, + "31": 1635.0, + "32": 1847.0, + "33": 2066.0, + "34": 1761.0, + "35": 1861.0, + "36": 1916.0, + "37": 2298.0, + "38": 2162.0, + "39": 2226.0, + "40": 2127.0, + "41": 2226.0, + "42": 2204.0, + "43": 1844.0, + "44": 2106.0, + "45": 2056.0, + "46": 2238.0, + "47": 2476.0, + "48": 2289.0, + "49": 2209.0, + "50": 2363.0, + "51": 2520.0, + "52": 2558.0, + "53": 2942.0, + "54": 2681.0, + "55": 2327.0, + "56": 2615.0, + "57": 2270.0, + "58": 2908.0, + "59": 2839.0, + "60": 2480.0, + "61": 2826.0, + "62": 2734.0, + "63": 2456.0, + "64": 2959.0, + "65": 2545.0, + "66": 2920.0, + "67": 2728.0, + "68": 2711.0, + "69": 2809.0, + "70": 3161.0, + "71": 2910.0, + "72": 2486.0, + "73": 2907.0, + "74": 1911.0, + "75": 2411.0, + "76": 2887.0, + "77": 3233.0, + "78": 3181.0, + "79": 3168.0, + "80": 3234.0, + "81": 3589.0, + "82": 3171.0, + "83": 2616.0, + "84": 3228.0, + "85": 3154.0, + "86": 2732.0, + "87": 3792.0, + "88": 3096.0, + "89": 3401.0, + "90": 3040.0, + "91": 2717.0, + "92": 3139.0, + "93": 2742.0, + "94": 3101.0, + "95": 3195.0, + "96": 3298.0, + "97": 3343.0, + "98": 3542.0, + "99": 3040.0, + "100": 3293.0 } }, "mem-allocated-bytes": { @@ -218,106 +218,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 514359808.0, - "2": 514359808.0, - "3": 514359808.0, - "4": 514359808.0, - "5": 514359808.0, - "6": 514359808.0, - "7": 514359808.0, - "8": 514359808.0, - "9": 514359808.0, - "10": 514359808.0, - "11": 514359808.0, - "12": 514359808.0, - "13": 514359808.0, - "14": 514359808.0, - "15": 514359808.0, - "16": 514359808.0, - "17": 514359808.0, - "18": 514359808.0, - "19": 514359808.0, - "20": 514359808.0, - "21": 514359808.0, - "22": 514359808.0, - "23": 514359808.0, - "24": 514359808.0, - "25": 514359808.0, - "26": 514359808.0, - "27": 514359808.0, - "28": 514359808.0, - "29": 514359808.0, - "30": 514359808.0, - "31": 514359808.0, - "32": 514359808.0, - "33": 514359808.0, - "34": 514359808.0, - "35": 514359808.0, - "36": 514359808.0, - "37": 514359808.0, - "38": 514359808.0, - "39": 514359808.0, - "40": 514359808.0, - "41": 514359808.0, - "42": 514359808.0, - "43": 514359808.0, - "44": 514359808.0, - "45": 514359808.0, - "46": 514359808.0, - "47": 514359808.0, - "48": 514359808.0, - "49": 514359808.0, - "50": 514359808.0, - "51": 514359808.0, - "52": 514359808.0, - "53": 514359808.0, - "54": 514359808.0, - "55": 514359808.0, - "56": 514359808.0, - "57": 514359808.0, - "58": 514359808.0, - "59": 514359808.0, - "60": 514359808.0, - "61": 514359808.0, - "62": 514359808.0, - "63": 514359808.0, - "64": 514359808.0, - "65": 514359808.0, - "66": 514359808.0, - "67": 514359808.0, - "68": 514359808.0, - "69": 514359808.0, - "70": 514359808.0, - "71": 514359808.0, - "72": 514359808.0, - "73": 514359808.0, - "74": 514359808.0, - "75": 514359808.0, - "76": 514359808.0, - "77": 514359808.0, - "78": 514359808.0, - "79": 514359808.0, - "80": 514359808.0, - "81": 514359808.0, - "82": 514359808.0, - "83": 514359808.0, - "84": 514359808.0, - "85": 514359808.0, - "86": 514359808.0, - "87": 514359808.0, - "88": 514359808.0, - "89": 514359808.0, - "90": 514359808.0, - "91": 514359808.0, - "92": 514359808.0, - "93": 514359808.0, - "94": 514359808.0, - "95": 514359808.0, - "96": 514359808.0, - "97": 514359808.0, - "98": 514359808.0, - "99": 514359808.0, - "100": 514359808.0 + "1": 514884096.0, + "2": 514884096.0, + "3": 514884096.0, + "4": 514884096.0, + "5": 514884096.0, + "6": 514884096.0, + "7": 514884096.0, + "8": 514884096.0, + "9": 514884096.0, + "10": 514884096.0, + "11": 514884096.0, + "12": 514884096.0, + "13": 514884096.0, + "14": 514884096.0, + "15": 514884096.0, + "16": 514884096.0, + "17": 514884096.0, + "18": 514884096.0, + "19": 514884096.0, + "20": 514884096.0, + "21": 514884096.0, + "22": 514884096.0, + "23": 514884096.0, + "24": 514884096.0, + "25": 514884096.0, + "26": 514884096.0, + "27": 514884096.0, + "28": 514884096.0, + "29": 514884096.0, + "30": 514884096.0, + "31": 514884096.0, + "32": 514884096.0, + "33": 514884096.0, + "34": 514884096.0, + "35": 514884096.0, + "36": 514884096.0, + "37": 514884096.0, + "38": 514884096.0, + "39": 514884096.0, + "40": 514884096.0, + "41": 514884096.0, + "42": 514884096.0, + "43": 514884096.0, + "44": 514884096.0, + "45": 514884096.0, + "46": 514884096.0, + "47": 514884096.0, + "48": 514884096.0, + "49": 514884096.0, + "50": 514884096.0, + "51": 514884096.0, + "52": 514884096.0, + "53": 514884096.0, + "54": 514884096.0, + "55": 514884096.0, + "56": 514884096.0, + "57": 514884096.0, + "58": 514884096.0, + "59": 514884096.0, + "60": 514884096.0, + "61": 514884096.0, + "62": 514884096.0, + "63": 514884096.0, + "64": 514884096.0, + "65": 514884096.0, + "66": 514884096.0, + "67": 514884096.0, + "68": 514884096.0, + "69": 514884096.0, + "70": 514884096.0, + "71": 514884096.0, + "72": 514884096.0, + "73": 514884096.0, + "74": 514884096.0, + "75": 514884096.0, + "76": 514884096.0, + "77": 514884096.0, + "78": 514884096.0, + "79": 514884096.0, + "80": 514884096.0, + "81": 514884096.0, + "82": 514884096.0, + "83": 514884096.0, + "84": 514884096.0, + "85": 514884096.0, + "86": 514884096.0, + "87": 514884096.0, + "88": 514884096.0, + "89": 514884096.0, + "90": 514884096.0, + "91": 514884096.0, + "92": 514884096.0, + "93": 514884096.0, + "94": 514884096.0, + "95": 514884096.0, + "96": 514884096.0, + "97": 514884096.0, + "98": 514884096.0, + "99": 514884096.0, + "100": 514884096.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1258060288.0, - "2": 1437084160.0, - "3": 1437084160.0, - "4": 1437084160.0, - "5": 1437084160.0, - "6": 1437084160.0, - "7": 1437084160.0, - "8": 1437084160.0, - "9": 1437084160.0, - "10": 1437084160.0, - "11": 1437084160.0, - "12": 1437084160.0, - "13": 1437084160.0, - "14": 1437084160.0, - "15": 1437084160.0, - "16": 1437084160.0, - "17": 1437084160.0, - "18": 1437084160.0, - "19": 1437084160.0, - "20": 1437084160.0, - "21": 1437084160.0, - "22": 1437084160.0, - "23": 1437084160.0, - "24": 1437084160.0, - "25": 1437084160.0, - "26": 1437084160.0, - "27": 1437084160.0, - "28": 1437084160.0, - "29": 1437084160.0, - "30": 1437084160.0, - "31": 1437084160.0, - "32": 1437084160.0, - "33": 1437084160.0, - "34": 1437084160.0, - "35": 1437084160.0, - "36": 1437084160.0, - "37": 1437084160.0, - "38": 1437084160.0, - "39": 1437084160.0, - "40": 1437084160.0, - "41": 1437084160.0, - "42": 1437084160.0, - "43": 1437084160.0, - "44": 1437084160.0, - "45": 1437084160.0, - "46": 1437084160.0, - "47": 1437084160.0, - "48": 1437084160.0, - "49": 1437084160.0, - "50": 1437084160.0, - "51": 1437084160.0, - "52": 1437084160.0, - "53": 1437084160.0, - "54": 1437084160.0, - "55": 1437084160.0, - "56": 1437084160.0, - "57": 1437084160.0, - "58": 1437084160.0, - "59": 1437084160.0, - "60": 1437084160.0, - "61": 1437084160.0, - "62": 1437084160.0, - "63": 1437084160.0, - "64": 1437084160.0, - "65": 1437084160.0, - "66": 1437084160.0, - "67": 1437084160.0, - "68": 1437084160.0, - "69": 1437084160.0, - "70": 1437084160.0, - "71": 1437084160.0, - "72": 1437084160.0, - "73": 1437084160.0, - "74": 1437084160.0, - "75": 1437084160.0, - "76": 1437084160.0, - "77": 1437084160.0, - "78": 1437084160.0, - "79": 1437084160.0, - "80": 1437084160.0, - "81": 1437084160.0, - "82": 1437084160.0, - "83": 1437084160.0, - "84": 1437084160.0, - "85": 1437084160.0, - "86": 1437084160.0, - "87": 1437084160.0, - "88": 1437084160.0, - "89": 1437084160.0, - "90": 1437084160.0, - "91": 1437084160.0, - "92": 1437084160.0, - "93": 1437084160.0, - "94": 1437084160.0, - "95": 1437084160.0, - "96": 1437084160.0, - "97": 1437084160.0, - "98": 1437084160.0, - "99": 1437084160.0, - "100": 1437084160.0 + "1": 1259108864.0, + "2": 1438394880.0, + "3": 1438394880.0, + "4": 1438394880.0, + "5": 1438394880.0, + "6": 1438394880.0, + "7": 1438394880.0, + "8": 1438394880.0, + "9": 1438394880.0, + "10": 1438394880.0, + "11": 1438394880.0, + "12": 1438394880.0, + "13": 1438394880.0, + "14": 1438394880.0, + "15": 1438394880.0, + "16": 1438394880.0, + "17": 1438394880.0, + "18": 1438394880.0, + "19": 1438394880.0, + "20": 1438394880.0, + "21": 1438394880.0, + "22": 1438394880.0, + "23": 1438394880.0, + "24": 1438394880.0, + "25": 1438394880.0, + "26": 1438394880.0, + "27": 1438394880.0, + "28": 1438394880.0, + "29": 1438394880.0, + "30": 1438394880.0, + "31": 1438394880.0, + "32": 1438394880.0, + "33": 1438394880.0, + "34": 1438394880.0, + "35": 1438394880.0, + "36": 1438394880.0, + "37": 1438394880.0, + "38": 1438394880.0, + "39": 1438394880.0, + "40": 1438394880.0, + "41": 1438394880.0, + "42": 1438394880.0, + "43": 1438394880.0, + "44": 1438394880.0, + "45": 1438394880.0, + "46": 1438394880.0, + "47": 1438394880.0, + "48": 1438394880.0, + "49": 1438394880.0, + "50": 1438394880.0, + "51": 1438394880.0, + "52": 1438394880.0, + "53": 1438394880.0, + "54": 1438394880.0, + "55": 1438394880.0, + "56": 1438394880.0, + "57": 1438394880.0, + "58": 1438394880.0, + "59": 1438394880.0, + "60": 1438394880.0, + "61": 1438394880.0, + "62": 1438394880.0, + "63": 1438394880.0, + "64": 1438394880.0, + "65": 1438394880.0, + "66": 1438394880.0, + "67": 1438394880.0, + "68": 1438394880.0, + "69": 1438394880.0, + "70": 1438394880.0, + "71": 1438394880.0, + "72": 1438394880.0, + "73": 1438394880.0, + "74": 1438394880.0, + "75": 1438394880.0, + "76": 1438394880.0, + "77": 1438394880.0, + "78": 1438394880.0, + "79": 1438394880.0, + "80": 1438394880.0, + "81": 1438394880.0, + "82": 1438394880.0, + "83": 1438394880.0, + "84": 1438394880.0, + "85": 1438394880.0, + "86": 1438394880.0, + "87": 1438394880.0, + "88": 1438394880.0, + "89": 1438394880.0, + "90": 1438394880.0, + "91": 1438394880.0, + "92": 1438394880.0, + "93": 1438394880.0, + "94": 1438394880.0, + "95": 1438394880.0, + "96": 1438394880.0, + "97": 1438394880.0, + "98": 1438394880.0, + "99": 1438394880.0, + "100": 1438394880.0 } }, "iteration-time": { @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 2.6815, - "3": 0.23582, - "4": 0.21969, - "5": 0.22399, - "6": 0.21848, - "7": 0.21944, - "8": 0.21989, - "9": 0.22542, - "10": 0.22685, - "11": 0.22859, - "12": 0.22734, - "13": 0.22735, - "14": 0.22682, - "15": 0.22731, - "16": 0.22724, - "17": 0.22774, - "18": 0.2253, - "19": 0.21338, - "20": 0.21612, - "21": 0.22487, - "22": 0.2609, - "23": 0.34495, - "24": 0.40538, - "25": 0.27265, - "26": 0.22852, - "27": 0.23498, - "28": 0.23458, - "29": 0.2356, - "30": 0.23223, - "31": 0.23427, - "32": 0.23193, - "33": 0.23007, - "34": 0.22762, - "35": 0.22604, - "36": 0.22153, - "37": 0.21923, - "38": 0.21718, - "39": 0.2162, - "40": 0.21653, - "41": 0.21673, - "42": 0.21416, - "43": 0.21439, - "44": 0.2141, - "45": 0.21364, - "46": 0.21263, - "47": 0.2139, - "48": 0.21445, - "49": 0.21424, - "50": 0.21381, - "51": 0.21544, - "52": 0.21075, - "53": 0.21292, - "54": 0.21407, - "55": 0.2167, - "56": 0.21877, - "57": 0.21861, - "58": 0.22087, - "59": 0.21999, - "60": 0.21884, - "61": 0.21841, - "62": 0.21988, - "63": 0.21876, - "64": 0.21811, - "65": 0.21795, - "66": 0.2197, - "67": 0.22005, - "68": 0.21994, - "69": 0.21937, - "70": 0.21964, - "71": 0.22007, - "72": 0.221, - "73": 0.22145, - "74": 0.22069, - "75": 0.22126, - "76": 0.21984, - "77": 0.22096, - "78": 0.2231, - "79": 0.22168, - "80": 0.21932, - "81": 0.21748, - "82": 0.21971, - "83": 0.22113, - "84": 0.22096, - "85": 0.22316, - "86": 0.22043, - "87": 0.22198, - "88": 0.2247, - "89": 0.2219, - "90": 0.22258, - "91": 0.22224, - "92": 0.22132, - "93": 0.22182, - "94": 0.22397, - "95": 0.22547, - "96": 0.22177, - "97": 0.22282, - "98": 0.22255, - "99": 0.22417, - "100": 0.22334 + "2": 5.78191, + "3": 0.21543, + "4": 0.20431, + "5": 0.2048, + "6": 0.20252, + "7": 0.20042, + "8": 0.19829, + "9": 0.1981, + "10": 0.19894, + "11": 0.19941, + "12": 0.19791, + "13": 0.19665, + "14": 0.19775, + "15": 0.19771, + "16": 0.19844, + "17": 0.1973, + "18": 0.19675, + "19": 0.19775, + "20": 0.19752, + "21": 0.19736, + "22": 0.19856, + "23": 0.19791, + "24": 0.19728, + "25": 0.19919, + "26": 0.20211, + "27": 0.20996, + "28": 0.20609, + "29": 0.20893, + "30": 0.29002, + "31": 0.30297, + "32": 0.2644, + "33": 0.24762, + "34": 0.2283, + "35": 0.19894, + "36": 0.19904, + "37": 0.19825, + "38": 0.19956, + "39": 0.19723, + "40": 0.23122, + "41": 0.19962, + "42": 0.19911, + "43": 0.20105, + "44": 0.1992, + "45": 0.19775, + "46": 0.19912, + "47": 0.19899, + "48": 0.2034, + "49": 0.20022, + "50": 0.19798, + "51": 0.22615, + "52": 0.19944, + "53": 0.19734, + "54": 0.19576, + "55": 0.19493, + "56": 0.19695, + "57": 0.19635, + "58": 0.19686, + "59": 0.19616, + "60": 0.19825, + "61": 0.19728, + "62": 0.19652, + "63": 0.1972, + "64": 0.19799, + "65": 0.19902, + "66": 0.19916, + "67": 0.19906, + "68": 0.20142, + "69": 0.20147, + "70": 0.20041, + "71": 0.20146, + "72": 0.19828, + "73": 0.19865, + "74": 0.19876, + "75": 0.20114, + "76": 0.19884, + "77": 0.19884, + "78": 0.19712, + "79": 0.19797, + "80": 0.19724, + "81": 0.19755, + "82": 0.19734, + "83": 0.19936, + "84": 0.19723, + "85": 0.19891, + "86": 0.198, + "87": 0.19759, + "88": 0.19644, + "89": 0.1987, + "90": 0.19753, + "91": 0.20302, + "92": 0.19865, + "93": 0.19958, + "94": 0.19868, + "95": 0.1982, + "96": 0.19927, + "97": 0.19927, + "98": 0.19757, + "99": 0.19871, + "100": 0.201 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_gb200.json index 7dd5b31f34f..2471c5117cc 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor/golden_values_dev_dgx_gb200.json @@ -4,106 +4,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 10.87163, + "1": 10.87162, "2": 10.87238, - "3": 10.86215, - "4": 10.84334, + "3": 10.86217, + "4": 10.84331, "5": 10.8781, - "6": 10.8937, - "7": 10.87187, - "8": 10.87789, - "9": 10.86815, - "10": 10.83758, - "11": 10.87595, - "12": 10.87605, - "13": 10.89163, + "6": 10.89369, + "7": 10.87188, + "8": 10.87792, + "9": 10.86811, + "10": 10.83761, + "11": 10.87596, + "12": 10.87602, + "13": 10.89168, "14": 10.89707, - "15": 10.83373, - "16": 10.82462, - "17": 10.80227, - "18": 10.82965, - "19": 10.82299, - "20": 10.73839, - "21": 10.70969, - "22": 10.5649, - "23": 10.73038, + "15": 10.83377, + "16": 10.82464, + "17": 10.80226, + "18": 10.82961, + "19": 10.82301, + "20": 10.73842, + "21": 10.7097, + "22": 10.56485, + "23": 10.73039, "24": 10.6062, - "25": 10.55515, - "26": 10.62333, - "27": 10.61393, + "25": 10.55511, + "26": 10.62334, + "27": 10.61392, "28": 10.57726, - "29": 10.60204, - "30": 10.38732, - "31": 10.12791, - "32": 10.4758, - "33": 10.47238, - "34": 10.22665, - "35": 10.28584, - "36": 10.23138, - "37": 10.35035, - "38": 10.19674, + "29": 10.60206, + "30": 10.38733, + "31": 10.12792, + "32": 10.47586, + "33": 10.47237, + "34": 10.22667, + "35": 10.2859, + "36": 10.23137, + "37": 10.35036, + "38": 10.19669, "39": 10.40798, - "40": 10.09496, - "41": 10.13593, - "42": 10.21728, - "43": 9.84575, - "44": 9.94965, - "45": 9.83809, - "46": 9.821, + "40": 10.095, + "41": 10.1359, + "42": 10.21731, + "43": 9.84574, + "44": 9.94964, + "45": 9.83808, + "46": 9.82101, "47": 10.13316, - "48": 9.85047, - "49": 9.53, - "50": 9.90689, - "51": 9.85498, - "52": 9.74731, - "53": 10.06267, - "54": 9.95301, - "55": 9.88728, + "48": 9.85044, + "49": 9.53002, + "50": 9.90686, + "51": 9.85497, + "52": 9.7473, + "53": 10.06268, + "54": 9.95304, + "55": 9.88726, "56": 9.6211, "57": 9.47571, - "58": 9.83152, - "59": 9.58168, - "60": 9.49439, - "61": 9.68902, - "62": 9.9857, - "63": 9.37411, + "58": 9.83147, + "59": 9.5817, + "60": 9.49436, + "61": 9.68906, + "62": 9.98568, + "63": 9.3741, "64": 9.7651, - "65": 8.94171, - "66": 9.69872, - "67": 9.36899, + "65": 8.94174, + "66": 9.69878, + "67": 9.369, "68": 9.78075, "69": 9.79729, - "70": 9.72884, - "71": 9.62546, - "72": 9.58193, - "73": 9.48195, - "74": 8.92206, - "75": 9.4096, - "76": 9.07711, + "70": 9.72882, + "71": 9.62545, + "72": 9.58195, + "73": 9.48193, + "74": 8.92205, + "75": 9.40961, + "76": 9.07704, "77": 10.05905, - "78": 9.7196, - "79": 9.37915, - "80": 9.39953, - "81": 9.4826, + "78": 9.71961, + "79": 9.37914, + "80": 9.39952, + "81": 9.48265, "82": 9.70045, "83": 9.31347, - "84": 9.41605, - "85": 9.61616, - "86": 9.07519, - "87": 9.59811, - "88": 9.75175, - "89": 9.60152, - "90": 9.82639, - "91": 9.33477, - "92": 9.3587, - "93": 9.08591, - "94": 8.82888, + "84": 9.41608, + "85": 9.61613, + "86": 9.07521, + "87": 9.59813, + "88": 9.75181, + "89": 9.60155, + "90": 9.82643, + "91": 9.33475, + "92": 9.35867, + "93": 9.08589, + "94": 8.82892, "95": 9.52816, - "96": 9.52866, - "97": 9.30468, - "98": 9.67128, - "99": 8.89752, - "100": 9.40653 + "96": 9.52865, + "97": 9.30465, + "98": 9.67127, + "99": 8.89754, + "100": 9.40655 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1731.0, - "2": 1804.0, - "3": 1704.0, - "4": 1768.0, - "5": 2006.0, - "6": 1918.0, - "7": 1815.0, - "8": 1654.0, - "9": 1919.0, - "10": 1481.0, - "11": 1876.0, - "12": 1795.0, - "13": 1915.0, - "14": 1830.0, - "15": 2029.0, - "16": 1948.0, - "17": 1838.0, - "18": 1747.0, - "19": 1789.0, - "20": 1771.0, - "21": 1876.0, - "22": 1854.0, - "23": 2069.0, - "24": 1684.0, - "25": 1732.0, - "26": 1803.0, - "27": 1919.0, - "28": 2095.0, - "29": 2041.0, - "30": 1919.0, - "31": 1704.0, - "32": 1869.0, - "33": 2184.0, - "34": 1846.0, - "35": 1923.0, - "36": 2071.0, - "37": 2407.0, - "38": 2209.0, - "39": 2462.0, - "40": 2275.0, - "41": 2369.0, - "42": 2305.0, - "43": 2048.0, - "44": 2171.0, - "45": 2119.0, - "46": 2287.0, - "47": 2499.0, - "48": 2361.0, - "49": 2398.0, - "50": 2321.0, - "51": 2604.0, - "52": 2579.0, - "53": 3020.0, - "54": 2705.0, - "55": 2369.0, - "56": 2752.0, - "57": 2351.0, - "58": 2902.0, - "59": 2786.0, - "60": 2511.0, - "61": 2861.0, - "62": 2715.0, - "63": 2476.0, - "64": 2944.0, - "65": 2791.0, - "66": 3095.0, - "67": 2945.0, - "68": 2853.0, - "69": 2919.0, - "70": 3113.0, - "71": 2898.0, - "72": 2554.0, - "73": 3029.0, - "74": 2044.0, - "75": 2601.0, - "76": 2957.0, - "77": 3204.0, - "78": 3197.0, - "79": 3123.0, - "80": 3255.0, - "81": 3582.0, - "82": 3338.0, - "83": 2799.0, - "84": 3225.0, - "85": 3372.0, - "86": 2818.0, - "87": 3881.0, - "88": 3040.0, - "89": 3335.0, - "90": 3256.0, - "91": 2903.0, - "92": 3202.0, - "93": 2806.0, - "94": 3422.0, - "95": 3348.0, - "96": 3594.0, - "97": 3290.0, - "98": 3746.0, - "99": 3085.0, - "100": 3366.0 + "1": 1635.0, + "2": 1768.0, + "3": 1703.0, + "4": 1810.0, + "5": 2003.0, + "6": 1892.0, + "7": 1931.0, + "8": 1674.0, + "9": 1847.0, + "10": 1444.0, + "11": 1908.0, + "12": 1837.0, + "13": 1949.0, + "14": 1763.0, + "15": 2001.0, + "16": 1932.0, + "17": 1778.0, + "18": 1660.0, + "19": 1682.0, + "20": 1759.0, + "21": 1946.0, + "22": 1897.0, + "23": 2008.0, + "24": 1724.0, + "25": 1718.0, + "26": 1829.0, + "27": 1924.0, + "28": 2025.0, + "29": 2092.0, + "30": 2054.0, + "31": 1734.0, + "32": 2014.0, + "33": 2138.0, + "34": 1910.0, + "35": 2052.0, + "36": 2015.0, + "37": 2410.0, + "38": 2166.0, + "39": 2371.0, + "40": 2250.0, + "41": 2474.0, + "42": 2322.0, + "43": 2052.0, + "44": 2160.0, + "45": 2143.0, + "46": 2297.0, + "47": 2569.0, + "48": 2370.0, + "49": 2448.0, + "50": 2361.0, + "51": 2511.0, + "52": 2531.0, + "53": 3056.0, + "54": 2732.0, + "55": 2357.0, + "56": 2840.0, + "57": 2362.0, + "58": 2824.0, + "59": 2835.0, + "60": 2513.0, + "61": 3049.0, + "62": 2701.0, + "63": 2568.0, + "64": 3048.0, + "65": 2684.0, + "66": 3050.0, + "67": 2851.0, + "68": 2854.0, + "69": 2948.0, + "70": 3211.0, + "71": 2926.0, + "72": 2524.0, + "73": 3033.0, + "74": 2043.0, + "75": 2593.0, + "76": 2905.0, + "77": 3297.0, + "78": 3228.0, + "79": 3232.0, + "80": 3299.0, + "81": 3568.0, + "82": 3366.0, + "83": 2687.0, + "84": 3268.0, + "85": 3336.0, + "86": 2690.0, + "87": 3826.0, + "88": 3033.0, + "89": 3481.0, + "90": 3125.0, + "91": 2909.0, + "92": 3151.0, + "93": 2860.0, + "94": 3427.0, + "95": 3386.0, + "96": 3575.0, + "97": 3297.0, + "98": 3706.0, + "99": 3178.0, + "100": 3420.0 } }, "mem-allocated-bytes": { @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 1.71105, - "3": 0.22879, - "4": 0.22169, - "5": 0.21979, - "6": 0.21933, - "7": 0.21836, - "8": 0.22054, - "9": 0.22096, - "10": 0.22079, - "11": 0.22255, - "12": 0.21905, - "13": 0.22266, - "14": 0.22261, - "15": 0.22192, - "16": 0.21928, - "17": 0.22014, - "18": 0.2213, - "19": 0.22242, - "20": 0.22097, - "21": 0.21936, - "22": 0.22091, - "23": 0.22071, - "24": 0.22306, - "25": 0.22073, - "26": 0.22028, - "27": 0.22198, - "28": 0.22294, - "29": 0.22204, - "30": 0.21896, - "31": 0.22144, - "32": 0.22279, - "33": 0.22428, - "34": 0.22247, - "35": 0.22192, - "36": 0.22242, - "37": 0.22321, - "38": 0.22186, - "39": 0.22242, - "40": 0.22098, - "41": 0.22254, - "42": 0.55234, - "43": 0.22432, - "44": 0.22103, - "45": 0.22202, - "46": 0.2216, - "47": 0.22107, - "48": 0.21878, - "49": 0.22338, - "50": 0.22181, - "51": 0.22588, - "52": 0.22221, - "53": 0.22214, - "54": 0.22059, - "55": 0.22088, - "56": 0.22231, - "57": 0.2231, - "58": 0.22228, - "59": 0.22136, - "60": 0.22087, - "61": 0.22171, - "62": 0.22165, - "63": 0.22149, - "64": 0.22165, - "65": 0.22916, - "66": 0.25667, - "67": 0.22585, - "68": 0.2212, - "69": 0.22322, - "70": 0.22332, - "71": 0.22291, - "72": 0.22074, - "73": 0.2214, - "74": 0.22287, - "75": 0.21929, - "76": 0.22246, - "77": 0.22148, - "78": 0.22442, - "79": 0.22465, - "80": 0.22859, - "81": 0.22464, - "82": 0.22391, - "83": 0.22417, - "84": 0.22202, - "85": 0.22369, - "86": 0.22224, - "87": 0.22245, - "88": 0.22255, - "89": 0.22379, - "90": 0.22356, - "91": 0.22229, - "92": 0.22297, - "93": 0.22525, - "94": 0.21956, - "95": 0.22318, - "96": 0.22361, - "97": 0.22246, - "98": 0.22326, - "99": 0.22121, - "100": 0.22214 + "2": 3.47519, + "3": 0.23464, + "4": 0.22551, + "5": 0.22589, + "6": 0.22563, + "7": 0.22745, + "8": 0.22511, + "9": 0.22598, + "10": 0.2276, + "11": 0.22627, + "12": 0.2248, + "13": 0.22685, + "14": 0.22689, + "15": 0.22796, + "16": 0.22747, + "17": 0.22636, + "18": 0.2276, + "19": 0.22859, + "20": 0.22879, + "21": 0.22663, + "22": 0.22705, + "23": 0.22996, + "24": 0.22863, + "25": 0.22596, + "26": 0.22764, + "27": 0.2288, + "28": 0.22753, + "29": 0.23036, + "30": 0.2272, + "31": 0.22832, + "32": 0.23015, + "33": 0.22925, + "34": 0.2264, + "35": 0.22962, + "36": 0.228, + "37": 0.22671, + "38": 0.22839, + "39": 0.2276, + "40": 0.22772, + "41": 0.22842, + "42": 0.22878, + "43": 0.22806, + "44": 0.22909, + "45": 0.22908, + "46": 0.22891, + "47": 0.22584, + "48": 0.22899, + "49": 0.22909, + "50": 0.23058, + "51": 0.2353, + "52": 0.22737, + "53": 0.22819, + "54": 0.22792, + "55": 0.22942, + "56": 0.22934, + "57": 0.23061, + "58": 0.22975, + "59": 0.2291, + "60": 0.2292, + "61": 0.22916, + "62": 0.22792, + "63": 0.22698, + "64": 0.22881, + "65": 0.22606, + "66": 0.22804, + "67": 0.22786, + "68": 0.22928, + "69": 0.23139, + "70": 0.22847, + "71": 0.22889, + "72": 0.22839, + "73": 0.22809, + "74": 0.22741, + "75": 0.22906, + "76": 0.23698, + "77": 0.22921, + "78": 0.22663, + "79": 0.23187, + "80": 0.23018, + "81": 0.22995, + "82": 0.22897, + "83": 0.22956, + "84": 0.22742, + "85": 0.22817, + "86": 0.23042, + "87": 0.22896, + "88": 0.2286, + "89": 0.23053, + "90": 0.2294, + "91": 0.22555, + "92": 0.22838, + "93": 0.22865, + "94": 0.22866, + "95": 0.2295, + "96": 0.22867, + "97": 0.23017, + "98": 0.23013, + "99": 0.22868, + "100": 0.22733 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json index 85eca8a168b..5662faa717f 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather/golden_values_dev_dgx_gb200.json @@ -6,53 +6,53 @@ "values": { "1": 10.84059, "2": 10.85204, - "3": 10.84133, - "4": 10.84535, - "5": 10.85551, - "6": 10.86422, - "7": 10.85246, - "8": 10.84439, - "9": 10.84792, - "10": 10.81313, - "11": 10.8561, - "12": 10.84243, - "13": 10.86076, - "14": 10.8495, - "15": 10.81525, - "16": 10.80923, - "17": 10.78383, - "18": 10.79178, - "19": 10.79409, - "20": 10.70535, - "21": 10.69778, - "22": 10.58348, + "3": 10.84138, + "4": 10.84536, + "5": 10.85554, + "6": 10.86423, + "7": 10.8525, + "8": 10.84446, + "9": 10.84795, + "10": 10.81311, + "11": 10.85614, + "12": 10.84248, + "13": 10.86077, + "14": 10.84952, + "15": 10.81528, + "16": 10.80924, + "17": 10.78389, + "18": 10.79175, + "19": 10.79412, + "20": 10.70534, + "21": 10.69776, + "22": 10.58346, "23": 10.69235, - "24": 10.60608, - "25": 10.56718, - "26": 10.61425, - "27": 10.60614, - "28": 10.55901, - "29": 10.56486, - "30": 10.37865, - "31": 10.16183, - "32": 10.45519, - "33": 10.45018, - "34": 10.23984, - "35": 10.27323, - "36": 10.24226, - "37": 10.34516, - "38": 10.21732, - "39": 10.39456, - "40": 10.09506, - "41": 10.15057, - "42": 10.21211, - "43": 9.87993, - "44": 9.97831, - "45": 9.85574, - "46": 9.83355, - "47": 10.14081, - "48": 9.86387, - "49": 9.55497, + "24": 10.6061, + "25": 10.5672, + "26": 10.61423, + "27": 10.60612, + "28": 10.55902, + "29": 10.56482, + "30": 10.37866, + "31": 10.16187, + "32": 10.4552, + "33": 10.45019, + "34": 10.23987, + "35": 10.27321, + "36": 10.24221, + "37": 10.34514, + "38": 10.21728, + "39": 10.39455, + "40": 10.09505, + "41": 10.15052, + "42": 10.21204, + "43": 9.87994, + "44": 9.97828, + "45": 9.85569, + "46": 9.83356, + "47": 10.1408, + "48": 9.86384, + "49": 9.55498, "50": 9.91604 } }, @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1653.0, - "2": 1752.0, - "3": 1624.0, - "4": 1773.0, - "5": 2051.0, - "6": 1884.0, - "7": 1841.0, - "8": 1684.0, - "9": 1859.0, - "10": 1545.0, - "11": 1863.0, - "12": 1746.0, - "13": 2004.0, - "14": 1896.0, - "15": 1934.0, - "16": 2001.0, - "17": 1933.0, - "18": 1793.0, - "19": 1900.0, - "20": 1792.0, - "21": 2062.0, - "22": 1795.0, - "23": 1997.0, - "24": 1666.0, - "25": 1607.0, - "26": 1745.0, - "27": 1880.0, - "28": 1887.0, - "29": 2023.0, - "30": 1964.0, - "31": 1609.0, - "32": 1793.0, - "33": 2102.0, - "34": 1891.0, - "35": 1869.0, - "36": 1984.0, - "37": 2446.0, - "38": 2088.0, - "39": 2394.0, - "40": 2182.0, - "41": 2110.0, - "42": 2180.0, - "43": 1931.0, - "44": 2082.0, - "45": 2079.0, - "46": 2189.0, - "47": 2510.0, - "48": 2197.0, - "49": 2282.0, - "50": 2160.0 + "1": 1636.0, + "2": 1800.0, + "3": 1658.0, + "4": 1779.0, + "5": 2034.0, + "6": 1824.0, + "7": 1839.0, + "8": 1699.0, + "9": 1879.0, + "10": 1507.0, + "11": 1927.0, + "12": 1767.0, + "13": 2057.0, + "14": 1821.0, + "15": 1970.0, + "16": 2034.0, + "17": 1909.0, + "18": 1751.0, + "19": 1885.0, + "20": 1756.0, + "21": 2017.0, + "22": 1709.0, + "23": 2079.0, + "24": 1728.0, + "25": 1634.0, + "26": 1781.0, + "27": 1786.0, + "28": 1888.0, + "29": 1942.0, + "30": 1884.0, + "31": 1611.0, + "32": 1879.0, + "33": 2056.0, + "34": 1834.0, + "35": 1950.0, + "36": 2062.0, + "37": 2372.0, + "38": 2155.0, + "39": 2438.0, + "40": 2115.0, + "41": 2178.0, + "42": 2225.0, + "43": 1904.0, + "44": 2060.0, + "45": 1972.0, + "46": 2141.0, + "47": 2505.0, + "48": 2257.0, + "49": 2330.0, + "50": 2227.0 } }, "mem-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1480436736.0, - "2": 1542892032.0, - "3": 1542892032.0, - "4": 1542892032.0, - "5": 1542892032.0, - "6": 1542892032.0, - "7": 1542892032.0, - "8": 1542892032.0, - "9": 1542892032.0, - "10": 1542892032.0, - "11": 1542892032.0, - "12": 1542892032.0, - "13": 1542892032.0, - "14": 1542892032.0, - "15": 1542892032.0, - "16": 1542892032.0, - "17": 1542892032.0, - "18": 1542892032.0, - "19": 1542892032.0, - "20": 1542892032.0, - "21": 1542892032.0, - "22": 1542892032.0, - "23": 1542892032.0, - "24": 1542892032.0, - "25": 1542892032.0, - "26": 1542892032.0, - "27": 1542892032.0, - "28": 1542892032.0, - "29": 1542892032.0, - "30": 1542892032.0, - "31": 1542892032.0, - "32": 1542892032.0, - "33": 1542892032.0, - "34": 1542892032.0, - "35": 1542892032.0, - "36": 1542892032.0, - "37": 1542892032.0, - "38": 1542892032.0, - "39": 1542892032.0, - "40": 1542892032.0, - "41": 1542892032.0, - "42": 1542892032.0, - "43": 1542892032.0, - "44": 1542892032.0, - "45": 1542892032.0, - "46": 1542892032.0, - "47": 1542892032.0, - "48": 1542892032.0, - "49": 1542892032.0, - "50": 1542892032.0 + "1": 1476898304.0, + "2": 1543416320.0, + "3": 1543416320.0, + "4": 1543416320.0, + "5": 1543416320.0, + "6": 1543416320.0, + "7": 1543416320.0, + "8": 1543416320.0, + "9": 1543416320.0, + "10": 1543416320.0, + "11": 1543416320.0, + "12": 1543416320.0, + "13": 1543416320.0, + "14": 1543416320.0, + "15": 1543416320.0, + "16": 1543416320.0, + "17": 1543416320.0, + "18": 1543416320.0, + "19": 1543416320.0, + "20": 1543416320.0, + "21": 1543416320.0, + "22": 1543416320.0, + "23": 1543416320.0, + "24": 1543416320.0, + "25": 1543416320.0, + "26": 1543416320.0, + "27": 1543416320.0, + "28": 1543416320.0, + "29": 1543416320.0, + "30": 1543416320.0, + "31": 1543416320.0, + "32": 1543416320.0, + "33": 1543416320.0, + "34": 1543416320.0, + "35": 1543416320.0, + "36": 1543416320.0, + "37": 1543416320.0, + "38": 1543416320.0, + "39": 1543416320.0, + "40": 1543416320.0, + "41": 1543416320.0, + "42": 1543416320.0, + "43": 1543416320.0, + "44": 1543416320.0, + "45": 1543416320.0, + "46": 1543416320.0, + "47": 1543416320.0, + "48": 1543416320.0, + "49": 1543416320.0, + "50": 1543416320.0 } }, "iteration-time": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 2.48029, - "3": 0.33127, - "4": 0.31594, - "5": 0.32143, - "6": 0.31919, - "7": 0.31884, - "8": 0.32129, - "9": 0.31988, - "10": 0.32069, - "11": 0.31907, - "12": 0.31959, - "13": 0.32211, - "14": 0.31964, - "15": 0.31855, - "16": 0.32013, - "17": 0.32004, - "18": 0.31786, - "19": 0.31755, - "20": 0.31944, - "21": 0.31998, - "22": 0.32066, - "23": 0.32079, - "24": 0.31728, - "25": 0.31689, - "26": 0.31831, - "27": 0.31727, - "28": 0.31999, - "29": 0.31997, - "30": 0.31824, - "31": 0.31724, - "32": 0.33433, - "33": 0.43748, - "34": 0.63551, - "35": 0.35878, - "36": 0.31703, - "37": 0.31709, - "38": 0.32151, - "39": 0.31762, - "40": 0.3204, - "41": 0.3741, - "42": 0.37991, - "43": 0.3738, - "44": 0.38277, - "45": 0.38, - "46": 0.37409, - "47": 0.36543, - "48": 0.37113, - "49": 0.36281, - "50": 0.36274 + "2": 4.53277, + "3": 0.36433, + "4": 0.35198, + "5": 0.35417, + "6": 0.35299, + "7": 0.35017, + "8": 0.34811, + "9": 0.35176, + "10": 0.35678, + "11": 0.35725, + "12": 0.35523, + "13": 0.35433, + "14": 0.35695, + "15": 0.35635, + "16": 0.35872, + "17": 0.35652, + "18": 0.35715, + "19": 0.35655, + "20": 0.35652, + "21": 0.35665, + "22": 0.35374, + "23": 0.35277, + "24": 0.35546, + "25": 0.35569, + "26": 0.35624, + "27": 0.35292, + "28": 0.34852, + "29": 0.35459, + "30": 0.35544, + "31": 0.35605, + "32": 0.3578, + "33": 0.35801, + "34": 0.35703, + "35": 0.35817, + "36": 0.35646, + "37": 0.35838, + "38": 0.36939, + "39": 0.36652, + "40": 0.36016, + "41": 0.35705, + "42": 0.35598, + "43": 0.35642, + "44": 0.35673, + "45": 0.36044, + "46": 0.35719, + "47": 0.35851, + "48": 0.35957, + "49": 0.35843, + "50": 0.35815 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_gb200.json index 063c93b8168..f6f7f91fb03 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp2/golden_values_dev_dgx_gb200.json @@ -7,52 +7,52 @@ "1": 10.7702, "2": 10.78031, "3": 10.77782, - "4": 10.73861, - "5": 10.81197, + "4": 10.7386, + "5": 10.81196, "6": 10.81962, - "7": 10.79512, - "8": 10.78158, - "9": 10.79081, - "10": 10.71741, - "11": 10.85173, - "12": 10.80653, - "13": 10.82058, - "14": 10.84404, - "15": 10.74918, - "16": 10.752, - "17": 10.70902, - "18": 10.752, - "19": 10.74635, + "7": 10.79511, + "8": 10.7816, + "9": 10.79083, + "10": 10.71745, + "11": 10.85175, + "12": 10.80655, + "13": 10.82059, + "14": 10.84403, + "15": 10.74914, + "16": 10.75203, + "17": 10.70906, + "18": 10.75193, + "19": 10.74634, "20": 10.63769, - "21": 10.61672, - "22": 10.44317, - "23": 10.6675, - "24": 10.50949, - "25": 10.45557, - "26": 10.53435, - "27": 10.54753, - "28": 10.51646, + "21": 10.61671, + "22": 10.4432, + "23": 10.66744, + "24": 10.50948, + "25": 10.45559, + "26": 10.53431, + "27": 10.54757, + "28": 10.51645, "29": 10.55435, - "30": 10.28785, - "31": 10.00156, + "30": 10.28787, + "31": 10.00155, "32": 10.40963, - "33": 10.40243, - "34": 10.13341, - "35": 10.19694, - "36": 10.14213, - "37": 10.2869, - "38": 10.10508, - "39": 10.35217, - "40": 10.00199, - "41": 10.07363, - "42": 10.1522, - "43": 9.74558, - "44": 9.8738, - "45": 9.74764, - "46": 9.74951, + "33": 10.40242, + "34": 10.13338, + "35": 10.19695, + "36": 10.1421, + "37": 10.28683, + "38": 10.10505, + "39": 10.35216, + "40": 10.00202, + "41": 10.07367, + "42": 10.15221, + "43": 9.74561, + "44": 9.87383, + "45": 9.74763, + "46": 9.74947, "47": 10.09152, - "48": 9.77892, - "49": 9.44822, + "48": 9.77894, + "49": 9.44821, "50": 9.84214 } }, @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1564.0, - "2": 1610.0, - "3": 1608.0, - "4": 1854.0, - "5": 1873.0, - "6": 1812.0, - "7": 1744.0, - "8": 1614.0, - "9": 1857.0, - "10": 1358.0, - "11": 1844.0, - "12": 1788.0, - "13": 1826.0, - "14": 1801.0, - "15": 1892.0, - "16": 1892.0, - "17": 1758.0, - "18": 1714.0, - "19": 1677.0, - "20": 1582.0, - "21": 1824.0, - "22": 1579.0, - "23": 1987.0, - "24": 1533.0, - "25": 1602.0, - "26": 1651.0, - "27": 1901.0, - "28": 2044.0, - "29": 1911.0, - "30": 1823.0, - "31": 1583.0, - "32": 1926.0, - "33": 2108.0, - "34": 1914.0, - "35": 2058.0, - "36": 1946.0, - "37": 2325.0, - "38": 2268.0, - "39": 2376.0, - "40": 2208.0, - "41": 2448.0, - "42": 2209.0, - "43": 1977.0, - "44": 2049.0, - "45": 2266.0, - "46": 2481.0, - "47": 2583.0, - "48": 2450.0, - "49": 2255.0, - "50": 2453.0 + "1": 1545.0, + "2": 1614.0, + "3": 1597.0, + "4": 1771.0, + "5": 1838.0, + "6": 1827.0, + "7": 1729.0, + "8": 1579.0, + "9": 1729.0, + "10": 1299.0, + "11": 1846.0, + "12": 1784.0, + "13": 1946.0, + "14": 1729.0, + "15": 1964.0, + "16": 1854.0, + "17": 1865.0, + "18": 1643.0, + "19": 1764.0, + "20": 1552.0, + "21": 1822.0, + "22": 1686.0, + "23": 2112.0, + "24": 1614.0, + "25": 1483.0, + "26": 1688.0, + "27": 1832.0, + "28": 1991.0, + "29": 2058.0, + "30": 1858.0, + "31": 1588.0, + "32": 2046.0, + "33": 2046.0, + "34": 1853.0, + "35": 1930.0, + "36": 1917.0, + "37": 2401.0, + "38": 2114.0, + "39": 2439.0, + "40": 2222.0, + "41": 2363.0, + "42": 2266.0, + "43": 1970.0, + "44": 2099.0, + "45": 2180.0, + "46": 2464.0, + "47": 2567.0, + "48": 2445.0, + "49": 2257.0, + "50": 2379.0 } }, "mem-allocated-bytes": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 2.42188, - "3": 0.13001, - "4": 0.11595, - "5": 0.1162, - "6": 0.11616, - "7": 0.11716, - "8": 0.11796, - "9": 0.1186, - "10": 0.11774, - "11": 0.11769, - "12": 0.12298, - "13": 0.11717, - "14": 0.11738, - "15": 0.11771, - "16": 0.11772, - "17": 0.11737, - "18": 0.11751, - "19": 0.11697, - "20": 0.11822, - "21": 0.11647, - "22": 0.11724, - "23": 0.11721, - "24": 0.11677, - "25": 0.1171, - "26": 0.11775, - "27": 0.11748, - "28": 0.11705, - "29": 0.11727, - "30": 0.11693, - "31": 0.11818, - "32": 0.11738, - "33": 0.11726, - "34": 0.11675, - "35": 0.11722, - "36": 0.11753, - "37": 0.11779, - "38": 0.11683, - "39": 0.11725, - "40": 0.11779, - "41": 0.11794, - "42": 0.11724, - "43": 0.11807, - "44": 0.11744, - "45": 0.12537, - "46": 0.11752, - "47": 0.11739, - "48": 0.11765, - "49": 0.11763, - "50": 0.11812 + "2": 5.51649, + "3": 0.13651, + "4": 0.12558, + "5": 0.12425, + "6": 0.12377, + "7": 0.12426, + "8": 0.12448, + "9": 0.12506, + "10": 0.1238, + "11": 0.12386, + "12": 0.12399, + "13": 0.12491, + "14": 0.12453, + "15": 0.12395, + "16": 0.12277, + "17": 0.12394, + "18": 0.12323, + "19": 0.12604, + "20": 0.12596, + "21": 0.12437, + "22": 0.12727, + "23": 0.12659, + "24": 0.12448, + "25": 0.12564, + "26": 0.12482, + "27": 0.12443, + "28": 0.12433, + "29": 0.12788, + "30": 0.12624, + "31": 0.12539, + "32": 0.12574, + "33": 0.12581, + "34": 0.1247, + "35": 0.12598, + "36": 0.12382, + "37": 0.1255, + "38": 0.12347, + "39": 0.12604, + "40": 0.1256, + "41": 0.1264, + "42": 0.12518, + "43": 0.12661, + "44": 0.12629, + "45": 0.12632, + "46": 0.12683, + "47": 0.12404, + "48": 0.12424, + "49": 0.12655, + "50": 0.12582 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json index 4aa2800617e..b12d4c68133 100644 --- a/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/gpt/gpt3_mcore_tp1_pp4/golden_values_dev_dgx_h100.json @@ -6,54 +6,54 @@ "values": { "1": 10.85678, "2": 10.86405, - "3": 10.86854, - "4": 10.85128, - "5": 10.88398, + "3": 10.86853, + "4": 10.8513, + "5": 10.884, "6": 10.89024, - "7": 10.86645, - "8": 10.86924, - "9": 10.87305, - "10": 10.84079, - "11": 10.87928, - "12": 10.8729, - "13": 10.8779, - "14": 10.89011, - "15": 10.82504, + "7": 10.86648, + "8": 10.86922, + "9": 10.87306, + "10": 10.84082, + "11": 10.87926, + "12": 10.87287, + "13": 10.87791, + "14": 10.89008, + "15": 10.82506, "16": 10.82957, - "17": 10.80875, - "18": 10.81163, - "19": 10.81545, - "20": 10.71913, - "21": 10.70404, - "22": 10.56646, - "23": 10.71861, - "24": 10.60988, - "25": 10.55482, - "26": 10.60879, - "27": 10.62303, - "28": 10.56953, - "29": 10.57966, - "30": 10.35999, - "31": 10.11305, - "32": 10.46585, - "33": 10.45153, - "34": 10.20832, - "35": 10.26936, - "36": 10.21924, - "37": 10.33851, - "38": 10.18603, - "39": 10.39977, - "40": 10.08397, - "41": 10.13423, + "17": 10.80876, + "18": 10.8116, + "19": 10.81547, + "20": 10.71915, + "21": 10.70406, + "22": 10.56647, + "23": 10.71857, + "24": 10.60989, + "25": 10.5548, + "26": 10.60875, + "27": 10.62301, + "28": 10.56951, + "29": 10.57968, + "30": 10.36, + "31": 10.1131, + "32": 10.46586, + "33": 10.45154, + "34": 10.20828, + "35": 10.26935, + "36": 10.21928, + "37": 10.3385, + "38": 10.18604, + "39": 10.39979, + "40": 10.08399, + "41": 10.13419, "42": 10.20889, "43": 9.82535, - "44": 9.95909, - "45": 9.82563, - "46": 9.8062, - "47": 10.135, - "48": 9.84004, - "49": 9.52485, - "50": 9.90723 + "44": 9.95908, + "45": 9.82559, + "46": 9.80623, + "47": 10.13498, + "48": 9.84003, + "49": 9.52483, + "50": 9.90726 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1654.0, - "2": 1646.0, - "3": 1565.0, - "4": 1759.0, - "5": 1860.0, - "6": 1741.0, - "7": 1752.0, - "8": 1579.0, - "9": 1849.0, - "10": 1317.0, - "11": 1901.0, - "12": 1765.0, - "13": 1910.0, - "14": 1773.0, - "15": 1864.0, - "16": 1759.0, - "17": 1794.0, - "18": 1805.0, - "19": 1846.0, - "20": 1770.0, - "21": 1963.0, - "22": 1706.0, - "23": 1983.0, - "24": 1609.0, - "25": 1593.0, - "26": 1643.0, - "27": 1696.0, - "28": 1882.0, - "29": 1946.0, - "30": 1925.0, - "31": 1574.0, - "32": 1863.0, - "33": 2024.0, - "34": 1878.0, - "35": 1941.0, - "36": 1887.0, - "37": 2294.0, - "38": 2142.0, - "39": 2288.0, - "40": 2053.0, - "41": 2189.0, - "42": 2331.0, - "43": 1933.0, - "44": 2042.0, - "45": 1956.0, - "46": 2285.0, - "47": 2470.0, - "48": 2437.0, - "49": 2238.0, - "50": 2004.0 + "1": 1608.0, + "2": 1684.0, + "3": 1618.0, + "4": 1745.0, + "5": 1847.0, + "6": 1758.0, + "7": 1838.0, + "8": 1585.0, + "9": 1818.0, + "10": 1346.0, + "11": 1847.0, + "12": 1686.0, + "13": 1839.0, + "14": 1742.0, + "15": 1780.0, + "16": 1799.0, + "17": 1771.0, + "18": 1747.0, + "19": 1831.0, + "20": 1749.0, + "21": 1887.0, + "22": 1714.0, + "23": 1971.0, + "24": 1688.0, + "25": 1579.0, + "26": 1640.0, + "27": 1836.0, + "28": 1879.0, + "29": 1995.0, + "30": 1862.0, + "31": 1500.0, + "32": 1775.0, + "33": 2088.0, + "34": 1804.0, + "35": 1967.0, + "36": 1862.0, + "37": 2231.0, + "38": 2082.0, + "39": 2290.0, + "40": 2123.0, + "41": 2206.0, + "42": 2166.0, + "43": 1985.0, + "44": 2111.0, + "45": 1937.0, + "46": 2111.0, + "47": 2429.0, + "48": 2301.0, + "49": 2278.0, + "50": 2040.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 777900032.0, - "2": 777900032.0, - "3": 777900032.0, - "4": 777900032.0, - "5": 777900032.0, - "6": 777900032.0, - "7": 777900032.0, - "8": 777900032.0, - "9": 777900032.0, - "10": 777900032.0, - "11": 777900032.0, - "12": 777900032.0, - "13": 777900032.0, - "14": 777900032.0, - "15": 777900032.0, - "16": 777900032.0, - "17": 777900032.0, - "18": 777900032.0, - "19": 777900032.0, - "20": 777900032.0, - "21": 777900032.0, - "22": 777900032.0, - "23": 777900032.0, - "24": 777900032.0, - "25": 777900032.0, - "26": 777900032.0, - "27": 777900032.0, - "28": 777900032.0, - "29": 777900032.0, - "30": 777900032.0, - "31": 777900032.0, - "32": 777900032.0, - "33": 777900032.0, - "34": 777900032.0, - "35": 777900032.0, - "36": 777900032.0, - "37": 777900032.0, - "38": 777900032.0, - "39": 777900032.0, - "40": 777900032.0, - "41": 777900032.0, - "42": 777900032.0, - "43": 777900032.0, - "44": 777900032.0, - "45": 777900032.0, - "46": 777900032.0, - "47": 777900032.0, - "48": 777900032.0, - "49": 777900032.0, - "50": 777900032.0 + "1": 778948608.0, + "2": 778948608.0, + "3": 778948608.0, + "4": 778948608.0, + "5": 778948608.0, + "6": 778948608.0, + "7": 778948608.0, + "8": 778948608.0, + "9": 778948608.0, + "10": 778948608.0, + "11": 778948608.0, + "12": 778948608.0, + "13": 778948608.0, + "14": 778948608.0, + "15": 778948608.0, + "16": 778948608.0, + "17": 778948608.0, + "18": 778948608.0, + "19": 778948608.0, + "20": 778948608.0, + "21": 778948608.0, + "22": 778948608.0, + "23": 778948608.0, + "24": 778948608.0, + "25": 778948608.0, + "26": 778948608.0, + "27": 778948608.0, + "28": 778948608.0, + "29": 778948608.0, + "30": 778948608.0, + "31": 778948608.0, + "32": 778948608.0, + "33": 778948608.0, + "34": 778948608.0, + "35": 778948608.0, + "36": 778948608.0, + "37": 778948608.0, + "38": 778948608.0, + "39": 778948608.0, + "40": 778948608.0, + "41": 778948608.0, + "42": 778948608.0, + "43": 778948608.0, + "44": 778948608.0, + "45": 778948608.0, + "46": 778948608.0, + "47": 778948608.0, + "48": 778948608.0, + "49": 778948608.0, + "50": 778948608.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 2463815680.0, - "2": 2744478720.0, - "3": 2744478720.0, - "4": 2744478720.0, - "5": 2744478720.0, - "6": 2744478720.0, - "7": 2744478720.0, - "8": 2744478720.0, - "9": 2744478720.0, - "10": 2744478720.0, - "11": 2744478720.0, - "12": 2744478720.0, - "13": 2744478720.0, - "14": 2744478720.0, - "15": 2744478720.0, - "16": 2744478720.0, - "17": 2744478720.0, - "18": 2744478720.0, - "19": 2744478720.0, - "20": 2744478720.0, - "21": 2744478720.0, - "22": 2744478720.0, - "23": 2744478720.0, - "24": 2744478720.0, - "25": 2744478720.0, - "26": 2744478720.0, - "27": 2744478720.0, - "28": 2744478720.0, - "29": 2744478720.0, - "30": 2744478720.0, - "31": 2744478720.0, - "32": 2744478720.0, - "33": 2744478720.0, - "34": 2744478720.0, - "35": 2744478720.0, - "36": 2744478720.0, - "37": 2744478720.0, - "38": 2744478720.0, - "39": 2744478720.0, - "40": 2744478720.0, - "41": 2744478720.0, - "42": 2744478720.0, - "43": 2744478720.0, - "44": 2744478720.0, - "45": 2744478720.0, - "46": 2744478720.0, - "47": 2744478720.0, - "48": 2744478720.0, - "49": 2744478720.0, - "50": 2744478720.0 + "1": 2462767104.0, + "2": 2746575872.0, + "3": 2746575872.0, + "4": 2746575872.0, + "5": 2746575872.0, + "6": 2746575872.0, + "7": 2746575872.0, + "8": 2746575872.0, + "9": 2746575872.0, + "10": 2746575872.0, + "11": 2746575872.0, + "12": 2746575872.0, + "13": 2746575872.0, + "14": 2746575872.0, + "15": 2746575872.0, + "16": 2746575872.0, + "17": 2746575872.0, + "18": 2746575872.0, + "19": 2746575872.0, + "20": 2746575872.0, + "21": 2746575872.0, + "22": 2746575872.0, + "23": 2746575872.0, + "24": 2746575872.0, + "25": 2746575872.0, + "26": 2746575872.0, + "27": 2746575872.0, + "28": 2746575872.0, + "29": 2746575872.0, + "30": 2746575872.0, + "31": 2746575872.0, + "32": 2746575872.0, + "33": 2746575872.0, + "34": 2746575872.0, + "35": 2746575872.0, + "36": 2746575872.0, + "37": 2746575872.0, + "38": 2746575872.0, + "39": 2746575872.0, + "40": 2746575872.0, + "41": 2746575872.0, + "42": 2746575872.0, + "43": 2746575872.0, + "44": 2746575872.0, + "45": 2746575872.0, + "46": 2746575872.0, + "47": 2746575872.0, + "48": 2746575872.0, + "49": 2746575872.0, + "50": 2746575872.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 7.818, - "2": 0.14182, - "3": 0.12081, - "4": 0.09954, - "5": 0.09861, - "6": 0.10039, - "7": 0.09846, - "8": 0.09916, - "9": 0.10232, - "10": 0.10158, - "11": 0.09888, - "12": 0.09744, - "13": 0.09991, - "14": 0.09707, - "15": 0.09748, - "16": 0.09761, - "17": 0.09792, - "18": 0.09795, - "19": 0.09792, - "20": 0.09738, - "21": 0.10014, - "22": 0.09781, - "23": 0.09834, - "24": 0.09956, - "25": 0.09768, - "26": 0.09722, - "27": 0.09836, - "28": 0.09714, - "29": 0.09695, - "30": 0.09751, - "31": 0.09809, - "32": 0.09759, - "33": 0.09764, - "34": 0.09711, - "35": 0.09791, - "36": 0.09751, - "37": 0.09778, - "38": 0.09695, - "39": 0.09907, - "40": 0.09654, - "41": 0.09746, - "42": 0.09685, - "43": 0.09736, - "44": 0.09954, - "45": 0.09768, - "46": 0.09735, - "47": 0.09905, - "48": 0.09815, - "49": 0.09684, - "50": 0.09793 + "1": "nan", + "2": 6.11405, + "3": 0.12147, + "4": 0.10402, + "5": 0.09689, + "6": 0.09621, + "7": 0.09601, + "8": 0.09683, + "9": 0.09798, + "10": 0.09721, + "11": 0.09607, + "12": 0.09586, + "13": 0.09658, + "14": 0.09806, + "15": 0.09742, + "16": 0.09593, + "17": 0.09607, + "18": 0.0965, + "19": 0.09647, + "20": 0.09635, + "21": 0.09631, + "22": 0.09646, + "23": 0.09779, + "24": 0.09709, + "25": 0.09739, + "26": 0.09629, + "27": 0.09658, + "28": 0.0966, + "29": 0.09641, + "30": 0.09681, + "31": 0.09671, + "32": 0.09692, + "33": 0.09737, + "34": 0.09716, + "35": 0.09694, + "36": 0.09598, + "37": 0.09623, + "38": 0.09571, + "39": 0.0999, + "40": 0.10205, + "41": 0.10186, + "42": 0.09612, + "43": 0.09678, + "44": 0.09781, + "45": 0.09572, + "46": 0.09747, + "47": 0.09574, + "48": 0.09602, + "49": 0.09759, + "50": 0.09631 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_gb200.json index 3dd007cc9ec..4c4c8c0c0cf 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer/golden_values_dev_dgx_gb200.json @@ -6,104 +6,104 @@ "values": { "1": 10.7915, "2": 10.8072, - "3": 10.79214, - "4": 10.77571, - "5": 10.82495, - "6": 10.83191, - "7": 10.82037, - "8": 10.81565, - "9": 10.81976, - "10": 10.7695, - "11": 10.8454, - "12": 10.83063, - "13": 10.83651, - "14": 10.85696, - "15": 10.80631, - "16": 10.78763, - "17": 10.75856, - "18": 10.79234, - "19": 10.78331, - "20": 10.73181, - "21": 10.71017, - "22": 10.57574, - "23": 10.71599, - "24": 10.62049, - "25": 10.58266, - "26": 10.61764, - "27": 10.65105, - "28": 10.63303, - "29": 10.63022, - "30": 10.44254, - "31": 10.20049, - "32": 10.52014, - "33": 10.50814, - "34": 10.29535, - "35": 10.33643, - "36": 10.30247, - "37": 10.41766, - "38": 10.28067, - "39": 10.46149, - "40": 10.18213, - "41": 10.21349, - "42": 10.28426, - "43": 9.9557, - "44": 10.05793, - "45": 9.9574, - "46": 9.93571, - "47": 10.22719, - "48": 9.96561, - "49": 9.66581, - "50": 10.00922, - "51": 9.94826, - "52": 9.84653, - "53": 10.14876, - "54": 10.03737, - "55": 9.97454, - "56": 9.71384, - "57": 9.5955, - "58": 9.92044, - "59": 9.67604, - "60": 9.61264, - "61": 9.79194, - "62": 10.05699, - "63": 9.47838, - "64": 9.84479, - "65": 9.03861, - "66": 9.78386, - "67": 9.43595, - "68": 9.85188, - "69": 9.84445, - "70": 9.79288, - "71": 9.69163, - "72": 9.64893, - "73": 9.55502, - "74": 9.04736, - "75": 9.49186, - "76": 9.17766, - "77": 10.11289, - "78": 9.7687, - "79": 9.43966, - "80": 9.45416, - "81": 9.53142, - "82": 9.7541, - "83": 9.38201, - "84": 9.46121, - "85": 9.66928, - "86": 9.13531, - "87": 9.63413, - "88": 9.8011, - "89": 9.66658, - "90": 9.86173, - "91": 9.39963, - "92": 9.41066, - "93": 9.14665, - "94": 8.8869, - "95": 9.56959, - "96": 9.57609, - "97": 9.34309, - "98": 9.72749, - "99": 8.96222, - "100": 9.44903 + "3": 10.79211, + "4": 10.77572, + "5": 10.82476, + "6": 10.83254, + "7": 10.81992, + "8": 10.81584, + "9": 10.81974, + "10": 10.76945, + "11": 10.84429, + "12": 10.83026, + "13": 10.83727, + "14": 10.857, + "15": 10.80627, + "16": 10.78742, + "17": 10.75904, + "18": 10.79229, + "19": 10.78343, + "20": 10.73204, + "21": 10.70924, + "22": 10.57637, + "23": 10.7162, + "24": 10.62002, + "25": 10.58213, + "26": 10.61692, + "27": 10.65114, + "28": 10.63239, + "29": 10.63059, + "30": 10.44345, + "31": 10.19924, + "32": 10.5204, + "33": 10.50791, + "34": 10.29555, + "35": 10.33583, + "36": 10.30347, + "37": 10.41745, + "38": 10.28027, + "39": 10.46232, + "40": 10.18155, + "41": 10.21302, + "42": 10.28416, + "43": 9.95622, + "44": 10.0575, + "45": 9.95696, + "46": 9.93598, + "47": 10.22691, + "48": 9.96494, + "49": 9.66599, + "50": 10.00894, + "51": 9.9482, + "52": 9.84637, + "53": 10.14872, + "54": 10.0373, + "55": 9.97437, + "56": 9.71411, + "57": 9.5952, + "58": 9.92061, + "59": 9.67629, + "60": 9.61278, + "61": 9.79184, + "62": 10.05672, + "63": 9.478, + "64": 9.84502, + "65": 9.03853, + "66": 9.78391, + "67": 9.43621, + "68": 9.85197, + "69": 9.84485, + "70": 9.79321, + "71": 9.6918, + "72": 9.64914, + "73": 9.5548, + "74": 9.04782, + "75": 9.49197, + "76": 9.17823, + "77": 10.11286, + "78": 9.76804, + "79": 9.43992, + "80": 9.45387, + "81": 9.53151, + "82": 9.7542, + "83": 9.38211, + "84": 9.46106, + "85": 9.66973, + "86": 9.13544, + "87": 9.63433, + "88": 9.80069, + "89": 9.66687, + "90": 9.86192, + "91": 9.39952, + "92": 9.41067, + "93": 9.1467, + "94": 8.88715, + "95": 9.56966, + "96": 9.57637, + "97": 9.34292, + "98": 9.72745, + "99": 8.96159, + "100": 9.44932 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 31217.0, - "2": 33106.0, - "3": 33596.0, - "4": 31946.0, - "5": 36783.0, - "6": 37252.0, - "7": 35314.0, - "8": 31970.0, - "9": 34937.0, - "10": 29900.0, - "11": 38039.0, - "12": 34886.0, - "13": 37108.0, - "14": 37755.0, - "15": 35069.0, - "16": 36687.0, - "17": 34887.0, - "18": 35219.0, - "19": 35710.0, - "20": 32682.0, - "21": 33456.0, - "22": 30216.0, - "23": 37780.0, - "24": 32298.0, - "25": 30789.0, - "26": 34549.0, - "27": 35611.0, - "28": 36806.0, - "29": 37955.0, - "30": 32950.0, - "31": 30468.0, - "32": 36291.0, - "33": 37916.0, - "34": 32820.0, - "35": 34371.0, - "36": 34957.0, - "37": 38282.0, - "38": 35878.0, - "39": 38974.0, - "40": 36048.0, - "41": 35988.0, - "42": 37320.0, - "43": 33909.0, - "44": 33889.0, - "45": 35577.0, - "46": 37076.0, - "47": 40966.0, - "48": 35327.0, - "49": 34682.0, - "50": 39871.0, - "51": 36802.0, - "52": 36445.0, - "53": 41968.0, - "54": 40797.0, - "55": 36920.0, - "56": 40345.0, - "57": 36961.0, - "58": 41622.0, - "59": 37988.0, - "60": 40534.0, - "61": 40456.0, - "62": 43543.0, - "63": 37438.0, - "64": 42659.0, - "65": 39924.0, - "66": 44122.0, - "67": 40136.0, - "68": 40005.0, - "69": 41675.0, - "70": 45011.0, - "71": 40746.0, - "72": 41647.0, - "73": 44080.0, - "74": 35412.0, - "75": 39478.0, - "76": 46254.0, - "77": 44764.0, - "78": 47985.0, - "79": 48646.0, - "80": 46686.0, - "81": 50102.0, - "82": 50188.0, - "83": 44717.0, - "84": 46114.0, - "85": 49347.0, - "86": 45770.0, - "87": 49671.0, - "88": 46449.0, - "89": 49666.0, - "90": 51087.0, - "91": 45827.0, - "92": 48163.0, - "93": 46547.0, - "94": 47562.0, - "95": 48540.0, - "96": 50182.0, - "97": 46055.0, - "98": 50271.0, - "99": 48494.0, - "100": 45373.0 + "1": 30880.0, + "2": 33134.0, + "3": 34050.0, + "4": 31735.0, + "5": 37029.0, + "6": 37301.0, + "7": 35264.0, + "8": 31446.0, + "9": 34979.0, + "10": 29776.0, + "11": 37948.0, + "12": 35277.0, + "13": 36889.0, + "14": 37873.0, + "15": 35129.0, + "16": 36685.0, + "17": 35245.0, + "18": 35040.0, + "19": 35795.0, + "20": 32424.0, + "21": 33245.0, + "22": 30312.0, + "23": 37765.0, + "24": 32307.0, + "25": 30615.0, + "26": 35162.0, + "27": 35279.0, + "28": 36753.0, + "29": 38210.0, + "30": 32956.0, + "31": 30176.0, + "32": 36478.0, + "33": 38169.0, + "34": 32816.0, + "35": 34299.0, + "36": 35094.0, + "37": 38301.0, + "38": 35465.0, + "39": 38725.0, + "40": 36179.0, + "41": 36169.0, + "42": 37201.0, + "43": 33594.0, + "44": 34260.0, + "45": 35612.0, + "46": 36862.0, + "47": 41077.0, + "48": 35724.0, + "49": 35419.0, + "50": 39856.0, + "51": 36420.0, + "52": 36293.0, + "53": 41745.0, + "54": 40909.0, + "55": 36730.0, + "56": 40013.0, + "57": 36880.0, + "58": 41605.0, + "59": 37882.0, + "60": 40354.0, + "61": 40374.0, + "62": 43810.0, + "63": 37847.0, + "64": 42754.0, + "65": 40712.0, + "66": 43936.0, + "67": 39899.0, + "68": 39751.0, + "69": 41842.0, + "70": 44867.0, + "71": 40732.0, + "72": 41487.0, + "73": 44646.0, + "74": 35559.0, + "75": 39104.0, + "76": 46425.0, + "77": 44861.0, + "78": 48216.0, + "79": 48320.0, + "80": 47159.0, + "81": 50341.0, + "82": 49881.0, + "83": 44680.0, + "84": 46232.0, + "85": 50001.0, + "86": 45505.0, + "87": 49100.0, + "88": 45978.0, + "89": 49672.0, + "90": 50663.0, + "91": 45690.0, + "92": 48221.0, + "93": 46168.0, + "94": 47472.0, + "95": 48600.0, + "96": 50290.0, + "97": 46454.0, + "98": 50024.0, + "99": 48124.0, + "100": 45275.0 } }, "mem-allocated-bytes": { @@ -220,104 +220,104 @@ "values": { "1": 892874752.0, "2": 892866560.0, - "3": 892869120.0, + "3": 892869632.0, "4": 892876800.0, - "5": 892869120.0, + "5": 892870656.0, "6": 892870656.0, - "7": 892874240.0, + "7": 892874752.0, "8": 892868608.0, "9": 892869632.0, - "10": 892868608.0, - "11": 892869632.0, - "12": 892867072.0, - "13": 892872192.0, - "14": 892873216.0, + "10": 892869632.0, + "11": 892868608.0, + "12": 892867584.0, + "13": 892870656.0, + "14": 892872192.0, "15": 892870656.0, - "16": 892868608.0, + "16": 892867584.0, "17": 892879360.0, - "18": 892867072.0, - "19": 892870656.0, + "18": 892867584.0, + "19": 892870144.0, "20": 892867072.0, "21": 892871168.0, - "22": 892874752.0, - "23": 892877824.0, + "22": 892875264.0, + "23": 892877312.0, "24": 892869120.0, - "25": 892877312.0, - "26": 892873216.0, - "27": 892865024.0, - "28": 892870144.0, - "29": 892869632.0, - "30": 892871680.0, + "25": 892876288.0, + "26": 892875264.0, + "27": 892866048.0, + "28": 892869632.0, + "29": 892869120.0, + "30": 892870656.0, "31": 892881920.0, "32": 892874752.0, - "33": 892870144.0, - "34": 892872192.0, - "35": 892874240.0, - "36": 892869632.0, + "33": 892869632.0, + "34": 892871168.0, + "35": 892873728.0, + "36": 892870656.0, "37": 892868096.0, - "38": 892867072.0, - "39": 892871168.0, - "40": 892869120.0, - "41": 892873728.0, - "42": 892868608.0, - "43": 892871168.0, - "44": 892871680.0, - "45": 892869632.0, - "46": 892876800.0, - "47": 892869632.0, + "38": 892869120.0, + "39": 892870144.0, + "40": 892868096.0, + "41": 892875776.0, + "42": 892868096.0, + "43": 892870144.0, + "44": 892872192.0, + "45": 892868608.0, + "46": 892877312.0, + "47": 892869120.0, "48": 892875264.0, - "49": 892872704.0, - "50": 892869120.0, - "51": 892872192.0, + "49": 892871168.0, + "50": 892868608.0, + "51": 892871168.0, "52": 892875776.0, - "53": 892868096.0, - "54": 892872192.0, - "55": 892867072.0, - "56": 892865024.0, - "57": 892876288.0, - "58": 892869120.0, - "59": 892871680.0, - "60": 892869120.0, + "53": 892869120.0, + "54": 892873216.0, + "55": 892868096.0, + "56": 892866048.0, + "57": 892876800.0, + "58": 892868608.0, + "59": 892872704.0, + "60": 892868096.0, "61": 892869120.0, - "62": 892869632.0, - "63": 892870656.0, + "62": 892870656.0, + "63": 892872192.0, "64": 892865536.0, - "65": 892872192.0, - "66": 892864512.0, + "65": 892872704.0, + "66": 892864000.0, "67": 892862464.0, - "68": 892867584.0, + "68": 892866560.0, "69": 892861952.0, - "70": 892867072.0, + "70": 892866048.0, "71": 892870656.0, - "72": 892862464.0, + "72": 892861952.0, "73": 892861440.0, - "74": 892849664.0, - "75": 892868096.0, + "74": 892850176.0, + "75": 892870144.0, "76": 892869632.0, - "77": 892868096.0, - "78": 892859392.0, - "79": 892865024.0, - "80": 892855296.0, - "81": 892856320.0, - "82": 892860416.0, - "83": 892869632.0, - "84": 892852736.0, - "85": 892871680.0, - "86": 892861952.0, + "77": 892867584.0, + "78": 892860928.0, + "79": 892864512.0, + "80": 892853760.0, + "81": 892854784.0, + "82": 892860928.0, + "83": 892869120.0, + "84": 892854784.0, + "85": 892872192.0, + "86": 892862464.0, "87": 892869120.0, "88": 892869632.0, - "89": 892859392.0, - "90": 892867072.0, - "91": 892865536.0, + "89": 892860416.0, + "90": 892868096.0, + "91": 892867584.0, "92": 892865536.0, "93": 892861440.0, - "94": 892860928.0, - "95": 892869120.0, - "96": 892866560.0, - "97": 892856320.0, - "98": 892869120.0, - "99": 892864512.0, - "100": 892864000.0 + "94": 892858880.0, + "95": 892869632.0, + "96": 892866048.0, + "97": 892855808.0, + "98": 892869632.0, + "99": 892861952.0, + "100": 892864512.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1948027904.0, - "2": 2183897088.0, - "3": 2184431104.0, - "4": 2190589952.0, - "5": 2190589952.0, - "6": 2190589952.0, - "7": 2190589952.0, - "8": 2190589952.0, - "9": 2190589952.0, - "10": 2190589952.0, - "11": 2190589952.0, - "12": 2190589952.0, - "13": 2190589952.0, - "14": 2190589952.0, - "15": 2190589952.0, - "16": 2190589952.0, - "17": 2194543104.0, - "18": 2194543104.0, - "19": 2194543104.0, - "20": 2194543104.0, - "21": 2194543104.0, - "22": 2194543104.0, - "23": 2194543104.0, - "24": 2194543104.0, - "25": 2194543104.0, - "26": 2194543104.0, - "27": 2194543104.0, - "28": 2194543104.0, - "29": 2194543104.0, - "30": 2194543104.0, - "31": 2195852288.0, - "32": 2195852288.0, - "33": 2195852288.0, - "34": 2195852288.0, - "35": 2195852288.0, - "36": 2195852288.0, - "37": 2195852288.0, - "38": 2195852288.0, - "39": 2195852288.0, - "40": 2195852288.0, - "41": 2195852288.0, - "42": 2195852288.0, - "43": 2195852288.0, - "44": 2195852288.0, - "45": 2195852288.0, - "46": 2195852288.0, - "47": 2195852288.0, - "48": 2195852288.0, - "49": 2195852288.0, - "50": 2195852288.0, - "51": 2195852288.0, - "52": 2195852288.0, - "53": 2195852288.0, - "54": 2195852288.0, - "55": 2195852288.0, - "56": 2195852288.0, - "57": 2195852288.0, - "58": 2195852288.0, - "59": 2195852288.0, - "60": 2195852288.0, - "61": 2195852288.0, - "62": 2195852288.0, - "63": 2195852288.0, - "64": 2195852288.0, - "65": 2195852288.0, - "66": 2195852288.0, - "67": 2195852288.0, - "68": 2195852288.0, - "69": 2195852288.0, - "70": 2195852288.0, - "71": 2195852288.0, - "72": 2195852288.0, - "73": 2195852288.0, - "74": 2195852288.0, - "75": 2195852288.0, - "76": 2195852288.0, - "77": 2195852288.0, - "78": 2195852288.0, - "79": 2195852288.0, - "80": 2195852288.0, - "81": 2195852288.0, - "82": 2195852288.0, - "83": 2195852288.0, - "84": 2195852288.0, - "85": 2195852288.0, - "86": 2195852288.0, - "87": 2195852288.0, - "88": 2195852288.0, - "89": 2195852288.0, - "90": 2195852288.0, - "91": 2195852288.0, - "92": 2195852288.0, - "93": 2195852288.0, - "94": 2195852288.0, - "95": 2195852288.0, - "96": 2195852288.0, - "97": 2195852288.0, - "98": 2195852288.0, - "99": 2195852288.0, - "100": 2195852288.0 + "1": 1946773504.0, + "2": 2182914048.0, + "3": 2184069120.0, + "4": 2188971008.0, + "5": 2188971008.0, + "6": 2188971008.0, + "7": 2188971008.0, + "8": 2188971008.0, + "9": 2188971008.0, + "10": 2188971008.0, + "11": 2188971008.0, + "12": 2188971008.0, + "13": 2188971008.0, + "14": 2188971008.0, + "15": 2188971008.0, + "16": 2188971008.0, + "17": 2191747584.0, + "18": 2191747584.0, + "19": 2191747584.0, + "20": 2191747584.0, + "21": 2191747584.0, + "22": 2191747584.0, + "23": 2191747584.0, + "24": 2191747584.0, + "25": 2191747584.0, + "26": 2191747584.0, + "27": 2191747584.0, + "28": 2191747584.0, + "29": 2191747584.0, + "30": 2191747584.0, + "31": 2193825792.0, + "32": 2193825792.0, + "33": 2193825792.0, + "34": 2193825792.0, + "35": 2193825792.0, + "36": 2193825792.0, + "37": 2193825792.0, + "38": 2193825792.0, + "39": 2193825792.0, + "40": 2193825792.0, + "41": 2193825792.0, + "42": 2193825792.0, + "43": 2193825792.0, + "44": 2193825792.0, + "45": 2193825792.0, + "46": 2193825792.0, + "47": 2193825792.0, + "48": 2193825792.0, + "49": 2193825792.0, + "50": 2193825792.0, + "51": 2193825792.0, + "52": 2193825792.0, + "53": 2193825792.0, + "54": 2193825792.0, + "55": 2193825792.0, + "56": 2193825792.0, + "57": 2194713600.0, + "58": 2194713600.0, + "59": 2194713600.0, + "60": 2194713600.0, + "61": 2194713600.0, + "62": 2194713600.0, + "63": 2194713600.0, + "64": 2194713600.0, + "65": 2194713600.0, + "66": 2194713600.0, + "67": 2194713600.0, + "68": 2194713600.0, + "69": 2194713600.0, + "70": 2194713600.0, + "71": 2194713600.0, + "72": 2194713600.0, + "73": 2194713600.0, + "74": 2194713600.0, + "75": 2194713600.0, + "76": 2194713600.0, + "77": 2194713600.0, + "78": 2194713600.0, + "79": 2194713600.0, + "80": 2194713600.0, + "81": 2194713600.0, + "82": 2194713600.0, + "83": 2194713600.0, + "84": 2194713600.0, + "85": 2194713600.0, + "86": 2194713600.0, + "87": 2194713600.0, + "88": 2194713600.0, + "89": 2194713600.0, + "90": 2194713600.0, + "91": 2194713600.0, + "92": 2194713600.0, + "93": 2194713600.0, + "94": 2194713600.0, + "95": 2194713600.0, + "96": 2194713600.0, + "97": 2194713600.0, + "98": 2194713600.0, + "99": 2194713600.0, + "100": 2194713600.0 } }, "iteration-time": { @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 3.86481, - "3": 0.3588, - "4": 0.35276, - "5": 0.33575, - "6": 0.3344, - "7": 0.3406, - "8": 0.33551, - "9": 0.33157, - "10": 0.32814, - "11": 0.32882, - "12": 0.3298, - "13": 0.32887, - "14": 0.32898, - "15": 0.33409, - "16": 0.32679, - "17": 0.34317, - "18": 0.33153, - "19": 0.32828, - "20": 0.33077, - "21": 0.32713, - "22": 0.32603, - "23": 0.32819, - "24": 0.33158, - "25": 0.32832, - "26": 0.32593, - "27": 0.33086, - "28": 0.32481, - "29": 0.32607, - "30": 0.33032, - "31": 0.33561, - "32": 0.33149, - "33": 0.32643, - "34": 0.34262, - "35": 0.32889, - "36": 0.32749, - "37": 0.32097, - "38": 0.33036, - "39": 0.69454, - "40": 0.33723, - "41": 0.3284, - "42": 0.32735, - "43": 0.33334, - "44": 0.3333, - "45": 0.33315, - "46": 0.33505, - "47": 0.32976, - "48": 0.32918, - "49": 0.34661, - "50": 0.32681, - "51": 0.3427, - "52": 0.3299, - "53": 0.32454, - "54": 0.3251, - "55": 0.32968, - "56": 0.34696, - "57": 0.33819, - "58": 0.32649, - "59": 0.3341, - "60": 0.33324, - "61": 0.33925, - "62": 0.33532, - "63": 0.34334, - "64": 0.34963, - "65": 0.38392, - "66": 0.33805, - "67": 0.3728, - "68": 0.33745, - "69": 0.33504, - "70": 0.33581, - "71": 0.35385, - "72": 0.34934, - "73": 0.34952, - "74": 0.35756, - "75": 0.35105, - "76": 0.34933, - "77": 0.33518, - "78": 0.34556, - "79": 0.34603, - "80": 0.36355, - "81": 0.34186, - "82": 0.34271, - "83": 0.39765, - "84": 0.36927, - "85": 0.33938, - "86": 0.35142, - "87": 0.34329, - "88": 0.33135, - "89": 0.34535, - "90": 0.33856, - "91": 0.3522, - "92": 0.33934, - "93": 0.38169, - "94": 0.36358, - "95": 0.33846, - "96": 0.33554, - "97": 0.34438, - "98": 0.32586, - "99": 0.43185, - "100": 0.33974 + "2": 6.67539, + "3": 0.407, + "4": 0.39073, + "5": 0.37543, + "6": 0.37537, + "7": 0.38454, + "8": 0.38441, + "9": 0.36994, + "10": 0.3708, + "11": 0.36361, + "12": 0.36417, + "13": 0.37453, + "14": 0.38004, + "15": 0.37984, + "16": 0.36878, + "17": 0.37576, + "18": 0.37765, + "19": 0.36829, + "20": 0.37498, + "21": 0.42033, + "22": 0.41011, + "23": 0.47059, + "24": 0.46929, + "25": 0.4628, + "26": 0.40125, + "27": 0.37004, + "28": 0.36512, + "29": 0.36666, + "30": 0.37229, + "31": 0.37489, + "32": 0.37701, + "33": 0.37213, + "34": 0.38959, + "35": 0.37309, + "36": 0.38121, + "37": 0.36506, + "38": 0.36892, + "39": 0.37227, + "40": 0.37401, + "41": 0.37309, + "42": 0.3734, + "43": 0.37679, + "44": 0.37378, + "45": 0.37577, + "46": 0.37224, + "47": 0.3656, + "48": 0.37878, + "49": 0.38841, + "50": 0.37117, + "51": 0.41362, + "52": 0.36969, + "53": 0.36731, + "54": 0.3675, + "55": 0.37619, + "56": 0.39642, + "57": 0.38389, + "58": 0.37131, + "59": 0.37528, + "60": 0.38109, + "61": 0.37573, + "62": 0.37706, + "63": 0.38387, + "64": 0.38217, + "65": 0.41628, + "66": 0.37827, + "67": 0.40668, + "68": 0.37316, + "69": 0.37861, + "70": 0.38107, + "71": 0.38377, + "72": 0.37354, + "73": 0.3807, + "74": 0.38445, + "75": 0.38628, + "76": 0.42439, + "77": 0.39532, + "78": 0.42914, + "79": 0.39185, + "80": 0.39213, + "81": 0.37941, + "82": 0.37259, + "83": 0.37258, + "84": 0.38481, + "85": 0.38022, + "86": 0.39917, + "87": 0.38057, + "88": 0.38203, + "89": 0.38656, + "90": 0.37227, + "91": 0.38274, + "92": 0.37855, + "93": 0.39076, + "94": 0.40202, + "95": 0.37695, + "96": 0.37707, + "97": 0.38216, + "98": 0.3776, + "99": 0.38682, + "100": 0.37803 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_gb200.json index 31167be6de5..160ba7e60e2 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances/golden_values_dev_dgx_gb200.json @@ -6,104 +6,104 @@ "values": { "1": 10.7915, "2": 10.8072, - "3": 10.79214, - "4": 10.77571, - "5": 10.82495, - "6": 10.83193, - "7": 10.82077, - "8": 10.81496, - "9": 10.81973, - "10": 10.7692, - "11": 10.84519, - "12": 10.83101, - "13": 10.83652, - "14": 10.85771, - "15": 10.80581, - "16": 10.78733, - "17": 10.75844, - "18": 10.79297, - "19": 10.78295, - "20": 10.73199, - "21": 10.70953, - "22": 10.57675, - "23": 10.71651, - "24": 10.61983, - "25": 10.58207, - "26": 10.61694, - "27": 10.6509, - "28": 10.63261, - "29": 10.63024, - "30": 10.4432, - "31": 10.19983, - "32": 10.52048, - "33": 10.5079, - "34": 10.29565, - "35": 10.33536, - "36": 10.30278, - "37": 10.41788, - "38": 10.28121, - "39": 10.46185, - "40": 10.18169, - "41": 10.21391, - "42": 10.28457, - "43": 9.95538, - "44": 10.05751, - "45": 9.95713, - "46": 9.93528, - "47": 10.22675, - "48": 9.96521, - "49": 9.66603, - "50": 10.009, - "51": 9.94789, - "52": 9.84665, - "53": 10.14887, - "54": 10.03772, - "55": 9.97445, - "56": 9.71378, - "57": 9.59509, - "58": 9.92081, - "59": 9.67609, - "60": 9.61253, - "61": 9.79221, - "62": 10.05653, - "63": 9.47849, - "64": 9.84455, - "65": 9.03889, - "66": 9.78399, - "67": 9.43609, - "68": 9.85203, - "69": 9.84438, - "70": 9.7933, - "71": 9.69163, - "72": 9.64909, - "73": 9.55528, - "74": 9.04743, - "75": 9.49185, - "76": 9.178, - "77": 10.11275, - "78": 9.76838, - "79": 9.4398, - "80": 9.45421, - "81": 9.53191, - "82": 9.75402, - "83": 9.38186, - "84": 9.46162, - "85": 9.66959, - "86": 9.1349, - "87": 9.6343, - "88": 9.80083, - "89": 9.66682, - "90": 9.86175, - "91": 9.39987, - "92": 9.41063, - "93": 9.14654, - "94": 8.88648, - "95": 9.56986, - "96": 9.57642, - "97": 9.34305, - "98": 9.72786, - "99": 8.96203, - "100": 9.44942 + "3": 10.79211, + "4": 10.77572, + "5": 10.82476, + "6": 10.83254, + "7": 10.81992, + "8": 10.81584, + "9": 10.81974, + "10": 10.76945, + "11": 10.84429, + "12": 10.83036, + "13": 10.83709, + "14": 10.85718, + "15": 10.80556, + "16": 10.78705, + "17": 10.75861, + "18": 10.79283, + "19": 10.78296, + "20": 10.73173, + "21": 10.71002, + "22": 10.5767, + "23": 10.71556, + "24": 10.6198, + "25": 10.58215, + "26": 10.61717, + "27": 10.651, + "28": 10.63244, + "29": 10.63052, + "30": 10.44314, + "31": 10.20041, + "32": 10.52007, + "33": 10.50862, + "34": 10.29649, + "35": 10.33562, + "36": 10.3035, + "37": 10.41783, + "38": 10.28036, + "39": 10.46158, + "40": 10.18138, + "41": 10.21356, + "42": 10.28463, + "43": 9.95586, + "44": 10.05773, + "45": 9.95666, + "46": 9.93559, + "47": 10.22716, + "48": 9.96512, + "49": 9.66568, + "50": 10.00925, + "51": 9.94787, + "52": 9.84659, + "53": 10.14898, + "54": 10.03733, + "55": 9.97466, + "56": 9.71366, + "57": 9.59511, + "58": 9.92053, + "59": 9.67602, + "60": 9.61281, + "61": 9.79168, + "62": 10.05651, + "63": 9.47825, + "64": 9.84472, + "65": 9.03871, + "66": 9.78383, + "67": 9.43591, + "68": 9.85174, + "69": 9.84474, + "70": 9.79281, + "71": 9.69147, + "72": 9.64923, + "73": 9.55514, + "74": 9.04748, + "75": 9.49223, + "76": 9.17797, + "77": 10.11311, + "78": 9.76827, + "79": 9.44002, + "80": 9.4543, + "81": 9.5315, + "82": 9.7541, + "83": 9.3821, + "84": 9.46112, + "85": 9.67014, + "86": 9.13473, + "87": 9.63424, + "88": 9.80046, + "89": 9.66691, + "90": 9.86202, + "91": 9.39937, + "92": 9.41036, + "93": 9.14676, + "94": 8.88677, + "95": 9.5697, + "96": 9.57651, + "97": 9.34308, + "98": 9.72802, + "99": 8.96192, + "100": 9.44958 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 31217.0, - "2": 33106.0, - "3": 33596.0, - "4": 31946.0, - "5": 36783.0, - "6": 37377.0, - "7": 35362.0, - "8": 31711.0, - "9": 34749.0, - "10": 29758.0, - "11": 38348.0, - "12": 35446.0, - "13": 37087.0, - "14": 37869.0, - "15": 35242.0, - "16": 36520.0, - "17": 35190.0, - "18": 35191.0, - "19": 35614.0, - "20": 32571.0, - "21": 33220.0, - "22": 30518.0, - "23": 37619.0, - "24": 32547.0, - "25": 30591.0, - "26": 34546.0, - "27": 35275.0, - "28": 36936.0, - "29": 37531.0, - "30": 33354.0, - "31": 30754.0, - "32": 36331.0, - "33": 38273.0, - "34": 32645.0, - "35": 34237.0, - "36": 35092.0, - "37": 37931.0, - "38": 35480.0, - "39": 39175.0, - "40": 36296.0, - "41": 35902.0, - "42": 37609.0, - "43": 33748.0, - "44": 34027.0, - "45": 35215.0, - "46": 37108.0, - "47": 41056.0, - "48": 35765.0, - "49": 35087.0, - "50": 39734.0, - "51": 36712.0, - "52": 36176.0, - "53": 41774.0, - "54": 40447.0, - "55": 37071.0, - "56": 39975.0, - "57": 36828.0, - "58": 41815.0, - "59": 37962.0, - "60": 40415.0, - "61": 39921.0, - "62": 43840.0, - "63": 37890.0, - "64": 42699.0, - "65": 40347.0, - "66": 44159.0, - "67": 40057.0, - "68": 39563.0, - "69": 42246.0, - "70": 44867.0, - "71": 40910.0, - "72": 40982.0, - "73": 44363.0, - "74": 35672.0, - "75": 39602.0, - "76": 46157.0, - "77": 44919.0, - "78": 48134.0, - "79": 48666.0, - "80": 46770.0, - "81": 50144.0, - "82": 49680.0, - "83": 44991.0, - "84": 45912.0, - "85": 49371.0, - "86": 45600.0, - "87": 49292.0, - "88": 46411.0, - "89": 49710.0, - "90": 51008.0, - "91": 45796.0, - "92": 47991.0, - "93": 46847.0, - "94": 47360.0, - "95": 48680.0, - "96": 50369.0, - "97": 46162.0, - "98": 49921.0, - "99": 48235.0, - "100": 45390.0 + "1": 30880.0, + "2": 33134.0, + "3": 34050.0, + "4": 31735.0, + "5": 37029.0, + "6": 37301.0, + "7": 35264.0, + "8": 31446.0, + "9": 34979.0, + "10": 29776.0, + "11": 37948.0, + "12": 35317.0, + "13": 37121.0, + "14": 38011.0, + "15": 34963.0, + "16": 36503.0, + "17": 35632.0, + "18": 35207.0, + "19": 35782.0, + "20": 32724.0, + "21": 33425.0, + "22": 30572.0, + "23": 37790.0, + "24": 32343.0, + "25": 30529.0, + "26": 34671.0, + "27": 35017.0, + "28": 36377.0, + "29": 37929.0, + "30": 33302.0, + "31": 30382.0, + "32": 35917.0, + "33": 38367.0, + "34": 33007.0, + "35": 34400.0, + "36": 35079.0, + "37": 38363.0, + "38": 35530.0, + "39": 39156.0, + "40": 36183.0, + "41": 36060.0, + "42": 38064.0, + "43": 33730.0, + "44": 34062.0, + "45": 35453.0, + "46": 37450.0, + "47": 41140.0, + "48": 35440.0, + "49": 35339.0, + "50": 40009.0, + "51": 37067.0, + "52": 36328.0, + "53": 41921.0, + "54": 40571.0, + "55": 36973.0, + "56": 39943.0, + "57": 37141.0, + "58": 41862.0, + "59": 38103.0, + "60": 40594.0, + "61": 40414.0, + "62": 43834.0, + "63": 37595.0, + "64": 42651.0, + "65": 40408.0, + "66": 43557.0, + "67": 40116.0, + "68": 40091.0, + "69": 41858.0, + "70": 45155.0, + "71": 40651.0, + "72": 41736.0, + "73": 44373.0, + "74": 35726.0, + "75": 39266.0, + "76": 46415.0, + "77": 44954.0, + "78": 47867.0, + "79": 48322.0, + "80": 46736.0, + "81": 50266.0, + "82": 50166.0, + "83": 44518.0, + "84": 46086.0, + "85": 49137.0, + "86": 45604.0, + "87": 48987.0, + "88": 46500.0, + "89": 49453.0, + "90": 50338.0, + "91": 45534.0, + "92": 47623.0, + "93": 46662.0, + "94": 47357.0, + "95": 48641.0, + "96": 50298.0, + "97": 46694.0, + "98": 50054.0, + "99": 48186.0, + "100": 44915.0 } }, "mem-allocated-bytes": { @@ -220,104 +220,104 @@ "values": { "1": 1254511616.0, "2": 1254503424.0, - "3": 1254505984.0, + "3": 1254506496.0, "4": 1254513664.0, - "5": 1254505984.0, + "5": 1254507520.0, "6": 1254507520.0, - "7": 1254511104.0, + "7": 1254511616.0, "8": 1254505472.0, - "9": 1254505984.0, + "9": 1254506496.0, "10": 1254506496.0, - "11": 1254507520.0, - "12": 1254503936.0, - "13": 1254509568.0, - "14": 1254510080.0, - "15": 1254506496.0, + "11": 1254505472.0, + "12": 1254504448.0, + "13": 1254507520.0, + "14": 1254509568.0, + "15": 1254508544.0, "16": 1254505984.0, "17": 1254516224.0, - "18": 1254503424.0, + "18": 1254504448.0, "19": 1254506496.0, - "20": 1254504960.0, - "21": 1254508032.0, - "22": 1254510592.0, - "23": 1254512640.0, + "20": 1254503936.0, + "21": 1254508544.0, + "22": 1254509056.0, + "23": 1254514176.0, "24": 1254505472.0, - "25": 1254513664.0, - "26": 1254512128.0, - "27": 1254501888.0, - "28": 1254509056.0, + "25": 1254513152.0, + "26": 1254510592.0, + "27": 1254502912.0, + "28": 1254508032.0, "29": 1254508032.0, - "30": 1254509056.0, - "31": 1254519296.0, - "32": 1254512128.0, + "30": 1254508032.0, + "31": 1254518784.0, + "32": 1254512640.0, "33": 1254507008.0, - "34": 1254509056.0, + "34": 1254508544.0, "35": 1254510080.0, "36": 1254507008.0, "37": 1254504448.0, - "38": 1254505472.0, - "39": 1254508032.0, - "40": 1254505984.0, - "41": 1254512128.0, - "42": 1254504960.0, - "43": 1254507008.0, - "44": 1254508032.0, - "45": 1254506496.0, + "38": 1254504448.0, + "39": 1254507008.0, + "40": 1254506496.0, + "41": 1254511616.0, + "42": 1254505472.0, + "43": 1254508544.0, + "44": 1254509056.0, + "45": 1254504960.0, "46": 1254513664.0, - "47": 1254507008.0, + "47": 1254505984.0, "48": 1254511616.0, - "49": 1254508032.0, - "50": 1254506496.0, - "51": 1254508032.0, - "52": 1254513152.0, - "53": 1254505984.0, - "54": 1254508544.0, - "55": 1254503936.0, - "56": 1254502912.0, - "57": 1254515200.0, - "58": 1254503936.0, + "49": 1254509056.0, + "50": 1254505984.0, + "51": 1254509056.0, + "52": 1254512640.0, + "53": 1254506496.0, + "54": 1254509056.0, + "55": 1254504448.0, + "56": 1254503424.0, + "57": 1254514176.0, + "58": 1254504448.0, "59": 1254508544.0, - "60": 1254503936.0, - "61": 1254507008.0, - "62": 1254508032.0, - "63": 1254507520.0, + "60": 1254505984.0, + "61": 1254505984.0, + "62": 1254507008.0, + "63": 1254508032.0, "64": 1254502400.0, - "65": 1254509568.0, + "65": 1254508544.0, "66": 1254501376.0, "67": 1254499328.0, - "68": 1254503936.0, - "69": 1254499328.0, + "68": 1254504960.0, + "69": 1254498816.0, "70": 1254502912.0, - "71": 1254507520.0, + "71": 1254508032.0, "72": 1254499328.0, - "73": 1254497280.0, - "74": 1254486016.0, + "73": 1254498304.0, + "74": 1254485504.0, "75": 1254504960.0, - "76": 1254507008.0, - "77": 1254504448.0, + "76": 1254506496.0, + "77": 1254503936.0, "78": 1254496256.0, - "79": 1254500864.0, - "80": 1254491648.0, - "81": 1254493696.0, - "82": 1254497280.0, + "79": 1254501376.0, + "80": 1254492160.0, + "81": 1254492160.0, + "82": 1254497792.0, "83": 1254505984.0, - "84": 1254489600.0, - "85": 1254505984.0, - "86": 1254500352.0, + "84": 1254490112.0, + "85": 1254507520.0, + "86": 1254499328.0, "87": 1254505472.0, - "88": 1254506496.0, - "89": 1254498304.0, - "90": 1254504448.0, - "91": 1254501888.0, - "92": 1254501888.0, + "88": 1254505472.0, + "89": 1254497792.0, + "90": 1254503424.0, + "91": 1254503424.0, + "92": 1254503424.0, "93": 1254499328.0, - "94": 1254494720.0, - "95": 1254504960.0, - "96": 1254503424.0, + "94": 1254497280.0, + "95": 1254505984.0, + "96": 1254502912.0, "97": 1254492672.0, - "98": 1254505984.0, - "99": 1254499328.0, - "100": 1254501888.0 + "98": 1254507008.0, + "99": 1254501376.0, + "100": 1254500864.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 2066381824.0, - "2": 2543623168.0, - "3": 2544637440.0, - "4": 2550310912.0, - "5": 2550310912.0, - "6": 2550310912.0, - "7": 2550310912.0, - "8": 2550310912.0, - "9": 2550310912.0, - "10": 2550310912.0, - "11": 2550310912.0, - "12": 2550310912.0, - "13": 2550310912.0, - "14": 2550310912.0, - "15": 2550310912.0, - "16": 2550310912.0, - "17": 2554848768.0, - "18": 2554848768.0, - "19": 2554848768.0, - "20": 2554848768.0, - "21": 2554848768.0, - "22": 2554848768.0, - "23": 2554848768.0, - "24": 2554848768.0, - "25": 2554848768.0, - "26": 2554848768.0, - "27": 2554848768.0, - "28": 2554848768.0, - "29": 2554848768.0, - "30": 2554848768.0, - "31": 2556047872.0, - "32": 2556047872.0, - "33": 2556047872.0, - "34": 2556047872.0, - "35": 2556047872.0, - "36": 2556047872.0, - "37": 2556047872.0, - "38": 2556047872.0, - "39": 2556047872.0, - "40": 2556047872.0, - "41": 2556047872.0, - "42": 2556047872.0, - "43": 2556047872.0, - "44": 2556047872.0, - "45": 2556047872.0, - "46": 2556047872.0, - "47": 2556047872.0, - "48": 2556047872.0, - "49": 2556047872.0, - "50": 2556047872.0, - "51": 2556047872.0, - "52": 2556047872.0, - "53": 2556047872.0, - "54": 2556047872.0, - "55": 2556047872.0, - "56": 2556047872.0, - "57": 2556047872.0, - "58": 2556047872.0, - "59": 2556047872.0, - "60": 2556047872.0, - "61": 2556047872.0, - "62": 2556047872.0, - "63": 2556047872.0, - "64": 2556047872.0, - "65": 2556047872.0, - "66": 2556047872.0, - "67": 2556047872.0, - "68": 2556047872.0, - "69": 2556047872.0, - "70": 2556047872.0, - "71": 2556047872.0, - "72": 2556047872.0, - "73": 2556047872.0, - "74": 2556047872.0, - "75": 2556047872.0, - "76": 2556047872.0, - "77": 2556047872.0, - "78": 2556047872.0, - "79": 2556047872.0, - "80": 2556047872.0, - "81": 2556047872.0, - "82": 2556047872.0, - "83": 2556047872.0, - "84": 2556047872.0, - "85": 2556047872.0, - "86": 2556047872.0, - "87": 2556047872.0, - "88": 2556047872.0, - "89": 2556047872.0, - "90": 2556047872.0, - "91": 2556047872.0, - "92": 2556047872.0, - "93": 2556047872.0, - "94": 2556047872.0, - "95": 2556047872.0, - "96": 2556047872.0, - "97": 2556047872.0, - "98": 2556047872.0, - "99": 2556047872.0, - "100": 2556047872.0 + "1": 2066350080.0, + "2": 2542998528.0, + "3": 2544289792.0, + "4": 2549362688.0, + "5": 2549362688.0, + "6": 2549362688.0, + "7": 2549362688.0, + "8": 2549362688.0, + "9": 2549362688.0, + "10": 2549362688.0, + "11": 2549362688.0, + "12": 2549362688.0, + "13": 2549362688.0, + "14": 2549362688.0, + "15": 2549362688.0, + "16": 2549362688.0, + "17": 2552193536.0, + "18": 2552193536.0, + "19": 2552193536.0, + "20": 2552193536.0, + "21": 2552193536.0, + "22": 2552193536.0, + "23": 2552193536.0, + "24": 2552193536.0, + "25": 2552193536.0, + "26": 2552193536.0, + "27": 2552193536.0, + "28": 2552193536.0, + "29": 2552193536.0, + "30": 2552193536.0, + "31": 2555549696.0, + "32": 2555549696.0, + "33": 2555549696.0, + "34": 2555549696.0, + "35": 2555549696.0, + "36": 2555549696.0, + "37": 2555549696.0, + "38": 2555549696.0, + "39": 2555549696.0, + "40": 2555549696.0, + "41": 2555549696.0, + "42": 2555549696.0, + "43": 2555549696.0, + "44": 2555549696.0, + "45": 2555549696.0, + "46": 2555549696.0, + "47": 2555549696.0, + "48": 2555549696.0, + "49": 2555549696.0, + "50": 2555549696.0, + "51": 2555549696.0, + "52": 2555549696.0, + "53": 2555549696.0, + "54": 2555549696.0, + "55": 2555549696.0, + "56": 2555549696.0, + "57": 2555549696.0, + "58": 2555549696.0, + "59": 2555549696.0, + "60": 2555549696.0, + "61": 2555549696.0, + "62": 2555549696.0, + "63": 2555549696.0, + "64": 2555549696.0, + "65": 2555549696.0, + "66": 2555549696.0, + "67": 2555549696.0, + "68": 2555549696.0, + "69": 2555549696.0, + "70": 2555549696.0, + "71": 2555549696.0, + "72": 2555549696.0, + "73": 2555549696.0, + "74": 2555549696.0, + "75": 2555549696.0, + "76": 2555549696.0, + "77": 2555549696.0, + "78": 2555549696.0, + "79": 2555549696.0, + "80": 2555549696.0, + "81": 2555549696.0, + "82": 2555549696.0, + "83": 2555549696.0, + "84": 2555549696.0, + "85": 2555549696.0, + "86": 2555549696.0, + "87": 2555549696.0, + "88": 2555549696.0, + "89": 2555549696.0, + "90": 2555549696.0, + "91": 2555549696.0, + "92": 2555549696.0, + "93": 2555549696.0, + "94": 2555549696.0, + "95": 2555549696.0, + "96": 2555549696.0, + "97": 2555549696.0, + "98": 2555549696.0, + "99": 2555549696.0, + "100": 2555549696.0 } }, "iteration-time": { @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 4.80614, - "3": 0.71249, - "4": 0.40839, - "5": 0.39358, - "6": 0.3911, - "7": 0.39032, - "8": 0.38318, - "9": 0.70382, - "10": 0.34707, - "11": 0.34403, - "12": 0.34043, - "13": 0.33959, - "14": 0.33461, - "15": 0.34767, - "16": 0.33495, - "17": 0.34839, - "18": 0.33673, - "19": 0.33335, - "20": 0.33161, - "21": 0.32643, - "22": 0.33565, - "23": 0.33625, - "24": 0.33009, - "25": 0.33065, - "26": 0.33344, - "27": 0.33552, - "28": 0.33047, - "29": 0.33011, - "30": 0.33358, - "31": 0.34631, - "32": 0.33536, - "33": 0.33271, - "34": 0.33949, - "35": 0.33073, - "36": 0.32877, - "37": 0.32806, - "38": 0.33111, - "39": 0.33408, - "40": 0.33428, - "41": 0.34927, - "42": 1.47745, - "43": 0.48012, - "44": 0.33077, - "45": 0.33262, - "46": 0.34066, - "47": 0.33152, - "48": 0.33512, - "49": 0.34429, - "50": 0.33697, - "51": 0.34656, - "52": 0.337, - "53": 0.33133, - "54": 0.33172, - "55": 0.33188, - "56": 0.35163, - "57": 0.34162, - "58": 0.33258, - "59": 0.7122, - "60": 0.33979, - "61": 0.33569, - "62": 0.33523, - "63": 0.33864, - "64": 0.34776, - "65": 0.37658, - "66": 0.3377, - "67": 0.36916, - "68": 0.3452, - "69": 0.33854, - "70": 0.34023, - "71": 0.3544, - "72": 0.34395, - "73": 0.3567, - "74": 0.35025, - "75": 0.35164, - "76": 0.35012, - "77": 0.3364, - "78": 0.34491, - "79": 0.34789, - "80": 0.35388, - "81": 0.34075, - "82": 0.34743, - "83": 0.34211, - "84": 0.34722, - "85": 0.33956, - "86": 0.35402, - "87": 0.34301, - "88": 0.34056, - "89": 0.35764, - "90": 0.33476, - "91": 0.3539, - "92": 0.34448, - "93": 0.34895, - "94": 0.3624, - "95": 0.34001, - "96": 0.3382, - "97": 0.35217, - "98": 0.33252, - "99": 0.34909, - "100": 0.34966 + "2": 8.3521, + "3": 0.41669, + "4": 0.41284, + "5": 0.39807, + "6": 0.39421, + "7": 0.40258, + "8": 0.39083, + "9": 0.38556, + "10": 0.39045, + "11": 0.38493, + "12": 0.38652, + "13": 0.38654, + "14": 0.38244, + "15": 0.39635, + "16": 0.38332, + "17": 0.38972, + "18": 0.38226, + "19": 0.37884, + "20": 0.38559, + "21": 0.41074, + "22": 0.88305, + "23": 0.38557, + "24": 0.38914, + "25": 0.38815, + "26": 0.38762, + "27": 0.38501, + "28": 0.38776, + "29": 0.38603, + "30": 0.38705, + "31": 0.38791, + "32": 0.3888, + "33": 0.38221, + "34": 0.40037, + "35": 0.387, + "36": 0.3968, + "37": 0.3961, + "38": 0.38866, + "39": 0.3965, + "40": 0.389, + "41": 0.38873, + "42": 0.3895, + "43": 0.38428, + "44": 0.38845, + "45": 0.38913, + "46": 0.38268, + "47": 0.38233, + "48": 0.38844, + "49": 0.3963, + "50": 0.38408, + "51": 0.42069, + "52": 0.38714, + "53": 0.38472, + "54": 0.38779, + "55": 0.38914, + "56": 0.40949, + "57": 0.39708, + "58": 0.39612, + "59": 0.3944, + "60": 0.38052, + "61": 0.39057, + "62": 0.38649, + "63": 0.4128, + "64": 0.38573, + "65": 0.41257, + "66": 0.39183, + "67": 0.42577, + "68": 0.4009, + "69": 0.38896, + "70": 0.39152, + "71": 0.40707, + "72": 0.40087, + "73": 0.41537, + "74": 0.43436, + "75": 0.45456, + "76": 0.41193, + "77": 0.38627, + "78": 0.39302, + "79": 0.40532, + "80": 0.39666, + "81": 0.39606, + "82": 0.39713, + "83": 0.39956, + "84": 0.39716, + "85": 0.40563, + "86": 0.40773, + "87": 0.39254, + "88": 0.402, + "89": 0.40411, + "90": 0.40046, + "91": 0.41284, + "92": 0.39708, + "93": 0.40273, + "94": 0.42216, + "95": 0.40147, + "96": 0.39007, + "97": 0.40448, + "98": 0.40171, + "99": 0.40175, + "100": 0.39847 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_gb200.json index ce3d79128b1..a6dfbf5a4e0 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective/golden_values_dev_dgx_gb200.json @@ -6,54 +6,54 @@ "values": { "1": 10.80509, "2": 10.82386, - "3": 10.80196, - "4": 10.79424, - "5": 10.8277, - "6": 10.84005, - "7": 10.8365, - "8": 10.82828, - "9": 10.83477, - "10": 10.77496, - "11": 10.85204, - "12": 10.83903, - "13": 10.85207, - "14": 10.85914, - "15": 10.81681, - "16": 10.79456, - "17": 10.77491, - "18": 10.80399, - "19": 10.79956, - "20": 10.73801, - "21": 10.72487, - "22": 10.59177, - "23": 10.73098, - "24": 10.6406, - "25": 10.59018, - "26": 10.63555, - "27": 10.66245, - "28": 10.6472, - "29": 10.64163, - "30": 10.4518, - "31": 10.22249, - "32": 10.52995, - "33": 10.51998, - "34": 10.31247, - "35": 10.34796, - "36": 10.31677, - "37": 10.42804, - "38": 10.29194, - "39": 10.46881, - "40": 10.19257, - "41": 10.23159, - "42": 10.29766, - "43": 9.97363, - "44": 10.07169, - "45": 9.97015, - "46": 9.94713, - "47": 10.23179, - "48": 9.97593, - "49": 9.67748, - "50": 10.0144 + "3": 10.80187, + "4": 10.79381, + "5": 10.82782, + "6": 10.84103, + "7": 10.83602, + "8": 10.8288, + "9": 10.83444, + "10": 10.77452, + "11": 10.85212, + "12": 10.8401, + "13": 10.85164, + "14": 10.85986, + "15": 10.8172, + "16": 10.79433, + "17": 10.77581, + "18": 10.80351, + "19": 10.79946, + "20": 10.73849, + "21": 10.72577, + "22": 10.59186, + "23": 10.73082, + "24": 10.64095, + "25": 10.59122, + "26": 10.63588, + "27": 10.66274, + "28": 10.64842, + "29": 10.64155, + "30": 10.45205, + "31": 10.22209, + "32": 10.52903, + "33": 10.52002, + "34": 10.31278, + "35": 10.34843, + "36": 10.31619, + "37": 10.42852, + "38": 10.2922, + "39": 10.46863, + "40": 10.19288, + "41": 10.23178, + "42": 10.29807, + "43": 9.97377, + "44": 10.07159, + "45": 9.96973, + "46": 9.94753, + "47": 10.23169, + "48": 9.97573, + "49": 9.67784, + "50": 10.01511 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 31182.0, - "2": 33013.0, - "3": 33646.0, - "4": 32202.0, - "5": 36913.0, - "6": 37554.0, - "7": 35184.0, - "8": 32207.0, - "9": 34523.0, - "10": 29945.0, - "11": 38237.0, - "12": 35346.0, - "13": 37426.0, - "14": 38358.0, - "15": 35140.0, - "16": 36293.0, - "17": 35645.0, - "18": 35117.0, - "19": 35648.0, - "20": 32896.0, - "21": 33511.0, - "22": 30704.0, - "23": 38149.0, - "24": 32677.0, - "25": 31055.0, - "26": 34700.0, - "27": 35410.0, - "28": 37268.0, - "29": 37953.0, - "30": 33210.0, - "31": 30482.0, - "32": 36908.0, - "33": 38308.0, - "34": 33125.0, - "35": 34341.0, - "36": 34925.0, - "37": 38767.0, - "38": 35780.0, - "39": 38955.0, - "40": 36485.0, - "41": 36015.0, - "42": 37638.0, - "43": 33689.0, - "44": 33688.0, - "45": 35448.0, - "46": 36810.0, - "47": 40858.0, - "48": 35696.0, - "49": 34729.0, - "50": 39077.0 + "1": 31002.0, + "2": 33357.0, + "3": 34009.0, + "4": 32315.0, + "5": 36524.0, + "6": 37490.0, + "7": 35275.0, + "8": 31742.0, + "9": 34839.0, + "10": 29933.0, + "11": 37766.0, + "12": 35329.0, + "13": 37113.0, + "14": 38180.0, + "15": 35219.0, + "16": 36450.0, + "17": 35700.0, + "18": 34853.0, + "19": 35788.0, + "20": 32808.0, + "21": 33724.0, + "22": 30451.0, + "23": 38218.0, + "24": 32472.0, + "25": 31520.0, + "26": 34965.0, + "27": 35584.0, + "28": 37025.0, + "29": 38197.0, + "30": 33255.0, + "31": 30551.0, + "32": 36545.0, + "33": 38300.0, + "34": 33029.0, + "35": 34162.0, + "36": 35185.0, + "37": 38545.0, + "38": 35778.0, + "39": 39000.0, + "40": 36217.0, + "41": 36225.0, + "42": 37525.0, + "43": 33633.0, + "44": 33746.0, + "45": 35656.0, + "46": 36610.0, + "47": 40929.0, + "48": 35960.0, + "49": 34405.0, + "50": 39680.0 } }, "mem-allocated-bytes": { @@ -121,8 +121,8 @@ "1": 1027095040.0, "2": 1027093504.0, "3": 1027094528.0, - "4": 1027095040.0, - "5": 1027091968.0, + "4": 1027096064.0, + "5": 1027091456.0, "6": 1027091968.0, "7": 1027098112.0, "8": 1027097600.0, @@ -131,43 +131,43 @@ "11": 1027098624.0, "12": 1027094528.0, "13": 1027092480.0, - "14": 1027095040.0, - "15": 1027095040.0, + "14": 1027094528.0, + "15": 1027096064.0, "16": 1027091456.0, - "17": 1027101184.0, + "17": 1027102208.0, "18": 1027096064.0, - "19": 1027093504.0, + "19": 1027092992.0, "20": 1027093504.0, "21": 1027097088.0, "22": 1027100160.0, "23": 1027100160.0, "24": 1027095552.0, - "25": 1027097088.0, + "25": 1027096576.0, "26": 1027098112.0, - "27": 1027091456.0, + "27": 1027090432.0, "28": 1027090944.0, - "29": 1027091968.0, - "30": 1027099648.0, - "31": 1027109888.0, + "29": 1027092480.0, + "30": 1027099136.0, + "31": 1027108864.0, "32": 1027095552.0, "33": 1027090944.0, - "34": 1027098112.0, - "35": 1027103744.0, - "36": 1027098112.0, - "37": 1027092480.0, - "38": 1027091456.0, - "39": 1027095040.0, - "40": 1027095040.0, - "41": 1027100160.0, + "34": 1027098624.0, + "35": 1027103232.0, + "36": 1027097088.0, + "37": 1027091968.0, + "38": 1027092480.0, + "39": 1027097088.0, + "40": 1027094016.0, + "41": 1027099648.0, "42": 1027091968.0, "43": 1027098624.0, - "44": 1027098624.0, - "45": 1027096064.0, - "46": 1027104256.0, - "47": 1027093504.0, + "44": 1027098112.0, + "45": 1027095552.0, + "46": 1027103232.0, + "47": 1027092992.0, "48": 1027101184.0, "49": 1027096064.0, - "50": 1027095552.0 + "50": 1027096064.0 } }, "mem-max-allocated-bytes": { @@ -177,54 +177,54 @@ "values": { "1": 3059586560.0, "2": 3299159040.0, - "3": 3299482112.0, - "4": 3302137344.0, - "5": 3302137344.0, - "6": 3302137344.0, - "7": 3303535104.0, - "8": 3303535104.0, - "9": 3303535104.0, - "10": 3303535104.0, - "11": 3303535104.0, - "12": 3303535104.0, - "13": 3303535104.0, - "14": 3303535104.0, - "15": 3303535104.0, - "16": 3303535104.0, - "17": 3306910208.0, - "18": 3306910208.0, - "19": 3306910208.0, - "20": 3306910208.0, - "21": 3306910208.0, - "22": 3306910208.0, - "23": 3306910208.0, - "24": 3306910208.0, - "25": 3306910208.0, - "26": 3306910208.0, - "27": 3306910208.0, - "28": 3306910208.0, - "29": 3306910208.0, - "30": 3306910208.0, - "31": 3312495616.0, - "32": 3312495616.0, - "33": 3312495616.0, - "34": 3312495616.0, - "35": 3312495616.0, - "36": 3312495616.0, - "37": 3312495616.0, - "38": 3312495616.0, - "39": 3312495616.0, - "40": 3312495616.0, - "41": 3312495616.0, - "42": 3312495616.0, - "43": 3312495616.0, - "44": 3312495616.0, - "45": 3312495616.0, - "46": 3312495616.0, - "47": 3312495616.0, - "48": 3312495616.0, - "49": 3312495616.0, - "50": 3312495616.0 + "3": 3299596800.0, + "4": 3301077504.0, + "5": 3301077504.0, + "6": 3301077504.0, + "7": 3303559680.0, + "8": 3303559680.0, + "9": 3303559680.0, + "10": 3303559680.0, + "11": 3303559680.0, + "12": 3303559680.0, + "13": 3303559680.0, + "14": 3303559680.0, + "15": 3303559680.0, + "16": 3303559680.0, + "17": 3309215232.0, + "18": 3309215232.0, + "19": 3309215232.0, + "20": 3309215232.0, + "21": 3309215232.0, + "22": 3309215232.0, + "23": 3309215232.0, + "24": 3309215232.0, + "25": 3309215232.0, + "26": 3309215232.0, + "27": 3309215232.0, + "28": 3309215232.0, + "29": 3309215232.0, + "30": 3309215232.0, + "31": 3312232448.0, + "32": 3312232448.0, + "33": 3312232448.0, + "34": 3312232448.0, + "35": 3312232448.0, + "36": 3312232448.0, + "37": 3312232448.0, + "38": 3312232448.0, + "39": 3312232448.0, + "40": 3312232448.0, + "41": 3312232448.0, + "42": 3312232448.0, + "43": 3312232448.0, + "44": 3312232448.0, + "45": 3312232448.0, + "46": 3312232448.0, + "47": 3312232448.0, + "48": 3312232448.0, + "49": 3312232448.0, + "50": 3312232448.0 } }, "iteration-time": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 3.82645, - "3": 0.34371, - "4": 0.32704, - "5": 0.31536, - "6": 0.32001, - "7": 0.31919, - "8": 0.31719, - "9": 0.31876, - "10": 0.31015, - "11": 0.31546, - "12": 0.31198, - "13": 0.31518, - "14": 0.40567, - "15": 0.31856, - "16": 0.30868, - "17": 0.31352, - "18": 0.31536, - "19": 0.31164, - "20": 0.31286, - "21": 0.35519, - "22": 0.30985, - "23": 0.31256, - "24": 0.31727, - "25": 0.36651, - "26": 0.47287, - "27": 0.57438, - "28": 0.3575, - "29": 0.71431, - "30": 0.31163, - "31": 0.31877, - "32": 0.34436, - "33": 0.51773, - "34": 0.32292, - "35": 0.31651, - "36": 0.34162, - "37": 0.31339, - "38": 0.30524, - "39": 0.63856, - "40": 0.31883, - "41": 0.31475, - "42": 0.67365, - "43": 0.33393, - "44": 0.31389, - "45": 0.65089, - "46": 0.6524, - "47": 0.3061, - "48": 0.30487, - "49": 0.3295, - "50": 0.30784 + "2": 7.29519, + "3": 0.37851, + "4": 0.35418, + "5": 0.34964, + "6": 0.34723, + "7": 0.34884, + "8": 0.35114, + "9": 0.35017, + "10": 0.3435, + "11": 0.34534, + "12": 0.34556, + "13": 0.34396, + "14": 0.34314, + "15": 0.35702, + "16": 0.34971, + "17": 0.35336, + "18": 0.3455, + "19": 0.34238, + "20": 0.34439, + "21": 0.34735, + "22": 0.34647, + "23": 0.34565, + "24": 0.34394, + "25": 0.34386, + "26": 0.34371, + "27": 0.34879, + "28": 0.34725, + "29": 0.34331, + "30": 0.40195, + "31": 0.63878, + "32": 0.35713, + "33": 0.34361, + "34": 0.35443, + "35": 0.34915, + "36": 0.34927, + "37": 0.34804, + "38": 0.3503, + "39": 0.34684, + "40": 0.35049, + "41": 0.34977, + "42": 0.34489, + "43": 0.34941, + "44": 0.34597, + "45": 0.34643, + "46": 0.35037, + "47": 0.34324, + "48": 0.35489, + "49": 0.35451, + "50": 0.34614 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_gb200.json index f62929eef31..54bf682b5f9 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM/golden_values_dev_dgx_gb200.json @@ -6,54 +6,54 @@ "values": { "1": 10.80509, "2": 10.82386, - "3": 10.80196, - "4": 10.79424, - "5": 10.8277, - "6": 10.84005, - "7": 10.8365, - "8": 10.82828, - "9": 10.83477, - "10": 10.77496, - "11": 10.85204, - "12": 10.83903, - "13": 10.85207, - "14": 10.85914, - "15": 10.81681, - "16": 10.79456, - "17": 10.77491, - "18": 10.80399, - "19": 10.79956, - "20": 10.73801, - "21": 10.72487, - "22": 10.59177, - "23": 10.73098, - "24": 10.6406, - "25": 10.59018, - "26": 10.63555, - "27": 10.66245, - "28": 10.6472, - "29": 10.64163, - "30": 10.4518, - "31": 10.22249, - "32": 10.52995, - "33": 10.51998, - "34": 10.31247, - "35": 10.34796, - "36": 10.31677, - "37": 10.42804, - "38": 10.29194, - "39": 10.46881, - "40": 10.19257, - "41": 10.23159, - "42": 10.29766, - "43": 9.97363, - "44": 10.07169, - "45": 9.97015, - "46": 9.94713, - "47": 10.23179, - "48": 9.97593, - "49": 9.67748, - "50": 10.0144 + "3": 10.80187, + "4": 10.79381, + "5": 10.82782, + "6": 10.84103, + "7": 10.83602, + "8": 10.8288, + "9": 10.83444, + "10": 10.77452, + "11": 10.85212, + "12": 10.8401, + "13": 10.85164, + "14": 10.85986, + "15": 10.8172, + "16": 10.79433, + "17": 10.77581, + "18": 10.80351, + "19": 10.79946, + "20": 10.73849, + "21": 10.72577, + "22": 10.59186, + "23": 10.73082, + "24": 10.64095, + "25": 10.59122, + "26": 10.63588, + "27": 10.66274, + "28": 10.64842, + "29": 10.64155, + "30": 10.45205, + "31": 10.22209, + "32": 10.52903, + "33": 10.52002, + "34": 10.31278, + "35": 10.34843, + "36": 10.31619, + "37": 10.42852, + "38": 10.2922, + "39": 10.46863, + "40": 10.19288, + "41": 10.23178, + "42": 10.29807, + "43": 9.97377, + "44": 10.07159, + "45": 9.96973, + "46": 9.94753, + "47": 10.23169, + "48": 9.97573, + "49": 9.67784, + "50": 10.01511 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 31182.0, - "2": 33013.0, - "3": 33646.0, - "4": 32202.0, - "5": 36913.0, - "6": 37554.0, - "7": 35184.0, - "8": 32207.0, - "9": 34523.0, - "10": 29945.0, - "11": 38237.0, - "12": 35346.0, - "13": 37426.0, - "14": 38358.0, - "15": 35140.0, - "16": 36293.0, - "17": 35645.0, - "18": 35117.0, - "19": 35648.0, - "20": 32896.0, - "21": 33511.0, - "22": 30704.0, - "23": 38149.0, - "24": 32677.0, - "25": 31055.0, - "26": 34700.0, - "27": 35410.0, - "28": 37268.0, - "29": 37953.0, - "30": 33210.0, - "31": 30482.0, - "32": 36908.0, - "33": 38308.0, - "34": 33125.0, - "35": 34341.0, - "36": 34925.0, - "37": 38767.0, - "38": 35780.0, - "39": 38955.0, - "40": 36485.0, - "41": 36015.0, - "42": 37638.0, - "43": 33689.0, - "44": 33688.0, - "45": 35448.0, - "46": 36810.0, - "47": 40858.0, - "48": 35696.0, - "49": 34729.0, - "50": 39077.0 + "1": 31002.0, + "2": 33357.0, + "3": 34009.0, + "4": 32315.0, + "5": 36524.0, + "6": 37490.0, + "7": 35275.0, + "8": 31742.0, + "9": 34839.0, + "10": 29933.0, + "11": 37766.0, + "12": 35329.0, + "13": 37113.0, + "14": 38180.0, + "15": 35219.0, + "16": 36450.0, + "17": 35700.0, + "18": 34853.0, + "19": 35788.0, + "20": 32808.0, + "21": 33724.0, + "22": 30451.0, + "23": 38218.0, + "24": 32472.0, + "25": 31520.0, + "26": 34965.0, + "27": 35584.0, + "28": 37025.0, + "29": 38197.0, + "30": 33255.0, + "31": 30551.0, + "32": 36545.0, + "33": 38300.0, + "34": 33029.0, + "35": 34162.0, + "36": 35185.0, + "37": 38545.0, + "38": 35778.0, + "39": 39000.0, + "40": 36217.0, + "41": 36225.0, + "42": 37525.0, + "43": 33633.0, + "44": 33746.0, + "45": 35656.0, + "46": 36610.0, + "47": 40929.0, + "48": 35960.0, + "49": 34405.0, + "50": 39680.0 } }, "mem-allocated-bytes": { @@ -121,8 +121,8 @@ "1": 1027095040.0, "2": 1027093504.0, "3": 1027094528.0, - "4": 1027095040.0, - "5": 1027091968.0, + "4": 1027096064.0, + "5": 1027091456.0, "6": 1027091968.0, "7": 1027098112.0, "8": 1027097600.0, @@ -131,43 +131,43 @@ "11": 1027098624.0, "12": 1027094528.0, "13": 1027092480.0, - "14": 1027095040.0, - "15": 1027095040.0, + "14": 1027094528.0, + "15": 1027096064.0, "16": 1027091456.0, - "17": 1027101184.0, + "17": 1027102208.0, "18": 1027096064.0, - "19": 1027093504.0, + "19": 1027092992.0, "20": 1027093504.0, "21": 1027097088.0, "22": 1027100160.0, "23": 1027100160.0, "24": 1027095552.0, - "25": 1027097088.0, + "25": 1027096576.0, "26": 1027098112.0, - "27": 1027091456.0, + "27": 1027090432.0, "28": 1027090944.0, - "29": 1027091968.0, - "30": 1027099648.0, - "31": 1027109888.0, + "29": 1027092480.0, + "30": 1027099136.0, + "31": 1027108864.0, "32": 1027095552.0, "33": 1027090944.0, - "34": 1027098112.0, - "35": 1027103744.0, - "36": 1027098112.0, - "37": 1027092480.0, - "38": 1027091456.0, - "39": 1027095040.0, - "40": 1027095040.0, - "41": 1027100160.0, + "34": 1027098624.0, + "35": 1027103232.0, + "36": 1027097088.0, + "37": 1027091968.0, + "38": 1027092480.0, + "39": 1027097088.0, + "40": 1027094016.0, + "41": 1027099648.0, "42": 1027091968.0, "43": 1027098624.0, - "44": 1027098624.0, - "45": 1027096064.0, - "46": 1027104256.0, - "47": 1027093504.0, + "44": 1027098112.0, + "45": 1027095552.0, + "46": 1027103232.0, + "47": 1027092992.0, "48": 1027101184.0, "49": 1027096064.0, - "50": 1027095552.0 + "50": 1027096064.0 } }, "mem-max-allocated-bytes": { @@ -177,54 +177,54 @@ "values": { "1": 3059586560.0, "2": 3299159040.0, - "3": 3299482112.0, - "4": 3302137344.0, - "5": 3302137344.0, - "6": 3302137344.0, - "7": 3303535104.0, - "8": 3303535104.0, - "9": 3303535104.0, - "10": 3303535104.0, - "11": 3303535104.0, - "12": 3303535104.0, - "13": 3303535104.0, - "14": 3303535104.0, - "15": 3303535104.0, - "16": 3303535104.0, - "17": 3306910208.0, - "18": 3306910208.0, - "19": 3306910208.0, - "20": 3306910208.0, - "21": 3306910208.0, - "22": 3306910208.0, - "23": 3306910208.0, - "24": 3306910208.0, - "25": 3306910208.0, - "26": 3306910208.0, - "27": 3306910208.0, - "28": 3306910208.0, - "29": 3306910208.0, - "30": 3306910208.0, - "31": 3312495616.0, - "32": 3312495616.0, - "33": 3312495616.0, - "34": 3312495616.0, - "35": 3312495616.0, - "36": 3312495616.0, - "37": 3312495616.0, - "38": 3312495616.0, - "39": 3312495616.0, - "40": 3312495616.0, - "41": 3312495616.0, - "42": 3312495616.0, - "43": 3312495616.0, - "44": 3312495616.0, - "45": 3312495616.0, - "46": 3312495616.0, - "47": 3312495616.0, - "48": 3312495616.0, - "49": 3312495616.0, - "50": 3312495616.0 + "3": 3299596800.0, + "4": 3301077504.0, + "5": 3301077504.0, + "6": 3301077504.0, + "7": 3303559680.0, + "8": 3303559680.0, + "9": 3303559680.0, + "10": 3303559680.0, + "11": 3303559680.0, + "12": 3303559680.0, + "13": 3303559680.0, + "14": 3303559680.0, + "15": 3303559680.0, + "16": 3303559680.0, + "17": 3309215232.0, + "18": 3309215232.0, + "19": 3309215232.0, + "20": 3309215232.0, + "21": 3309215232.0, + "22": 3309215232.0, + "23": 3309215232.0, + "24": 3309215232.0, + "25": 3309215232.0, + "26": 3309215232.0, + "27": 3309215232.0, + "28": 3309215232.0, + "29": 3309215232.0, + "30": 3309215232.0, + "31": 3312232448.0, + "32": 3312232448.0, + "33": 3312232448.0, + "34": 3312232448.0, + "35": 3312232448.0, + "36": 3312232448.0, + "37": 3312232448.0, + "38": 3312232448.0, + "39": 3312232448.0, + "40": 3312232448.0, + "41": 3312232448.0, + "42": 3312232448.0, + "43": 3312232448.0, + "44": 3312232448.0, + "45": 3312232448.0, + "46": 3312232448.0, + "47": 3312232448.0, + "48": 3312232448.0, + "49": 3312232448.0, + "50": 3312232448.0 } }, "iteration-time": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 3.6307, - "3": 0.3854, - "4": 0.38116, - "5": 0.36866, - "6": 0.36756, - "7": 0.37196, - "8": 0.37096, - "9": 0.36719, - "10": 0.36516, - "11": 0.36882, - "12": 0.37126, - "13": 0.36294, - "14": 0.36799, - "15": 0.3669, - "16": 0.36835, - "17": 0.37548, - "18": 0.37236, - "19": 0.36274, - "20": 0.36388, - "21": 0.36581, - "22": 0.3703, - "23": 0.36921, - "24": 0.35712, - "25": 0.36049, - "26": 0.36512, - "27": 0.36657, - "28": 0.36074, - "29": 0.41887, - "30": 0.45698, - "31": 0.54747, - "32": 0.4695, - "33": 0.67157, - "34": 0.4186, - "35": 0.39703, - "36": 0.40139, - "37": 0.39345, - "38": 0.38789, - "39": 1.0807, - "40": 0.42023, - "41": 0.3945, - "42": 0.39312, - "43": 0.41319, - "44": 0.40657, - "45": 0.4003, - "46": 0.3986, - "47": 0.38501, - "48": 0.38618, - "49": 0.38586, - "50": 0.38297 + "2": 7.18117, + "3": 0.39274, + "4": 0.37481, + "5": 0.36417, + "6": 0.35956, + "7": 0.36415, + "8": 0.35948, + "9": 0.36277, + "10": 0.35527, + "11": 0.35596, + "12": 0.35345, + "13": 0.35585, + "14": 0.35251, + "15": 0.35628, + "16": 0.35485, + "17": 0.35537, + "18": 0.35705, + "19": 0.35236, + "20": 0.35638, + "21": 0.35274, + "22": 0.35311, + "23": 0.35362, + "24": 0.35458, + "25": 0.35318, + "26": 0.3568, + "27": 0.358, + "28": 0.35273, + "29": 0.3547, + "30": 0.35339, + "31": 0.35691, + "32": 0.35295, + "33": 0.3534, + "34": 0.35976, + "35": 0.35279, + "36": 0.35422, + "37": 0.35326, + "38": 0.35572, + "39": 0.35404, + "40": 0.35512, + "41": 0.3531, + "42": 0.35645, + "43": 0.35611, + "44": 0.3541, + "45": 0.35746, + "46": 0.3587, + "47": 0.35184, + "48": 0.35122, + "49": 0.37644, + "50": 0.35636 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_gb200.json index 682fa44a64d..295fb5f4394 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4/golden_values_dev_dgx_gb200.json @@ -6,54 +6,54 @@ "values": { "1": 10.82137, "2": 10.8271, - "3": 10.81279, - "4": 10.80424, - "5": 10.84481, - "6": 10.85159, - "7": 10.82705, - "8": 10.83127, - "9": 10.8396, - "10": 10.79638, - "11": 10.85834, - "12": 10.8443, - "13": 10.8625, - "14": 10.86559, - "15": 10.8001, - "16": 10.78718, - "17": 10.7639, - "18": 10.78578, - "19": 10.78836, - "20": 10.71249, - "21": 10.68241, - "22": 10.54353, - "23": 10.69825, - "24": 10.58633, - "25": 10.52721, - "26": 10.58871, - "27": 10.60408, - "28": 10.57696, - "29": 10.57897, - "30": 10.36401, - "31": 10.10796, - "32": 10.44854, - "33": 10.4401, - "34": 10.20252, - "35": 10.25069, - "36": 10.21055, - "37": 10.32849, - "38": 10.17511, - "39": 10.38336, - "40": 10.05674, - "41": 10.10841, - "42": 10.18865, - "43": 9.80582, - "44": 9.91887, - "45": 9.79924, - "46": 9.78948, - "47": 10.11342, - "48": 9.82499, - "49": 9.49844, - "50": 9.87311 + "3": 10.81248, + "4": 10.80431, + "5": 10.84555, + "6": 10.85135, + "7": 10.8266, + "8": 10.83182, + "9": 10.83857, + "10": 10.79652, + "11": 10.85828, + "12": 10.84347, + "13": 10.86198, + "14": 10.86485, + "15": 10.80032, + "16": 10.78672, + "17": 10.764, + "18": 10.78575, + "19": 10.7879, + "20": 10.7129, + "21": 10.68224, + "22": 10.54288, + "23": 10.69786, + "24": 10.58577, + "25": 10.5273, + "26": 10.58757, + "27": 10.60399, + "28": 10.57739, + "29": 10.57879, + "30": 10.3642, + "31": 10.10688, + "32": 10.4486, + "33": 10.44, + "34": 10.20208, + "35": 10.25024, + "36": 10.21081, + "37": 10.32827, + "38": 10.1751, + "39": 10.3836, + "40": 10.05714, + "41": 10.10843, + "42": 10.18876, + "43": 9.80599, + "44": 9.91863, + "45": 9.79912, + "46": 9.78919, + "47": 10.11344, + "48": 9.82468, + "49": 9.49866, + "50": 9.87288 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 27059.0, - "2": 29311.0, - "3": 28886.0, - "4": 27768.0, - "5": 32694.0, - "6": 33260.0, - "7": 31409.0, - "8": 27342.0, - "9": 30401.0, - "10": 25524.0, - "11": 33805.0, - "12": 31146.0, - "13": 33161.0, - "14": 33991.0, - "15": 31160.0, - "16": 32445.0, - "17": 30974.0, - "18": 31151.0, - "19": 31742.0, - "20": 28624.0, - "21": 29115.0, - "22": 26827.0, - "23": 34472.0, - "24": 29096.0, - "25": 27239.0, - "26": 30910.0, - "27": 31915.0, - "28": 33968.0, - "29": 36017.0, - "30": 30702.0, - "31": 27384.0, - "32": 33681.0, - "33": 35476.0, - "34": 30160.0, - "35": 31419.0, - "36": 32568.0, - "37": 36189.0, - "38": 33607.0, - "39": 37731.0, - "40": 34463.0, - "41": 33229.0, - "42": 35616.0, - "43": 32361.0, - "44": 31908.0, - "45": 33571.0, - "46": 33618.0, - "47": 38873.0, - "48": 35034.0, - "49": 34407.0, - "50": 37669.0 + "1": 27109.0, + "2": 29210.0, + "3": 29328.0, + "4": 28362.0, + "5": 32327.0, + "6": 33596.0, + "7": 31658.0, + "8": 27593.0, + "9": 30282.0, + "10": 25192.0, + "11": 33625.0, + "12": 31223.0, + "13": 33038.0, + "14": 33706.0, + "15": 31239.0, + "16": 32543.0, + "17": 31248.0, + "18": 31432.0, + "19": 31321.0, + "20": 28782.0, + "21": 28937.0, + "22": 27212.0, + "23": 34514.0, + "24": 28886.0, + "25": 27441.0, + "26": 31167.0, + "27": 31499.0, + "28": 33834.0, + "29": 35571.0, + "30": 30114.0, + "31": 27267.0, + "32": 34060.0, + "33": 35537.0, + "34": 30434.0, + "35": 31397.0, + "36": 32396.0, + "37": 36184.0, + "38": 33474.0, + "39": 37945.0, + "40": 34570.0, + "41": 33384.0, + "42": 35263.0, + "43": 31969.0, + "44": 32501.0, + "45": 33391.0, + "46": 33699.0, + "47": 39054.0, + "48": 34292.0, + "49": 34706.0, + "50": 37480.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1559412224.0, - "2": 1558616064.0, - "3": 1558495744.0, - "4": 1559267328.0, - "5": 1558842880.0, - "6": 1559098368.0, - "7": 1558495744.0, - "8": 1558546432.0, - "9": 1558495744.0, - "10": 1558546432.0, - "11": 1558597120.0, - "12": 1558546432.0, - "13": 1558597120.0, - "14": 1558546432.0, - "15": 1558904320.0, - "16": 1558647808.0, - "17": 1558597120.0, - "18": 1558889472.0, - "19": 1558597120.0, - "20": 1559229440.0, - "21": 1558597120.0, - "22": 1558758400.0, - "23": 1559698944.0, - "24": 1559078912.0, - "25": 1559052800.0, - "26": 1558647808.0, - "27": 1559382528.0, - "28": 1558749184.0, - "29": 1558830592.0, - "30": 1558749184.0, - "31": 1558915584.0, - "32": 1559541760.0, - "33": 1558698496.0, - "34": 1558749184.0, - "35": 1559422464.0, - "36": 1558863872.0, - "37": 1558799872.0, - "38": 1558749184.0, - "39": 1559397888.0, - "40": 1559002112.0, - "41": 1558799872.0, - "42": 1558850560.0, - "43": 1559724544.0, - "44": 1558850560.0, - "45": 1558901248.0, - "46": 1559175168.0, - "47": 1558901248.0, - "48": 1558850560.0, - "49": 1558901248.0, - "50": 1559632896.0 + "1": 1558575616.0, + "2": 1557988352.0, + "3": 1557668352.0, + "4": 1558026240.0, + "5": 1558042624.0, + "6": 1558044160.0, + "7": 1557568512.0, + "8": 1557750272.0, + "9": 1558148096.0, + "10": 1557619200.0, + "11": 1557669888.0, + "12": 1557971456.0, + "13": 1557854208.0, + "14": 1557692928.0, + "15": 1557967872.0, + "16": 1557992960.0, + "17": 1558166528.0, + "18": 1557720576.0, + "19": 1558161408.0, + "20": 1557720576.0, + "21": 1558300672.0, + "22": 1558105600.0, + "23": 1557771264.0, + "24": 1557720576.0, + "25": 1558467584.0, + "26": 1557720576.0, + "27": 1557771264.0, + "28": 1557821952.0, + "29": 1557922816.0, + "30": 1557821952.0, + "31": 1557771264.0, + "32": 1558044160.0, + "33": 1557771264.0, + "34": 1557821952.0, + "35": 1558429696.0, + "36": 1558039040.0, + "37": 1557872640.0, + "38": 1557821952.0, + "39": 1557872640.0, + "40": 1557923328.0, + "41": 1557872640.0, + "42": 1557923328.0, + "43": 1558315008.0, + "44": 1558451712.0, + "45": 1557872640.0, + "46": 1558763008.0, + "47": 1557974016.0, + "48": 1558113792.0, + "49": 1557974016.0, + "50": 1557923328.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 3495116800.0, - "2": 4054579712.0, - "3": 4062724096.0, - "4": 4062724096.0, - "5": 4062724096.0, - "6": 4062724096.0, - "7": 4070930432.0, - "8": 4070930432.0, - "9": 4073446400.0, - "10": 4073446400.0, - "11": 4073446400.0, - "12": 4073446400.0, - "13": 4073446400.0, - "14": 4075493888.0, - "15": 4075493888.0, - "16": 4075493888.0, - "17": 4075493888.0, - "18": 4075493888.0, - "19": 4075493888.0, - "20": 4075493888.0, - "21": 4075493888.0, - "22": 4079303168.0, - "23": 4096666624.0, - "24": 4096666624.0, - "25": 4096666624.0, - "26": 4096666624.0, - "27": 4096666624.0, - "28": 4096666624.0, - "29": 4096666624.0, - "30": 4096666624.0, - "31": 4105302016.0, - "32": 4105302016.0, - "33": 4105302016.0, - "34": 4105302016.0, - "35": 4105302016.0, - "36": 4105302016.0, - "37": 4105302016.0, - "38": 4105302016.0, - "39": 4105302016.0, - "40": 4105302016.0, - "41": 4105302016.0, - "42": 4105302016.0, - "43": 4105302016.0, - "44": 4105302016.0, - "45": 4105302016.0, - "46": 4105302016.0, - "47": 4105302016.0, - "48": 4105302016.0, - "49": 4105302016.0, - "50": 4105302016.0 + "1": 3497768960.0, + "2": 4052572672.0, + "3": 4065197056.0, + "4": 4065197056.0, + "5": 4065197056.0, + "6": 4065197056.0, + "7": 4067999232.0, + "8": 4072688640.0, + "9": 4073913856.0, + "10": 4073997312.0, + "11": 4073997312.0, + "12": 4073997312.0, + "13": 4073997312.0, + "14": 4073997312.0, + "15": 4073997312.0, + "16": 4073997312.0, + "17": 4073997312.0, + "18": 4073997312.0, + "19": 4073997312.0, + "20": 4073997312.0, + "21": 4073997312.0, + "22": 4082313216.0, + "23": 4095283712.0, + "24": 4095283712.0, + "25": 4095283712.0, + "26": 4095283712.0, + "27": 4095283712.0, + "28": 4095283712.0, + "29": 4095283712.0, + "30": 4095283712.0, + "31": 4105076224.0, + "32": 4105076224.0, + "33": 4105076224.0, + "34": 4105076224.0, + "35": 4105076224.0, + "36": 4105076224.0, + "37": 4105076224.0, + "38": 4105076224.0, + "39": 4105076224.0, + "40": 4105076224.0, + "41": 4105076224.0, + "42": 4105076224.0, + "43": 4105076224.0, + "44": 4105076224.0, + "45": 4105076224.0, + "46": 4105076224.0, + "47": 4105076224.0, + "48": 4105076224.0, + "49": 4105076224.0, + "50": 4105076224.0 } }, "iteration-time": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 6.04776, - "3": 0.43191, - "4": 0.39355, - "5": 0.39556, - "6": 0.39818, - "7": 0.39915, - "8": 0.39139, - "9": 0.41074, - "10": 0.45245, - "11": 0.45849, - "12": 0.46806, - "13": 0.46943, - "14": 0.47411, - "15": 0.48525, - "16": 0.47939, - "17": 0.47872, - "18": 0.4715, - "19": 0.4792, - "20": 0.46531, - "21": 0.46809, - "22": 0.46348, - "23": 0.47875, - "24": 0.83175, - "25": 0.50009, - "26": 0.4884, - "27": 0.82926, - "28": 0.50184, - "29": 0.50509, - "30": 0.49725, - "31": 0.50602, - "32": 0.84607, - "33": 0.50581, - "34": 0.49849, - "35": 0.50057, - "36": 0.5007, - "37": 0.50598, - "38": 0.50147, - "39": 0.51593, - "40": 0.51491, - "41": 0.50337, - "42": 0.48945, - "43": 0.49729, - "44": 0.49341, - "45": 0.4898, - "46": 0.49624, - "47": 0.51146, - "48": 0.49582, - "49": 0.49624, - "50": 0.49469 + "2": 10.06157, + "3": 0.4856, + "4": 0.46049, + "5": 0.43948, + "6": 0.43183, + "7": 0.44034, + "8": 0.43195, + "9": 0.43638, + "10": 0.42655, + "11": 0.43018, + "12": 0.43393, + "13": 0.43838, + "14": 0.43523, + "15": 0.43328, + "16": 0.42037, + "17": 0.42241, + "18": 0.43362, + "19": 0.42614, + "20": 0.42864, + "21": 0.43965, + "22": 0.43536, + "23": 0.43455, + "24": 0.42899, + "25": 0.43715, + "26": 0.43265, + "27": 0.4308, + "28": 0.44277, + "29": 0.43872, + "30": 0.43503, + "31": 0.44237, + "32": 0.43534, + "33": 0.43287, + "34": 0.42957, + "35": 0.43485, + "36": 0.43978, + "37": 0.43306, + "38": 0.43652, + "39": 0.43267, + "40": 0.43312, + "41": 0.43916, + "42": 0.43795, + "43": 0.44772, + "44": 0.43327, + "45": 0.44442, + "46": 0.43359, + "47": 0.43871, + "48": 0.44893, + "49": 0.43793, + "50": 0.42994 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_gb200.json index 113a491b0ba..5e07fb3bfad 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router/golden_values_dev_dgx_gb200.json @@ -6,104 +6,104 @@ "values": { "1": 10.81199, "2": 10.82649, - "3": 10.81384, - "4": 10.79509, - "5": 10.83534, - "6": 10.84275, - "7": 10.83571, - "8": 10.83439, - "9": 10.83696, - "10": 10.78957, - "11": 10.85974, - "12": 10.84264, - "13": 10.84986, - "14": 10.86378, - "15": 10.80482, - "16": 10.79204, - "17": 10.7636, - "18": 10.78823, - "19": 10.78841, - "20": 10.70796, - "21": 10.68628, - "22": 10.53299, - "23": 10.691, - "24": 10.58061, - "25": 10.5289, - "26": 10.57723, - "27": 10.58971, - "28": 10.5643, - "29": 10.56693, - "30": 10.35124, - "31": 10.09414, - "32": 10.43287, - "33": 10.43231, - "34": 10.19673, - "35": 10.23457, - "36": 10.19059, - "37": 10.31658, - "38": 10.16469, - "39": 10.37482, - "40": 10.05031, - "41": 10.10005, - "42": 10.1774, - "43": 9.79407, - "44": 9.91934, - "45": 9.7932, - "46": 9.78104, - "47": 10.10607, - "48": 9.8118, - "49": 9.48096, - "50": 9.86752, - "51": 9.8069, - "52": 9.70296, - "53": 10.03508, - "54": 9.92052, - "55": 9.84588, - "56": 9.58072, - "57": 9.43445, - "58": 9.79856, - "59": 9.54419, - "60": 9.45288, - "61": 9.65801, - "62": 9.95366, - "63": 9.34015, - "64": 9.73433, - "65": 8.90213, - "66": 9.6667, - "67": 9.33687, - "68": 9.7563, - "69": 9.77598, - "70": 9.70281, + "3": 10.81402, + "4": 10.79444, + "5": 10.8356, + "6": 10.84311, + "7": 10.83557, + "8": 10.83498, + "9": 10.83668, + "10": 10.78964, + "11": 10.85912, + "12": 10.84339, + "13": 10.84997, + "14": 10.86414, + "15": 10.80576, + "16": 10.7918, + "17": 10.76394, + "18": 10.78766, + "19": 10.78774, + "20": 10.70812, + "21": 10.6864, + "22": 10.53307, + "23": 10.69044, + "24": 10.5809, + "25": 10.52886, + "26": 10.57744, + "27": 10.58939, + "28": 10.56471, + "29": 10.56607, + "30": 10.35103, + "31": 10.09367, + "32": 10.43199, + "33": 10.43216, + "34": 10.19633, + "35": 10.23455, + "36": 10.19036, + "37": 10.31682, + "38": 10.16475, + "39": 10.3741, + "40": 10.05088, + "41": 10.10003, + "42": 10.17734, + "43": 9.79377, + "44": 9.91897, + "45": 9.79315, + "46": 9.78119, + "47": 10.10601, + "48": 9.81175, + "49": 9.4813, + "50": 9.86738, + "51": 9.80706, + "52": 9.70288, + "53": 10.03514, + "54": 9.92065, + "55": 9.84605, + "56": 9.58055, + "57": 9.43481, + "58": 9.79877, + "59": 9.54386, + "60": 9.4523, + "61": 9.65803, + "62": 9.95373, + "63": 9.34019, + "64": 9.73453, + "65": 8.90212, + "66": 9.66653, + "67": 9.33709, + "68": 9.75619, + "69": 9.77579, + "70": 9.70272, "71": 9.60206, - "72": 9.543, + "72": 9.54307, "73": 9.4557, - "74": 8.87804, - "75": 9.37677, - "76": 9.03816, - "77": 10.03912, - "78": 9.69714, - "79": 9.35195, - "80": 9.37278, - "81": 9.45649, - "82": 9.6802, - "83": 9.27723, - "84": 9.39341, - "85": 9.58928, - "86": 9.05151, - "87": 9.57623, - "88": 9.72869, - "89": 9.57637, - "90": 9.80884, - "91": 9.30719, - "92": 9.33823, - "93": 9.05712, - "94": 8.80375, - "95": 9.5091, - "96": 9.50777, - "97": 9.27751, - "98": 9.65271, - "99": 8.87009, - "100": 9.38142 + "74": 8.87807, + "75": 9.37673, + "76": 9.03809, + "77": 10.03878, + "78": 9.69735, + "79": 9.35192, + "80": 9.37284, + "81": 9.45647, + "82": 9.67999, + "83": 9.27725, + "84": 9.39356, + "85": 9.58912, + "86": 9.05149, + "87": 9.57627, + "88": 9.72865, + "89": 9.5761, + "90": 9.80906, + "91": 9.30685, + "92": 9.33841, + "93": 9.05655, + "94": 8.80359, + "95": 9.50883, + "96": 9.50764, + "97": 9.27773, + "98": 9.65276, + "99": 8.87014, + "100": 9.38138 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 26865.0, - "2": 29306.0, - "3": 29361.0, - "4": 28339.0, - "5": 32501.0, - "6": 33051.0, - "7": 31429.0, - "8": 27274.0, - "9": 30849.0, - "10": 25253.0, - "11": 34123.0, - "12": 30710.0, - "13": 33513.0, - "14": 33611.0, - "15": 31132.0, - "16": 32283.0, - "17": 31523.0, - "18": 30937.0, - "19": 31324.0, - "20": 28686.0, - "21": 29644.0, - "22": 27366.0, - "23": 34392.0, - "24": 29052.0, - "25": 27947.0, - "26": 31335.0, - "27": 31669.0, - "28": 33909.0, - "29": 35204.0, - "30": 30468.0, - "31": 27904.0, - "32": 33358.0, - "33": 35896.0, - "34": 30365.0, - "35": 31692.0, - "36": 32966.0, - "37": 35992.0, - "38": 33308.0, - "39": 38061.0, - "40": 34579.0, - "41": 33534.0, - "42": 36447.0, - "43": 32600.0, - "44": 32178.0, - "45": 34034.0, - "46": 34910.0, - "47": 39009.0, - "48": 34943.0, - "49": 34977.0, - "50": 38519.0, - "51": 36877.0, - "52": 36443.0, - "53": 43145.0, - "54": 41676.0, - "55": 38684.0, - "56": 41454.0, - "57": 35771.0, - "58": 41538.0, - "59": 39697.0, - "60": 56137.0, - "61": 59394.0, - "62": 2137056.0, - "63": 36401.0, - "64": 50930.0, - "65": 43788.0, - "66": 2139459.0, - "67": 2137025.0, - "68": 2137005.0, - "69": 2139555.0, - "70": 2140268.0, - "71": 2138613.0, - "72": 2139093.0, - "73": 2141321.0, - "74": 2137048.0, - "75": 2136852.0, - "76": 2140757.0, - "77": 2140654.0, - "78": 2141929.0, - "79": 2142543.0, - "80": 2142157.0, - "81": 2145547.0, - "82": 2144670.0, - "83": 2140858.0, - "84": 2140984.0, - "85": 2145921.0, - "86": 149825.0, - "87": 2144700.0, - "88": 2142479.0, - "89": 2140988.0, - "90": 2144684.0, - "91": 2143848.0, - "92": 2142027.0, - "93": 2139531.0, - "94": 2145775.0, - "95": 2143141.0, - "96": 2146259.0, - "97": 2140268.0, - "98": 2143316.0, - "99": 2144369.0, - "100": 2143057.0 + "1": 26523.0, + "2": 29472.0, + "3": 29140.0, + "4": 28787.0, + "5": 32154.0, + "6": 33150.0, + "7": 31503.0, + "8": 27488.0, + "9": 30851.0, + "10": 25539.0, + "11": 33735.0, + "12": 30721.0, + "13": 33360.0, + "14": 33374.0, + "15": 30838.0, + "16": 32360.0, + "17": 31588.0, + "18": 31016.0, + "19": 31320.0, + "20": 28419.0, + "21": 29325.0, + "22": 27567.0, + "23": 34221.0, + "24": 28953.0, + "25": 27716.0, + "26": 31399.0, + "27": 31596.0, + "28": 33689.0, + "29": 35335.0, + "30": 30311.0, + "31": 27380.0, + "32": 33651.0, + "33": 36080.0, + "34": 30178.0, + "35": 31974.0, + "36": 32609.0, + "37": 36504.0, + "38": 33985.0, + "39": 38040.0, + "40": 34901.0, + "41": 33629.0, + "42": 35751.0, + "43": 32869.0, + "44": 32821.0, + "45": 34054.0, + "46": 34067.0, + "47": 40493.0, + "48": 35146.0, + "49": 34756.0, + "50": 38467.0, + "51": 36706.0, + "52": 36171.0, + "53": 42698.0, + "54": 41471.0, + "55": 38216.0, + "56": 41916.0, + "57": 36330.0, + "58": 41283.0, + "59": 40081.0, + "60": 55891.0, + "61": 59793.0, + "62": 2137191.0, + "63": 36446.0, + "64": 128493.0, + "65": 43769.0, + "66": 2139269.0, + "67": 2137293.0, + "68": 2136798.0, + "69": 2139311.0, + "70": 2140379.0, + "71": 2138932.0, + "72": 2138654.0, + "73": 2141565.0, + "74": 2137087.0, + "75": 2137011.0, + "76": 2140501.0, + "77": 2140898.0, + "78": 2142043.0, + "79": 2142782.0, + "80": 2141568.0, + "81": 2145750.0, + "82": 2144812.0, + "83": 2141262.0, + "84": 2140595.0, + "85": 2145583.0, + "86": 2140562.0, + "87": 2144769.0, + "88": 2142291.0, + "89": 2140641.0, + "90": 2144794.0, + "91": 2143597.0, + "92": 2141696.0, + "93": 2139704.0, + "94": 2145839.0, + "95": 2142840.0, + "96": 2145981.0, + "97": 2140183.0, + "98": 2143585.0, + "99": 2143931.0, + "100": 2142923.0 } }, "mem-allocated-bytes": { @@ -220,104 +220,104 @@ "values": { "1": 787591680.0, "2": 787578880.0, - "3": 787593728.0, + "3": 787594240.0, "4": 787568128.0, "5": 787563008.0, "6": 787585536.0, - "7": 787578368.0, - "8": 787582976.0, + "7": 787578880.0, + "8": 787582464.0, "9": 787581952.0, - "10": 787592192.0, + "10": 787591680.0, "11": 787569152.0, - "12": 787570688.0, + "12": 787570176.0, "13": 787579392.0, - "14": 787582976.0, - "15": 787565568.0, - "16": 787572224.0, - "17": 787566592.0, - "18": 787547648.0, - "19": 787566592.0, - "20": 787537408.0, + "14": 787582464.0, + "15": 787566592.0, + "16": 787572736.0, + "17": 787567104.0, + "18": 787546624.0, + "19": 787567104.0, + "20": 787536384.0, "21": 787540992.0, - "22": 787540480.0, - "23": 787548672.0, - "24": 787542016.0, - "25": 787534336.0, + "22": 787541504.0, + "23": 787549696.0, + "24": 787540992.0, + "25": 787533824.0, "26": 787548672.0, - "27": 787509760.0, - "28": 787504640.0, - "29": 787499520.0, - "30": 787494912.0, - "31": 787510784.0, - "32": 787501056.0, + "27": 787510784.0, + "28": 787505152.0, + "29": 787500544.0, + "30": 787493376.0, + "31": 787511808.0, + "32": 787501568.0, "33": 787482624.0, "34": 787486208.0, "35": 787483136.0, - "36": 787482624.0, - "37": 787460608.0, - "38": 787457536.0, - "39": 787461632.0, - "40": 787457536.0, - "41": 787466752.0, - "42": 787432448.0, - "43": 787450368.0, + "36": 787484160.0, + "37": 787461120.0, + "38": 787457024.0, + "39": 787462144.0, + "40": 787456512.0, + "41": 787467264.0, + "42": 787431936.0, + "43": 787449856.0, "44": 787436032.0, - "45": 787411456.0, - "46": 787460608.0, - "47": 787412992.0, + "45": 787411968.0, + "46": 787460096.0, + "47": 787413504.0, "48": 787440128.0, - "49": 787409920.0, + "49": 787410432.0, "50": 787396096.0, - "51": 787388416.0, - "52": 787415040.0, - "53": 787377664.0, - "54": 787403264.0, + "51": 787388928.0, + "52": 787413504.0, + "53": 787377152.0, + "54": 787404288.0, "55": 787375104.0, "56": 787362304.0, "57": 787405824.0, - "58": 787356160.0, - "59": 787378688.0, - "60": 787380224.0, - "61": 787337216.0, - "62": 787331584.0, - "63": 787368960.0, + "58": 787355648.0, + "59": 787378176.0, + "60": 787379712.0, + "61": 787339264.0, + "62": 787331072.0, + "63": 787369472.0, "64": 787339264.0, "65": 787403776.0, - "66": 787330048.0, - "67": 787337728.0, - "68": 787324416.0, - "69": 787335680.0, - "70": 787328512.0, - "71": 787331584.0, - "72": 787341312.0, - "73": 787353088.0, - "74": 787366400.0, - "75": 787342848.0, - "76": 787344384.0, - "77": 787345920.0, + "66": 787329024.0, + "67": 787337216.0, + "68": 787323904.0, + "69": 787335168.0, + "70": 787329536.0, + "71": 787331072.0, + "72": 787341824.0, + "73": 787351552.0, + "74": 787365376.0, + "75": 787343360.0, + "76": 787343872.0, + "77": 787344896.0, "78": 787371520.0, - "79": 787366400.0, - "80": 787390464.0, - "81": 787385344.0, - "82": 787395584.0, - "83": 787403776.0, + "79": 787366912.0, + "80": 787387904.0, + "81": 787384832.0, + "82": 787393536.0, + "83": 787403264.0, "84": 787397632.0, - "85": 787398144.0, - "86": 787411968.0, + "85": 787397120.0, + "86": 787410432.0, "87": 787389952.0, - "88": 787387904.0, - "89": 787400704.0, - "90": 787379712.0, - "91": 787401216.0, - "92": 787399168.0, - "93": 787391488.0, - "94": 787392000.0, + "88": 787387392.0, + "89": 787400192.0, + "90": 787379200.0, + "91": 787400192.0, + "92": 787397632.0, + "93": 787390976.0, + "94": 787393024.0, "95": 787398656.0, - "96": 787395584.0, + "96": 787397120.0, "97": 787403776.0, - "98": 787396608.0, - "99": 787406848.0, - "100": 787410432.0 + "98": 787398144.0, + "99": 787408896.0, + "100": 787411968.0 } }, "mem-max-allocated-bytes": { @@ -327,104 +327,104 @@ "values": { "1": 2662647296.0, "2": 2662647296.0, - "3": 2665052672.0, - "4": 2665052672.0, - "5": 2665052672.0, - "6": 2665052672.0, - "7": 2665052672.0, - "8": 2665052672.0, - "9": 2665052672.0, - "10": 2665052672.0, - "11": 2665052672.0, - "12": 2665052672.0, - "13": 2665052672.0, - "14": 2665052672.0, - "15": 2665052672.0, - "16": 2665052672.0, - "17": 2665052672.0, - "18": 2665052672.0, - "19": 2665052672.0, - "20": 2665052672.0, - "21": 2665052672.0, - "22": 2665052672.0, - "23": 2665052672.0, - "24": 2665052672.0, - "25": 2665052672.0, - "26": 2665052672.0, - "27": 2665052672.0, - "28": 2665052672.0, - "29": 2665052672.0, - "30": 2665052672.0, - "31": 2665052672.0, - "32": 2665052672.0, - "33": 2665052672.0, - "34": 2665052672.0, - "35": 2665052672.0, - "36": 2665052672.0, - "37": 2665052672.0, - "38": 2665052672.0, - "39": 2665052672.0, - "40": 2665052672.0, - "41": 2665052672.0, - "42": 2665052672.0, - "43": 2665052672.0, - "44": 2665052672.0, - "45": 2665052672.0, - "46": 2665052672.0, - "47": 2665052672.0, - "48": 2665052672.0, - "49": 2665052672.0, - "50": 2665052672.0, - "51": 2665052672.0, - "52": 2665052672.0, - "53": 2665052672.0, - "54": 2665052672.0, - "55": 2665052672.0, - "56": 2665052672.0, - "57": 2665052672.0, - "58": 2665052672.0, - "59": 2665052672.0, - "60": 2665052672.0, - "61": 2665052672.0, - "62": 2665052672.0, - "63": 2665052672.0, - "64": 2665052672.0, - "65": 2665052672.0, - "66": 2665052672.0, - "67": 2665052672.0, - "68": 2665052672.0, - "69": 2665052672.0, - "70": 2665052672.0, - "71": 2665052672.0, - "72": 2665052672.0, - "73": 2665052672.0, - "74": 2665052672.0, - "75": 2665052672.0, - "76": 2665052672.0, - "77": 2665052672.0, - "78": 2665052672.0, - "79": 2665052672.0, - "80": 2665052672.0, - "81": 2665052672.0, - "82": 2665052672.0, - "83": 2665052672.0, - "84": 2665052672.0, - "85": 2665052672.0, - "86": 2665052672.0, - "87": 2665052672.0, - "88": 2665052672.0, - "89": 2665052672.0, - "90": 2665052672.0, - "91": 2665052672.0, - "92": 2665052672.0, - "93": 2665052672.0, - "94": 2665052672.0, - "95": 2665052672.0, - "96": 2665052672.0, - "97": 2665052672.0, - "98": 2665052672.0, - "99": 2665052672.0, - "100": 2665052672.0 + "3": 2665063424.0, + "4": 2665063424.0, + "5": 2665063424.0, + "6": 2665063424.0, + "7": 2665063424.0, + "8": 2665063424.0, + "9": 2665063424.0, + "10": 2665063424.0, + "11": 2665063424.0, + "12": 2665063424.0, + "13": 2665063424.0, + "14": 2665063424.0, + "15": 2665063424.0, + "16": 2665063424.0, + "17": 2665063424.0, + "18": 2665063424.0, + "19": 2665063424.0, + "20": 2665063424.0, + "21": 2665063424.0, + "22": 2665063424.0, + "23": 2665063424.0, + "24": 2665063424.0, + "25": 2665063424.0, + "26": 2665063424.0, + "27": 2665063424.0, + "28": 2665063424.0, + "29": 2665063424.0, + "30": 2665063424.0, + "31": 2665063424.0, + "32": 2665063424.0, + "33": 2665063424.0, + "34": 2665063424.0, + "35": 2665063424.0, + "36": 2665063424.0, + "37": 2665063424.0, + "38": 2665063424.0, + "39": 2665063424.0, + "40": 2665063424.0, + "41": 2665063424.0, + "42": 2665063424.0, + "43": 2665063424.0, + "44": 2665063424.0, + "45": 2665063424.0, + "46": 2665063424.0, + "47": 2665063424.0, + "48": 2665063424.0, + "49": 2665063424.0, + "50": 2665063424.0, + "51": 2665063424.0, + "52": 2665063424.0, + "53": 2665063424.0, + "54": 2665063424.0, + "55": 2665063424.0, + "56": 2665063424.0, + "57": 2665063424.0, + "58": 2665063424.0, + "59": 2665063424.0, + "60": 2665063424.0, + "61": 2665063424.0, + "62": 2665063424.0, + "63": 2665063424.0, + "64": 2665063424.0, + "65": 2665063424.0, + "66": 2665063424.0, + "67": 2665063424.0, + "68": 2665063424.0, + "69": 2665063424.0, + "70": 2665063424.0, + "71": 2665063424.0, + "72": 2665063424.0, + "73": 2665063424.0, + "74": 2665063424.0, + "75": 2665063424.0, + "76": 2665063424.0, + "77": 2665063424.0, + "78": 2665063424.0, + "79": 2665063424.0, + "80": 2665063424.0, + "81": 2665063424.0, + "82": 2665063424.0, + "83": 2665063424.0, + "84": 2665063424.0, + "85": 2665063424.0, + "86": 2665063424.0, + "87": 2665063424.0, + "88": 2665063424.0, + "89": 2665063424.0, + "90": 2665063424.0, + "91": 2665063424.0, + "92": 2665063424.0, + "93": 2665063424.0, + "94": 2665063424.0, + "95": 2665063424.0, + "96": 2665063424.0, + "97": 2665063424.0, + "98": 2665063424.0, + "99": 2665063424.0, + "100": 2665063424.0 } }, "iteration-time": { @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 4.78579, - "3": 0.53829, - "4": 0.5501, - "5": 0.52877, - "6": 0.53341, - "7": 0.53101, - "8": 0.52594, - "9": 0.52656, - "10": 0.52721, - "11": 0.51907, - "12": 0.52113, - "13": 0.52417, - "14": 0.52392, - "15": 0.53475, - "16": 0.52116, - "17": 0.52656, - "18": 0.52034, - "19": 0.52016, - "20": 0.52199, - "21": 0.53183, - "22": 0.53661, - "23": 0.54084, - "24": 0.52495, - "25": 0.53128, - "26": 0.52735, - "27": 0.54335, - "28": 0.52654, - "29": 0.53834, - "30": 0.53606, - "31": 0.53938, - "32": 0.53598, - "33": 0.53326, - "34": 0.54444, - "35": 0.53164, - "36": 0.5404, - "37": 0.54568, - "38": 0.54552, - "39": 0.5366, - "40": 0.54027, - "41": 0.53525, - "42": 0.55075, - "43": 0.53886, - "44": 0.53665, - "45": 0.55089, - "46": 0.5331, - "47": 0.54482, - "48": 0.53151, - "49": 0.53493, - "50": 0.53302, - "51": 0.52424, - "52": 0.52434, - "53": 0.51687, - "54": 0.52816, - "55": 0.53022, - "56": 0.53577, - "57": 0.53245, - "58": 0.53568, - "59": 0.54753, - "60": 0.53813, - "61": 0.53815, - "62": 0.5366, - "63": 0.54423, - "64": 0.5344, - "65": 0.53864, - "66": 0.54089, - "67": 0.53579, - "68": 0.54777, - "69": 0.54032, - "70": 0.54348, - "71": 0.5411, - "72": 0.54019, - "73": 0.53851, - "74": 0.54021, - "75": 0.53784, - "76": 0.53954, - "77": 0.54237, - "78": 0.53049, - "79": 0.57915, - "80": 0.57307, - "81": 0.56876, - "82": 0.56781, - "83": 0.56481, - "84": 0.55385, - "85": 0.56577, - "86": 0.569, - "87": 0.5621, - "88": 0.56698, - "89": 0.55835, - "90": 0.85395, - "91": 0.56888, - "92": 0.55621, - "93": 0.57143, - "94": 0.5584, - "95": 0.56204, - "96": 0.5656, - "97": 0.5491, - "98": 0.56348, - "99": 0.5607, - "100": 0.56258 + "2": 5.98635, + "3": 0.52282, + "4": 0.49693, + "5": 0.4928, + "6": 0.48259, + "7": 0.4965, + "8": 0.47878, + "9": 0.47293, + "10": 0.47259, + "11": 0.47135, + "12": 0.47375, + "13": 0.46469, + "14": 0.4653, + "15": 0.47382, + "16": 0.48208, + "17": 0.47932, + "18": 0.46393, + "19": 0.46346, + "20": 0.47236, + "21": 0.4714, + "22": 0.47499, + "23": 0.47258, + "24": 0.46914, + "25": 0.47024, + "26": 0.46574, + "27": 0.47482, + "28": 0.47982, + "29": 0.48899, + "30": 0.49411, + "31": 0.48791, + "32": 0.48868, + "33": 0.48565, + "34": 0.48033, + "35": 0.48225, + "36": 0.47838, + "37": 0.48688, + "38": 0.48265, + "39": 0.48609, + "40": 0.48829, + "41": 0.48993, + "42": 0.49163, + "43": 0.48738, + "44": 0.48033, + "45": 0.48298, + "46": 0.49224, + "47": 0.47934, + "48": 0.48869, + "49": 0.47492, + "50": 0.47463, + "51": 0.49442, + "52": 0.4729, + "53": 0.47381, + "54": 0.47741, + "55": 0.48415, + "56": 0.48472, + "57": 0.49879, + "58": 0.48585, + "59": 0.49378, + "60": 0.49224, + "61": 0.48445, + "62": 0.47883, + "63": 0.48658, + "64": 0.48416, + "65": 0.47652, + "66": 0.47867, + "67": 0.5028, + "68": 0.48553, + "69": 0.48415, + "70": 0.47946, + "71": 0.47869, + "72": 0.47973, + "73": 0.48056, + "74": 0.48003, + "75": 0.48769, + "76": 0.4697, + "77": 0.47534, + "78": 0.46682, + "79": 0.47552, + "80": 0.47839, + "81": 0.48653, + "82": 0.48245, + "83": 0.48713, + "84": 0.4737, + "85": 0.47339, + "86": 0.47528, + "87": 0.48514, + "88": 0.47048, + "89": 0.47146, + "90": 0.81332, + "91": 0.4747, + "92": 0.47449, + "93": 0.47825, + "94": 0.47459, + "95": 0.47757, + "96": 0.47444, + "97": 0.46924, + "98": 0.47068, + "99": 0.47128, + "100": 0.47481 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json index dffbbf25de6..37f74714dfc 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_a100.json @@ -6,54 +6,54 @@ "values": { "1": 10.82207, "2": 10.84178, - "3": 10.81126, - "4": 10.82219, - "5": 10.8455, - "6": 10.86291, - "7": 10.84399, - "8": 10.84652, - "9": 10.84916, - "10": 10.78879, - "11": 10.8581, - "12": 10.84415, - "13": 10.87153, - "14": 10.87463, - "15": 10.83396, - "16": 10.8091, - "17": 10.79098, - "18": 10.81032, - "19": 10.80535, - "20": 10.73557, - "21": 10.71472, - "22": 10.57762, - "23": 10.72594, - "24": 10.61811, - "25": 10.58114, - "26": 10.63747, - "27": 10.63794, - "28": 10.60614, - "29": 10.61062, - "30": 10.40965, - "31": 10.16941, - "32": 10.49897, + "3": 10.81113, + "4": 10.82234, + "5": 10.84489, + "6": 10.86304, + "7": 10.84427, + "8": 10.84693, + "9": 10.85001, + "10": 10.7897, + "11": 10.85766, + "12": 10.84396, + "13": 10.87181, + "14": 10.87455, + "15": 10.83399, + "16": 10.80919, + "17": 10.791, + "18": 10.81017, + "19": 10.80568, + "20": 10.73563, + "21": 10.71417, + "22": 10.57691, + "23": 10.72597, + "24": 10.61823, + "25": 10.58154, + "26": 10.63745, + "27": 10.63717, + "28": 10.60574, + "29": 10.61026, + "30": 10.40985, + "31": 10.16959, + "32": 10.49887, "33": 10.49702, - "34": 10.26142, - "35": 10.31452, - "36": 10.2851, - "37": 10.3895, - "38": 10.2473, + "34": 10.26149, + "35": 10.31439, + "36": 10.28489, + "37": 10.38874, + "38": 10.24737, "39": 10.43792, - "40": 10.14599, - "41": 10.19691, - "42": 10.26122, - "43": 9.91082, - "44": 10.02318, - "45": 9.91674, - "46": 9.89463, - "47": 10.19281, - "48": 9.93104, - "49": 9.61208, - "50": 9.97427 + "40": 10.14606, + "41": 10.19685, + "42": 10.26102, + "43": 9.91027, + "44": 10.02323, + "45": 9.91719, + "46": 9.89453, + "47": 10.19329, + "48": 9.93042, + "49": 9.61243, + "50": 9.97437 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 4986.0, - "2": 5272.0, - "3": 5309.0, - "4": 5162.0, - "5": 5824.0, - "6": 5990.0, - "7": 5433.0, - "8": 5101.0, - "9": 5654.0, - "10": 4736.0, - "11": 6213.0, - "12": 5723.0, - "13": 5952.0, - "14": 6073.0, - "15": 5503.0, - "16": 5808.0, - "17": 5545.0, - "18": 5647.0, - "19": 5555.0, - "20": 5120.0, - "21": 5578.0, - "22": 5097.0, - "23": 5992.0, - "24": 5204.0, - "25": 5016.0, - "26": 5487.0, - "27": 5618.0, - "28": 5994.0, - "29": 6202.0, - "30": 5538.0, - "31": 4762.0, - "32": 6010.0, - "33": 6302.0, - "34": 5312.0, - "35": 5783.0, - "36": 5716.0, - "37": 6562.0, - "38": 6183.0, - "39": 6964.0, - "40": 6220.0, - "41": 6139.0, - "42": 6368.0, - "43": 5900.0, - "44": 5754.0, - "45": 5814.0, - "46": 5882.0, - "47": 6818.0, - "48": 6495.0, - "49": 6047.0, - "50": 6623.0 + "1": 5051.0, + "2": 5315.0, + "3": 5393.0, + "4": 5144.0, + "5": 5971.0, + "6": 5897.0, + "7": 5465.0, + "8": 5099.0, + "9": 5504.0, + "10": 4793.0, + "11": 6028.0, + "12": 5837.0, + "13": 6020.0, + "14": 5895.0, + "15": 5534.0, + "16": 5710.0, + "17": 5651.0, + "18": 5450.0, + "19": 5649.0, + "20": 5103.0, + "21": 5625.0, + "22": 5074.0, + "23": 6044.0, + "24": 5183.0, + "25": 4901.0, + "26": 5454.0, + "27": 5628.0, + "28": 6018.0, + "29": 6058.0, + "30": 5571.0, + "31": 4731.0, + "32": 5867.0, + "33": 6453.0, + "34": 5334.0, + "35": 5685.0, + "36": 5687.0, + "37": 6801.0, + "38": 6247.0, + "39": 6933.0, + "40": 6105.0, + "41": 6091.0, + "42": 6453.0, + "43": 5778.0, + "44": 5946.0, + "45": 5869.0, + "46": 6171.0, + "47": 6709.0, + "48": 6336.0, + "49": 6103.0, + "50": 6672.0 } }, "mem-allocated-bytes": { @@ -124,47 +124,47 @@ "4": 598360576.0, "5": 598358016.0, "6": 598358016.0, - "7": 598354432.0, - "8": 598359040.0, - "9": 598358016.0, + "7": 598355456.0, + "8": 598360576.0, + "9": 598356992.0, "10": 598356992.0, "11": 598358016.0, "12": 598358016.0, "13": 598359040.0, "14": 598359040.0, "15": 598359040.0, - "16": 598358528.0, + "16": 598360576.0, "17": 598352384.0, "18": 598358016.0, "19": 598359040.0, "20": 598357504.0, - "21": 598358016.0, + "21": 598359040.0, "22": 598354432.0, "23": 598355968.0, - "24": 598356480.0, + "24": 598357504.0, "25": 598358528.0, - "26": 598357504.0, + "26": 598356480.0, "27": 598360064.0, "28": 598358016.0, - "29": 598356480.0, + "29": 598357504.0, "30": 598359552.0, "31": 598354944.0, - "32": 598356992.0, + "32": 598355968.0, "33": 598359552.0, - "34": 598358016.0, - "35": 598356480.0, - "36": 598356992.0, - "37": 598358016.0, + "34": 598356992.0, + "35": 598355968.0, + "36": 598357504.0, + "37": 598359040.0, "38": 598358016.0, - "39": 598357504.0, + "39": 598356992.0, "40": 598357504.0, - "41": 598352384.0, + "41": 598351872.0, "42": 598357504.0, "43": 598352384.0, "44": 598355456.0, "45": 598355968.0, "46": 598351872.0, - "47": 598359040.0, + "47": 598359552.0, "48": 598354944.0, "49": 598353408.0, "50": 598356992.0 @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 855598080.0, - "2": 1083234304.0, - "3": 1083234304.0, - "4": 1083234304.0, - "5": 1083234304.0, - "6": 1083493888.0, - "7": 1083493888.0, - "8": 1083493888.0, - "9": 1083493888.0, - "10": 1083493888.0, - "11": 1083493888.0, - "12": 1083493888.0, - "13": 1083493888.0, - "14": 1084195840.0, - "15": 1084195840.0, - "16": 1084195840.0, - "17": 1084195840.0, - "18": 1084195840.0, - "19": 1084195840.0, - "20": 1084195840.0, - "21": 1084195840.0, - "22": 1084195840.0, - "23": 1084195840.0, - "24": 1084195840.0, - "25": 1084195840.0, - "26": 1084195840.0, - "27": 1084195840.0, - "28": 1084195840.0, - "29": 1084195840.0, - "30": 1084195840.0, - "31": 1084195840.0, - "32": 1084195840.0, - "33": 1084195840.0, - "34": 1084195840.0, - "35": 1084195840.0, - "36": 1084195840.0, - "37": 1084195840.0, - "38": 1084195840.0, - "39": 1084195840.0, - "40": 1084195840.0, - "41": 1084195840.0, - "42": 1084195840.0, - "43": 1084195840.0, - "44": 1084195840.0, - "45": 1084195840.0, - "46": 1084195840.0, - "47": 1084195840.0, - "48": 1084195840.0, - "49": 1084195840.0, - "50": 1084195840.0 + "1": 849854464.0, + "2": 1083133952.0, + "3": 1083133952.0, + "4": 1083459584.0, + "5": 1083459584.0, + "6": 1084528128.0, + "7": 1084528128.0, + "8": 1084734976.0, + "9": 1084734976.0, + "10": 1084734976.0, + "11": 1084734976.0, + "12": 1084734976.0, + "13": 1084734976.0, + "14": 1084734976.0, + "15": 1084734976.0, + "16": 1084734976.0, + "17": 1084734976.0, + "18": 1084734976.0, + "19": 1084734976.0, + "20": 1084734976.0, + "21": 1084734976.0, + "22": 1084734976.0, + "23": 1084734976.0, + "24": 1084734976.0, + "25": 1084734976.0, + "26": 1084734976.0, + "27": 1084734976.0, + "28": 1084734976.0, + "29": 1084734976.0, + "30": 1084734976.0, + "31": 1084734976.0, + "32": 1084734976.0, + "33": 1084734976.0, + "34": 1084734976.0, + "35": 1084734976.0, + "36": 1084734976.0, + "37": 1084734976.0, + "38": 1084734976.0, + "39": 1084734976.0, + "40": 1084734976.0, + "41": 1084734976.0, + "42": 1084734976.0, + "43": 1084734976.0, + "44": 1084734976.0, + "45": 1084734976.0, + "46": 1084734976.0, + "47": 1084734976.0, + "48": 1084734976.0, + "49": 1084734976.0, + "50": 1084734976.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 12.15002, - "2": 0.70236, - "3": 0.6774, - "4": 0.6698, - "5": 0.66613, - "6": 0.65685, - "7": 0.65852, - "8": 1.19123, - "9": 0.65621, - "10": 1.09603, - "11": 0.65688, - "12": 0.65983, - "13": 0.6521, - "14": 0.65135, - "15": 0.65551, - "16": 0.64995, - "17": 0.6532, - "18": 0.65306, - "19": 0.65221, - "20": 0.65239, - "21": 0.65356, - "22": 0.6536, - "23": 0.65416, - "24": 0.65298, - "25": 0.65469, - "26": 0.65391, - "27": 0.65289, - "28": 1.1109, - "29": 0.65365, - "30": 0.65326, - "31": 0.68599, - "32": 0.65366, - "33": 0.65416, - "34": 0.6538, - "35": 0.65304, - "36": 0.65351, - "37": 0.65423, - "38": 0.6542, - "39": 0.65254, - "40": 0.65386, - "41": 0.65384, - "42": 0.65434, - "43": 0.65537, - "44": 0.65573, - "45": 0.65342, - "46": 0.65451, - "47": 0.6535, - "48": 0.65377, - "49": 0.65522, - "50": 0.65221 + "1": "nan", + "2": 10.29335, + "3": 0.67878, + "4": 0.6592, + "5": 0.65824, + "6": 0.65977, + "7": 0.65877, + "8": 0.66094, + "9": 0.65786, + "10": 0.65658, + "11": 0.6579, + "12": 0.65517, + "13": 0.65666, + "14": 0.65608, + "15": 0.65601, + "16": 0.65482, + "17": 0.65822, + "18": 0.65711, + "19": 0.65603, + "20": 0.65609, + "21": 0.6565, + "22": 0.67194, + "23": 0.65625, + "24": 0.66529, + "25": 0.66578, + "26": 0.67383, + "27": 0.65687, + "28": 0.65467, + "29": 0.65676, + "30": 0.65789, + "31": 0.65676, + "32": 0.65719, + "33": 0.65687, + "34": 0.65653, + "35": 0.65849, + "36": 0.65693, + "37": 0.65595, + "38": 0.6557, + "39": 0.65719, + "40": 0.65771, + "41": 0.65694, + "42": 0.6571, + "43": 0.65669, + "44": 0.65684, + "45": 0.65629, + "46": 0.65907, + "47": 0.65645, + "48": 0.6576, + "49": 0.65594, + "50": 0.65339 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_gb200.json index bfea64b8438..ce15ba8d8b8 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_gb200.json @@ -6,54 +6,54 @@ "values": { "1": 10.79193, "2": 10.81245, - "3": 10.79181, - "4": 10.78209, - "5": 10.82295, - "6": 10.83309, - "7": 10.81351, - "8": 10.81215, - "9": 10.81457, - "10": 10.76068, - "11": 10.84185, - "12": 10.82404, - "13": 10.83895, - "14": 10.84433, - "15": 10.79974, - "16": 10.78654, - "17": 10.76789, - "18": 10.77495, - "19": 10.77669, - "20": 10.71893, - "21": 10.69691, + "3": 10.7918, + "4": 10.78248, + "5": 10.8229, + "6": 10.83325, + "7": 10.81381, + "8": 10.8124, + "9": 10.81419, + "10": 10.76095, + "11": 10.84109, + "12": 10.82369, + "13": 10.83874, + "14": 10.84415, + "15": 10.79967, + "16": 10.78628, + "17": 10.76773, + "18": 10.77429, + "19": 10.77699, + "20": 10.71877, + "21": 10.69736, "22": 10.5691, - "23": 10.7131, - "24": 10.59975, - "25": 10.56123, - "26": 10.60735, - "27": 10.63093, - "28": 10.6064, - "29": 10.61213, - "30": 10.39823, - "31": 10.16422, - "32": 10.49019, - "33": 10.48385, - "34": 10.26645, - "35": 10.31743, - "36": 10.28264, - "37": 10.39002, - "38": 10.25116, - "39": 10.43811, - "40": 10.1403, - "41": 10.19191, - "42": 10.25886, - "43": 9.91588, - "44": 10.02837, - "45": 9.91815, - "46": 9.89353, - "47": 10.20144, - "48": 9.92509, - "49": 9.62973, - "50": 9.97857 + "23": 10.71318, + "24": 10.59944, + "25": 10.56048, + "26": 10.60755, + "27": 10.63083, + "28": 10.60651, + "29": 10.61165, + "30": 10.3982, + "31": 10.16412, + "32": 10.49049, + "33": 10.4843, + "34": 10.2663, + "35": 10.31711, + "36": 10.28281, + "37": 10.39018, + "38": 10.25141, + "39": 10.43825, + "40": 10.14073, + "41": 10.19205, + "42": 10.25794, + "43": 9.91532, + "44": 10.0282, + "45": 9.91826, + "46": 9.89373, + "47": 10.20145, + "48": 9.9251, + "49": 9.63015, + "50": 9.979 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 5192.0, - "2": 5510.0, - "3": 5508.0, - "4": 5240.0, - "5": 6136.0, - "6": 6180.0, - "7": 5549.0, - "8": 5242.0, - "9": 5717.0, - "10": 4818.0, - "11": 6299.0, - "12": 5746.0, - "13": 6110.0, - "14": 6165.0, - "15": 5683.0, - "16": 5805.0, - "17": 5758.0, - "18": 5546.0, - "19": 5787.0, - "20": 5231.0, - "21": 5741.0, - "22": 5126.0, - "23": 6019.0, - "24": 5410.0, - "25": 5100.0, - "26": 5630.0, - "27": 5627.0, - "28": 6146.0, - "29": 6174.0, - "30": 5570.0, - "31": 4768.0, - "32": 5926.0, - "33": 6348.0, - "34": 5389.0, - "35": 5856.0, - "36": 5741.0, - "37": 6611.0, - "38": 6262.0, - "39": 6971.0, - "40": 6094.0, - "41": 6227.0, - "42": 6622.0, - "43": 5761.0, - "44": 5929.0, - "45": 5769.0, - "46": 6141.0, - "47": 6909.0, - "48": 6650.0, - "49": 6100.0, - "50": 6753.0 + "1": 5164.0, + "2": 5460.0, + "3": 5476.0, + "4": 5164.0, + "5": 5919.0, + "6": 6081.0, + "7": 5512.0, + "8": 5296.0, + "9": 5667.0, + "10": 4778.0, + "11": 6210.0, + "12": 5685.0, + "13": 6050.0, + "14": 6114.0, + "15": 5595.0, + "16": 5966.0, + "17": 5549.0, + "18": 5693.0, + "19": 5735.0, + "20": 5229.0, + "21": 5764.0, + "22": 4962.0, + "23": 5984.0, + "24": 5373.0, + "25": 5199.0, + "26": 5592.0, + "27": 5735.0, + "28": 6127.0, + "29": 6298.0, + "30": 5597.0, + "31": 4765.0, + "32": 5954.0, + "33": 6464.0, + "34": 5371.0, + "35": 5671.0, + "36": 5796.0, + "37": 6657.0, + "38": 6074.0, + "39": 6859.0, + "40": 6040.0, + "41": 6237.0, + "42": 6570.0, + "43": 5866.0, + "44": 5828.0, + "45": 5998.0, + "46": 5969.0, + "47": 6792.0, + "48": 6699.0, + "49": 6241.0, + "50": 6831.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 627716608.0, - "2": 627719168.0, - "3": 627717632.0, - "4": 627719680.0, - "5": 627717120.0, - "6": 627717120.0, - "7": 627719680.0, - "8": 627716608.0, - "9": 627718144.0, - "10": 627718144.0, - "11": 627717632.0, - "12": 627718144.0, - "13": 627719168.0, - "14": 627718144.0, - "15": 627722240.0, - "16": 627718144.0, - "17": 627720704.0, - "18": 627719680.0, - "19": 627719168.0, - "20": 627718144.0, - "21": 627718656.0, - "22": 627723264.0, - "23": 627720192.0, - "24": 627719680.0, - "25": 627718144.0, - "26": 627719168.0, - "27": 627719168.0, - "28": 627718144.0, - "29": 627718144.0, - "30": 627719168.0, - "31": 627719168.0, - "32": 627719168.0, - "33": 627717632.0, - "34": 627719680.0, - "35": 627721216.0, - "36": 627717120.0, - "37": 627719168.0, - "38": 627721216.0, - "39": 627719168.0, - "40": 627718656.0, - "41": 627718144.0, - "42": 627717632.0, - "43": 627717120.0, - "44": 627718656.0, - "45": 627717632.0, - "46": 627717120.0, - "47": 627719168.0, - "48": 627718144.0, - "49": 627716608.0, - "50": 627716096.0 + "1": 628503040.0, + "2": 628505600.0, + "3": 628504064.0, + "4": 628506112.0, + "5": 628504576.0, + "6": 628503552.0, + "7": 628507136.0, + "8": 628503040.0, + "9": 628504576.0, + "10": 628503552.0, + "11": 628505088.0, + "12": 628504576.0, + "13": 628505600.0, + "14": 628504576.0, + "15": 628508672.0, + "16": 628503552.0, + "17": 628507136.0, + "18": 628506112.0, + "19": 628504576.0, + "20": 628505600.0, + "21": 628506112.0, + "22": 628509696.0, + "23": 628506624.0, + "24": 628506112.0, + "25": 628505600.0, + "26": 628504576.0, + "27": 628505600.0, + "28": 628504576.0, + "29": 628504576.0, + "30": 628504064.0, + "31": 628506624.0, + "32": 628505600.0, + "33": 628504064.0, + "34": 628506624.0, + "35": 628508160.0, + "36": 628503552.0, + "37": 628504576.0, + "38": 628506112.0, + "39": 628504576.0, + "40": 628505088.0, + "41": 628505088.0, + "42": 628504576.0, + "43": 628503040.0, + "44": 628504576.0, + "45": 628503040.0, + "46": 628503552.0, + "47": 628504576.0, + "48": 628504576.0, + "49": 628503040.0, + "50": 628502528.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 879803392.0, - "2": 1114769920.0, - "3": 1114769920.0, - "4": 1116293632.0, - "5": 1116293632.0, - "6": 1116293632.0, - "7": 1116293632.0, - "8": 1116293632.0, - "9": 1116293632.0, - "10": 1116293632.0, - "11": 1116293632.0, - "12": 1116293632.0, - "13": 1116293632.0, - "14": 1116293632.0, - "15": 1116293632.0, - "16": 1116293632.0, - "17": 1116293632.0, - "18": 1116293632.0, - "19": 1116293632.0, - "20": 1116293632.0, - "21": 1116293632.0, - "22": 1116293632.0, - "23": 1116293632.0, - "24": 1116293632.0, - "25": 1116293632.0, - "26": 1116293632.0, - "27": 1116293632.0, - "28": 1116293632.0, - "29": 1116293632.0, - "30": 1116293632.0, - "31": 1116293632.0, - "32": 1116293632.0, - "33": 1116293632.0, - "34": 1116293632.0, - "35": 1116293632.0, - "36": 1116293632.0, - "37": 1116293632.0, - "38": 1116293632.0, - "39": 1116293632.0, - "40": 1116293632.0, - "41": 1116293632.0, - "42": 1116293632.0, - "43": 1116293632.0, - "44": 1116293632.0, - "45": 1116293632.0, - "46": 1116293632.0, - "47": 1116293632.0, - "48": 1116293632.0, - "49": 1116293632.0, - "50": 1116293632.0 + "1": 882678784.0, + "2": 1114974720.0, + "3": 1114974720.0, + "4": 1116527104.0, + "5": 1116527104.0, + "6": 1116527104.0, + "7": 1116527104.0, + "8": 1116527104.0, + "9": 1116527104.0, + "10": 1116527104.0, + "11": 1116527104.0, + "12": 1116527104.0, + "13": 1116527104.0, + "14": 1116527104.0, + "15": 1116527104.0, + "16": 1116527104.0, + "17": 1116527104.0, + "18": 1116527104.0, + "19": 1116527104.0, + "20": 1116527104.0, + "21": 1116527104.0, + "22": 1116527104.0, + "23": 1116527104.0, + "24": 1116527104.0, + "25": 1116527104.0, + "26": 1116527104.0, + "27": 1116527104.0, + "28": 1116527104.0, + "29": 1116527104.0, + "30": 1116527104.0, + "31": 1116527104.0, + "32": 1116527104.0, + "33": 1116527104.0, + "34": 1116527104.0, + "35": 1116527104.0, + "36": 1116527104.0, + "37": 1116527104.0, + "38": 1116527104.0, + "39": 1116527104.0, + "40": 1116527104.0, + "41": 1116527104.0, + "42": 1116527104.0, + "43": 1116527104.0, + "44": 1116527104.0, + "45": 1116527104.0, + "46": 1116527104.0, + "47": 1116527104.0, + "48": 1116527104.0, + "49": 1116527104.0, + "50": 1116527104.0 } }, "iteration-time": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 7.71846, - "3": 0.76188, - "4": 0.74577, - "5": 0.73403, - "6": 0.73193, - "7": 0.73107, - "8": 0.72199, - "9": 0.726, - "10": 0.71891, - "11": 0.72723, - "12": 0.71504, - "13": 0.71448, - "14": 0.71551, - "15": 0.71936, - "16": 0.71512, - "17": 0.73948, - "18": 0.83787, - "19": 0.94178, - "20": 0.98096, - "21": 0.71399, - "22": 0.87302, - "23": 0.71359, - "24": 0.7104, - "25": 0.70807, - "26": 0.71636, - "27": 0.70864, - "28": 0.72237, - "29": 0.7163, - "30": 0.7153, - "31": 0.71793, - "32": 0.70846, - "33": 0.7079, - "34": 0.71058, - "35": 0.71492, - "36": 0.72031, - "37": 0.71537, - "38": 0.70333, - "39": 0.70449, - "40": 0.71725, - "41": 0.72322, - "42": 0.7105, - "43": 0.70421, - "44": 0.70441, - "45": 0.70449, - "46": 0.7091, - "47": 0.70989, - "48": 0.70781, - "49": 0.71985, - "50": 0.70534 + "2": 12.4857, + "3": 0.83608, + "4": 0.96022, + "5": 0.85369, + "6": 0.81142, + "7": 0.81711, + "8": 0.81096, + "9": 0.81865, + "10": 0.80421, + "11": 0.81494, + "12": 0.81722, + "13": 0.81082, + "14": 0.80883, + "15": 0.81671, + "16": 0.80836, + "17": 0.80711, + "18": 0.80172, + "19": 0.80702, + "20": 0.80318, + "21": 0.80759, + "22": 0.80555, + "23": 0.80545, + "24": 0.80349, + "25": 0.80504, + "26": 0.80694, + "27": 0.80341, + "28": 0.80371, + "29": 0.81135, + "30": 0.81323, + "31": 0.82006, + "32": 0.84418, + "33": 0.80637, + "34": 0.80425, + "35": 0.80548, + "36": 0.80173, + "37": 0.80569, + "38": 0.80918, + "39": 0.81029, + "40": 0.80714, + "41": 0.81129, + "42": 0.80866, + "43": 0.81108, + "44": 0.80864, + "45": 0.8052, + "46": 0.81737, + "47": 0.81206, + "48": 0.81149, + "49": 0.82012, + "50": 0.80486 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json index d2a07cdf1dd..cf306109a97 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json @@ -6,54 +6,54 @@ "values": { "1": 10.82207, "2": 10.84178, - "3": 10.81126, - "4": 10.82219, - "5": 10.8455, - "6": 10.86291, - "7": 10.84399, - "8": 10.84652, - "9": 10.84916, - "10": 10.78879, - "11": 10.8581, - "12": 10.84415, - "13": 10.87153, - "14": 10.87463, - "15": 10.83396, - "16": 10.8091, - "17": 10.79098, - "18": 10.81032, - "19": 10.80535, - "20": 10.73557, - "21": 10.71472, - "22": 10.57762, - "23": 10.72594, - "24": 10.61811, - "25": 10.58114, - "26": 10.63747, - "27": 10.63794, - "28": 10.60614, - "29": 10.61062, - "30": 10.40965, - "31": 10.16941, - "32": 10.49897, + "3": 10.81113, + "4": 10.82234, + "5": 10.84489, + "6": 10.86304, + "7": 10.84427, + "8": 10.84693, + "9": 10.85001, + "10": 10.7897, + "11": 10.85766, + "12": 10.84396, + "13": 10.87181, + "14": 10.87455, + "15": 10.83399, + "16": 10.80919, + "17": 10.791, + "18": 10.81017, + "19": 10.80568, + "20": 10.73563, + "21": 10.71417, + "22": 10.57691, + "23": 10.72597, + "24": 10.61823, + "25": 10.58154, + "26": 10.63745, + "27": 10.63717, + "28": 10.60574, + "29": 10.61026, + "30": 10.40985, + "31": 10.16959, + "32": 10.49887, "33": 10.49702, - "34": 10.26142, - "35": 10.31452, - "36": 10.2851, - "37": 10.3895, - "38": 10.2473, + "34": 10.26149, + "35": 10.31439, + "36": 10.28489, + "37": 10.38874, + "38": 10.24737, "39": 10.43792, - "40": 10.14599, - "41": 10.19691, - "42": 10.26122, - "43": 9.91082, - "44": 10.02318, - "45": 9.91674, - "46": 9.89463, - "47": 10.19281, - "48": 9.93104, - "49": 9.61208, - "50": 9.97427 + "40": 10.14606, + "41": 10.19685, + "42": 10.26102, + "43": 9.91027, + "44": 10.02323, + "45": 9.91719, + "46": 9.89453, + "47": 10.19329, + "48": 9.93042, + "49": 9.61243, + "50": 9.97437 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 4986.0, - "2": 5272.0, - "3": 5309.0, - "4": 5162.0, - "5": 5824.0, - "6": 5990.0, - "7": 5433.0, - "8": 5101.0, - "9": 5654.0, - "10": 4736.0, - "11": 6213.0, - "12": 5723.0, - "13": 5952.0, - "14": 6073.0, - "15": 5503.0, - "16": 5808.0, - "17": 5545.0, - "18": 5647.0, - "19": 5555.0, - "20": 5120.0, - "21": 5578.0, - "22": 5097.0, - "23": 5992.0, - "24": 5204.0, - "25": 5016.0, - "26": 5487.0, - "27": 5618.0, - "28": 5994.0, - "29": 6202.0, - "30": 5538.0, - "31": 4762.0, - "32": 6010.0, - "33": 6302.0, - "34": 5312.0, - "35": 5783.0, - "36": 5716.0, - "37": 6562.0, - "38": 6183.0, - "39": 6964.0, - "40": 6220.0, - "41": 6139.0, - "42": 6368.0, - "43": 5900.0, - "44": 5754.0, - "45": 5814.0, - "46": 5882.0, - "47": 6818.0, - "48": 6495.0, - "49": 6047.0, - "50": 6623.0 + "1": 5051.0, + "2": 5315.0, + "3": 5393.0, + "4": 5144.0, + "5": 5971.0, + "6": 5897.0, + "7": 5465.0, + "8": 5099.0, + "9": 5504.0, + "10": 4793.0, + "11": 6028.0, + "12": 5837.0, + "13": 6020.0, + "14": 5895.0, + "15": 5534.0, + "16": 5710.0, + "17": 5651.0, + "18": 5450.0, + "19": 5649.0, + "20": 5103.0, + "21": 5625.0, + "22": 5074.0, + "23": 6044.0, + "24": 5183.0, + "25": 4901.0, + "26": 5454.0, + "27": 5628.0, + "28": 6018.0, + "29": 6058.0, + "30": 5571.0, + "31": 4731.0, + "32": 5867.0, + "33": 6453.0, + "34": 5334.0, + "35": 5685.0, + "36": 5687.0, + "37": 6801.0, + "38": 6247.0, + "39": 6933.0, + "40": 6105.0, + "41": 6091.0, + "42": 6453.0, + "43": 5778.0, + "44": 5946.0, + "45": 5869.0, + "46": 6171.0, + "47": 6709.0, + "48": 6336.0, + "49": 6103.0, + "50": 6672.0 } }, "mem-allocated-bytes": { @@ -124,47 +124,47 @@ "4": 598360576.0, "5": 598358016.0, "6": 598358016.0, - "7": 598354432.0, - "8": 598359040.0, - "9": 598358016.0, + "7": 598355456.0, + "8": 598360576.0, + "9": 598356992.0, "10": 598356992.0, "11": 598358016.0, "12": 598358016.0, "13": 598359040.0, "14": 598359040.0, "15": 598359040.0, - "16": 598358528.0, + "16": 598360576.0, "17": 598352384.0, "18": 598358016.0, "19": 598359040.0, "20": 598357504.0, - "21": 598358016.0, + "21": 598359040.0, "22": 598354432.0, "23": 598355968.0, - "24": 598356480.0, + "24": 598357504.0, "25": 598358528.0, - "26": 598357504.0, + "26": 598356480.0, "27": 598360064.0, "28": 598358016.0, - "29": 598356480.0, + "29": 598357504.0, "30": 598359552.0, "31": 598354944.0, - "32": 598356992.0, + "32": 598355968.0, "33": 598359552.0, - "34": 598358016.0, - "35": 598356480.0, - "36": 598356992.0, - "37": 598358016.0, + "34": 598356992.0, + "35": 598355968.0, + "36": 598357504.0, + "37": 598359040.0, "38": 598358016.0, - "39": 598357504.0, + "39": 598356992.0, "40": 598357504.0, - "41": 598352384.0, + "41": 598351872.0, "42": 598357504.0, "43": 598352384.0, "44": 598355456.0, "45": 598355968.0, "46": 598351872.0, - "47": 598359040.0, + "47": 598359552.0, "48": 598354944.0, "49": 598353408.0, "50": 598356992.0 @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 855598080.0, - "2": 1083234304.0, - "3": 1083234304.0, - "4": 1083234304.0, - "5": 1083234304.0, - "6": 1083493888.0, - "7": 1083493888.0, - "8": 1083493888.0, - "9": 1083493888.0, - "10": 1083493888.0, - "11": 1083493888.0, - "12": 1083493888.0, - "13": 1083493888.0, - "14": 1084195840.0, - "15": 1084195840.0, - "16": 1084195840.0, - "17": 1084195840.0, - "18": 1084195840.0, - "19": 1084195840.0, - "20": 1084195840.0, - "21": 1084195840.0, - "22": 1084195840.0, - "23": 1084195840.0, - "24": 1084195840.0, - "25": 1084195840.0, - "26": 1084195840.0, - "27": 1084195840.0, - "28": 1084195840.0, - "29": 1084195840.0, - "30": 1084195840.0, - "31": 1084195840.0, - "32": 1084195840.0, - "33": 1084195840.0, - "34": 1084195840.0, - "35": 1084195840.0, - "36": 1084195840.0, - "37": 1084195840.0, - "38": 1084195840.0, - "39": 1084195840.0, - "40": 1084195840.0, - "41": 1084195840.0, - "42": 1084195840.0, - "43": 1084195840.0, - "44": 1084195840.0, - "45": 1084195840.0, - "46": 1084195840.0, - "47": 1084195840.0, - "48": 1084195840.0, - "49": 1084195840.0, - "50": 1084195840.0 + "1": 849854464.0, + "2": 1083133952.0, + "3": 1083133952.0, + "4": 1083459584.0, + "5": 1083459584.0, + "6": 1084528128.0, + "7": 1084528128.0, + "8": 1084734976.0, + "9": 1084734976.0, + "10": 1084734976.0, + "11": 1084734976.0, + "12": 1084734976.0, + "13": 1084734976.0, + "14": 1084734976.0, + "15": 1084734976.0, + "16": 1084734976.0, + "17": 1084734976.0, + "18": 1084734976.0, + "19": 1084734976.0, + "20": 1084734976.0, + "21": 1084734976.0, + "22": 1084734976.0, + "23": 1084734976.0, + "24": 1084734976.0, + "25": 1084734976.0, + "26": 1084734976.0, + "27": 1084734976.0, + "28": 1084734976.0, + "29": 1084734976.0, + "30": 1084734976.0, + "31": 1084734976.0, + "32": 1084734976.0, + "33": 1084734976.0, + "34": 1084734976.0, + "35": 1084734976.0, + "36": 1084734976.0, + "37": 1084734976.0, + "38": 1084734976.0, + "39": 1084734976.0, + "40": 1084734976.0, + "41": 1084734976.0, + "42": 1084734976.0, + "43": 1084734976.0, + "44": 1084734976.0, + "45": 1084734976.0, + "46": 1084734976.0, + "47": 1084734976.0, + "48": 1084734976.0, + "49": 1084734976.0, + "50": 1084734976.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 12.18178, - "2": 0.71018, - "3": 0.6513, - "4": 0.63757, - "5": 0.63692, - "6": 1.25031, - "7": 0.63769, - "8": 0.6385, - "9": 1.00487, - "10": 0.63706, - "11": 0.63646, - "12": 0.63826, - "13": 0.63654, - "14": 0.63609, - "15": 0.64, - "16": 0.6373, - "17": 0.63737, - "18": 0.63625, - "19": 0.63624, - "20": 0.63844, - "21": 0.6361, - "22": 0.63788, - "23": 0.63738, - "24": 0.63546, - "25": 0.63758, - "26": 0.63704, - "27": 0.63992, - "28": 0.64468, - "29": 0.64456, - "30": 0.6501, - "31": 0.64571, - "32": 0.64554, - "33": 0.64543, - "34": 0.64396, - "35": 0.64389, - "36": 0.64513, - "37": 0.6451, - "38": 0.64723, - "39": 0.6454, - "40": 0.64512, - "41": 0.64629, - "42": 0.64576, - "43": 0.64737, - "44": 0.64709, - "45": 0.64517, - "46": 0.64605, - "47": 0.64625, - "48": 0.64627, - "49": 0.64638, - "50": 0.64367 + "1": "nan", + "2": 7.45654, + "3": 0.66799, + "4": 0.85068, + "5": 0.77572, + "6": 0.68201, + "7": 0.67921, + "8": 0.67951, + "9": 0.72589, + "10": 0.68668, + "11": 0.6775, + "12": 0.67297, + "13": 0.68537, + "14": 0.67587, + "15": 0.66706, + "16": 0.66844, + "17": 0.72815, + "18": 0.71789, + "19": 0.68666, + "20": 0.66516, + "21": 0.66541, + "22": 0.66565, + "23": 0.6614, + "24": 0.6618, + "25": 0.66037, + "26": 0.66246, + "27": 0.63822, + "28": 0.6393, + "29": 0.63682, + "30": 0.63816, + "31": 0.63919, + "32": 0.63822, + "33": 0.64356, + "34": 0.64371, + "35": 0.6378, + "36": 0.63816, + "37": 0.63762, + "38": 0.63747, + "39": 0.63722, + "40": 0.6399, + "41": 0.63856, + "42": 0.63914, + "43": 0.63893, + "44": 0.63818, + "45": 0.63902, + "46": 0.63953, + "47": 0.64, + "48": 0.63873, + "49": 0.63892, + "50": 0.63814 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_gb200.json index 2bcdb30bc50..27a9727334c 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last/golden_values_dev_dgx_gb200.json @@ -6,54 +6,54 @@ "values": { "1": 10.79193, "2": 10.81245, - "3": 10.79181, - "4": 10.78209, - "5": 10.82295, - "6": 10.83309, - "7": 10.81351, - "8": 10.81215, - "9": 10.81457, - "10": 10.76068, - "11": 10.84185, - "12": 10.82404, - "13": 10.83895, - "14": 10.84433, - "15": 10.79974, - "16": 10.78654, - "17": 10.76789, - "18": 10.77495, - "19": 10.77669, - "20": 10.71893, - "21": 10.69691, + "3": 10.7918, + "4": 10.78248, + "5": 10.8229, + "6": 10.83325, + "7": 10.81381, + "8": 10.8124, + "9": 10.81419, + "10": 10.76095, + "11": 10.84109, + "12": 10.82369, + "13": 10.83874, + "14": 10.84415, + "15": 10.79967, + "16": 10.78628, + "17": 10.76773, + "18": 10.77429, + "19": 10.77699, + "20": 10.71877, + "21": 10.69736, "22": 10.5691, - "23": 10.7131, - "24": 10.59975, - "25": 10.56123, - "26": 10.60735, - "27": 10.63093, - "28": 10.6064, - "29": 10.61213, - "30": 10.39823, - "31": 10.16422, - "32": 10.49019, - "33": 10.48385, - "34": 10.26645, - "35": 10.31743, - "36": 10.28264, - "37": 10.39002, - "38": 10.25116, - "39": 10.43811, - "40": 10.1403, - "41": 10.19191, - "42": 10.25886, - "43": 9.91588, - "44": 10.02837, - "45": 9.91815, - "46": 9.89353, - "47": 10.20144, - "48": 9.92509, - "49": 9.62973, - "50": 9.97857 + "23": 10.71318, + "24": 10.59944, + "25": 10.56048, + "26": 10.60755, + "27": 10.63083, + "28": 10.60651, + "29": 10.61165, + "30": 10.3982, + "31": 10.16412, + "32": 10.49049, + "33": 10.4843, + "34": 10.2663, + "35": 10.31711, + "36": 10.28281, + "37": 10.39018, + "38": 10.25141, + "39": 10.43825, + "40": 10.14073, + "41": 10.19205, + "42": 10.25794, + "43": 9.91532, + "44": 10.0282, + "45": 9.91826, + "46": 9.89373, + "47": 10.20145, + "48": 9.9251, + "49": 9.63015, + "50": 9.979 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 5192.0, - "2": 5510.0, - "3": 5508.0, - "4": 5240.0, - "5": 6136.0, - "6": 6180.0, - "7": 5549.0, - "8": 5242.0, - "9": 5717.0, - "10": 4818.0, - "11": 6299.0, - "12": 5746.0, - "13": 6110.0, - "14": 6165.0, - "15": 5683.0, - "16": 5805.0, - "17": 5758.0, - "18": 5546.0, - "19": 5787.0, - "20": 5231.0, - "21": 5741.0, - "22": 5126.0, - "23": 6019.0, - "24": 5410.0, - "25": 5100.0, - "26": 5630.0, - "27": 5627.0, - "28": 6146.0, - "29": 6174.0, - "30": 5570.0, - "31": 4768.0, - "32": 5926.0, - "33": 6348.0, - "34": 5389.0, - "35": 5856.0, - "36": 5741.0, - "37": 6611.0, - "38": 6262.0, - "39": 6971.0, - "40": 6094.0, - "41": 6227.0, - "42": 6622.0, - "43": 5761.0, - "44": 5929.0, - "45": 5769.0, - "46": 6141.0, - "47": 6909.0, - "48": 6650.0, - "49": 6100.0, - "50": 6753.0 + "1": 5164.0, + "2": 5460.0, + "3": 5476.0, + "4": 5164.0, + "5": 5919.0, + "6": 6081.0, + "7": 5512.0, + "8": 5296.0, + "9": 5667.0, + "10": 4778.0, + "11": 6210.0, + "12": 5685.0, + "13": 6050.0, + "14": 6114.0, + "15": 5595.0, + "16": 5966.0, + "17": 5549.0, + "18": 5693.0, + "19": 5735.0, + "20": 5229.0, + "21": 5764.0, + "22": 4962.0, + "23": 5984.0, + "24": 5373.0, + "25": 5199.0, + "26": 5592.0, + "27": 5735.0, + "28": 6127.0, + "29": 6298.0, + "30": 5597.0, + "31": 4765.0, + "32": 5954.0, + "33": 6464.0, + "34": 5371.0, + "35": 5671.0, + "36": 5796.0, + "37": 6657.0, + "38": 6074.0, + "39": 6859.0, + "40": 6040.0, + "41": 6237.0, + "42": 6570.0, + "43": 5866.0, + "44": 5828.0, + "45": 5998.0, + "46": 5969.0, + "47": 6792.0, + "48": 6699.0, + "49": 6241.0, + "50": 6831.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 627716608.0, - "2": 627719168.0, - "3": 627717632.0, - "4": 627719680.0, - "5": 627717120.0, - "6": 627717120.0, - "7": 627719680.0, - "8": 627716608.0, - "9": 627718144.0, - "10": 627718144.0, - "11": 627717632.0, - "12": 627718144.0, - "13": 627719168.0, - "14": 627718144.0, - "15": 627722240.0, - "16": 627718144.0, - "17": 627720704.0, - "18": 627719680.0, - "19": 627719168.0, - "20": 627718144.0, - "21": 627718656.0, - "22": 627723264.0, - "23": 627720192.0, - "24": 627719680.0, - "25": 627718144.0, - "26": 627719168.0, - "27": 627719168.0, - "28": 627718144.0, - "29": 627718144.0, - "30": 627719168.0, - "31": 627719168.0, - "32": 627719168.0, - "33": 627717632.0, - "34": 627719680.0, - "35": 627721216.0, - "36": 627717120.0, - "37": 627719168.0, - "38": 627721216.0, - "39": 627719168.0, - "40": 627718656.0, - "41": 627718144.0, - "42": 627717632.0, - "43": 627717120.0, - "44": 627718656.0, - "45": 627717632.0, - "46": 627717120.0, - "47": 627719168.0, - "48": 627718144.0, - "49": 627716608.0, - "50": 627716096.0 + "1": 628503040.0, + "2": 628505600.0, + "3": 628504064.0, + "4": 628506112.0, + "5": 628504576.0, + "6": 628503552.0, + "7": 628507136.0, + "8": 628503040.0, + "9": 628504576.0, + "10": 628503552.0, + "11": 628505088.0, + "12": 628504576.0, + "13": 628505600.0, + "14": 628504576.0, + "15": 628508672.0, + "16": 628503552.0, + "17": 628507136.0, + "18": 628506112.0, + "19": 628504576.0, + "20": 628505600.0, + "21": 628506112.0, + "22": 628509696.0, + "23": 628506624.0, + "24": 628506112.0, + "25": 628505600.0, + "26": 628504576.0, + "27": 628505600.0, + "28": 628504576.0, + "29": 628504576.0, + "30": 628504064.0, + "31": 628506624.0, + "32": 628505600.0, + "33": 628504064.0, + "34": 628506624.0, + "35": 628508160.0, + "36": 628503552.0, + "37": 628504576.0, + "38": 628506112.0, + "39": 628504576.0, + "40": 628505088.0, + "41": 628505088.0, + "42": 628504576.0, + "43": 628503040.0, + "44": 628504576.0, + "45": 628503040.0, + "46": 628503552.0, + "47": 628504576.0, + "48": 628504576.0, + "49": 628503040.0, + "50": 628502528.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 879803392.0, - "2": 1114769920.0, - "3": 1114769920.0, - "4": 1116293632.0, - "5": 1116293632.0, - "6": 1116293632.0, - "7": 1116293632.0, - "8": 1116293632.0, - "9": 1116293632.0, - "10": 1116293632.0, - "11": 1116293632.0, - "12": 1116293632.0, - "13": 1116293632.0, - "14": 1116293632.0, - "15": 1116293632.0, - "16": 1116293632.0, - "17": 1116293632.0, - "18": 1116293632.0, - "19": 1116293632.0, - "20": 1116293632.0, - "21": 1116293632.0, - "22": 1116293632.0, - "23": 1116293632.0, - "24": 1116293632.0, - "25": 1116293632.0, - "26": 1116293632.0, - "27": 1116293632.0, - "28": 1116293632.0, - "29": 1116293632.0, - "30": 1116293632.0, - "31": 1116293632.0, - "32": 1116293632.0, - "33": 1116293632.0, - "34": 1116293632.0, - "35": 1116293632.0, - "36": 1116293632.0, - "37": 1116293632.0, - "38": 1116293632.0, - "39": 1116293632.0, - "40": 1116293632.0, - "41": 1116293632.0, - "42": 1116293632.0, - "43": 1116293632.0, - "44": 1116293632.0, - "45": 1116293632.0, - "46": 1116293632.0, - "47": 1116293632.0, - "48": 1116293632.0, - "49": 1116293632.0, - "50": 1116293632.0 + "1": 882678784.0, + "2": 1114974720.0, + "3": 1114974720.0, + "4": 1116527104.0, + "5": 1116527104.0, + "6": 1116527104.0, + "7": 1116527104.0, + "8": 1116527104.0, + "9": 1116527104.0, + "10": 1116527104.0, + "11": 1116527104.0, + "12": 1116527104.0, + "13": 1116527104.0, + "14": 1116527104.0, + "15": 1116527104.0, + "16": 1116527104.0, + "17": 1116527104.0, + "18": 1116527104.0, + "19": 1116527104.0, + "20": 1116527104.0, + "21": 1116527104.0, + "22": 1116527104.0, + "23": 1116527104.0, + "24": 1116527104.0, + "25": 1116527104.0, + "26": 1116527104.0, + "27": 1116527104.0, + "28": 1116527104.0, + "29": 1116527104.0, + "30": 1116527104.0, + "31": 1116527104.0, + "32": 1116527104.0, + "33": 1116527104.0, + "34": 1116527104.0, + "35": 1116527104.0, + "36": 1116527104.0, + "37": 1116527104.0, + "38": 1116527104.0, + "39": 1116527104.0, + "40": 1116527104.0, + "41": 1116527104.0, + "42": 1116527104.0, + "43": 1116527104.0, + "44": 1116527104.0, + "45": 1116527104.0, + "46": 1116527104.0, + "47": 1116527104.0, + "48": 1116527104.0, + "49": 1116527104.0, + "50": 1116527104.0 } }, "iteration-time": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 7.52257, - "3": 0.74502, - "4": 0.74089, - "5": 0.73009, - "6": 0.73041, - "7": 0.73704, - "8": 0.71933, - "9": 0.72466, - "10": 1.0546, - "11": 0.71525, - "12": 0.71298, - "13": 0.71412, - "14": 0.71521, - "15": 0.71883, - "16": 0.71464, - "17": 0.72192, - "18": 1.32991, - "19": 0.92083, - "20": 0.72233, - "21": 0.71533, - "22": 0.7144, - "23": 0.71011, - "24": 0.71396, - "25": 0.70984, - "26": 0.7111, - "27": 0.71496, - "28": 0.71187, - "29": 0.71729, - "30": 0.72095, - "31": 0.71436, - "32": 0.70963, - "33": 0.71384, - "34": 0.71534, - "35": 0.7148, - "36": 0.71389, - "37": 0.71097, - "38": 0.71244, - "39": 0.7048, - "40": 0.715, - "41": 1.08196, - "42": 0.71129, - "43": 0.73716, - "44": 0.72639, - "45": 0.71182, - "46": 0.71576, - "47": 0.72917, - "48": 0.72017, - "49": 0.72166, - "50": 0.70656 + "2": 12.28166, + "3": 0.8423, + "4": 0.84011, + "5": 0.81701, + "6": 0.82136, + "7": 0.82417, + "8": 0.81946, + "9": 0.81975, + "10": 0.81813, + "11": 0.82078, + "12": 0.83017, + "13": 0.82021, + "14": 0.81852, + "15": 0.81964, + "16": 0.81778, + "17": 0.85722, + "18": 0.81428, + "19": 0.8163, + "20": 0.81226, + "21": 0.81898, + "22": 0.81825, + "23": 0.81827, + "24": 0.81386, + "25": 0.81609, + "26": 0.81305, + "27": 0.81825, + "28": 0.81785, + "29": 0.81412, + "30": 0.81316, + "31": 0.81642, + "32": 0.81661, + "33": 0.81787, + "34": 0.81723, + "35": 0.82087, + "36": 0.81683, + "37": 0.81788, + "38": 0.81728, + "39": 0.81548, + "40": 0.81851, + "41": 0.82697, + "42": 0.9091, + "43": 0.8141, + "44": 0.81178, + "45": 0.81052, + "46": 0.81949, + "47": 0.81737, + "48": 0.81316, + "49": 0.82751, + "50": 0.80977 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json index e3b2e326fda..a10a199ae66 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_a100.json @@ -6,54 +6,54 @@ "values": { "1": 10.79175, "2": 10.80907, - "3": 10.81011, - "4": 10.78146, - "5": 10.82288, - "6": 10.84057, - "7": 10.81192, - "8": 10.80005, - "9": 10.81667, - "10": 10.7688, - "11": 10.8618, - "12": 10.84042, - "13": 10.84452, - "14": 10.86421, - "15": 10.79157, - "16": 10.78199, - "17": 10.75122, - "18": 10.79446, - "19": 10.79523, - "20": 10.71001, - "21": 10.68811, - "22": 10.53736, - "23": 10.7066, - "24": 10.58865, - "25": 10.54662, - "26": 10.59492, - "27": 10.62142, - "28": 10.5969, - "29": 10.60036, - "30": 10.39407, - "31": 10.12951, - "32": 10.49684, - "33": 10.48779, - "34": 10.24347, - "35": 10.30461, - "36": 10.26056, - "37": 10.38859, - "38": 10.24848, - "39": 10.43799, - "40": 10.13303, - "41": 10.18651, - "42": 10.25823, - "43": 9.892, - "44": 10.02576, - "45": 9.90015, - "46": 9.88387, - "47": 10.19565, - "48": 9.91255, - "49": 9.60147, - "50": 9.97874 + "3": 10.81039, + "4": 10.78127, + "5": 10.82308, + "6": 10.84083, + "7": 10.81171, + "8": 10.79973, + "9": 10.81744, + "10": 10.76857, + "11": 10.86217, + "12": 10.84084, + "13": 10.8452, + "14": 10.86332, + "15": 10.79106, + "16": 10.78185, + "17": 10.75084, + "18": 10.7944, + "19": 10.79546, + "20": 10.70984, + "21": 10.68874, + "22": 10.53812, + "23": 10.70639, + "24": 10.58861, + "25": 10.54517, + "26": 10.59545, + "27": 10.6213, + "28": 10.59623, + "29": 10.60109, + "30": 10.39455, + "31": 10.12997, + "32": 10.49682, + "33": 10.48802, + "34": 10.24299, + "35": 10.305, + "36": 10.26115, + "37": 10.38853, + "38": 10.24853, + "39": 10.43727, + "40": 10.13312, + "41": 10.1867, + "42": 10.25835, + "43": 9.89244, + "44": 10.02594, + "45": 9.90006, + "46": 9.88449, + "47": 10.19557, + "48": 9.91284, + "49": 9.60182, + "50": 9.97848 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 5656.0, - "2": 6018.0, - "3": 5790.0, - "4": 5941.0, - "5": 6476.0, - "6": 6653.0, - "7": 6287.0, - "8": 5875.0, - "9": 6239.0, - "10": 5453.0, - "11": 6936.0, - "12": 6711.0, - "13": 6655.0, - "14": 6814.0, - "15": 6233.0, - "16": 6533.0, - "17": 6397.0, - "18": 6112.0, - "19": 6678.0, - "20": 5837.0, - "21": 6403.0, - "22": 5715.0, - "23": 6744.0, - "24": 6051.0, - "25": 5811.0, - "26": 6104.0, - "27": 6484.0, - "28": 6884.0, - "29": 7253.0, - "30": 6047.0, - "31": 5593.0, - "32": 6625.0, - "33": 7054.0, - "34": 6104.0, - "35": 6712.0, - "36": 6684.0, - "37": 7523.0, - "38": 7273.0, - "39": 7620.0, - "40": 7062.0, - "41": 6895.0, - "42": 7426.0, - "43": 6713.0, - "44": 6664.0, - "45": 6681.0, - "46": 6923.0, - "47": 7705.0, - "48": 7248.0, - "49": 7331.0, - "50": 7527.0 + "1": 5743.0, + "2": 5999.0, + "3": 5905.0, + "4": 5938.0, + "5": 6445.0, + "6": 6621.0, + "7": 6274.0, + "8": 5877.0, + "9": 6334.0, + "10": 5348.0, + "11": 6889.0, + "12": 6531.0, + "13": 6773.0, + "14": 6834.0, + "15": 6263.0, + "16": 6473.0, + "17": 6190.0, + "18": 6261.0, + "19": 6546.0, + "20": 5906.0, + "21": 6290.0, + "22": 5727.0, + "23": 6736.0, + "24": 5914.0, + "25": 5888.0, + "26": 6127.0, + "27": 6455.0, + "28": 6974.0, + "29": 7148.0, + "30": 6270.0, + "31": 5639.0, + "32": 6815.0, + "33": 7063.0, + "34": 6218.0, + "35": 6708.0, + "36": 6496.0, + "37": 7484.0, + "38": 7231.0, + "39": 7687.0, + "40": 7025.0, + "41": 7081.0, + "42": 7161.0, + "43": 6589.0, + "44": 6743.0, + "45": 6790.0, + "46": 6933.0, + "47": 7570.0, + "48": 7413.0, + "49": 7262.0, + "50": 7706.0 } }, "mem-allocated-bytes": { @@ -122,52 +122,52 @@ "2": 458211840.0, "3": 458215424.0, "4": 458211840.0, - "5": 458213376.0, + "5": 458213888.0, "6": 458213888.0, - "7": 458216448.0, + "7": 458215936.0, "8": 458216448.0, "9": 458212864.0, - "10": 458215936.0, - "11": 458213888.0, + "10": 458215424.0, + "11": 458212864.0, "12": 458213888.0, "13": 458214400.0, "14": 458215424.0, - "15": 458215424.0, + "15": 458214912.0, "16": 458212864.0, - "17": 458214400.0, - "18": 458214400.0, - "19": 458214400.0, - "20": 458214400.0, + "17": 458214912.0, + "18": 458214912.0, + "19": 458215424.0, + "20": 458214912.0, "21": 458211840.0, - "22": 458218496.0, + "22": 458219520.0, "23": 458214912.0, "24": 458214400.0, "25": 458211840.0, - "26": 458215936.0, - "27": 458210816.0, + "26": 458215424.0, + "27": 458209792.0, "28": 458213888.0, - "29": 458212864.0, + "29": 458213888.0, "30": 458211840.0, "31": 458219008.0, "32": 458214400.0, "33": 458214912.0, "34": 458211840.0, - "35": 458215936.0, + "35": 458215424.0, "36": 458212864.0, - "37": 458215424.0, + "37": 458214400.0, "38": 458213888.0, "39": 458213888.0, "40": 458213376.0, "41": 458216960.0, - "42": 458215424.0, + "42": 458214912.0, "43": 458216960.0, "44": 458213376.0, - "45": 458214400.0, + "45": 458213888.0, "46": 458216448.0, - "47": 458213376.0, + "47": 458212352.0, "48": 458213888.0, - "49": 458215424.0, - "50": 458214912.0 + "49": 458216448.0, + "50": 458215424.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1029256704.0, - "2": 1193177088.0, - "3": 1193177088.0, - "4": 1193686016.0, - "5": 1193686016.0, - "6": 1193686016.0, - "7": 1193686016.0, - "8": 1193686016.0, - "9": 1193771520.0, - "10": 1193771520.0, - "11": 1193771520.0, - "12": 1193771520.0, - "13": 1193771520.0, - "14": 1193771520.0, - "15": 1193771520.0, - "16": 1193771520.0, - "17": 1193771520.0, - "18": 1193771520.0, - "19": 1193771520.0, - "20": 1193771520.0, - "21": 1193771520.0, - "22": 1193918464.0, - "23": 1193918464.0, - "24": 1193918464.0, - "25": 1193918464.0, - "26": 1193918464.0, - "27": 1193918464.0, - "28": 1193918464.0, - "29": 1193918464.0, - "30": 1193918464.0, - "31": 1193918464.0, - "32": 1193918464.0, - "33": 1193918464.0, - "34": 1193918464.0, - "35": 1193918464.0, - "36": 1193918464.0, - "37": 1193918464.0, - "38": 1193918464.0, - "39": 1193918464.0, - "40": 1194139136.0, - "41": 1194139136.0, - "42": 1194139136.0, - "43": 1194249728.0, - "44": 1194249728.0, - "45": 1194249728.0, - "46": 1194249728.0, - "47": 1194249728.0, - "48": 1194249728.0, - "49": 1194249728.0, - "50": 1194249728.0 + "1": 1028204032.0, + "2": 1193069568.0, + "3": 1193083392.0, + "4": 1194512384.0, + "5": 1194512384.0, + "6": 1194512384.0, + "7": 1194512384.0, + "8": 1194512384.0, + "9": 1194512384.0, + "10": 1194512384.0, + "11": 1194512384.0, + "12": 1194512384.0, + "13": 1194512384.0, + "14": 1194512384.0, + "15": 1195728896.0, + "16": 1195728896.0, + "17": 1195728896.0, + "18": 1195728896.0, + "19": 1195728896.0, + "20": 1195728896.0, + "21": 1195728896.0, + "22": 1195728896.0, + "23": 1195728896.0, + "24": 1195728896.0, + "25": 1195728896.0, + "26": 1195728896.0, + "27": 1195728896.0, + "28": 1195728896.0, + "29": 1195728896.0, + "30": 1195728896.0, + "31": 1195728896.0, + "32": 1195728896.0, + "33": 1195728896.0, + "34": 1195728896.0, + "35": 1195728896.0, + "36": 1195728896.0, + "37": 1195728896.0, + "38": 1195728896.0, + "39": 1195728896.0, + "40": 1195728896.0, + "41": 1195728896.0, + "42": 1195728896.0, + "43": 1195728896.0, + "44": 1195728896.0, + "45": 1195728896.0, + "46": 1195728896.0, + "47": 1195728896.0, + "48": 1195728896.0, + "49": 1195728896.0, + "50": 1195728896.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 6.67874, - "2": 0.59048, - "3": 0.55954, - "4": 0.55064, - "5": 0.54285, - "6": 0.54344, - "7": 0.54862, - "8": 0.542, - "9": 0.54738, - "10": 0.54947, - "11": 0.53996, - "12": 0.54615, - "13": 0.54407, - "14": 0.54098, - "15": 0.55148, - "16": 0.54024, - "17": 0.54784, - "18": 0.54329, - "19": 0.54213, - "20": 0.55192, - "21": 0.53901, - "22": 0.54612, - "23": 0.54495, - "24": 0.54254, - "25": 0.55242, - "26": 0.53958, - "27": 0.54346, - "28": 0.5466, - "29": 0.54048, - "30": 0.55385, - "31": 0.54112, - "32": 0.54404, - "33": 0.54779, - "34": 0.54049, - "35": 0.53889, - "36": 0.53823, - "37": 0.54013, - "38": 0.53918, - "39": 0.53801, - "40": 0.5394, - "41": 0.53905, - "42": 0.53797, - "43": 0.53957, - "44": 0.5384, - "45": 0.53795, - "46": 0.53859, - "47": 0.54222, - "48": 0.53881, - "49": 0.5401, - "50": 0.53746 + "1": "nan", + "2": 4.55953, + "3": 0.55516, + "4": 0.53746, + "5": 0.53478, + "6": 0.53498, + "7": 0.5367, + "8": 0.53629, + "9": 0.53522, + "10": 0.53431, + "11": 0.53368, + "12": 0.53492, + "13": 0.53533, + "14": 0.5449, + "15": 0.53453, + "16": 0.53447, + "17": 0.53496, + "18": 0.53336, + "19": 0.53494, + "20": 0.53505, + "21": 0.53514, + "22": 0.53519, + "23": 0.53691, + "24": 0.53531, + "25": 0.53509, + "26": 0.53425, + "27": 0.53381, + "28": 0.53396, + "29": 0.53282, + "30": 0.53481, + "31": 0.53441, + "32": 0.53325, + "33": 0.54597, + "34": 0.53535, + "35": 0.53466, + "36": 0.53377, + "37": 0.53356, + "38": 0.53288, + "39": 0.53419, + "40": 0.53467, + "41": 0.53334, + "42": 0.53455, + "43": 0.53489, + "44": 0.53505, + "45": 0.53371, + "46": 0.53538, + "47": 0.53461, + "48": 0.53464, + "49": 0.53506, + "50": 0.53332 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json index 6ec10f4f931..c53f8ba9f79 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel/golden_values_dev_dgx_h100.json @@ -6,54 +6,54 @@ "values": { "1": 10.80475, "2": 10.821, - "3": 10.8216, - "4": 10.79306, - "5": 10.84831, - "6": 10.85888, - "7": 10.83177, - "8": 10.82362, - "9": 10.83757, - "10": 10.78732, - "11": 10.86732, - "12": 10.85395, - "13": 10.86171, - "14": 10.88343, - "15": 10.79765, - "16": 10.79986, - "17": 10.76238, - "18": 10.80286, - "19": 10.7945, - "20": 10.71733, - "21": 10.70194, - "22": 10.55147, - "23": 10.72167, - "24": 10.60698, - "25": 10.54614, - "26": 10.6136, - "27": 10.63974, - "28": 10.60486, - "29": 10.62277, - "30": 10.41109, - "31": 10.1456, - "32": 10.51017, - "33": 10.50089, - "34": 10.25812, - "35": 10.3154, - "36": 10.27895, - "37": 10.41061, - "38": 10.25908, - "39": 10.45334, - "40": 10.1604, - "41": 10.20557, - "42": 10.26792, - "43": 9.90468, - "44": 10.03233, - "45": 9.91098, - "46": 9.87857, - "47": 10.20952, - "48": 9.93178, - "49": 9.61584, - "50": 9.98565 + "3": 10.82137, + "4": 10.79315, + "5": 10.8483, + "6": 10.85935, + "7": 10.83174, + "8": 10.82386, + "9": 10.83754, + "10": 10.78771, + "11": 10.86699, + "12": 10.85365, + "13": 10.86137, + "14": 10.88332, + "15": 10.79759, + "16": 10.80014, + "17": 10.76189, + "18": 10.80285, + "19": 10.79428, + "20": 10.71656, + "21": 10.70165, + "22": 10.55146, + "23": 10.72122, + "24": 10.60742, + "25": 10.54634, + "26": 10.61335, + "27": 10.63973, + "28": 10.60466, + "29": 10.62274, + "30": 10.41087, + "31": 10.14603, + "32": 10.50965, + "33": 10.50142, + "34": 10.25863, + "35": 10.31568, + "36": 10.27941, + "37": 10.41098, + "38": 10.2593, + "39": 10.45366, + "40": 10.1605, + "41": 10.20637, + "42": 10.26762, + "43": 9.90459, + "44": 10.03234, + "45": 9.91164, + "46": 9.87875, + "47": 10.20947, + "48": 9.93144, + "49": 9.61602, + "50": 9.98541 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 5474.0, - "2": 5853.0, - "3": 5875.0, - "4": 6041.0, - "5": 6601.0, - "6": 6654.0, - "7": 6135.0, - "8": 5761.0, - "9": 6505.0, - "10": 5497.0, - "11": 6994.0, - "12": 6523.0, - "13": 6807.0, - "14": 6969.0, - "15": 6154.0, - "16": 6667.0, - "17": 6368.0, - "18": 6298.0, - "19": 6353.0, - "20": 5998.0, - "21": 6264.0, - "22": 5628.0, - "23": 6620.0, - "24": 6063.0, - "25": 5649.0, - "26": 6226.0, - "27": 6409.0, - "28": 6790.0, - "29": 7055.0, - "30": 6430.0, - "31": 5565.0, - "32": 6615.0, - "33": 6969.0, - "34": 6107.0, - "35": 6538.0, - "36": 6486.0, - "37": 7272.0, - "38": 6923.0, - "39": 7497.0, - "40": 6997.0, - "41": 6747.0, - "42": 7228.0, - "43": 6629.0, - "44": 6752.0, - "45": 6557.0, - "46": 6904.0, - "47": 7474.0, - "48": 7165.0, - "49": 7244.0, - "50": 7331.0 + "1": 5649.0, + "2": 5830.0, + "3": 6085.0, + "4": 5929.0, + "5": 6605.0, + "6": 6671.0, + "7": 6190.0, + "8": 5974.0, + "9": 6573.0, + "10": 5417.0, + "11": 6943.0, + "12": 6397.0, + "13": 6815.0, + "14": 6932.0, + "15": 6292.0, + "16": 6446.0, + "17": 6413.0, + "18": 6231.0, + "19": 6288.0, + "20": 5995.0, + "21": 6301.0, + "22": 5746.0, + "23": 6701.0, + "24": 5988.0, + "25": 5734.0, + "26": 6127.0, + "27": 6238.0, + "28": 6771.0, + "29": 7217.0, + "30": 6275.0, + "31": 5518.0, + "32": 6530.0, + "33": 7079.0, + "34": 6130.0, + "35": 6701.0, + "36": 6367.0, + "37": 7301.0, + "38": 6815.0, + "39": 7720.0, + "40": 6748.0, + "41": 6679.0, + "42": 7340.0, + "43": 6563.0, + "44": 6570.0, + "45": 6542.0, + "46": 7029.0, + "47": 7320.0, + "48": 7144.0, + "49": 7259.0, + "50": 7400.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 458212352.0, - "2": 458212864.0, - "3": 458211328.0, - "4": 458212864.0, - "5": 458212352.0, - "6": 458213376.0, - "7": 458212864.0, - "8": 458213888.0, - "9": 458213376.0, - "10": 458212864.0, - "11": 458210816.0, - "12": 458210304.0, - "13": 458211840.0, - "14": 458213376.0, - "15": 458214400.0, - "16": 458215424.0, - "17": 458212864.0, - "18": 458210816.0, - "19": 458211840.0, - "20": 458212352.0, - "21": 458213888.0, - "22": 458213888.0, - "23": 458211328.0, - "24": 458211840.0, - "25": 458211840.0, - "26": 458212864.0, - "27": 458212352.0, - "28": 458212864.0, - "29": 458211840.0, - "30": 458211840.0, - "31": 458213376.0, - "32": 458211328.0, - "33": 458210304.0, - "34": 458213888.0, - "35": 458214912.0, - "36": 458211328.0, - "37": 458210816.0, - "38": 458211840.0, - "39": 458213376.0, - "40": 458211328.0, - "41": 458213888.0, - "42": 458211840.0, - "43": 458214400.0, - "44": 458213888.0, - "45": 458210816.0, - "46": 458213888.0, - "47": 458211328.0, - "48": 458212352.0, - "49": 458212352.0, - "50": 458210816.0 + "1": 458736640.0, + "2": 458737152.0, + "3": 458735616.0, + "4": 458737152.0, + "5": 458736640.0, + "6": 458737664.0, + "7": 458737152.0, + "8": 458738176.0, + "9": 458738688.0, + "10": 458737152.0, + "11": 458735104.0, + "12": 458734592.0, + "13": 458737664.0, + "14": 458737664.0, + "15": 458737664.0, + "16": 458739712.0, + "17": 458737152.0, + "18": 458735104.0, + "19": 458736128.0, + "20": 458736640.0, + "21": 458735616.0, + "22": 458736640.0, + "23": 458735616.0, + "24": 458737152.0, + "25": 458737152.0, + "26": 458737152.0, + "27": 458735616.0, + "28": 458737152.0, + "29": 458735104.0, + "30": 458736128.0, + "31": 458737664.0, + "32": 458736128.0, + "33": 458734592.0, + "34": 458738176.0, + "35": 458739200.0, + "36": 458735616.0, + "37": 458735104.0, + "38": 458736128.0, + "39": 458737664.0, + "40": 458735616.0, + "41": 458737664.0, + "42": 458735616.0, + "43": 458738688.0, + "44": 458737664.0, + "45": 458735616.0, + "46": 458738688.0, + "47": 458735616.0, + "48": 458736640.0, + "49": 458736640.0, + "50": 458734080.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1026068480.0, - "2": 1192152064.0, - "3": 1192152064.0, - "4": 1192205312.0, - "5": 1192205312.0, - "6": 1192205312.0, - "7": 1192205312.0, - "8": 1192205312.0, - "9": 1192205312.0, - "10": 1192205312.0, - "11": 1192205312.0, - "12": 1192205312.0, - "13": 1192349184.0, - "14": 1192349184.0, - "15": 1192506368.0, - "16": 1192506368.0, - "17": 1192506368.0, - "18": 1192506368.0, - "19": 1192506368.0, - "20": 1192506368.0, - "21": 1192506368.0, - "22": 1192506368.0, - "23": 1192506368.0, - "24": 1192506368.0, - "25": 1192506368.0, - "26": 1192506368.0, - "27": 1192506368.0, - "28": 1192506368.0, - "29": 1192506368.0, - "30": 1192506368.0, - "31": 1192506368.0, - "32": 1192506368.0, - "33": 1192506368.0, - "34": 1192506368.0, - "35": 1192506368.0, - "36": 1192506368.0, - "37": 1192506368.0, - "38": 1192506368.0, - "39": 1192506368.0, - "40": 1192506368.0, - "41": 1192506368.0, - "42": 1192506368.0, - "43": 1192506368.0, - "44": 1192506368.0, - "45": 1192506368.0, - "46": 1192506368.0, - "47": 1192506368.0, - "48": 1192506368.0, - "49": 1192506368.0, - "50": 1192506368.0 + "1": 1026235392.0, + "2": 1191006720.0, + "3": 1191138304.0, + "4": 1192048640.0, + "5": 1192048640.0, + "6": 1192048640.0, + "7": 1192048640.0, + "8": 1192048640.0, + "9": 1192048640.0, + "10": 1192327680.0, + "11": 1192465920.0, + "12": 1192465920.0, + "13": 1192641024.0, + "14": 1192641024.0, + "15": 1192641024.0, + "16": 1192641024.0, + "17": 1192641024.0, + "18": 1192641024.0, + "19": 1192641024.0, + "20": 1192641024.0, + "21": 1192641024.0, + "22": 1192641024.0, + "23": 1192641024.0, + "24": 1192641024.0, + "25": 1192641024.0, + "26": 1192641024.0, + "27": 1192641024.0, + "28": 1192641024.0, + "29": 1192641024.0, + "30": 1192641024.0, + "31": 1192641024.0, + "32": 1192641024.0, + "33": 1192641024.0, + "34": 1192641024.0, + "35": 1192641024.0, + "36": 1192641024.0, + "37": 1192641024.0, + "38": 1192641024.0, + "39": 1193289216.0, + "40": 1193289216.0, + "41": 1193289216.0, + "42": 1193289216.0, + "43": 1193289216.0, + "44": 1193289216.0, + "45": 1193289216.0, + "46": 1193289216.0, + "47": 1193289216.0, + "48": 1193289216.0, + "49": 1193289216.0, + "50": 1193289216.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 13.43711, - "2": 0.5648, - "3": 0.46103, - "4": 0.42843, - "5": 0.39023, - "6": 0.40228, - "7": 0.39933, - "8": 0.40801, - "9": 0.41661, - "10": 0.41115, - "11": 0.40919, - "12": 0.38713, - "13": 0.3967, - "14": 0.39634, - "15": 0.3917, - "16": 0.38895, - "17": 0.39488, - "18": 0.38262, - "19": 0.38633, - "20": 0.38778, - "21": 0.37793, - "22": 0.38122, - "23": 0.3785, - "24": 0.38176, - "25": 0.37936, - "26": 0.38399, - "27": 0.37425, - "28": 0.38373, - "29": 0.37674, - "30": 0.38541, - "31": 0.38748, - "32": 0.37483, - "33": 0.37931, - "34": 0.38691, - "35": 0.39293, - "36": 0.38011, - "37": 0.37641, - "38": 0.37714, - "39": 0.37754, - "40": 0.3929, - "41": 0.37984, - "42": 0.37748, - "43": 0.39504, - "44": 0.38155, - "45": 0.39617, - "46": 0.42631, - "47": 0.39497, - "48": 0.39432, - "49": 0.40482, - "50": 0.37964 + "1": "nan", + "2": 7.35614, + "3": 0.42662, + "4": 0.4194, + "5": 0.38561, + "6": 0.38215, + "7": 0.44042, + "8": 0.39648, + "9": 0.39179, + "10": 0.38601, + "11": 0.37867, + "12": 0.39696, + "13": 0.37723, + "14": 0.37583, + "15": 0.38932, + "16": 0.37862, + "17": 0.38218, + "18": 0.38721, + "19": 0.39068, + "20": 0.3855, + "21": 0.37737, + "22": 0.37975, + "23": 0.3763, + "24": 0.37755, + "25": 0.37814, + "26": 0.37613, + "27": 0.38794, + "28": 0.37908, + "29": 0.37583, + "30": 0.37897, + "31": 0.39194, + "32": 0.37775, + "33": 0.37581, + "34": 0.38903, + "35": 0.38954, + "36": 0.378, + "37": 0.37692, + "38": 0.37717, + "39": 0.37356, + "40": 0.37832, + "41": 0.38595, + "42": 0.38169, + "43": 0.38907, + "44": 0.38237, + "45": 0.37792, + "46": 0.414, + "47": 0.3933, + "48": 0.38767, + "49": 0.39999, + "50": 0.37905 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json index b4462fc931e..890246c909f 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_a100.json @@ -6,54 +6,54 @@ "values": { "1": 10.79175, "2": 10.80907, - "3": 10.81011, - "4": 10.78146, - "5": 10.82288, - "6": 10.84057, - "7": 10.81192, - "8": 10.80005, - "9": 10.81667, - "10": 10.7688, - "11": 10.8618, - "12": 10.84042, - "13": 10.84452, - "14": 10.86421, - "15": 10.79157, - "16": 10.78199, - "17": 10.75122, - "18": 10.79446, - "19": 10.79523, - "20": 10.71001, - "21": 10.68811, - "22": 10.53736, - "23": 10.7066, - "24": 10.58865, - "25": 10.54662, - "26": 10.59492, - "27": 10.62142, - "28": 10.5969, - "29": 10.60036, - "30": 10.39407, - "31": 10.12951, - "32": 10.49684, - "33": 10.48779, - "34": 10.24347, - "35": 10.30461, - "36": 10.26056, - "37": 10.38859, - "38": 10.24848, - "39": 10.43799, - "40": 10.13303, - "41": 10.18651, - "42": 10.25823, - "43": 9.892, - "44": 10.02576, - "45": 9.90015, - "46": 9.88387, - "47": 10.19565, - "48": 9.91255, - "49": 9.60147, - "50": 9.97874 + "3": 10.81039, + "4": 10.78127, + "5": 10.82308, + "6": 10.84083, + "7": 10.81171, + "8": 10.79973, + "9": 10.81744, + "10": 10.76857, + "11": 10.86217, + "12": 10.84084, + "13": 10.8452, + "14": 10.86332, + "15": 10.79106, + "16": 10.78185, + "17": 10.75084, + "18": 10.7944, + "19": 10.79546, + "20": 10.70984, + "21": 10.68874, + "22": 10.53812, + "23": 10.70639, + "24": 10.58861, + "25": 10.54517, + "26": 10.59545, + "27": 10.6213, + "28": 10.59623, + "29": 10.60109, + "30": 10.39455, + "31": 10.12997, + "32": 10.49682, + "33": 10.48802, + "34": 10.24299, + "35": 10.305, + "36": 10.26115, + "37": 10.38853, + "38": 10.24853, + "39": 10.43727, + "40": 10.13312, + "41": 10.1867, + "42": 10.25835, + "43": 9.89244, + "44": 10.02594, + "45": 9.90006, + "46": 9.88449, + "47": 10.19557, + "48": 9.91284, + "49": 9.60182, + "50": 9.97848 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 5656.0, - "2": 6018.0, - "3": 5790.0, - "4": 5941.0, - "5": 6476.0, - "6": 6653.0, - "7": 6287.0, - "8": 5875.0, - "9": 6239.0, - "10": 5453.0, - "11": 6936.0, - "12": 6711.0, - "13": 6655.0, - "14": 6814.0, - "15": 6233.0, - "16": 6533.0, - "17": 6397.0, - "18": 6112.0, - "19": 6678.0, - "20": 5837.0, - "21": 6403.0, - "22": 5715.0, - "23": 6744.0, - "24": 6051.0, - "25": 5811.0, - "26": 6104.0, - "27": 6484.0, - "28": 6884.0, - "29": 7253.0, - "30": 6047.0, - "31": 5593.0, - "32": 6625.0, - "33": 7054.0, - "34": 6104.0, - "35": 6712.0, - "36": 6684.0, - "37": 7523.0, - "38": 7273.0, - "39": 7620.0, - "40": 7062.0, - "41": 6895.0, - "42": 7426.0, - "43": 6713.0, - "44": 6664.0, - "45": 6681.0, - "46": 6923.0, - "47": 7705.0, - "48": 7248.0, - "49": 7331.0, - "50": 7527.0 + "1": 5743.0, + "2": 5999.0, + "3": 5905.0, + "4": 5938.0, + "5": 6445.0, + "6": 6621.0, + "7": 6274.0, + "8": 5877.0, + "9": 6334.0, + "10": 5348.0, + "11": 6889.0, + "12": 6531.0, + "13": 6773.0, + "14": 6834.0, + "15": 6263.0, + "16": 6473.0, + "17": 6190.0, + "18": 6261.0, + "19": 6546.0, + "20": 5906.0, + "21": 6290.0, + "22": 5727.0, + "23": 6736.0, + "24": 5914.0, + "25": 5888.0, + "26": 6127.0, + "27": 6455.0, + "28": 6974.0, + "29": 7148.0, + "30": 6270.0, + "31": 5639.0, + "32": 6815.0, + "33": 7063.0, + "34": 6218.0, + "35": 6708.0, + "36": 6496.0, + "37": 7484.0, + "38": 7231.0, + "39": 7687.0, + "40": 7025.0, + "41": 7081.0, + "42": 7161.0, + "43": 6589.0, + "44": 6743.0, + "45": 6790.0, + "46": 6933.0, + "47": 7570.0, + "48": 7413.0, + "49": 7262.0, + "50": 7706.0 } }, "mem-allocated-bytes": { @@ -122,52 +122,52 @@ "2": 458211840.0, "3": 458215424.0, "4": 458211840.0, - "5": 458213376.0, + "5": 458213888.0, "6": 458213888.0, - "7": 458216448.0, + "7": 458215936.0, "8": 458216448.0, "9": 458212864.0, - "10": 458215936.0, - "11": 458213888.0, + "10": 458215424.0, + "11": 458212864.0, "12": 458213888.0, "13": 458214400.0, "14": 458215424.0, - "15": 458215424.0, + "15": 458214912.0, "16": 458212864.0, - "17": 458214400.0, - "18": 458214400.0, - "19": 458214400.0, - "20": 458214400.0, + "17": 458214912.0, + "18": 458214912.0, + "19": 458215424.0, + "20": 458214912.0, "21": 458211840.0, - "22": 458218496.0, + "22": 458219520.0, "23": 458214912.0, "24": 458214400.0, "25": 458211840.0, - "26": 458215936.0, - "27": 458210816.0, + "26": 458215424.0, + "27": 458209792.0, "28": 458213888.0, - "29": 458212864.0, + "29": 458213888.0, "30": 458211840.0, "31": 458219008.0, "32": 458214400.0, "33": 458214912.0, "34": 458211840.0, - "35": 458215936.0, + "35": 458215424.0, "36": 458212864.0, - "37": 458215424.0, + "37": 458214400.0, "38": 458213888.0, "39": 458213888.0, "40": 458213376.0, "41": 458216960.0, - "42": 458215424.0, + "42": 458214912.0, "43": 458216960.0, "44": 458213376.0, - "45": 458214400.0, + "45": 458213888.0, "46": 458216448.0, - "47": 458213376.0, + "47": 458212352.0, "48": 458213888.0, - "49": 458215424.0, - "50": 458214912.0 + "49": 458216448.0, + "50": 458215424.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1029256704.0, - "2": 1193177088.0, - "3": 1193177088.0, - "4": 1193686016.0, - "5": 1193686016.0, - "6": 1193686016.0, - "7": 1193686016.0, - "8": 1193686016.0, - "9": 1193771520.0, - "10": 1193771520.0, - "11": 1193771520.0, - "12": 1193771520.0, - "13": 1193771520.0, - "14": 1193771520.0, - "15": 1193771520.0, - "16": 1193771520.0, - "17": 1193771520.0, - "18": 1193771520.0, - "19": 1193771520.0, - "20": 1193771520.0, - "21": 1193771520.0, - "22": 1193918464.0, - "23": 1193918464.0, - "24": 1193918464.0, - "25": 1193918464.0, - "26": 1193918464.0, - "27": 1193918464.0, - "28": 1193918464.0, - "29": 1193918464.0, - "30": 1193918464.0, - "31": 1193918464.0, - "32": 1193918464.0, - "33": 1193918464.0, - "34": 1193918464.0, - "35": 1193918464.0, - "36": 1193918464.0, - "37": 1193918464.0, - "38": 1193918464.0, - "39": 1193918464.0, - "40": 1194139136.0, - "41": 1194139136.0, - "42": 1194139136.0, - "43": 1194249728.0, - "44": 1194249728.0, - "45": 1194249728.0, - "46": 1194249728.0, - "47": 1194249728.0, - "48": 1194249728.0, - "49": 1194249728.0, - "50": 1194249728.0 + "1": 1028204032.0, + "2": 1193069568.0, + "3": 1193083392.0, + "4": 1194512384.0, + "5": 1194512384.0, + "6": 1194512384.0, + "7": 1194512384.0, + "8": 1194512384.0, + "9": 1194512384.0, + "10": 1194512384.0, + "11": 1194512384.0, + "12": 1194512384.0, + "13": 1194512384.0, + "14": 1194512384.0, + "15": 1195728896.0, + "16": 1195728896.0, + "17": 1195728896.0, + "18": 1195728896.0, + "19": 1195728896.0, + "20": 1195728896.0, + "21": 1195728896.0, + "22": 1195728896.0, + "23": 1195728896.0, + "24": 1195728896.0, + "25": 1195728896.0, + "26": 1195728896.0, + "27": 1195728896.0, + "28": 1195728896.0, + "29": 1195728896.0, + "30": 1195728896.0, + "31": 1195728896.0, + "32": 1195728896.0, + "33": 1195728896.0, + "34": 1195728896.0, + "35": 1195728896.0, + "36": 1195728896.0, + "37": 1195728896.0, + "38": 1195728896.0, + "39": 1195728896.0, + "40": 1195728896.0, + "41": 1195728896.0, + "42": 1195728896.0, + "43": 1195728896.0, + "44": 1195728896.0, + "45": 1195728896.0, + "46": 1195728896.0, + "47": 1195728896.0, + "48": 1195728896.0, + "49": 1195728896.0, + "50": 1195728896.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 6.42299, - "2": 0.59069, - "3": 0.56496, - "4": 0.54736, - "5": 0.54792, - "6": 0.57731, - "7": 0.54778, - "8": 0.54659, - "9": 0.54833, - "10": 0.54497, - "11": 0.55076, - "12": 0.55595, - "13": 0.54721, - "14": 0.54614, - "15": 0.5457, - "16": 0.54774, - "17": 0.54518, - "18": 0.54582, - "19": 0.5467, - "20": 0.54611, - "21": 0.54622, - "22": 0.54617, - "23": 0.54622, - "24": 0.54547, - "25": 0.54796, - "26": 0.54413, - "27": 0.5458, - "28": 0.54598, - "29": 0.54813, - "30": 0.54556, - "31": 0.54684, - "32": 0.54789, - "33": 0.57275, - "34": 0.54705, - "35": 0.54545, - "36": 0.54414, - "37": 0.54225, - "38": 0.54504, - "39": 0.54284, - "40": 0.54185, - "41": 0.54578, - "42": 0.54542, - "43": 0.54621, - "44": 0.54447, - "45": 0.54521, - "46": 0.5449, - "47": 0.54529, - "48": 0.54403, - "49": 0.56089, - "50": 0.54374 + "1": "nan", + "2": 5.39653, + "3": 0.54844, + "4": 0.53143, + "5": 0.52936, + "6": 0.5307, + "7": 0.53171, + "8": 0.52977, + "9": 0.5296, + "10": 0.52973, + "11": 0.52988, + "12": 0.52899, + "13": 0.53004, + "14": 0.52976, + "15": 0.5302, + "16": 0.52852, + "17": 0.52991, + "18": 0.52961, + "19": 0.52906, + "20": 0.53311, + "21": 0.53111, + "22": 0.53049, + "23": 0.52977, + "24": 0.52985, + "25": 0.52861, + "26": 0.52859, + "27": 0.52946, + "28": 0.5289, + "29": 0.5285, + "30": 0.53038, + "31": 0.53184, + "32": 0.52975, + "33": 0.5291, + "34": 0.52987, + "35": 0.52942, + "36": 0.52992, + "37": 0.52962, + "38": 0.52867, + "39": 0.52796, + "40": 0.53013, + "41": 0.52972, + "42": 0.52845, + "43": 0.52857, + "44": 0.52933, + "45": 0.52943, + "46": 0.52979, + "47": 0.5281, + "48": 0.52831, + "49": 0.52914, + "50": 0.52676 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json index 64dc8751e92..0617e255ebc 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last/golden_values_dev_dgx_h100.json @@ -6,54 +6,54 @@ "values": { "1": 10.80475, "2": 10.821, - "3": 10.8216, - "4": 10.79306, - "5": 10.84831, - "6": 10.85888, - "7": 10.83177, - "8": 10.82362, - "9": 10.83757, - "10": 10.78732, - "11": 10.86732, - "12": 10.85395, - "13": 10.86171, - "14": 10.88343, - "15": 10.79765, - "16": 10.79986, - "17": 10.76238, - "18": 10.80286, - "19": 10.7945, - "20": 10.71733, - "21": 10.70194, - "22": 10.55147, - "23": 10.72167, - "24": 10.60698, - "25": 10.54614, - "26": 10.6136, - "27": 10.63974, - "28": 10.60486, - "29": 10.62277, - "30": 10.41109, - "31": 10.1456, - "32": 10.51017, - "33": 10.50089, - "34": 10.25812, - "35": 10.3154, - "36": 10.27895, - "37": 10.41061, - "38": 10.25908, - "39": 10.45334, - "40": 10.1604, - "41": 10.20557, - "42": 10.26792, - "43": 9.90468, - "44": 10.03233, - "45": 9.91098, - "46": 9.87857, - "47": 10.20952, - "48": 9.93178, - "49": 9.61584, - "50": 9.98565 + "3": 10.82137, + "4": 10.79315, + "5": 10.8483, + "6": 10.85935, + "7": 10.83174, + "8": 10.82386, + "9": 10.83754, + "10": 10.78771, + "11": 10.86699, + "12": 10.85365, + "13": 10.86137, + "14": 10.88332, + "15": 10.79759, + "16": 10.80014, + "17": 10.76189, + "18": 10.80285, + "19": 10.79428, + "20": 10.71656, + "21": 10.70165, + "22": 10.55146, + "23": 10.72122, + "24": 10.60742, + "25": 10.54634, + "26": 10.61335, + "27": 10.63973, + "28": 10.60466, + "29": 10.62274, + "30": 10.41087, + "31": 10.14603, + "32": 10.50965, + "33": 10.50142, + "34": 10.25863, + "35": 10.31568, + "36": 10.27941, + "37": 10.41098, + "38": 10.2593, + "39": 10.45366, + "40": 10.1605, + "41": 10.20637, + "42": 10.26762, + "43": 9.90459, + "44": 10.03234, + "45": 9.91164, + "46": 9.87875, + "47": 10.20947, + "48": 9.93144, + "49": 9.61602, + "50": 9.98541 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 5474.0, - "2": 5853.0, - "3": 5875.0, - "4": 6041.0, - "5": 6601.0, - "6": 6654.0, - "7": 6135.0, - "8": 5761.0, - "9": 6505.0, - "10": 5497.0, - "11": 6994.0, - "12": 6523.0, - "13": 6807.0, - "14": 6969.0, - "15": 6154.0, - "16": 6667.0, - "17": 6368.0, - "18": 6298.0, - "19": 6353.0, - "20": 5998.0, - "21": 6264.0, - "22": 5628.0, - "23": 6620.0, - "24": 6063.0, - "25": 5649.0, - "26": 6226.0, - "27": 6409.0, - "28": 6790.0, - "29": 7055.0, - "30": 6430.0, - "31": 5565.0, - "32": 6615.0, - "33": 6969.0, - "34": 6107.0, - "35": 6538.0, - "36": 6486.0, - "37": 7272.0, - "38": 6923.0, - "39": 7497.0, - "40": 6997.0, - "41": 6747.0, - "42": 7228.0, - "43": 6629.0, - "44": 6752.0, - "45": 6557.0, - "46": 6904.0, - "47": 7474.0, - "48": 7165.0, - "49": 7244.0, - "50": 7331.0 + "1": 5649.0, + "2": 5830.0, + "3": 6085.0, + "4": 5929.0, + "5": 6605.0, + "6": 6671.0, + "7": 6190.0, + "8": 5974.0, + "9": 6573.0, + "10": 5417.0, + "11": 6943.0, + "12": 6397.0, + "13": 6815.0, + "14": 6932.0, + "15": 6292.0, + "16": 6446.0, + "17": 6413.0, + "18": 6231.0, + "19": 6288.0, + "20": 5995.0, + "21": 6301.0, + "22": 5746.0, + "23": 6701.0, + "24": 5988.0, + "25": 5734.0, + "26": 6127.0, + "27": 6238.0, + "28": 6771.0, + "29": 7217.0, + "30": 6275.0, + "31": 5518.0, + "32": 6530.0, + "33": 7079.0, + "34": 6130.0, + "35": 6701.0, + "36": 6367.0, + "37": 7301.0, + "38": 6815.0, + "39": 7720.0, + "40": 6748.0, + "41": 6679.0, + "42": 7340.0, + "43": 6563.0, + "44": 6570.0, + "45": 6542.0, + "46": 7029.0, + "47": 7320.0, + "48": 7144.0, + "49": 7259.0, + "50": 7400.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 458212352.0, - "2": 458212864.0, - "3": 458211328.0, - "4": 458212864.0, - "5": 458212352.0, - "6": 458213376.0, - "7": 458212864.0, - "8": 458213888.0, - "9": 458213376.0, - "10": 458212864.0, - "11": 458210816.0, - "12": 458210304.0, - "13": 458211840.0, - "14": 458213376.0, - "15": 458214400.0, - "16": 458215424.0, - "17": 458212864.0, - "18": 458210816.0, - "19": 458211840.0, - "20": 458212352.0, - "21": 458213888.0, - "22": 458213888.0, - "23": 458211328.0, - "24": 458211840.0, - "25": 458211840.0, - "26": 458212864.0, - "27": 458212352.0, - "28": 458212864.0, - "29": 458211840.0, - "30": 458211840.0, - "31": 458213376.0, - "32": 458211328.0, - "33": 458210304.0, - "34": 458213888.0, - "35": 458214912.0, - "36": 458211328.0, - "37": 458210816.0, - "38": 458211840.0, - "39": 458213376.0, - "40": 458211328.0, - "41": 458213888.0, - "42": 458211840.0, - "43": 458214400.0, - "44": 458213888.0, - "45": 458210816.0, - "46": 458213888.0, - "47": 458211328.0, - "48": 458212352.0, - "49": 458212352.0, - "50": 458210816.0 + "1": 458736640.0, + "2": 458737152.0, + "3": 458735616.0, + "4": 458737152.0, + "5": 458736640.0, + "6": 458737664.0, + "7": 458737152.0, + "8": 458738176.0, + "9": 458738688.0, + "10": 458737152.0, + "11": 458735104.0, + "12": 458734592.0, + "13": 458737664.0, + "14": 458737664.0, + "15": 458737664.0, + "16": 458739712.0, + "17": 458737152.0, + "18": 458735104.0, + "19": 458736128.0, + "20": 458736640.0, + "21": 458735616.0, + "22": 458736640.0, + "23": 458735616.0, + "24": 458737152.0, + "25": 458737152.0, + "26": 458737152.0, + "27": 458735616.0, + "28": 458737152.0, + "29": 458735104.0, + "30": 458736128.0, + "31": 458737664.0, + "32": 458736128.0, + "33": 458734592.0, + "34": 458738176.0, + "35": 458739200.0, + "36": 458735616.0, + "37": 458735104.0, + "38": 458736128.0, + "39": 458737664.0, + "40": 458735616.0, + "41": 458737664.0, + "42": 458735616.0, + "43": 458738688.0, + "44": 458737664.0, + "45": 458735616.0, + "46": 458738688.0, + "47": 458735616.0, + "48": 458736640.0, + "49": 458736640.0, + "50": 458734080.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1026068480.0, - "2": 1192152064.0, - "3": 1192152064.0, - "4": 1192205312.0, - "5": 1192205312.0, - "6": 1192205312.0, - "7": 1192205312.0, - "8": 1192205312.0, - "9": 1192205312.0, - "10": 1192205312.0, - "11": 1192205312.0, - "12": 1192205312.0, - "13": 1192349184.0, - "14": 1192349184.0, - "15": 1192506368.0, - "16": 1192506368.0, - "17": 1192506368.0, - "18": 1192506368.0, - "19": 1192506368.0, - "20": 1192506368.0, - "21": 1192506368.0, - "22": 1192506368.0, - "23": 1192506368.0, - "24": 1192506368.0, - "25": 1192506368.0, - "26": 1192506368.0, - "27": 1192506368.0, - "28": 1192506368.0, - "29": 1192506368.0, - "30": 1192506368.0, - "31": 1192506368.0, - "32": 1192506368.0, - "33": 1192506368.0, - "34": 1192506368.0, - "35": 1192506368.0, - "36": 1192506368.0, - "37": 1192506368.0, - "38": 1192506368.0, - "39": 1192506368.0, - "40": 1192506368.0, - "41": 1192506368.0, - "42": 1192506368.0, - "43": 1192506368.0, - "44": 1192506368.0, - "45": 1192506368.0, - "46": 1192506368.0, - "47": 1192506368.0, - "48": 1192506368.0, - "49": 1192506368.0, - "50": 1192506368.0 + "1": 1026235392.0, + "2": 1191006720.0, + "3": 1191138304.0, + "4": 1192048640.0, + "5": 1192048640.0, + "6": 1192048640.0, + "7": 1192048640.0, + "8": 1192048640.0, + "9": 1192048640.0, + "10": 1192327680.0, + "11": 1192465920.0, + "12": 1192465920.0, + "13": 1192641024.0, + "14": 1192641024.0, + "15": 1192641024.0, + "16": 1192641024.0, + "17": 1192641024.0, + "18": 1192641024.0, + "19": 1192641024.0, + "20": 1192641024.0, + "21": 1192641024.0, + "22": 1192641024.0, + "23": 1192641024.0, + "24": 1192641024.0, + "25": 1192641024.0, + "26": 1192641024.0, + "27": 1192641024.0, + "28": 1192641024.0, + "29": 1192641024.0, + "30": 1192641024.0, + "31": 1192641024.0, + "32": 1192641024.0, + "33": 1192641024.0, + "34": 1192641024.0, + "35": 1192641024.0, + "36": 1192641024.0, + "37": 1192641024.0, + "38": 1192641024.0, + "39": 1193289216.0, + "40": 1193289216.0, + "41": 1193289216.0, + "42": 1193289216.0, + "43": 1193289216.0, + "44": 1193289216.0, + "45": 1193289216.0, + "46": 1193289216.0, + "47": 1193289216.0, + "48": 1193289216.0, + "49": 1193289216.0, + "50": 1193289216.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 13.13083, - "2": 0.49339, - "3": 0.43067, - "4": 0.43124, - "5": 0.38622, - "6": 0.39174, - "7": 0.39833, - "8": 0.39421, - "9": 0.3937, - "10": 0.38682, - "11": 0.39333, - "12": 0.38647, - "13": 0.38364, - "14": 0.38374, - "15": 0.38593, - "16": 0.38263, - "17": 0.39915, - "18": 0.38564, - "19": 0.38954, - "20": 0.38955, - "21": 0.38216, - "22": 0.38466, - "23": 0.38551, - "24": 0.38195, - "25": 0.38416, - "26": 0.38554, - "27": 0.38123, - "28": 0.38882, - "29": 0.43011, - "30": 0.38995, - "31": 0.39202, - "32": 0.38203, - "33": 0.38777, - "34": 0.39058, - "35": 0.39634, - "36": 0.38496, - "37": 0.38112, - "38": 0.38052, - "39": 0.37771, - "40": 0.38438, - "41": 0.38696, - "42": 0.38029, - "43": 0.39638, - "44": 0.38187, - "45": 0.38285, - "46": 0.42266, - "47": 0.3977, - "48": 0.39566, - "49": 0.40884, - "50": 0.38389 + "1": "nan", + "2": 7.92806, + "3": 0.42884, + "4": 0.4181, + "5": 0.38785, + "6": 0.38798, + "7": 0.39804, + "8": 0.39885, + "9": 0.39837, + "10": 0.38582, + "11": 0.38128, + "12": 0.3877, + "13": 0.39005, + "14": 0.3857, + "15": 0.38979, + "16": 0.3855, + "17": 0.39486, + "18": 0.38449, + "19": 0.39881, + "20": 0.39407, + "21": 0.38587, + "22": 0.38917, + "23": 0.3851, + "24": 0.38917, + "25": 0.38635, + "26": 0.38598, + "27": 0.38618, + "28": 0.3898, + "29": 0.3871, + "30": 0.39096, + "31": 0.40269, + "32": 0.38598, + "33": 0.39054, + "34": 0.40412, + "35": 0.40047, + "36": 0.38707, + "37": 0.38889, + "38": 0.3882, + "39": 0.38375, + "40": 0.38903, + "41": 0.39487, + "42": 0.3921, + "43": 0.4011, + "44": 0.39501, + "45": 0.38847, + "46": 0.4285, + "47": 0.40517, + "48": 0.40246, + "49": 0.40981, + "50": 0.39109 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_gb200.json index 3e910ef7869..d3e7b4da4e1 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_gb200.json @@ -6,54 +6,54 @@ "values": { "1": 10.79574, "2": 10.81485, - "3": 10.78713, - "4": 10.78269, - "5": 10.82015, - "6": 10.83331, - "7": 10.81116, - "8": 10.81446, - "9": 10.81645, - "10": 10.75997, - "11": 10.8388, - "12": 10.81544, - "13": 10.84141, - "14": 10.8476, - "15": 10.79857, - "16": 10.78544, - "17": 10.77004, - "18": 10.77906, - "19": 10.7689, - "20": 10.71392, - "21": 10.69182, - "22": 10.56438, - "23": 10.70939, - "24": 10.60304, - "25": 10.55748, - "26": 10.60238, - "27": 10.62835, - "28": 10.59772, - "29": 10.61013, - "30": 10.40394, - "31": 10.17092, - "32": 10.49069, - "33": 10.48436, - "34": 10.26719, - "35": 10.31532, - "36": 10.27654, - "37": 10.39353, - "38": 10.24536, - "39": 10.43863, - "40": 10.13998, - "41": 10.19151, - "42": 10.25868, - "43": 9.9191, - "44": 10.03026, - "45": 9.92187, - "46": 9.89763, - "47": 10.1946, - "48": 9.93001, - "49": 9.62787, - "50": 9.97966 + "3": 10.78727, + "4": 10.78304, + "5": 10.81989, + "6": 10.83351, + "7": 10.81125, + "8": 10.8139, + "9": 10.81641, + "10": 10.75982, + "11": 10.83861, + "12": 10.81549, + "13": 10.84169, + "14": 10.84748, + "15": 10.79858, + "16": 10.78574, + "17": 10.77021, + "18": 10.77928, + "19": 10.76898, + "20": 10.71384, + "21": 10.69186, + "22": 10.56455, + "23": 10.70953, + "24": 10.60188, + "25": 10.55758, + "26": 10.6026, + "27": 10.62881, + "28": 10.59778, + "29": 10.61003, + "30": 10.4043, + "31": 10.17042, + "32": 10.49112, + "33": 10.48455, + "34": 10.26721, + "35": 10.31534, + "36": 10.27691, + "37": 10.39297, + "38": 10.24497, + "39": 10.43848, + "40": 10.13979, + "41": 10.19223, + "42": 10.25887, + "43": 9.91922, + "44": 10.03027, + "45": 9.92197, + "46": 9.89723, + "47": 10.19434, + "48": 9.92968, + "49": 9.62843, + "50": 9.97977 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 13025.0, - "2": 14911.0, - "3": 14651.0, - "4": 13760.0, - "5": 16297.0, - "6": 16032.0, - "7": 15521.0, - "8": 13170.0, - "9": 15403.0, - "10": 12605.0, - "11": 16803.0, - "12": 15289.0, - "13": 16415.0, - "14": 16182.0, - "15": 15127.0, - "16": 16135.0, - "17": 15282.0, - "18": 15280.0, - "19": 15379.0, - "20": 13642.0, - "21": 14281.0, - "22": 13476.0, - "23": 16892.0, - "24": 13920.0, - "25": 13236.0, - "26": 15256.0, - "27": 15454.0, - "28": 15973.0, - "29": 16892.0, - "30": 14103.0, - "31": 13113.0, - "32": 16067.0, - "33": 16788.0, - "34": 14559.0, - "35": 14974.0, - "36": 15798.0, - "37": 17569.0, - "38": 16172.0, - "39": 17774.0, - "40": 16088.0, - "41": 16616.0, - "42": 17149.0, - "43": 15487.0, - "44": 15110.0, - "45": 16499.0, - "46": 17407.0, - "47": 19502.0, - "48": 16568.0, - "49": 16613.0, - "50": 18892.0 + "1": 12991.0, + "2": 14647.0, + "3": 14597.0, + "4": 13579.0, + "5": 16353.0, + "6": 16061.0, + "7": 15477.0, + "8": 13223.0, + "9": 15156.0, + "10": 12636.0, + "11": 16856.0, + "12": 15160.0, + "13": 16551.0, + "14": 16239.0, + "15": 15226.0, + "16": 16061.0, + "17": 15397.0, + "18": 15451.0, + "19": 15161.0, + "20": 13625.0, + "21": 14545.0, + "22": 13271.0, + "23": 16890.0, + "24": 13779.0, + "25": 13265.0, + "26": 14968.0, + "27": 15467.0, + "28": 16066.0, + "29": 16732.0, + "30": 14531.0, + "31": 13042.0, + "32": 16229.0, + "33": 16950.0, + "34": 14393.0, + "35": 14998.0, + "36": 15721.0, + "37": 17452.0, + "38": 16404.0, + "39": 17975.0, + "40": 16453.0, + "41": 16448.0, + "42": 17213.0, + "43": 15580.0, + "44": 14866.0, + "45": 16460.0, + "46": 17136.0, + "47": 19531.0, + "48": 16471.0, + "49": 16688.0, + "50": 18961.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 625796096.0, - "2": 625850368.0, - "3": 625987072.0, - "4": 625831424.0, - "5": 625794048.0, - "6": 625789952.0, - "7": 625830912.0, - "8": 625794048.0, - "9": 625861120.0, - "10": 625806848.0, - "11": 625795584.0, - "12": 626022912.0, - "13": 625802240.0, - "14": 625853952.0, - "15": 625796608.0, - "16": 625793024.0, - "17": 625798144.0, - "18": 625802240.0, - "19": 625792000.0, - "20": 625793536.0, - "21": 626690048.0, - "22": 626176000.0, - "23": 626092032.0, - "24": 625794560.0, - "25": 626540544.0, - "26": 625934848.0, - "27": 625799168.0, - "28": 625801728.0, - "29": 625793536.0, - "30": 626191360.0, - "31": 626149376.0, - "32": 626774016.0, - "33": 625792512.0, - "34": 625793024.0, - "35": 625851904.0, - "36": 625809408.0, - "37": 625794048.0, - "38": 625827328.0, - "39": 625865216.0, - "40": 625831936.0, - "41": 626081280.0, - "42": 626046464.0, - "43": 625792000.0, - "44": 625792000.0, - "45": 626266112.0, - "46": 626042880.0, - "47": 625789440.0, - "48": 625905152.0, - "49": 625883648.0, - "50": 626099712.0 + "1": 627893248.0, + "2": 628091904.0, + "3": 627973120.0, + "4": 627928576.0, + "5": 627940352.0, + "6": 627909120.0, + "7": 627932672.0, + "8": 628246528.0, + "9": 627952128.0, + "10": 627889664.0, + "11": 627977728.0, + "12": 628083200.0, + "13": 627912704.0, + "14": 627889664.0, + "15": 627917312.0, + "16": 627889664.0, + "17": 627895296.0, + "18": 627897344.0, + "19": 627950592.0, + "20": 627968000.0, + "21": 627994624.0, + "22": 627895296.0, + "23": 627894272.0, + "24": 627890688.0, + "25": 628020224.0, + "26": 628020736.0, + "27": 628694528.0, + "28": 627783680.0, + "29": 627811840.0, + "30": 627985408.0, + "31": 628240384.0, + "32": 627893248.0, + "33": 627946496.0, + "34": 627910656.0, + "35": 627893248.0, + "36": 627896320.0, + "37": 628513280.0, + "38": 627889152.0, + "39": 627890176.0, + "40": 627921920.0, + "41": 627889664.0, + "42": 627889664.0, + "43": 627888128.0, + "44": 627889664.0, + "45": 627887616.0, + "46": 628792320.0, + "47": 627899904.0, + "48": 627887616.0, + "49": 627982848.0, + "50": 627890176.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1844879360.0, - "2": 2076399104.0, - "3": 2076399104.0, - "4": 2079827456.0, - "5": 2079827456.0, - "6": 2079827456.0, - "7": 2079827456.0, - "8": 2079827456.0, - "9": 2079827456.0, - "10": 2079827456.0, - "11": 2079827456.0, - "12": 2079827456.0, - "13": 2079827456.0, - "14": 2079827456.0, - "15": 2079827456.0, - "16": 2079827456.0, - "17": 2079827456.0, - "18": 2079827456.0, - "19": 2079827456.0, - "20": 2079827456.0, - "21": 2079827456.0, - "22": 2079827456.0, - "23": 2079827456.0, - "24": 2079827456.0, - "25": 2079827456.0, - "26": 2079827456.0, - "27": 2079827456.0, - "28": 2079827456.0, - "29": 2079827456.0, - "30": 2079827456.0, - "31": 2079827456.0, - "32": 2079827456.0, - "33": 2079827456.0, - "34": 2079827456.0, - "35": 2079827456.0, - "36": 2079827456.0, - "37": 2079827456.0, - "38": 2079827456.0, - "39": 2079827456.0, - "40": 2079827456.0, - "41": 2079827456.0, - "42": 2079827456.0, - "43": 2079827456.0, - "44": 2079827456.0, - "45": 2079827456.0, - "46": 2079827456.0, - "47": 2079827456.0, - "48": 2079827456.0, - "49": 2079827456.0, - "50": 2079827456.0 + "1": 1844857856.0, + "2": 2078309888.0, + "3": 2078309888.0, + "4": 2081169920.0, + "5": 2081169920.0, + "6": 2081169920.0, + "7": 2081169920.0, + "8": 2081169920.0, + "9": 2081169920.0, + "10": 2081169920.0, + "11": 2081169920.0, + "12": 2081169920.0, + "13": 2081169920.0, + "14": 2081169920.0, + "15": 2081276416.0, + "16": 2081276416.0, + "17": 2081276416.0, + "18": 2081276416.0, + "19": 2081276416.0, + "20": 2081276416.0, + "21": 2081276416.0, + "22": 2081276416.0, + "23": 2081276416.0, + "24": 2081276416.0, + "25": 2081276416.0, + "26": 2081276416.0, + "27": 2081276416.0, + "28": 2081276416.0, + "29": 2081276416.0, + "30": 2081276416.0, + "31": 2081276416.0, + "32": 2081276416.0, + "33": 2081276416.0, + "34": 2081276416.0, + "35": 2081276416.0, + "36": 2081276416.0, + "37": 2081276416.0, + "38": 2081276416.0, + "39": 2081276416.0, + "40": 2081276416.0, + "41": 2081276416.0, + "42": 2081276416.0, + "43": 2081276416.0, + "44": 2081276416.0, + "45": 2081276416.0, + "46": 2081276416.0, + "47": 2081276416.0, + "48": 2081276416.0, + "49": 2081276416.0, + "50": 2081276416.0 } }, "iteration-time": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 5.62487, - "3": 0.39428, - "4": 0.3711, - "5": 0.36777, - "6": 0.36423, - "7": 0.35408, - "8": 0.35462, - "9": 0.35588, - "10": 0.35204, - "11": 0.35155, - "12": 0.35049, - "13": 0.35818, - "14": 0.35461, - "15": 0.36874, - "16": 0.367, - "17": 0.37423, - "18": 0.36926, - "19": 0.37139, - "20": 0.37109, - "21": 0.37066, - "22": 0.37237, - "23": 0.37636, - "24": 0.37618, - "25": 0.37461, - "26": 0.37622, - "27": 0.37576, - "28": 0.37551, - "29": 0.3765, - "30": 0.3787, - "31": 0.38695, - "32": 0.37235, - "33": 0.37931, - "34": 0.37817, - "35": 0.3749, - "36": 0.37829, - "37": 0.37774, - "38": 0.3755, - "39": 0.37889, - "40": 0.37688, - "41": 0.38007, - "42": 0.37324, - "43": 0.36948, - "44": 0.37523, - "45": 0.37464, - "46": 0.38496, - "47": 0.3737, - "48": 0.37892, - "49": 0.39066, - "50": 0.37612 + "2": 8.65526, + "3": 0.42519, + "4": 0.40037, + "5": 0.39893, + "6": 0.39093, + "7": 0.39009, + "8": 0.39, + "9": 0.39299, + "10": 0.38898, + "11": 0.39153, + "12": 0.38826, + "13": 0.38884, + "14": 0.38722, + "15": 0.38852, + "16": 0.38716, + "17": 0.38566, + "18": 0.39003, + "19": 0.38566, + "20": 0.38156, + "21": 0.38572, + "22": 0.38689, + "23": 0.38747, + "24": 0.3855, + "25": 0.38305, + "26": 0.38467, + "27": 0.38312, + "28": 0.38663, + "29": 0.38453, + "30": 0.38017, + "31": 0.38989, + "32": 0.383, + "33": 0.38673, + "34": 0.38978, + "35": 0.38346, + "36": 0.38743, + "37": 0.38384, + "38": 0.38229, + "39": 0.38945, + "40": 0.3918, + "41": 0.38611, + "42": 0.3826, + "43": 0.38483, + "44": 0.38281, + "45": 0.38601, + "46": 0.3983, + "47": 0.38626, + "48": 0.39207, + "49": 0.39903, + "50": 0.38062 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json index 8928145fcbb..a6938ff40f2 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel/golden_values_dev_dgx_h100.json @@ -6,54 +6,54 @@ "values": { "1": 10.81692, "2": 10.82534, - "3": 10.82401, - "4": 10.79801, - "5": 10.8415, - "6": 10.85912, - "7": 10.81927, - "8": 10.81789, - "9": 10.83554, - "10": 10.78266, - "11": 10.85455, - "12": 10.84582, - "13": 10.84996, - "14": 10.87821, - "15": 10.80684, - "16": 10.80662, - "17": 10.76305, - "18": 10.80188, - "19": 10.79303, - "20": 10.73474, - "21": 10.71067, - "22": 10.57636, - "23": 10.7196, - "24": 10.63305, - "25": 10.56916, - "26": 10.62589, - "27": 10.64466, - "28": 10.60792, - "29": 10.61761, - "30": 10.42214, - "31": 10.17719, - "32": 10.50701, - "33": 10.50561, - "34": 10.27485, - "35": 10.3276, - "36": 10.29275, - "37": 10.40262, - "38": 10.25679, - "39": 10.43615, - "40": 10.16589, - "41": 10.20032, - "42": 10.27424, - "43": 9.93044, - "44": 10.04415, - "45": 9.92936, - "46": 9.89984, - "47": 10.18573, - "48": 9.93082, - "49": 9.6257, - "50": 9.98437 + "3": 10.82425, + "4": 10.79835, + "5": 10.84149, + "6": 10.85958, + "7": 10.81932, + "8": 10.81811, + "9": 10.8359, + "10": 10.78241, + "11": 10.85501, + "12": 10.845, + "13": 10.8499, + "14": 10.87795, + "15": 10.80637, + "16": 10.80682, + "17": 10.76314, + "18": 10.80211, + "19": 10.79309, + "20": 10.73505, + "21": 10.71033, + "22": 10.57666, + "23": 10.71956, + "24": 10.63346, + "25": 10.56915, + "26": 10.62644, + "27": 10.64462, + "28": 10.60798, + "29": 10.61786, + "30": 10.42195, + "31": 10.17755, + "32": 10.50694, + "33": 10.50593, + "34": 10.27534, + "35": 10.32709, + "36": 10.29294, + "37": 10.40264, + "38": 10.25671, + "39": 10.43584, + "40": 10.1662, + "41": 10.20061, + "42": 10.27446, + "43": 9.92989, + "44": 10.04353, + "45": 9.92946, + "46": 9.89981, + "47": 10.18589, + "48": 9.93134, + "49": 9.62589, + "50": 9.98435 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 12899.0, - "2": 14592.0, - "3": 14243.0, - "4": 13886.0, - "5": 15732.0, - "6": 16250.0, - "7": 15453.0, - "8": 13386.0, - "9": 15159.0, - "10": 12804.0, - "11": 16441.0, - "12": 14951.0, - "13": 16151.0, - "14": 16330.0, - "15": 15144.0, - "16": 15588.0, - "17": 15315.0, - "18": 14902.0, - "19": 15436.0, - "20": 13814.0, - "21": 13977.0, - "22": 12814.0, - "23": 16615.0, - "24": 13785.0, - "25": 13451.0, - "26": 14681.0, - "27": 15288.0, - "28": 16290.0, - "29": 16880.0, - "30": 14583.0, - "31": 13272.0, - "32": 15972.0, - "33": 16904.0, - "34": 14406.0, - "35": 14981.0, - "36": 15576.0, - "37": 17584.0, - "38": 16136.0, - "39": 17650.0, - "40": 16506.0, - "41": 16391.0, - "42": 17008.0, - "43": 15459.0, - "44": 15097.0, - "45": 16136.0, - "46": 16845.0, - "47": 19101.0, - "48": 16405.0, - "49": 16558.0, - "50": 18439.0 + "1": 12988.0, + "2": 14581.0, + "3": 14629.0, + "4": 13993.0, + "5": 16038.0, + "6": 16416.0, + "7": 15537.0, + "8": 13088.0, + "9": 14947.0, + "10": 12672.0, + "11": 16430.0, + "12": 15080.0, + "13": 15805.0, + "14": 16282.0, + "15": 15182.0, + "16": 15760.0, + "17": 15427.0, + "18": 15046.0, + "19": 15395.0, + "20": 13537.0, + "21": 13941.0, + "22": 12878.0, + "23": 16723.0, + "24": 14054.0, + "25": 13061.0, + "26": 14780.0, + "27": 15479.0, + "28": 16079.0, + "29": 16571.0, + "30": 14763.0, + "31": 12985.0, + "32": 15925.0, + "33": 16870.0, + "34": 14353.0, + "35": 15089.0, + "36": 15352.0, + "37": 17464.0, + "38": 16285.0, + "39": 17957.0, + "40": 16621.0, + "41": 16454.0, + "42": 16859.0, + "43": 15314.0, + "44": 15307.0, + "45": 16417.0, + "46": 17005.0, + "47": 19138.0, + "48": 16601.0, + "49": 16564.0, + "50": 18745.0 } }, "mem-allocated-bytes": { @@ -118,56 +118,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 625530880.0, - "2": 625483264.0, - "3": 625484800.0, - "4": 625516032.0, - "5": 625759744.0, - "6": 625774592.0, - "7": 625485312.0, - "8": 625568256.0, - "9": 625519616.0, - "10": 625655808.0, - "11": 625630720.0, - "12": 625482240.0, - "13": 625488384.0, - "14": 625819136.0, - "15": 625982976.0, - "16": 625500160.0, - "17": 625613312.0, - "18": 625494016.0, - "19": 625484288.0, - "20": 625508864.0, - "21": 625486336.0, - "22": 625486848.0, - "23": 625632768.0, - "24": 625487872.0, - "25": 625484288.0, - "26": 625753088.0, - "27": 625513984.0, - "28": 625483264.0, - "29": 625698816.0, - "30": 625967104.0, - "31": 625477632.0, - "32": 625523200.0, - "33": 625484288.0, - "34": 625481216.0, - "35": 625479680.0, - "36": 625554432.0, - "37": 625554944.0, - "38": 625487360.0, - "39": 625504768.0, - "40": 625481216.0, - "41": 625481728.0, - "42": 625481728.0, - "43": 626760192.0, - "44": 625598464.0, - "45": 625534464.0, - "46": 625603072.0, - "47": 625509376.0, - "48": 626520576.0, - "49": 625630720.0, - "50": 625565696.0 + "1": 626041856.0, + "2": 626082304.0, + "3": 626342912.0, + "4": 627062784.0, + "5": 626040320.0, + "6": 626118656.0, + "7": 626089984.0, + "8": 626075136.0, + "9": 626074624.0, + "10": 626041856.0, + "11": 626954240.0, + "12": 626144768.0, + "13": 626040832.0, + "14": 626037760.0, + "15": 626042368.0, + "16": 626081280.0, + "17": 626153984.0, + "18": 626056192.0, + "19": 626041344.0, + "20": 626070528.0, + "21": 626043904.0, + "22": 626040832.0, + "23": 626071040.0, + "24": 626407936.0, + "25": 626120704.0, + "26": 626217984.0, + "27": 626289664.0, + "28": 626401280.0, + "29": 626043904.0, + "30": 626057216.0, + "31": 626034688.0, + "32": 626039808.0, + "33": 626041856.0, + "34": 626091520.0, + "35": 626880512.0, + "36": 626065408.0, + "37": 626088448.0, + "38": 626063872.0, + "39": 626053632.0, + "40": 626297344.0, + "41": 626072576.0, + "42": 626039808.0, + "43": 626035200.0, + "44": 626271232.0, + "45": 626083328.0, + "46": 626032128.0, + "47": 626070528.0, + "48": 626230272.0, + "49": 626186752.0, + "50": 626039808.0 } }, "mem-max-allocated-bytes": { @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 1845331456.0, - "2": 2075684352.0, - "3": 2075684352.0, - "4": 2078547456.0, - "5": 2078547456.0, - "6": 2078547456.0, - "7": 2078547456.0, - "8": 2078547456.0, - "9": 2078547456.0, - "10": 2078547456.0, - "11": 2078547456.0, - "12": 2078547456.0, - "13": 2078547456.0, - "14": 2078547456.0, - "15": 2078547456.0, - "16": 2078547456.0, - "17": 2078547456.0, - "18": 2078547456.0, - "19": 2078547456.0, - "20": 2078547456.0, - "21": 2078547456.0, - "22": 2078547456.0, - "23": 2078547456.0, - "24": 2078547456.0, - "25": 2078547456.0, - "26": 2078547456.0, - "27": 2078547456.0, - "28": 2078547456.0, - "29": 2078547456.0, - "30": 2078547456.0, - "31": 2078547456.0, - "32": 2078547456.0, - "33": 2078547456.0, - "34": 2078547456.0, - "35": 2078547456.0, - "36": 2078547456.0, - "37": 2078547456.0, - "38": 2078547456.0, - "39": 2078547456.0, - "40": 2078547456.0, - "41": 2078547456.0, - "42": 2078547456.0, - "43": 2078547456.0, - "44": 2078547456.0, - "45": 2078547456.0, - "46": 2078547456.0, - "47": 2078547456.0, - "48": 2078547456.0, - "49": 2078547456.0, - "50": 2078547456.0 + "1": 1844949504.0, + "2": 2075497984.0, + "3": 2076485632.0, + "4": 2078283776.0, + "5": 2078283776.0, + "6": 2078283776.0, + "7": 2078283776.0, + "8": 2078283776.0, + "9": 2078283776.0, + "10": 2078283776.0, + "11": 2078283776.0, + "12": 2078283776.0, + "13": 2078292480.0, + "14": 2078292480.0, + "15": 2078292480.0, + "16": 2078292480.0, + "17": 2078292480.0, + "18": 2078292480.0, + "19": 2078292480.0, + "20": 2078292480.0, + "21": 2078387200.0, + "22": 2078387200.0, + "23": 2078387200.0, + "24": 2078387200.0, + "25": 2078387200.0, + "26": 2078387200.0, + "27": 2078387200.0, + "28": 2078387200.0, + "29": 2078387200.0, + "30": 2078490624.0, + "31": 2078490624.0, + "32": 2078490624.0, + "33": 2078490624.0, + "34": 2078490624.0, + "35": 2078490624.0, + "36": 2078490624.0, + "37": 2078490624.0, + "38": 2078490624.0, + "39": 2078490624.0, + "40": 2078490624.0, + "41": 2078490624.0, + "42": 2078490624.0, + "43": 2078490624.0, + "44": 2078490624.0, + "45": 2078490624.0, + "46": 2078490624.0, + "47": 2078490624.0, + "48": 2078490624.0, + "49": 2078490624.0, + "50": 2078490624.0 } }, "iteration-time": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 14.22688, - "2": 0.36404, - "3": 0.28777, - "4": 0.27054, - "5": 0.24844, - "6": 0.23753, - "7": 0.2541, - "8": 0.2395, - "9": 0.23675, - "10": 0.23301, - "11": 0.25454, - "12": 0.22665, - "13": 0.23214, - "14": 0.22521, - "15": 0.24748, - "16": 0.2636, - "17": 0.2605, - "18": 0.24164, - "19": 0.24627, - "20": 0.25668, - "21": 0.24329, - "22": 0.24722, - "23": 0.25378, - "24": 0.22642, - "25": 0.22497, - "26": 0.22495, - "27": 0.2239, - "28": 0.22848, - "29": 0.22515, - "30": 0.22501, - "31": 0.27252, - "32": 0.22744, - "33": 0.22453, - "34": 0.23411, - "35": 0.22556, - "36": 0.2278, - "37": 0.22109, - "38": 0.22459, - "39": 0.22077, - "40": 0.22097, - "41": 0.23428, - "42": 0.22009, - "43": 0.23227, - "44": 0.22717, - "45": 0.23445, - "46": 0.23886, - "47": 0.22667, - "48": 0.23204, - "49": 0.27864, - "50": 0.22287 + "1": "nan", + "2": 7.53997, + "3": 0.27369, + "4": 0.2629, + "5": 0.24127, + "6": 0.23879, + "7": 0.2486, + "8": 0.22975, + "9": 0.23492, + "10": 0.22756, + "11": 0.23734, + "12": 0.22618, + "13": 0.23035, + "14": 0.22237, + "15": 0.24274, + "16": 0.22308, + "17": 0.23173, + "18": 0.22258, + "19": 0.22214, + "20": 0.22713, + "21": 0.21893, + "22": 0.22057, + "23": 0.23133, + "24": 0.22415, + "25": 0.22094, + "26": 0.21957, + "27": 0.22081, + "28": 0.21989, + "29": 0.21978, + "30": 0.21904, + "31": 0.26218, + "32": 0.2204, + "33": 0.22048, + "34": 0.2318, + "35": 0.21999, + "36": 0.23031, + "37": 0.21978, + "38": 0.22301, + "39": 0.21796, + "40": 0.22181, + "41": 0.23147, + "42": 0.218, + "43": 0.2244, + "44": 0.22, + "45": 0.22268, + "46": 0.24114, + "47": 0.22482, + "48": 0.22419, + "49": 0.29207, + "50": 0.23757 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_gb200.json index 8710366a4a2..0bcdf051fe8 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_gb200.json @@ -6,104 +6,104 @@ "values": { "1": 10.94839, "2": 10.94024, - "3": 10.95902, - "4": 10.9592, - "5": 10.93942, - "6": 10.95284, - "7": 10.95227, - "8": 10.93987, - "9": 10.94518, - "10": 10.94146, - "11": 10.94366, - "12": 10.93351, - "13": 10.92937, - "14": 10.93117, - "15": 10.87714, - "16": 10.88218, - "17": 10.87388, - "18": 10.86829, - "19": 10.86292, - "20": 10.78627, - "21": 10.73278, - "22": 10.62202, - "23": 10.72355, - "24": 10.61784, - "25": 10.54739, - "26": 10.64163, - "27": 10.63354, - "28": 10.59007, - "29": 10.59937, - "30": 10.36921, - "31": 10.1175, - "32": 10.457, - "33": 10.45238, - "34": 10.18943, - "35": 10.24409, - "36": 10.20779, - "37": 10.32099, - "38": 10.17141, - "39": 10.39579, - "40": 10.03318, - "41": 10.08573, - "42": 10.17487, - "43": 9.7274, - "44": 9.88257, - "45": 9.73978, - "46": 9.72104, - "47": 10.08354, - "48": 9.75251, - "49": 9.39373, - "50": 9.83765, - "51": 9.76236, - "52": 9.65444, - "53": 10.01594, + "3": 10.95883, + "4": 10.95945, + "5": 10.93922, + "6": 10.95356, + "7": 10.95188, + "8": 10.94008, + "9": 10.94584, + "10": 10.94155, + "11": 10.94418, + "12": 10.93363, + "13": 10.92884, + "14": 10.93169, + "15": 10.87633, + "16": 10.88177, + "17": 10.87371, + "18": 10.86872, + "19": 10.86381, + "20": 10.7867, + "21": 10.73203, + "22": 10.62158, + "23": 10.72338, + "24": 10.61801, + "25": 10.54715, + "26": 10.64147, + "27": 10.63358, + "28": 10.5896, + "29": 10.59904, + "30": 10.36872, + "31": 10.11579, + "32": 10.45756, + "33": 10.45241, + "34": 10.18915, + "35": 10.2439, + "36": 10.20736, + "37": 10.32093, + "38": 10.17157, + "39": 10.39578, + "40": 10.03339, + "41": 10.08583, + "42": 10.17461, + "43": 9.7269, + "44": 9.88264, + "45": 9.73977, + "46": 9.72083, + "47": 10.08359, + "48": 9.75242, + "49": 9.39338, + "50": 9.83766, + "51": 9.76205, + "52": 9.65359, + "53": 10.01542, "54": 9.86969, - "55": 9.79645, - "56": 9.53492, - "57": 9.365, - "58": 9.75243, - "59": 9.4751, - "60": 9.40362, - "61": 9.59124, - "62": 9.91012, - "63": 9.24082, - "64": 9.67728, - "65": 8.79731, - "66": 9.60544, - "67": 9.24212, - "68": 9.70475, - "69": 9.70741, - "70": 9.65988, - "71": 9.50626, - "72": 9.45834, - "73": 9.38692, - "74": 8.79461, - "75": 9.32175, - "76": 8.92857, - "77": 9.99456, - "78": 9.63104, - "79": 9.26692, - "80": 9.29144, - "81": 9.37768, - "82": 9.60984, - "83": 9.21108, - "84": 9.33667, - "85": 9.52726, - "86": 8.94539, - "87": 9.49937, - "88": 9.67766, - "89": 9.49525, - "90": 9.7509, - "91": 9.22918, - "92": 9.25394, - "93": 8.96194, - "94": 8.69021, - "95": 9.43531, - "96": 9.39886, - "97": 9.19199, - "98": 9.57248, - "99": 8.75688, - "100": 9.2924 + "55": 9.79359, + "56": 9.53523, + "57": 9.36504, + "58": 9.75196, + "59": 9.47483, + "60": 9.40403, + "61": 9.59091, + "62": 9.91004, + "63": 9.24114, + "64": 9.67715, + "65": 8.7955, + "66": 9.60545, + "67": 9.24275, + "68": 9.70506, + "69": 9.70768, + "70": 9.65927, + "71": 9.50665, + "72": 9.45743, + "73": 9.38631, + "74": 8.79319, + "75": 9.32104, + "76": 8.92761, + "77": 9.9944, + "78": 9.63108, + "79": 9.26663, + "80": 9.29169, + "81": 9.37916, + "82": 9.60951, + "83": 9.21123, + "84": 9.3369, + "85": 9.52732, + "86": 8.94678, + "87": 9.49933, + "88": 9.67822, + "89": 9.49501, + "90": 9.75097, + "91": 9.22906, + "92": 9.25338, + "93": 8.96161, + "94": 8.68814, + "95": 9.43597, + "96": 9.3988, + "97": 9.19106, + "98": 9.57241, + "99": 8.7571, + "100": 9.29259 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 22750392.0, - "2": 22953048.0, - "3": 22604332.0, - "4": 23266194.0, - "5": 22735592.0, - "6": 23061740.0, - "7": 22793278.0, - "8": 22960884.0, - "9": 22865532.0, - "10": 22950250.0, - "11": 22499592.0, - "12": 22455936.0, - "13": 22948014.0, - "14": 22384528.0, - "15": 22846172.0, - "16": 22856720.0, - "17": 22836352.0, - "18": 22590198.0, - "19": 22627006.0, - "20": 22712338.0, - "21": 22762590.0, - "22": 22816896.0, - "23": 22545280.0, - "24": 22794372.0, - "25": 22841964.0, - "26": 22549700.0, - "27": 22464724.0, - "28": 22453634.0, - "29": 22534620.0, - "30": 22636106.0, - "31": 22989484.0, - "32": 22593994.0, - "33": 22565948.0, - "34": 22855396.0, - "35": 22813606.0, - "36": 22595412.0, - "37": 22499234.0, - "38": 22926180.0, - "39": 22825052.0, - "40": 22675880.0, - "41": 22671624.0, - "42": 22682188.0, - "43": 23015228.0, - "44": 22766040.0, - "45": 22679588.0, - "46": 22915144.0, - "47": 22642744.0, - "48": 24003236.0, - "49": 23786618.0, - "50": 22931756.0, - "51": 23866290.0, - "52": 23807188.0, - "53": 24007482.0, - "54": 23916892.0, - "55": 23571308.0, - "56": 23954192.0, - "57": 24211600.0, - "58": 23914524.0, - "59": 23771900.0, - "60": 23813638.0, - "61": 23795512.0, - "62": 23739412.0, - "63": 23917700.0, - "64": 23895780.0, - "65": 24147262.0, - "66": 23794750.0, - "67": 23983810.0, - "68": 23674060.0, - "69": 23647770.0, - "70": 23907338.0, - "71": 23818256.0, - "72": 23723392.0, - "73": 22754048.0, - "74": 25181258.0, - "75": 24144968.0, - "76": 23976372.0, - "77": 22260516.0, - "78": 23862138.0, - "79": 23806872.0, - "80": 23773052.0, - "81": 25020468.0, - "82": 22812998.0, - "83": 23911992.0, - "84": 25144028.0, - "85": 22725432.0, - "86": 24205484.0, - "87": 24851672.0, - "88": 23700260.0, - "89": 22505492.0, - "90": 24062928.0, - "91": 22790310.0, - "92": 24923596.0, - "93": 23722104.0, - "94": 23993086.0, - "95": 24140048.0, - "96": 23909352.0, - "97": 23668280.0, - "98": 23832272.0, - "99": 23985032.0, - "100": 24101560.0 + "1": 22750380.0, + "2": 22952988.0, + "3": 22604404.0, + "4": 23266260.0, + "5": 22735464.0, + "6": 23061776.0, + "7": 22793284.0, + "8": 22960752.0, + "9": 22865586.0, + "10": 22950244.0, + "11": 22499508.0, + "12": 22455914.0, + "13": 22948128.0, + "14": 22384492.0, + "15": 22846200.0, + "16": 22856740.0, + "17": 22836348.0, + "18": 22590092.0, + "19": 22626980.0, + "20": 22712346.0, + "21": 22762584.0, + "22": 22816872.0, + "23": 22545200.0, + "24": 22794424.0, + "25": 22841840.0, + "26": 22549600.0, + "27": 22464668.0, + "28": 22453636.0, + "29": 22534564.0, + "30": 22636080.0, + "31": 22989416.0, + "32": 22593964.0, + "33": 22565836.0, + "34": 22855482.0, + "35": 22813636.0, + "36": 22595464.0, + "37": 22499300.0, + "38": 22926232.0, + "39": 22825020.0, + "40": 22675796.0, + "41": 22671596.0, + "42": 22682664.0, + "43": 23014302.0, + "44": 22765298.0, + "45": 22680068.0, + "46": 22911564.0, + "47": 23691886.0, + "48": 24003214.0, + "49": 23786576.0, + "50": 22930692.0, + "51": 23866304.0, + "52": 23807258.0, + "53": 24007416.0, + "54": 23916884.0, + "55": 23571264.0, + "56": 23954262.0, + "57": 24211706.0, + "58": 23914568.0, + "59": 23771788.0, + "60": 23813668.0, + "61": 23795444.0, + "62": 23739602.0, + "63": 23917932.0, + "64": 23895652.0, + "65": 24147024.0, + "66": 23795416.0, + "67": 23983764.0, + "68": 23671776.0, + "69": 23647440.0, + "70": 23907516.0, + "71": 23816848.0, + "72": 23723142.0, + "73": 22754780.0, + "74": 25180576.0, + "75": 24143882.0, + "76": 23976376.0, + "77": 22260324.0, + "78": 23862220.0, + "79": 23806848.0, + "80": 23773316.0, + "81": 25020610.0, + "82": 23858356.0, + "83": 23911948.0, + "84": 25143954.0, + "85": 23774768.0, + "86": 24201996.0, + "87": 23801536.0, + "88": 23701450.0, + "89": 22506106.0, + "90": 23014912.0, + "91": 22789294.0, + "92": 24923556.0, + "93": 23721120.0, + "94": 23993696.0, + "95": 24140284.0, + "96": 23909332.0, + "97": 23668404.0, + "98": 24881320.0, + "99": 23981986.0, + "100": 24101576.0 } }, "mem-allocated-bytes": { @@ -218,106 +218,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 810140160.0, - "2": 804531200.0, - "3": 804531200.0, - "4": 934860800.0, - "5": 934860800.0, - "6": 934860800.0, - "7": 934860800.0, - "8": 934860800.0, - "9": 938611712.0, - "10": 938104832.0, - "11": 938379264.0, - "12": 934860800.0, - "13": 934860800.0, - "14": 934860800.0, - "15": 934860800.0, - "16": 942249984.0, - "17": 941443072.0, - "18": 937990144.0, - "19": 937548800.0, - "20": 937498624.0, - "21": 934860800.0, - "22": 934860800.0, - "23": 941533184.0, - "24": 942114816.0, - "25": 942398464.0, - "26": 934860800.0, - "27": 934860800.0, - "28": 934860800.0, - "29": 934860800.0, - "30": 934860800.0, - "31": 934860800.0, - "32": 934860800.0, - "33": 934860800.0, - "34": 941477888.0, - "35": 934860800.0, - "36": 934860800.0, - "37": 934860800.0, - "38": 934860800.0, - "39": 934860800.0, - "40": 934860800.0, - "41": 940742656.0, - "42": 940742656.0, - "43": 940742656.0, - "44": 940968960.0, - "45": 941581312.0, - "46": 934860800.0, - "47": 934860800.0, - "48": 940742656.0, - "49": 934860800.0, - "50": 934860800.0, - "51": 934860800.0, - "52": 940742656.0, - "53": 937498624.0, - "54": 937498624.0, - "55": 937498624.0, - "56": 937498624.0, - "57": 938199040.0, - "58": 937498624.0, - "59": 937498624.0, - "60": 940742656.0, - "61": 934860800.0, - "62": 934860800.0, - "63": 934860800.0, - "64": 934860800.0, - "65": 934860800.0, - "66": 934860800.0, - "67": 934860800.0, - "68": 934860800.0, - "69": 934860800.0, - "70": 934860800.0, - "71": 934860800.0, - "72": 934860800.0, - "73": 934860800.0, - "74": 934860800.0, - "75": 934860800.0, - "76": 934860800.0, - "77": 934860800.0, - "78": 934860800.0, - "79": 938199040.0, - "80": 938199040.0, - "81": 937498624.0, - "82": 938061824.0, - "83": 938412032.0, - "84": 937498624.0, - "85": 938199040.0, - "86": 938445824.0, - "87": 937498624.0, - "88": 937498624.0, - "89": 934860800.0, - "90": 934860800.0, - "91": 934860800.0, - "92": 940742656.0, - "93": 940742656.0, - "94": 938104832.0, - "95": 941451264.0, - "96": 940742656.0, - "97": 941542400.0, - "98": 938104832.0, - "99": 940742656.0, - "100": 938104832.0 + "1": 815727104.0, + "2": 804506624.0, + "3": 768707584.0, + "4": 935311360.0, + "5": 938555392.0, + "6": 935311360.0, + "7": 935311360.0, + "8": 938555392.0, + "9": 935311360.0, + "10": 935311360.0, + "11": 935311360.0, + "12": 938380288.0, + "13": 941193216.0, + "14": 935311360.0, + "15": 938637312.0, + "16": 941229056.0, + "17": 935311360.0, + "18": 935311360.0, + "19": 938677248.0, + "20": 935311360.0, + "21": 935311360.0, + "22": 938555392.0, + "23": 935311360.0, + "24": 935311360.0, + "25": 938298368.0, + "26": 942405632.0, + "27": 935311360.0, + "28": 935311360.0, + "29": 941799424.0, + "30": 935311360.0, + "31": 938982400.0, + "32": 941586432.0, + "33": 935311360.0, + "34": 935311360.0, + "35": 941193216.0, + "36": 935311360.0, + "37": 938044416.0, + "38": 935311360.0, + "39": 935311360.0, + "40": 938053632.0, + "41": 941635584.0, + "42": 935311360.0, + "43": 941193216.0, + "44": 935311360.0, + "45": 935311360.0, + "46": 938274816.0, + "47": 935311360.0, + "48": 935311360.0, + "49": 935311360.0, + "50": 938555392.0, + "51": 937949184.0, + "52": 941804544.0, + "53": 935311360.0, + "54": 941193216.0, + "55": 935311360.0, + "56": 935311360.0, + "57": 937949184.0, + "58": 935311360.0, + "59": 935311360.0, + "60": 938555392.0, + "61": 935311360.0, + "62": 937949184.0, + "63": 938555392.0, + "64": 935311360.0, + "65": 935311360.0, + "66": 937949184.0, + "67": 941193216.0, + "68": 935311360.0, + "69": 935311360.0, + "70": 937949184.0, + "71": 941193216.0, + "72": 935311360.0, + "73": 935311360.0, + "74": 935311360.0, + "75": 937949184.0, + "76": 938555392.0, + "77": 935311360.0, + "78": 935311360.0, + "79": 937949184.0, + "80": 941326336.0, + "81": 935311360.0, + "82": 937949184.0, + "83": 941193216.0, + "84": 935311360.0, + "85": 935311360.0, + "86": 937949184.0, + "87": 935311360.0, + "88": 935311360.0, + "89": 937949184.0, + "90": 941193216.0, + "91": 935311360.0, + "92": 935311360.0, + "93": 935311360.0, + "94": 937949184.0, + "95": 941193216.0, + "96": 935311360.0, + "97": 935311360.0, + "98": 938555392.0, + "99": 935311360.0, + "100": 935311360.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 1017976320.0, - "2": 1226964480.0, - "3": 1228012032.0, - "4": 1300063744.0, - "5": 1300063744.0, - "6": 1300223488.0, - "7": 1300891648.0, - "8": 1300891648.0, - "9": 1300891648.0, - "10": 1303292416.0, - "11": 1303292416.0, - "12": 1303292416.0, - "13": 1303292416.0, - "14": 1303292416.0, - "15": 1303292416.0, - "16": 1303292416.0, - "17": 1303292416.0, - "18": 1303292416.0, - "19": 1303292416.0, - "20": 1303292416.0, - "21": 1303292416.0, - "22": 1303292416.0, - "23": 1303292416.0, - "24": 1303292416.0, - "25": 1303292416.0, - "26": 1303292416.0, - "27": 1303292416.0, - "28": 1303292416.0, - "29": 1303292416.0, - "30": 1303292416.0, - "31": 1303292416.0, - "32": 1303292416.0, - "33": 1303292416.0, - "34": 1303292416.0, - "35": 1303292416.0, - "36": 1303292416.0, - "37": 1303292416.0, - "38": 1303292416.0, - "39": 1303292416.0, - "40": 1303292416.0, - "41": 1303292416.0, - "42": 1303292416.0, - "43": 1303292416.0, - "44": 1303292416.0, - "45": 1303292416.0, - "46": 1303292416.0, - "47": 1303292416.0, - "48": 1303292416.0, - "49": 1303292416.0, - "50": 1303292416.0, - "51": 1303292416.0, - "52": 1303292416.0, - "53": 1303292416.0, - "54": 1303292416.0, - "55": 1303292416.0, - "56": 1303292416.0, - "57": 1303292416.0, - "58": 1303292416.0, - "59": 1303292416.0, - "60": 1303292416.0, - "61": 1303292416.0, - "62": 1303292416.0, - "63": 1303292416.0, - "64": 1303292416.0, - "65": 1303292416.0, - "66": 1303292416.0, - "67": 1303292416.0, - "68": 1303292416.0, - "69": 1303292416.0, - "70": 1303292416.0, - "71": 1303292416.0, - "72": 1303292416.0, - "73": 1303292416.0, - "74": 1303292416.0, - "75": 1303292416.0, - "76": 1303292416.0, - "77": 1303292416.0, - "78": 1303292416.0, - "79": 1303292416.0, - "80": 1303292416.0, - "81": 1303292416.0, - "82": 1303292416.0, - "83": 1303292416.0, - "84": 1303292416.0, - "85": 1303292416.0, - "86": 1303292416.0, - "87": 1303292416.0, - "88": 1303292416.0, - "89": 1303292416.0, - "90": 1303292416.0, - "91": 1303292416.0, - "92": 1303292416.0, - "93": 1303292416.0, - "94": 1303292416.0, - "95": 1303292416.0, - "96": 1303292416.0, - "97": 1303292416.0, - "98": 1303292416.0, - "99": 1303292416.0, - "100": 1303292416.0 + "1": 966314496.0, + "2": 1175641088.0, + "3": 1179495424.0, + "4": 1250092544.0, + "5": 1250092544.0, + "6": 1250357248.0, + "7": 1250357248.0, + "8": 1250946048.0, + "9": 1250946048.0, + "10": 1250946048.0, + "11": 1250946048.0, + "12": 1250946048.0, + "13": 1250946048.0, + "14": 1250946048.0, + "15": 1250946048.0, + "16": 1250946048.0, + "17": 1250946048.0, + "18": 1250946048.0, + "19": 1250946048.0, + "20": 1250946048.0, + "21": 1250946048.0, + "22": 1251286016.0, + "23": 1251286016.0, + "24": 1251655168.0, + "25": 1251655168.0, + "26": 1251655168.0, + "27": 1251655168.0, + "28": 1251655168.0, + "29": 1251655168.0, + "30": 1251655168.0, + "31": 1251655168.0, + "32": 1251655168.0, + "33": 1251655168.0, + "34": 1251655168.0, + "35": 1251655168.0, + "36": 1251655168.0, + "37": 1251655168.0, + "38": 1251655168.0, + "39": 1251655168.0, + "40": 1251655168.0, + "41": 1251655168.0, + "42": 1251655168.0, + "43": 1251655168.0, + "44": 1251655168.0, + "45": 1251655168.0, + "46": 1251655168.0, + "47": 1251655168.0, + "48": 1251655168.0, + "49": 1251655168.0, + "50": 1251655168.0, + "51": 1251655168.0, + "52": 1251655168.0, + "53": 1251655168.0, + "54": 1251655168.0, + "55": 1251655168.0, + "56": 1251655168.0, + "57": 1251655168.0, + "58": 1251655168.0, + "59": 1251655168.0, + "60": 1251655168.0, + "61": 1251655168.0, + "62": 1251655168.0, + "63": 1251655168.0, + "64": 1251655168.0, + "65": 1251655168.0, + "66": 1251655168.0, + "67": 1251655168.0, + "68": 1251655168.0, + "69": 1251655168.0, + "70": 1251655168.0, + "71": 1251655168.0, + "72": 1251655168.0, + "73": 1251655168.0, + "74": 1251655168.0, + "75": 1251655168.0, + "76": 1251655168.0, + "77": 1251655168.0, + "78": 1251655168.0, + "79": 1251655168.0, + "80": 1251655168.0, + "81": 1251655168.0, + "82": 1251655168.0, + "83": 1251655168.0, + "84": 1251655168.0, + "85": 1251655168.0, + "86": 1251655168.0, + "87": 1251655168.0, + "88": 1251655168.0, + "89": 1251655168.0, + "90": 1251655168.0, + "91": 1251655168.0, + "92": 1251655168.0, + "93": 1251655168.0, + "94": 1251655168.0, + "95": 1251655168.0, + "96": 1251655168.0, + "97": 1251655168.0, + "98": 1251655168.0, + "99": 1251655168.0, + "100": 1251655168.0 } }, "mtp_1 loss": { @@ -434,104 +434,104 @@ "values": { "1": 10.89507, "2": 10.90521, - "3": 10.90879, - "4": 10.86406, - "5": 10.91765, - "6": 10.92332, - "7": 10.90072, - "8": 10.8906, - "9": 10.90544, - "10": 10.88636, - "11": 10.93328, - "12": 10.91582, - "13": 10.90917, - "14": 10.92294, - "15": 10.89802, - "16": 10.90337, - "17": 10.88446, - "18": 10.90526, - "19": 10.90011, - "20": 10.88775, - "21": 10.88103, - "22": 10.85514, - "23": 10.89267, - "24": 10.87352, - "25": 10.86182, - "26": 10.87152, - "27": 10.88847, - "28": 10.87872, - "29": 10.88744, - "30": 10.87297, - "31": 10.80177, - "32": 10.8732, - "33": 10.88219, - "34": 10.83823, - "35": 10.85291, - "36": 10.84901, - "37": 10.85873, - "38": 10.83148, - "39": 10.86289, - "40": 10.82147, - "41": 10.82913, - "42": 10.84798, - "43": 10.7908, - "44": 10.81431, - "45": 10.7879, - "46": 10.78018, - "47": 10.83142, - "48": 10.78854, - "49": 10.71024, - "50": 10.76861, - "51": 10.76331, - "52": 10.73945, - "53": 10.80126, - "54": 10.77704, - "55": 10.765, - "56": 10.71649, - "57": 10.67368, - "58": 10.75107, - "59": 10.69607, - "60": 10.66236, - "61": 10.69617, - "62": 10.77167, - "63": 10.6134, - "64": 10.70881, - "65": 10.49259, - "66": 10.66843, - "67": 10.58084, - "68": 10.68215, - "69": 10.68669, - "70": 10.67296, + "3": 10.90891, + "4": 10.86405, + "5": 10.91795, + "6": 10.92308, + "7": 10.90092, + "8": 10.8905, + "9": 10.90564, + "10": 10.88627, + "11": 10.93361, + "12": 10.91588, + "13": 10.90939, + "14": 10.9234, + "15": 10.89846, + "16": 10.90396, + "17": 10.88447, + "18": 10.90582, + "19": 10.90022, + "20": 10.88707, + "21": 10.88163, + "22": 10.85466, + "23": 10.89315, + "24": 10.87384, + "25": 10.862, + "26": 10.87173, + "27": 10.88833, + "28": 10.87871, + "29": 10.888, + "30": 10.87291, + "31": 10.80089, + "32": 10.87349, + "33": 10.88255, + "34": 10.83851, + "35": 10.85278, + "36": 10.84955, + "37": 10.85859, + "38": 10.8318, + "39": 10.86269, + "40": 10.82201, + "41": 10.8292, + "42": 10.84807, + "43": 10.79058, + "44": 10.8144, + "45": 10.78763, + "46": 10.77993, + "47": 10.83153, + "48": 10.78853, + "49": 10.7101, + "50": 10.7689, + "51": 10.76353, + "52": 10.7394, + "53": 10.8014, + "54": 10.77742, + "55": 10.76695, + "56": 10.71545, + "57": 10.67279, + "58": 10.75064, + "59": 10.69451, + "60": 10.66153, + "61": 10.69547, + "62": 10.77153, + "63": 10.61306, + "64": 10.70871, + "65": 10.49004, + "66": 10.66861, + "67": 10.58123, + "68": 10.6819, + "69": 10.6866, + "70": 10.6736, "71": 10.64397, - "72": 10.60997, - "73": 10.56734, - "74": 10.38624, - "75": 10.53623, - "76": 10.40297, - "77": 10.75436, - "78": 10.62548, - "79": 10.47858, - "80": 10.47388, - "81": 10.5143, - "82": 10.58579, - "83": 10.43913, - "84": 10.45418, - "85": 10.55042, - "86": 10.27831, - "87": 10.51067, - "88": 10.60469, - "89": 10.5084, - "90": 10.60243, - "91": 10.38487, - "92": 10.38165, - "93": 10.23549, - "94": 10.07844, - "95": 10.42709, - "96": 10.44697, - "97": 10.31686, - "98": 10.4968, - "99": 10.04966, - "100": 10.32944 + "72": 10.6101, + "73": 10.56737, + "74": 10.38578, + "75": 10.53646, + "76": 10.40232, + "77": 10.75426, + "78": 10.62498, + "79": 10.47825, + "80": 10.47326, + "81": 10.51209, + "82": 10.58547, + "83": 10.43904, + "84": 10.45482, + "85": 10.55079, + "86": 10.2791, + "87": 10.51114, + "88": 10.60507, + "89": 10.50871, + "90": 10.60312, + "91": 10.38605, + "92": 10.38222, + "93": 10.2369, + "94": 10.08056, + "95": 10.42693, + "96": 10.44672, + "97": 10.31582, + "98": 10.49658, + "99": 10.04836, + "100": 10.32882 } }, "iteration-time": { @@ -540,105 +540,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 19.93451, - "3": 2.31445, - "4": 5.28856, - "5": 1.09994, - "6": 1.09399, - "7": 1.09697, - "8": 1.09872, - "9": 1.17005, - "10": 1.10071, - "11": 1.0994, - "12": 1.08313, - "13": 1.09364, - "14": 1.09082, - "15": 1.09269, - "16": 1.08133, - "17": 1.08872, - "18": 1.09032, - "19": 1.10458, - "20": 1.10126, - "21": 1.09029, - "22": 1.19723, - "23": 1.36303, - "24": 1.39758, - "25": 1.40863, - "26": 1.40985, - "27": 1.40231, - "28": 1.42816, - "29": 1.37678, - "30": 1.40545, - "31": 1.40841, - "32": 1.40385, - "33": 1.39528, - "34": 1.4028, - "35": 1.41768, - "36": 1.40649, - "37": 1.41813, - "38": 1.40674, - "39": 1.38881, - "40": 1.40998, - "41": 1.37775, - "42": 1.39701, - "43": 1.3967, - "44": 1.40408, - "45": 1.40972, - "46": 1.41116, - "47": 1.40427, - "48": 1.38905, - "49": 1.42541, - "50": 1.40474, - "51": 1.40708, - "52": 1.37484, - "53": 1.38539, - "54": 1.39988, - "55": 1.39635, - "56": 1.41326, - "57": 1.40012, - "58": 1.40599, - "59": 1.41023, - "60": 1.4209, - "61": 1.41702, - "62": 1.40134, - "63": 1.40282, - "64": 1.40573, - "65": 1.41933, - "66": 1.40057, - "67": 1.41526, - "68": 1.40285, - "69": 1.41947, - "70": 1.37747, - "71": 1.41206, - "72": 1.39123, - "73": 1.42381, - "74": 1.40806, - "75": 1.40032, - "76": 1.41783, - "77": 1.39133, - "78": 1.41146, - "79": 1.42648, - "80": 1.40774, - "81": 1.40046, - "82": 1.39158, - "83": 1.4079, - "84": 1.40469, - "85": 1.39689, - "86": 1.41401, - "87": 1.40637, - "88": 1.40569, - "89": 1.45225, - "90": 1.39469, - "91": 1.39677, - "92": 1.39569, - "93": 1.38882, - "94": 1.40133, - "95": 1.41493, - "96": 1.40659, - "97": 1.39059, - "98": 1.40044, - "99": 1.41118, - "100": 1.39159 + "2": 25.3001, + "3": 2.37449, + "4": 6.45526, + "5": 1.25943, + "6": 1.2406, + "7": 1.2439, + "8": 1.23826, + "9": 1.23638, + "10": 1.23645, + "11": 1.28556, + "12": 1.24755, + "13": 1.24428, + "14": 1.23765, + "15": 1.23841, + "16": 1.23701, + "17": 1.23629, + "18": 1.23686, + "19": 1.2385, + "20": 1.24373, + "21": 1.23809, + "22": 1.24616, + "23": 1.23693, + "24": 1.23357, + "25": 1.23233, + "26": 1.23893, + "27": 1.23811, + "28": 1.23473, + "29": 1.23994, + "30": 1.24448, + "31": 1.23919, + "32": 1.23674, + "33": 1.23353, + "34": 1.24176, + "35": 1.2521, + "36": 1.25339, + "37": 1.23958, + "38": 1.2423, + "39": 1.2407, + "40": 1.23768, + "41": 1.23529, + "42": 1.26123, + "43": 1.24517, + "44": 1.22842, + "45": 1.22883, + "46": 1.24531, + "47": 1.23707, + "48": 1.23315, + "49": 1.23288, + "50": 1.23004, + "51": 1.28252, + "52": 1.22628, + "53": 1.22943, + "54": 1.28301, + "55": 1.22868, + "56": 1.23725, + "57": 1.23032, + "58": 1.23924, + "59": 1.24181, + "60": 1.24927, + "61": 1.24821, + "62": 1.25233, + "63": 1.2192, + "64": 1.23724, + "65": 1.24262, + "66": 1.23425, + "67": 1.22341, + "68": 1.22875, + "69": 1.2369, + "70": 1.23637, + "71": 1.22418, + "72": 1.23267, + "73": 1.22945, + "74": 1.22797, + "75": 1.21647, + "76": 1.22687, + "77": 1.23339, + "78": 1.23518, + "79": 1.22748, + "80": 1.24985, + "81": 1.23345, + "82": 1.24687, + "83": 1.23815, + "84": 1.21836, + "85": 1.23899, + "86": 1.23973, + "87": 1.2356, + "88": 1.23475, + "89": 1.23433, + "90": 1.23924, + "91": 1.22203, + "92": 1.22429, + "93": 1.23054, + "94": 1.23476, + "95": 1.23033, + "96": 1.23182, + "97": 1.2322, + "98": 1.2363, + "99": 1.2319, + "100": 1.2316 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json index f4a701a2e4d..92f66b60691 100644 --- a/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/t5/t5_11b_mcore_tp4_pp1/golden_values_dev_dgx_h100.json @@ -4,31 +4,31 @@ "end_step": 25, "step_interval": 1, "values": { - "1": 10.74903, - "2": 10.75924, - "3": 16.15622, - "4": 20.1728, - "5": 11.07413, - "6": 10.29087, - "7": 10.31369, - "8": 10.31557, - "9": 9.68992, - "10": 9.25112, - "11": 9.43376, - "12": 9.8267, - "13": 8.88334, - "14": 8.49023, - "15": 8.79113, - "16": 7.95739, - "17": 7.70005, - "18": 7.81826, - "19": 8.21562, - "20": 8.16452, - "21": 7.833, - "22": 7.71899, - "23": 7.88724, - "24": 7.70093, - "25": 7.78994 + "1": 10.72478, + "2": 10.75508, + "3": 16.36503, + "4": 19.85665, + "5": 11.26807, + "6": 10.26242, + "7": 10.27643, + "8": 10.21919, + "9": 9.67706, + "10": 9.28114, + "11": 9.41509, + "12": 9.80636, + "13": 8.90098, + "14": 8.48799, + "15": 8.79292, + "16": 7.96872, + "17": 7.70291, + "18": 7.79927, + "19": 8.18669, + "20": 8.15261, + "21": 7.82296, + "22": 7.71245, + "23": 7.86818, + "24": 7.65708, + "25": 7.77835 } }, "num-zeros": { @@ -36,31 +36,31 @@ "end_step": 25, "step_interval": 1, "values": { - "1": 245867.0, - "2": 256817.0, - "3": 248438.0, - "4": 233541.0, - "5": 251594.0, - "6": 259588.0, - "7": 256938.0, - "8": 237612.0, - "9": 241154.0, - "10": 252461.0, - "11": 288146.0, - "12": 248712.0, - "13": 241371.0, - "14": 228365.0, - "15": 261948.0, - "16": 237032.0, - "17": 249760.0, - "18": 251590.0, - "19": 257104.0, - "20": 248292.0, - "21": 231805.0, - "22": 223805.0, - "23": 247959.0, - "24": 250798.0, - "25": 237325.0 + "1": 245877.0, + "2": 256678.0, + "3": 248449.0, + "4": 233627.0, + "5": 251461.0, + "6": 259612.0, + "7": 256977.0, + "8": 237781.0, + "9": 241225.0, + "10": 252449.0, + "11": 288363.0, + "12": 248683.0, + "13": 241476.0, + "14": 228497.0, + "15": 262029.0, + "16": 236949.0, + "17": 249661.0, + "18": 251680.0, + "19": 257143.0, + "20": 248220.0, + "21": 231800.0, + "22": 223145.0, + "23": 247912.0, + "24": 250781.0, + "25": 238528.0 } }, "mem-allocated-bytes": { @@ -101,30 +101,30 @@ "step_interval": 1, "values": { "1": 40735715328.0, - "2": 44991991808.0, - "3": 44993564672.0, - "4": 44993564672.0, - "5": 44993564672.0, - "6": 44993564672.0, - "7": 44993564672.0, - "8": 44993564672.0, - "9": 44993564672.0, - "10": 44993564672.0, - "11": 44993564672.0, - "12": 44993564672.0, - "13": 44993564672.0, - "14": 44993564672.0, - "15": 44993564672.0, - "16": 44993564672.0, - "17": 44993564672.0, - "18": 44993564672.0, - "19": 44993564672.0, - "20": 44993564672.0, - "21": 44993564672.0, - "22": 44993564672.0, - "23": 44993564672.0, - "24": 44993564672.0, - "25": 44993564672.0 + "2": 44989894656.0, + "3": 44989894656.0, + "4": 44989894656.0, + "5": 44989894656.0, + "6": 44989894656.0, + "7": 44989894656.0, + "8": 44989894656.0, + "9": 44989894656.0, + "10": 44989894656.0, + "11": 44989894656.0, + "12": 44989894656.0, + "13": 44989894656.0, + "14": 44989894656.0, + "15": 44989894656.0, + "16": 44989894656.0, + "17": 44989894656.0, + "18": 44989894656.0, + "19": 44989894656.0, + "20": 44989894656.0, + "21": 44989894656.0, + "22": 44989894656.0, + "23": 44989894656.0, + "24": 44989894656.0, + "25": 44989894656.0 } }, "iteration-time": { @@ -132,31 +132,31 @@ "end_step": 25, "step_interval": 1, "values": { - "1": 25.74522, - "2": 0.73559, - "3": 0.40581, - "4": 0.38308, - "5": 0.37606, - "6": 0.37631, - "7": 0.39269, - "8": 0.37902, - "9": 0.37764, - "10": 0.8554, - "11": 0.95952, - "12": 0.37861, - "13": 0.38954, - "14": 0.42497, - "15": 0.37698, - "16": 0.37629, - "17": 0.37835, - "18": 0.3766, - "19": 0.37494, - "20": 0.42005, - "21": 0.38011, - "22": 0.37713, - "23": 0.37617, - "24": 0.37515, - "25": 0.37401 + "1": "nan", + "2": 6.67745, + "3": 0.39735, + "4": 0.37954, + "5": 0.37756, + "6": 0.38019, + "7": 0.37708, + "8": 0.37784, + "9": 0.37652, + "10": 0.379, + "11": 0.37836, + "12": 0.38173, + "13": 0.37692, + "14": 0.37886, + "15": 0.37923, + "16": 0.37855, + "17": 0.3788, + "18": 0.37899, + "19": 0.37732, + "20": 0.37813, + "21": 0.37908, + "22": 0.38047, + "23": 0.38247, + "24": 0.38016, + "25": 0.37866 } } } \ No newline at end of file From e1e68de2c7d2078bc9e145ceed16e5ad707e81cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 4 Mar 2026 09:32:23 +0000 Subject: [PATCH 08/12] bump to 26.02 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .gitlab/stages/01.build.yml | 4 ++-- docker/.ngc_version.dev | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab/stages/01.build.yml b/.gitlab/stages/01.build.yml index 498041e5646..f5a80c4074d 100644 --- a/.gitlab/stages/01.build.yml +++ b/.gitlab/stages/01.build.yml @@ -64,12 +64,12 @@ test:pre_build_image: - IMAGE: CI_MCORE_DEV_IMAGE FILE: Dockerfile.ci.dev IMAGE_TYPE: dev - BASE_IMAGE: nvcr.io/nvidia/pytorch:26.01-py3 + BASE_IMAGE: nvcr.io/nvidia/pytorch:26.02-py3 PLATFORM: amd64 - IMAGE: CI_MCORE_DEV_IMAGE FILE: Dockerfile.ci.dev IMAGE_TYPE: dev - BASE_IMAGE: nvcr.io/nvidia/pytorch:26.01-py3 + BASE_IMAGE: nvcr.io/nvidia/pytorch:26.02-py3 PLATFORM: arm64 - IMAGE: UTILITY_IMAGE FILE: Dockerfile.linting diff --git a/docker/.ngc_version.dev b/docker/.ngc_version.dev index 3efd88dbe34..2c33440d4e2 100644 --- a/docker/.ngc_version.dev +++ b/docker/.ngc_version.dev @@ -1 +1 @@ -nvcr.io/nvidia/pytorch:26.01-py3 \ No newline at end of file +nvcr.io/nvidia/pytorch:26.02-py3 \ No newline at end of file From 49b244f5230015c00f3814dce4e26eadfc8213ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 4 Mar 2026 11:45:58 +0000 Subject: [PATCH 09/12] update nightlies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .../golden_values_dev_dgx_h100.json | 372 ++--- .../golden_values_dev_dgx_h100.json | 398 +++--- .../golden_values_dev_dgx_h100.json | 362 ++--- .../golden_values_dev_dgx_h100.json | 340 ++--- .../golden_values_dev_dgx_h100.json | 792 +++++------ .../golden_values_dev_dgx_h100_2nd.json | 500 +++---- .../golden_values_dev_dgx_h100.json | 614 ++++----- .../golden_values_dev_dgx_h100_2nd.json | 500 +++---- .../golden_values_dev_dgx_h100.json | 296 ++-- .../golden_values_dev_dgx_gb200.json | 1192 ++++++++--------- 10 files changed, 2683 insertions(+), 2683 deletions(-) diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json index 403d1fe1253..3e66eee8511 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp2/golden_values_dev_dgx_h100.json @@ -4,56 +4,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 10.60763, - "2": 10.59402, - "3": 10.60795, - "4": 10.59589, - "5": 10.60931, - "6": 10.59415, - "7": 10.58965, - "8": 10.59915, - "9": 10.59851, - "10": 10.5837, - "11": 10.58821, - "12": 10.58578, - "13": 10.5893, - "14": 10.58439, - "15": 10.594, - "16": 10.57546, - "17": 10.56566, - "18": 10.57411, - "19": 10.56959, - "20": 10.56742, - "21": 10.55501, - "22": 10.50754, - "23": 10.4905, - "24": 10.48088, - "25": 10.46596, - "26": 10.47202, - "27": 10.46396, - "28": 10.45992, - "29": 10.41203, - "30": 10.31839, - "31": 10.27417, - "32": 10.2348, - "33": 10.24377, - "34": 10.18839, - "35": 10.21156, - "36": 10.17092, - "37": 10.15115, - "38": 10.13027, - "39": 10.09771, - "40": 10.05356, - "41": 9.9979, - "42": 9.94057, - "43": 9.92471, - "44": 9.87692, - "45": 9.85234, - "46": 9.79379, - "47": 9.78024, - "48": 9.75534, - "49": 9.79726, - "50": 9.75564 + "1": 10.6058, + "2": 10.59439, + "3": 10.60666, + "4": 10.59694, + "5": 10.60801, + "6": 10.59385, + "7": 10.59024, + "8": 10.59823, + "9": 10.59943, + "10": 10.58255, + "11": 10.58702, + "12": 10.58576, + "13": 10.58784, + "14": 10.58378, + "15": 10.59588, + "16": 10.57567, + "17": 10.56579, + "18": 10.57285, + "19": 10.57119, + "20": 10.56957, + "21": 10.55592, + "22": 10.5062, + "23": 10.49068, + "24": 10.48236, + "25": 10.4659, + "26": 10.4721, + "27": 10.46507, + "28": 10.4612, + "29": 10.41239, + "30": 10.31803, + "31": 10.27411, + "32": 10.23313, + "33": 10.24369, + "34": 10.18811, + "35": 10.21171, + "36": 10.17156, + "37": 10.15188, + "38": 10.12924, + "39": 10.09758, + "40": 10.05346, + "41": 9.99651, + "42": 9.93927, + "43": 9.92391, + "44": 9.87838, + "45": 9.85224, + "46": 9.79483, + "47": 9.77927, + "48": 9.75556, + "49": 9.7976, + "50": 9.75512 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 2282.0, - "2": 2715.0, - "3": 2495.0, - "4": 2373.0, - "5": 2854.0, - "6": 2551.0, - "7": 1613.0, - "8": 2242.0, - "9": 2185.0, - "10": 2245.0, - "11": 2404.0, - "12": 2675.0, - "13": 2384.0, - "14": 2122.0, - "15": 2765.0, - "16": 2433.0, - "17": 2643.0, - "18": 2686.0, - "19": 2739.0, - "20": 2298.0, - "21": 2297.0, - "22": 2666.0, - "23": 2414.0, - "24": 2554.0, - "25": 2425.0, - "26": 2476.0, - "27": 2675.0, - "28": 2528.0, - "29": 2604.0, - "30": 2574.0, - "31": 3120.0, - "32": 2892.0, - "33": 2709.0, - "34": 2790.0, - "35": 3081.0, - "36": 3174.0, - "37": 3030.0, - "38": 2712.0, - "39": 2550.0, - "40": 2185.0, - "41": 3638.0, - "42": 3606.0, - "43": 3383.0, - "44": 3558.0, - "45": 3606.0, - "46": 2694.0, - "47": 2103.0, - "48": 3161.0, - "49": 3268.0, - "50": 3644.0 + "1": 2164.0, + "2": 2598.0, + "3": 2397.0, + "4": 2435.0, + "5": 2760.0, + "6": 2574.0, + "7": 1590.0, + "8": 2259.0, + "9": 2191.0, + "10": 2314.0, + "11": 2323.0, + "12": 2752.0, + "13": 2379.0, + "14": 2067.0, + "15": 2764.0, + "16": 2387.0, + "17": 2585.0, + "18": 2590.0, + "19": 2717.0, + "20": 2296.0, + "21": 2367.0, + "22": 2651.0, + "23": 2494.0, + "24": 2477.0, + "25": 2363.0, + "26": 2418.0, + "27": 2726.0, + "28": 2441.0, + "29": 2578.0, + "30": 2578.0, + "31": 3162.0, + "32": 2876.0, + "33": 2668.0, + "34": 2746.0, + "35": 2986.0, + "36": 3165.0, + "37": 2960.0, + "38": 2576.0, + "39": 2493.0, + "40": 2187.0, + "41": 3548.0, + "42": 3480.0, + "43": 3559.0, + "44": 3629.0, + "45": 3713.0, + "46": 2753.0, + "47": 2111.0, + "48": 3199.0, + "49": 3266.0, + "50": 3649.0 } }, "mem-allocated-bytes": { @@ -119,52 +119,52 @@ "step_interval": 1, "values": { "1": 3434522112.0, - "2": 3435308544.0, + "2": 3434522112.0, "3": 3435308544.0, - "4": 3435308544.0, + "4": 3434522112.0, "5": 3435308544.0, - "6": 3435308544.0, + "6": 3434522112.0, "7": 3434522112.0, "8": 3434522112.0, "9": 3434522112.0, - "10": 3435308544.0, - "11": 3434522112.0, + "10": 3434522112.0, + "11": 3435308544.0, "12": 3434522112.0, - "13": 3434522112.0, + "13": 3435308544.0, "14": 3434522112.0, "15": 3435308544.0, - "16": 3435308544.0, + "16": 3434522112.0, "17": 3435308544.0, - "18": 3434522112.0, + "18": 3435308544.0, "19": 3435308544.0, "20": 3435308544.0, - "21": 3435308544.0, - "22": 3434522112.0, + "21": 3434522112.0, + "22": 3435308544.0, "23": 3435308544.0, - "24": 3434522112.0, + "24": 3435308544.0, "25": 3435308544.0, - "26": 3434522112.0, - "27": 3434522112.0, - "28": 3434522112.0, + "26": 3435308544.0, + "27": 3435308544.0, + "28": 3435308544.0, "29": 3434522112.0, "30": 3435308544.0, - "31": 3434522112.0, - "32": 3435308544.0, + "31": 3435308544.0, + "32": 3434522112.0, "33": 3434522112.0, - "34": 3435308544.0, + "34": 3434522112.0, "35": 3435308544.0, "36": 3434522112.0, - "37": 3434522112.0, - "38": 3434522112.0, - "39": 3434522112.0, - "40": 3434522112.0, - "41": 3435308544.0, + "37": 3435308544.0, + "38": 3435308544.0, + "39": 3435308544.0, + "40": 3435308544.0, + "41": 3434522112.0, "42": 3435308544.0, "43": 3435308544.0, "44": 3434522112.0, - "45": 3434522112.0, + "45": 3435308544.0, "46": 3435308544.0, - "47": 3435308544.0, + "47": 3434522112.0, "48": 3434522112.0, "49": 3434522112.0, "50": 3434522112.0 @@ -176,14 +176,14 @@ "step_interval": 1, "values": { "1": 4230456320.0, - "2": 5708704256.0, - "3": 5708704256.0, - "4": 5708704256.0, + "2": 5708442112.0, + "3": 5708442624.0, + "4": 5708702208.0, "5": 5708704256.0, "6": 5708704256.0, - "7": 5708704768.0, - "8": 5708704768.0, - "9": 5708704768.0, + "7": 5708704256.0, + "8": 5708704256.0, + "9": 5708704256.0, "10": 5708704768.0, "11": 5708704768.0, "12": 5708704768.0, @@ -215,13 +215,13 @@ "38": 5708704768.0, "39": 5708704768.0, "40": 5708704768.0, - "41": 5708704768.0, - "42": 5708704768.0, - "43": 5709227520.0, - "44": 5709227520.0, - "45": 5709227520.0, - "46": 5709227520.0, - "47": 5709227520.0, + "41": 5709229056.0, + "42": 5709229056.0, + "43": 5709229056.0, + "44": 5709229056.0, + "45": 5709229056.0, + "46": 5709229056.0, + "47": 5709229056.0, "48": 5709229056.0, "49": 5709229056.0, "50": 5709229056.0 @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 5.98593, - "3": 0.46994, - "4": 0.44538, - "5": 0.44329, - "6": 0.45802, - "7": 0.44298, - "8": 0.43592, - "9": 0.43535, - "10": 0.43911, - "11": 0.43744, - "12": 0.43512, - "13": 0.43522, - "14": 0.43513, - "15": 0.43723, - "16": 0.43945, - "17": 0.43667, - "18": 0.43792, - "19": 0.43525, - "20": 0.43711, - "21": 0.4375, - "22": 0.43715, - "23": 0.43657, - "24": 0.43624, - "25": 0.43653, - "26": 0.43652, - "27": 0.43638, - "28": 0.43842, - "29": 0.43702, - "30": 0.43458, - "31": 0.43557, - "32": 0.43539, - "33": 0.43697, - "34": 0.43601, - "35": 0.43849, - "36": 0.70822, - "37": 0.69532, - "38": 0.43677, - "39": 0.74727, - "40": 0.4357, - "41": 0.46813, - "42": 0.88067, - "43": 0.89866, - "44": 0.43802, - "45": 0.85415, - "46": 0.45011, - "47": 0.44994, - "48": 0.72982, - "49": 0.66393, - "50": 0.46635 + "2": 5.67735, + "3": 0.46538, + "4": 0.44317, + "5": 0.45285, + "6": 0.44518, + "7": 0.44638, + "8": 0.44504, + "9": 0.44399, + "10": 0.46034, + "11": 0.44765, + "12": 0.45862, + "13": 0.45283, + "14": 0.44416, + "15": 0.45982, + "16": 0.45122, + "17": 0.45378, + "18": 0.46228, + "19": 0.47453, + "20": 0.47019, + "21": 0.47211, + "22": 0.47214, + "23": 0.46966, + "24": 0.4777, + "25": 0.47444, + "26": 0.47352, + "27": 0.47688, + "28": 0.46351, + "29": 0.46755, + "30": 0.47715, + "31": 0.47888, + "32": 0.47071, + "33": 0.47276, + "34": 0.46642, + "35": 0.46047, + "36": 0.77366, + "37": 0.46374, + "38": 0.69893, + "39": 0.463, + "40": 0.46301, + "41": 0.49028, + "42": 0.4547, + "43": 0.45453, + "44": 0.44383, + "45": 0.4536, + "46": 0.69663, + "47": 0.4428, + "48": 0.8495, + "49": 0.44224, + "50": 0.44077 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json index 78cfbf66189..8de0aa83fd4 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp1_pp4_vp2/golden_values_dev_dgx_h100.json @@ -4,56 +4,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 10.50326, - "2": 10.50525, - "3": 10.50338, - "4": 10.50978, - "5": 10.49527, - "6": 10.5069, - "7": 10.50655, - "8": 10.49761, - "9": 10.50205, - "10": 10.51242, - "11": 10.51237, - "12": 10.50319, - "13": 10.50028, - "14": 10.49326, - "15": 10.497, - "16": 10.46708, - "17": 10.46988, - "18": 10.4867, - "19": 10.47169, - "20": 10.47363, - "21": 10.45966, - "22": 10.41973, - "23": 10.41804, - "24": 10.41547, - "25": 10.37511, - "26": 10.38999, - "27": 10.35241, - "28": 10.37465, - "29": 10.32145, - "30": 10.22366, - "31": 10.17929, - "32": 10.15937, - "33": 10.16243, - "34": 10.1286, - "35": 10.10432, - "36": 10.08333, - "37": 10.076, - "38": 10.08394, - "39": 10.02227, - "40": 9.98037, - "41": 9.93077, - "42": 9.87377, - "43": 9.87387, - "44": 9.83581, - "45": 9.81097, - "46": 9.74786, - "47": 9.73156, - "48": 9.71596, - "49": 9.7651, - "50": 9.72689 + "1": 10.50482, + "2": 10.50404, + "3": 10.50335, + "4": 10.51055, + "5": 10.49447, + "6": 10.50588, + "7": 10.50579, + "8": 10.49765, + "9": 10.50309, + "10": 10.51146, + "11": 10.5116, + "12": 10.50313, + "13": 10.49975, + "14": 10.49371, + "15": 10.49863, + "16": 10.46726, + "17": 10.47291, + "18": 10.48815, + "19": 10.47125, + "20": 10.47433, + "21": 10.45812, + "22": 10.42243, + "23": 10.41688, + "24": 10.415, + "25": 10.37598, + "26": 10.38949, + "27": 10.35236, + "28": 10.37411, + "29": 10.32166, + "30": 10.22264, + "31": 10.17961, + "32": 10.15824, + "33": 10.16353, + "34": 10.12996, + "35": 10.10316, + "36": 10.08378, + "37": 10.07513, + "38": 10.08249, + "39": 10.02305, + "40": 9.97736, + "41": 9.93166, + "42": 9.87391, + "43": 9.87469, + "44": 9.83745, + "45": 9.8109, + "46": 9.74766, + "47": 9.73199, + "48": 9.71587, + "49": 9.76612, + "50": 9.72673 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 2491.0, - "2": 1900.0, - "3": 1708.0, - "4": 2380.0, - "5": 2253.0, - "6": 1958.0, - "7": 2398.0, - "8": 2384.0, - "9": 2206.0, - "10": 2330.0, - "11": 2833.0, - "12": 1956.0, - "13": 2178.0, - "14": 2805.0, - "15": 2182.0, - "16": 2715.0, - "17": 2621.0, - "18": 2629.0, - "19": 2501.0, - "20": 2632.0, - "21": 2047.0, - "22": 2655.0, - "23": 2487.0, - "24": 2912.0, - "25": 2624.0, - "26": 2640.0, - "27": 2746.0, - "28": 2787.0, - "29": 2569.0, - "30": 3013.0, - "31": 2385.0, - "32": 2773.0, - "33": 2477.0, - "34": 2594.0, - "35": 2823.0, - "36": 3033.0, - "37": 3108.0, - "38": 2842.0, - "39": 2647.0, - "40": 3551.0, - "41": 1852.0, - "42": 1566.0, - "43": 1733.0, - "44": 3130.0, - "45": 3843.0, - "46": 3331.0, - "47": 2862.0, - "48": 3103.0, - "49": 2820.0, - "50": 2198.0 + "1": 2506.0, + "2": 1849.0, + "3": 1757.0, + "4": 2346.0, + "5": 2174.0, + "6": 1956.0, + "7": 2422.0, + "8": 2334.0, + "9": 2295.0, + "10": 2272.0, + "11": 2686.0, + "12": 2029.0, + "13": 2184.0, + "14": 2776.0, + "15": 2217.0, + "16": 2711.0, + "17": 2660.0, + "18": 2725.0, + "19": 2491.0, + "20": 2552.0, + "21": 2109.0, + "22": 2579.0, + "23": 2652.0, + "24": 2857.0, + "25": 2631.0, + "26": 2667.0, + "27": 2807.0, + "28": 2878.0, + "29": 2502.0, + "30": 2942.0, + "31": 2461.0, + "32": 2807.0, + "33": 2440.0, + "34": 2620.0, + "35": 2635.0, + "36": 3128.0, + "37": 3145.0, + "38": 2873.0, + "39": 2718.0, + "40": 3464.0, + "41": 1848.0, + "42": 1575.0, + "43": 1705.0, + "44": 3092.0, + "45": 3737.0, + "46": 3190.0, + "47": 2792.0, + "48": 3198.0, + "49": 2825.0, + "50": 2268.0 } }, "mem-allocated-bytes": { @@ -148,7 +148,7 @@ "28": 2091169280.0, "29": 2091169280.0, "30": 2091169280.0, - "31": 2091693568.0, + "31": 2091169280.0, "32": 2091169280.0, "33": 2091169280.0, "34": 2091169280.0, @@ -175,56 +175,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 4421348864.0, + "1": 4421607424.0, "2": 5294192128.0, - "3": 5294716416.0, - "4": 5294716416.0, - "5": 5294716416.0, - "6": 5294716416.0, - "7": 5294716416.0, - "8": 5294716416.0, - "9": 5294716416.0, - "10": 5294716416.0, - "11": 5294716416.0, - "12": 5294716416.0, - "13": 5294716416.0, - "14": 5294716416.0, - "15": 5294716416.0, - "16": 5294716416.0, - "17": 5294716416.0, - "18": 5294716416.0, - "19": 5294716416.0, - "20": 5294716416.0, - "21": 5294716416.0, - "22": 5294716416.0, - "23": 5294716416.0, - "24": 5294716416.0, - "25": 5294716416.0, - "26": 5294716416.0, - "27": 5294716416.0, - "28": 5294716416.0, - "29": 5294716416.0, - "30": 5294716416.0, - "31": 5294716416.0, - "32": 5294716416.0, - "33": 5294716416.0, - "34": 5294716416.0, - "35": 5294716416.0, - "36": 5294716416.0, - "37": 5294716416.0, - "38": 5294716416.0, - "39": 5294716416.0, - "40": 5294716416.0, - "41": 5294716416.0, - "42": 5294716416.0, - "43": 5294716416.0, - "44": 5294716416.0, - "45": 5294716416.0, - "46": 5294716416.0, - "47": 5294716416.0, - "48": 5294716416.0, - "49": 5294716416.0, - "50": 5294716416.0 + "3": 5294972416.0, + "4": 5294972416.0, + "5": 5294972416.0, + "6": 5294972416.0, + "7": 5294972416.0, + "8": 5294972416.0, + "9": 5294972416.0, + "10": 5294972416.0, + "11": 5294972416.0, + "12": 5294972416.0, + "13": 5294972416.0, + "14": 5294974976.0, + "15": 5294974976.0, + "16": 5294974976.0, + "17": 5294974976.0, + "18": 5294974976.0, + "19": 5294974976.0, + "20": 5294974976.0, + "21": 5294974976.0, + "22": 5294974976.0, + "23": 5294974976.0, + "24": 5294974976.0, + "25": 5294974976.0, + "26": 5294974976.0, + "27": 5294974976.0, + "28": 5294974976.0, + "29": 5294974976.0, + "30": 5294974976.0, + "31": 5294974976.0, + "32": 5294974976.0, + "33": 5294974976.0, + "34": 5294974976.0, + "35": 5294974976.0, + "36": 5294974976.0, + "37": 5294974976.0, + "38": 5294974976.0, + "39": 5294974976.0, + "40": 5294974976.0, + "41": 5294974976.0, + "42": 5294974976.0, + "43": 5294974976.0, + "44": 5294974976.0, + "45": 5294974976.0, + "46": 5294974976.0, + "47": 5294974976.0, + "48": 5294974976.0, + "49": 5294974976.0, + "50": 5294974976.0 } }, "iteration-time": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 8.01999, - "3": 0.56866, - "4": 0.53972, - "5": 0.54716, - "6": 0.55645, - "7": 0.54019, - "8": 0.54218, - "9": 0.55986, - "10": 0.56452, - "11": 0.5598, - "12": 0.53842, - "13": 0.55086, - "14": 0.54615, - "15": 0.56416, - "16": 0.56092, - "17": 1.0193, - "18": 1.23706, - "19": 1.271, - "20": 1.26275, - "21": 1.76427, - "22": 1.84856, - "23": 1.41201, - "24": 0.92392, - "25": 0.54441, - "26": 0.53908, - "27": 0.5709, - "28": 0.5559, - "29": 0.56284, - "30": 0.53843, - "31": 0.54401, - "32": 0.52817, - "33": 0.54638, - "34": 0.57179, - "35": 0.58599, - "36": 0.56335, - "37": 0.56457, - "38": 0.55358, - "39": 0.5496, - "40": 0.55457, - "41": 0.57059, - "42": 0.53866, - "43": 0.55125, - "44": 0.55925, - "45": 0.54021, - "46": 1.06149, - "47": 0.84305, - "48": 1.10614, - "49": 1.45848, - "50": 1.12052 + "2": 9.5335, + "3": 1.30837, + "4": 1.62237, + "5": 0.5831, + "6": 0.55361, + "7": 0.55652, + "8": 0.56899, + "9": 0.54724, + "10": 0.57357, + "11": 0.55773, + "12": 0.56429, + "13": 0.58955, + "14": 0.55646, + "15": 0.55011, + "16": 0.5672, + "17": 0.57005, + "18": 0.58104, + "19": 0.56942, + "20": 0.57257, + "21": 0.94199, + "22": 0.9596, + "23": 1.90344, + "24": 1.67005, + "25": 1.36424, + "26": 0.95329, + "27": 0.58604, + "28": 0.60973, + "29": 0.80315, + "30": 1.15752, + "31": 1.23267, + "32": 1.08008, + "33": 0.79288, + "34": 0.86693, + "35": 1.039, + "36": 0.55509, + "37": 0.57816, + "38": 0.56698, + "39": 0.54523, + "40": 0.54733, + "41": 0.55193, + "42": 0.55812, + "43": 0.54191, + "44": 0.56678, + "45": 0.56852, + "46": 0.55367, + "47": 0.54988, + "48": 0.53628, + "49": 0.5317, + "50": 0.82851 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json index 48744b5c021..62565f6e1a9 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2/golden_values_dev_dgx_h100.json @@ -4,56 +4,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 10.52605, - "2": 10.54515, - "3": 10.53013, - "4": 10.54119, - "5": 10.53738, - "6": 10.54198, - "7": 10.54842, - "8": 10.54207, - "9": 10.53241, - "10": 10.52878, - "11": 10.53233, - "12": 10.53794, - "13": 10.54081, - "14": 10.52465, - "15": 10.52208, - "16": 10.50333, - "17": 10.51348, - "18": 10.50677, - "19": 10.50509, - "20": 10.50927, - "21": 10.50899, - "22": 10.44601, - "23": 10.43118, - "24": 10.44177, - "25": 10.40785, - "26": 10.41558, - "27": 10.39996, - "28": 10.40992, - "29": 10.36533, - "30": 10.28201, - "31": 10.23061, - "32": 10.20558, - "33": 10.21632, - "34": 10.17137, - "35": 10.14581, - "36": 10.1275, - "37": 10.11512, - "38": 10.11943, - "39": 10.0799, - "40": 10.01106, - "41": 9.96694, - "42": 9.92772, - "43": 9.92016, - "44": 9.86405, - "45": 9.83804, - "46": 9.77979, - "47": 9.77207, - "48": 9.7488, - "49": 9.77845, - "50": 9.75235 + "1": 10.52645, + "2": 10.54456, + "3": 10.53193, + "4": 10.54022, + "5": 10.53535, + "6": 10.54151, + "7": 10.5484, + "8": 10.54184, + "9": 10.53228, + "10": 10.52935, + "11": 10.53017, + "12": 10.53822, + "13": 10.53952, + "14": 10.5246, + "15": 10.52341, + "16": 10.50474, + "17": 10.50999, + "18": 10.50779, + "19": 10.50524, + "20": 10.50897, + "21": 10.50843, + "22": 10.44624, + "23": 10.43293, + "24": 10.4404, + "25": 10.40743, + "26": 10.41491, + "27": 10.39907, + "28": 10.41139, + "29": 10.36479, + "30": 10.28278, + "31": 10.23063, + "32": 10.20397, + "33": 10.21432, + "34": 10.16982, + "35": 10.14699, + "36": 10.12773, + "37": 10.11576, + "38": 10.11765, + "39": 10.0794, + "40": 10.01113, + "41": 9.96704, + "42": 9.92741, + "43": 9.91953, + "44": 9.86314, + "45": 9.83745, + "46": 9.77938, + "47": 9.77244, + "48": 9.74908, + "49": 9.77818, + "50": 9.75284 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 2665.0, - "2": 2118.0, - "3": 1770.0, - "4": 2558.0, - "5": 2317.0, - "6": 2052.0, - "7": 2434.0, - "8": 2608.0, - "9": 2485.0, - "10": 2440.0, - "11": 2673.0, - "12": 2025.0, - "13": 2299.0, - "14": 2842.0, - "15": 2136.0, - "16": 2721.0, - "17": 2652.0, - "18": 2538.0, - "19": 2662.0, - "20": 2845.0, - "21": 2242.0, - "22": 2745.0, - "23": 2672.0, - "24": 2739.0, - "25": 2663.0, - "26": 2756.0, - "27": 2696.0, - "28": 2775.0, - "29": 2509.0, - "30": 2967.0, - "31": 2508.0, - "32": 2862.0, - "33": 2542.0, - "34": 2619.0, - "35": 2758.0, - "36": 3043.0, - "37": 3327.0, - "38": 2725.0, - "39": 2853.0, - "40": 3511.0, - "41": 1709.0, - "42": 1452.0, - "43": 1696.0, - "44": 2955.0, - "45": 3662.0, - "46": 3297.0, - "47": 3146.0, - "48": 2674.0, - "49": 2478.0, - "50": 2078.0 + "1": 2666.0, + "2": 2067.0, + "3": 1740.0, + "4": 2413.0, + "5": 2309.0, + "6": 1987.0, + "7": 2322.0, + "8": 2592.0, + "9": 2591.0, + "10": 2533.0, + "11": 2715.0, + "12": 2046.0, + "13": 2261.0, + "14": 2888.0, + "15": 2213.0, + "16": 2816.0, + "17": 2691.0, + "18": 2624.0, + "19": 2673.0, + "20": 2764.0, + "21": 2223.0, + "22": 2764.0, + "23": 2776.0, + "24": 2764.0, + "25": 2607.0, + "26": 2809.0, + "27": 2690.0, + "28": 2732.0, + "29": 2548.0, + "30": 2826.0, + "31": 2488.0, + "32": 2859.0, + "33": 2434.0, + "34": 2658.0, + "35": 2681.0, + "36": 2966.0, + "37": 3337.0, + "38": 2852.0, + "39": 2838.0, + "40": 3427.0, + "41": 1730.0, + "42": 1524.0, + "43": 1786.0, + "44": 2869.0, + "45": 3620.0, + "46": 3383.0, + "47": 3171.0, + "48": 2667.0, + "49": 2621.0, + "50": 2099.0 } }, "mem-allocated-bytes": { @@ -152,7 +152,7 @@ "32": 1796646400.0, "33": 1796646400.0, "34": 1796646400.0, - "35": 1797694976.0, + "35": 1796646400.0, "36": 1796646400.0, "37": 1796646400.0, "38": 1796646400.0, @@ -176,16 +176,16 @@ "step_interval": 1, "values": { "1": 2376915456.0, - "2": 3126723584.0, - "3": 3126724096.0, - "4": 3126724096.0, - "5": 3126724096.0, - "6": 3126724096.0, - "7": 3126724096.0, - "8": 3126724096.0, - "9": 3126724096.0, - "10": 3126724096.0, - "11": 3126724096.0, + "2": 3124626944.0, + "3": 3124626944.0, + "4": 3126723584.0, + "5": 3126723584.0, + "6": 3126723584.0, + "7": 3126723584.0, + "8": 3126723584.0, + "9": 3126723584.0, + "10": 3126723584.0, + "11": 3126723584.0, "12": 3126724096.0, "13": 3126724096.0, "14": 3126724096.0, @@ -204,27 +204,27 @@ "27": 3126724096.0, "28": 3126724096.0, "29": 3126724096.0, - "30": 3128821248.0, - "31": 3128821248.0, - "32": 3128821248.0, - "33": 3128821248.0, - "34": 3128821248.0, - "35": 3128821248.0, - "36": 3128821248.0, - "37": 3128821248.0, - "38": 3128821248.0, - "39": 3128821248.0, - "40": 3128821248.0, - "41": 3128821248.0, - "42": 3128821248.0, - "43": 3128821248.0, - "44": 3128821248.0, - "45": 3128821248.0, - "46": 3128821248.0, - "47": 3128821248.0, - "48": 3128821248.0, - "49": 3128821248.0, - "50": 3128821248.0 + "30": 3126724096.0, + "31": 3126724096.0, + "32": 3126724096.0, + "33": 3126724096.0, + "34": 3126724096.0, + "35": 3126724096.0, + "36": 3126724096.0, + "37": 3126724096.0, + "38": 3126724096.0, + "39": 3126724096.0, + "40": 3126724096.0, + "41": 3126724096.0, + "42": 3126724096.0, + "43": 3126724096.0, + "44": 3126724096.0, + "45": 3126724096.0, + "46": 3126724096.0, + "47": 3126724096.0, + "48": 3126724096.0, + "49": 3126724096.0, + "50": 3126724096.0 } }, "iteration-time": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 7.30404, - "3": 0.94091, - "4": 0.92204, - "5": 0.92114, - "6": 0.92111, - "7": 0.90621, - "8": 0.90764, - "9": 0.90745, - "10": 0.90814, - "11": 0.91042, - "12": 0.90559, - "13": 0.90811, - "14": 0.9575, - "15": 0.92282, - "16": 0.92228, - "17": 0.91866, - "18": 0.91983, - "19": 0.92061, - "20": 0.91825, - "21": 1.25693, - "22": 1.10637, - "23": 1.15254, - "24": 0.90684, - "25": 0.9132, - "26": 1.16081, - "27": 0.90621, - "28": 1.16419, - "29": 0.90868, - "30": 1.24759, - "31": 0.94976, - "32": 0.9232, - "33": 0.90997, - "34": 0.90937, - "35": 0.90824, - "36": 0.91023, - "37": 0.90952, - "38": 0.91076, - "39": 0.90936, - "40": 0.9063, - "41": 0.92378, - "42": 0.92283, - "43": 0.92095, - "44": 0.91236, - "45": 0.90954, - "46": 0.90765, - "47": 0.90877, - "48": 1.08773, - "49": 0.9072, - "50": 0.91697 + "2": 6.84093, + "3": 0.92462, + "4": 0.90312, + "5": 0.90076, + "6": 0.90077, + "7": 0.89943, + "8": 0.89853, + "9": 0.90198, + "10": 0.90263, + "11": 0.90328, + "12": 0.90775, + "13": 0.90847, + "14": 0.91571, + "15": 0.91424, + "16": 0.90232, + "17": 0.90274, + "18": 0.902, + "19": 0.91326, + "20": 0.92886, + "21": 1.16397, + "22": 0.91397, + "23": 1.11344, + "24": 0.90352, + "25": 0.93256, + "26": 1.42352, + "27": 1.19905, + "28": 1.171, + "29": 0.89989, + "30": 0.90194, + "31": 0.899, + "32": 1.16818, + "33": 1.1969, + "34": 0.97514, + "35": 0.94924, + "36": 0.90134, + "37": 0.89922, + "38": 0.89724, + "39": 0.89848, + "40": 0.89833, + "41": 0.89969, + "42": 0.90019, + "43": 0.89851, + "44": 0.89779, + "45": 0.89774, + "46": 0.90025, + "47": 0.90031, + "48": 0.89743, + "49": 0.89782, + "50": 0.89739 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json index 5fcd4069a97..8c62ea6ef08 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_local_spec/golden_values_dev_dgx_h100.json @@ -4,56 +4,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 10.52646, - "2": 10.54515, - "3": 10.5313, - "4": 10.53837, - "5": 10.53725, - "6": 10.54437, - "7": 10.54661, - "8": 10.54186, - "9": 10.53417, - "10": 10.53023, - "11": 10.53138, - "12": 10.53806, - "13": 10.53901, - "14": 10.52435, - "15": 10.52291, - "16": 10.50388, - "17": 10.51258, - "18": 10.50728, - "19": 10.505, - "20": 10.50929, - "21": 10.51045, - "22": 10.4465, - "23": 10.43169, - "24": 10.44107, - "25": 10.40841, - "26": 10.4161, - "27": 10.39933, - "28": 10.41056, - "29": 10.36605, - "30": 10.28358, - "31": 10.23042, - "32": 10.205, - "33": 10.21542, - "34": 10.17025, - "35": 10.14669, - "36": 10.12668, - "37": 10.11652, - "38": 10.11826, - "39": 10.08113, - "40": 10.01198, - "41": 9.9667, - "42": 9.92772, - "43": 9.91963, - "44": 9.86358, - "45": 9.83787, - "46": 9.77904, - "47": 9.77267, - "48": 9.74922, - "49": 9.77992, - "50": 9.75251 + "1": 10.52596, + "2": 10.54321, + "3": 10.53116, + "4": 10.53974, + "5": 10.53655, + "6": 10.54373, + "7": 10.54832, + "8": 10.54246, + "9": 10.53183, + "10": 10.52865, + "11": 10.53105, + "12": 10.53775, + "13": 10.53988, + "14": 10.52454, + "15": 10.52156, + "16": 10.5035, + "17": 10.51146, + "18": 10.50989, + "19": 10.50555, + "20": 10.51076, + "21": 10.51027, + "22": 10.44578, + "23": 10.43283, + "24": 10.44166, + "25": 10.408, + "26": 10.41467, + "27": 10.39903, + "28": 10.4109, + "29": 10.36686, + "30": 10.28291, + "31": 10.23023, + "32": 10.20472, + "33": 10.21514, + "34": 10.17075, + "35": 10.14683, + "36": 10.12771, + "37": 10.11687, + "38": 10.11942, + "39": 10.08135, + "40": 10.01225, + "41": 9.96658, + "42": 9.92742, + "43": 9.92083, + "44": 9.86425, + "45": 9.83851, + "46": 9.77845, + "47": 9.77179, + "48": 9.74924, + "49": 9.77833, + "50": 9.75333 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 2680.0, - "2": 2049.0, - "3": 1714.0, - "4": 2460.0, - "5": 2301.0, - "6": 1981.0, - "7": 2388.0, - "8": 2533.0, - "9": 2517.0, - "10": 2512.0, - "11": 2671.0, - "12": 1939.0, - "13": 2323.0, - "14": 2763.0, - "15": 2225.0, - "16": 2787.0, - "17": 2755.0, - "18": 2621.0, - "19": 2763.0, - "20": 2794.0, - "21": 2179.0, - "22": 2877.0, - "23": 2632.0, - "24": 2826.0, - "25": 2691.0, - "26": 2767.0, - "27": 2730.0, - "28": 2782.0, - "29": 2521.0, - "30": 2921.0, - "31": 2472.0, - "32": 2921.0, - "33": 2388.0, - "34": 2551.0, - "35": 2647.0, - "36": 3023.0, - "37": 3267.0, - "38": 2786.0, - "39": 3010.0, - "40": 3454.0, - "41": 1758.0, - "42": 1488.0, - "43": 1763.0, - "44": 2999.0, - "45": 3594.0, - "46": 3353.0, - "47": 3172.0, - "48": 2692.0, - "49": 2463.0, - "50": 2115.0 + "1": 2713.0, + "2": 2130.0, + "3": 1804.0, + "4": 2472.0, + "5": 2340.0, + "6": 1969.0, + "7": 2395.0, + "8": 2562.0, + "9": 2508.0, + "10": 2500.0, + "11": 2625.0, + "12": 1980.0, + "13": 2314.0, + "14": 2908.0, + "15": 2177.0, + "16": 2818.0, + "17": 2644.0, + "18": 2571.0, + "19": 2716.0, + "20": 2752.0, + "21": 2233.0, + "22": 2772.0, + "23": 2589.0, + "24": 2800.0, + "25": 2621.0, + "26": 2753.0, + "27": 2770.0, + "28": 2961.0, + "29": 2587.0, + "30": 2894.0, + "31": 2527.0, + "32": 2956.0, + "33": 2369.0, + "34": 2509.0, + "35": 2671.0, + "36": 3081.0, + "37": 3300.0, + "38": 2748.0, + "39": 2941.0, + "40": 3471.0, + "41": 1674.0, + "42": 1507.0, + "43": 1708.0, + "44": 2828.0, + "45": 3611.0, + "46": 3389.0, + "47": 3137.0, + "48": 2686.0, + "49": 2606.0, + "50": 2150.0 } }, "mem-allocated-bytes": { @@ -176,27 +176,27 @@ "step_interval": 1, "values": { "1": 2347554304.0, - "2": 3097360384.0, - "3": 3097360384.0, - "4": 3097360384.0, - "5": 3097360384.0, - "6": 3097360384.0, - "7": 3097360384.0, - "8": 3097360384.0, - "9": 3097362944.0, - "10": 3097362944.0, - "11": 3097362944.0, - "12": 3097362944.0, - "13": 3097362944.0, - "14": 3097362944.0, - "15": 3097362944.0, - "16": 3097362944.0, - "17": 3097362944.0, - "18": 3097362944.0, - "19": 3097362944.0, - "20": 3097362944.0, - "21": 3097362944.0, - "22": 3097362944.0, + "2": 3097359360.0, + "3": 3097361920.0, + "4": 3097361920.0, + "5": 3097361920.0, + "6": 3097361920.0, + "7": 3097361920.0, + "8": 3097361920.0, + "9": 3097361920.0, + "10": 3097361920.0, + "11": 3097361920.0, + "12": 3097361920.0, + "13": 3097361920.0, + "14": 3097361920.0, + "15": 3097361920.0, + "16": 3097361920.0, + "17": 3097361920.0, + "18": 3097361920.0, + "19": 3097361920.0, + "20": 3097361920.0, + "21": 3097361920.0, + "22": 3097361920.0, "23": 3097362944.0, "24": 3097362944.0, "25": 3097362944.0, @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 6.65278, - "3": 0.80729, - "4": 0.75309, - "5": 0.75657, - "6": 0.75567, - "7": 0.75051, - "8": 0.79758, - "9": 0.74801, - "10": 0.74907, - "11": 0.75864, - "12": 0.77822, - "13": 0.77353, - "14": 0.7675, - "15": 0.74681, - "16": 0.74699, - "17": 0.74713, - "18": 0.74909, - "19": 1.17339, - "20": 1.04917, - "21": 1.0602, - "22": 0.75705, - "23": 1.40883, - "24": 1.35546, - "25": 0.77334, - "26": 0.77065, - "27": 0.7506, - "28": 0.76835, - "29": 0.75344, - "30": 0.7614, - "31": 0.76286, - "32": 0.76206, - "33": 0.7584, - "34": 0.76586, - "35": 0.79058, - "36": 0.74744, - "37": 0.75671, - "38": 0.75006, - "39": 0.75076, - "40": 0.74876, - "41": 0.74984, - "42": 0.75073, - "43": 0.87349, - "44": 0.8073, - "45": 0.77888, - "46": 0.99536, - "47": 0.76688, - "48": 0.77466, - "49": 1.15924, - "50": 0.74701 + "2": 6.22924, + "3": 0.82565, + "4": 0.76734, + "5": 0.81289, + "6": 0.7677, + "7": 0.77043, + "8": 0.77276, + "9": 0.76969, + "10": 0.77067, + "11": 0.79791, + "12": 0.7664, + "13": 0.76205, + "14": 0.75458, + "15": 0.75145, + "16": 0.75003, + "17": 0.7506, + "18": 0.75092, + "19": 0.74811, + "20": 0.74743, + "21": 0.76346, + "22": 0.76016, + "23": 0.76441, + "24": 0.76477, + "25": 0.76052, + "26": 0.95583, + "27": 1.13429, + "28": 0.75924, + "29": 1.10063, + "30": 0.94433, + "31": 0.76585, + "32": 0.76275, + "33": 0.76681, + "34": 0.76106, + "35": 0.76023, + "36": 0.75761, + "37": 0.76086, + "38": 0.76453, + "39": 0.76282, + "40": 0.76061, + "41": 0.7604, + "42": 0.76276, + "43": 0.78308, + "44": 0.77747, + "45": 0.7563, + "46": 0.76289, + "47": 0.76138, + "48": 0.75941, + "49": 0.76251, + "50": 0.76162 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json index 40bea4ac462..4fa87cc633a 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100.json @@ -4,106 +4,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 10.52515, - "2": 10.54464, - "3": 10.53225, - "4": 10.53963, - "5": 10.53658, - "6": 10.54252, - "7": 10.54919, - "8": 10.54127, - "9": 10.53395, - "10": 10.52862, - "11": 10.53067, - "12": 10.53697, - "13": 10.53945, - "14": 10.52443, - "15": 10.52179, - "16": 10.50323, - "17": 10.5127, - "18": 10.50939, - "19": 10.50594, - "20": 10.50997, - "21": 10.51032, - "22": 10.44577, - "23": 10.43367, - "24": 10.44118, - "25": 10.40813, - "26": 10.41475, - "27": 10.39983, - "28": 10.41071, - "29": 10.36795, - "30": 10.28189, - "31": 10.23052, - "32": 10.20533, - "33": 10.21364, - "34": 10.17063, - "35": 10.14593, - "36": 10.12739, - "37": 10.11585, - "38": 10.11919, - "39": 10.07882, - "40": 10.01252, - "41": 9.96692, - "42": 9.92758, - "43": 9.92049, - "44": 9.86484, - "45": 9.83822, - "46": 9.77998, - "47": 9.77239, - "48": 9.74982, - "49": 9.778, - "50": 9.75365, - "51": 9.76005, - "52": 9.70986, - "53": 9.67159, - "54": 9.69242, - "55": 9.67995, - "56": 9.67294, - "57": 9.60193, - "58": 9.61969, - "59": 9.54603, - "60": 9.6122, - "61": 9.54787, - "62": 9.53722, - "63": 9.52349, - "64": 9.51067, - "65": 9.52347, - "66": 9.49062, - "67": 9.45803, - "68": 9.44212, - "69": 9.44324, - "70": 9.43922, + "1": 10.52601, + "2": 10.54351, + "3": 10.53142, + "4": 10.5395, + "5": 10.53689, + "6": 10.54178, + "7": 10.54797, + "8": 10.54141, + "9": 10.53367, + "10": 10.5298, + "11": 10.53043, + "12": 10.53682, + "13": 10.53964, + "14": 10.52514, + "15": 10.52209, + "16": 10.50425, + "17": 10.51196, + "18": 10.50665, + "19": 10.50453, + "20": 10.51117, + "21": 10.50765, + "22": 10.44507, + "23": 10.43323, + "24": 10.44209, + "25": 10.40722, + "26": 10.4149, + "27": 10.40035, + "28": 10.40991, + "29": 10.36666, + "30": 10.28147, + "31": 10.23014, + "32": 10.20472, + "33": 10.21463, + "34": 10.17016, + "35": 10.14747, + "36": 10.12881, + "37": 10.1167, + "38": 10.1182, + "39": 10.07942, + "40": 10.01111, + "41": 9.9674, + "42": 9.92692, + "43": 9.91985, + "44": 9.86362, + "45": 9.83829, + "46": 9.7801, + "47": 9.77153, + "48": 9.7494, + "49": 9.77867, + "50": 9.75389, + "51": 9.76065, + "52": 9.70994, + "53": 9.67136, + "54": 9.69181, + "55": 9.67972, + "56": 9.67279, + "57": 9.60184, + "58": 9.62028, + "59": 9.54565, + "60": 9.61284, + "61": 9.54674, + "62": 9.53683, + "63": 9.52273, + "64": 9.50968, + "65": 9.52321, + "66": 9.49015, + "67": 9.45837, + "68": 9.44138, + "69": 9.44339, + "70": 9.43981, "71": 9.47041, - "72": 9.45725, - "73": 9.40536, - "74": 9.45635, - "75": 9.40507, - "76": 9.37316, - "77": 9.34396, - "78": 9.37786, - "79": 9.41166, - "80": 9.34493, - "81": 9.33096, - "82": 9.34642, - "83": 9.31611, - "84": 9.29902, - "85": 9.33654, - "86": 9.26861, - "87": 9.31388, - "88": 9.29805, - "89": 9.26894, - "90": 9.34087, - "91": 9.25631, - "92": 9.29651, - "93": 9.29935, - "94": 9.27574, - "95": 9.28048, - "96": 9.18131, - "97": 9.26438, - "98": 9.19722, - "99": 9.21951, - "100": 9.22923 + "72": 9.45692, + "73": 9.40581, + "74": 9.4563, + "75": 9.4049, + "76": 9.37355, + "77": 9.34341, + "78": 9.37752, + "79": 9.41164, + "80": 9.34491, + "81": 9.33083, + "82": 9.34702, + "83": 9.31548, + "84": 9.29966, + "85": 9.33603, + "86": 9.26938, + "87": 9.3147, + "88": 9.29899, + "89": 9.26908, + "90": 9.3414, + "91": 9.25694, + "92": 9.29673, + "93": 9.30033, + "94": 9.27553, + "95": 9.28012, + "96": 9.18247, + "97": 9.26433, + "98": 9.19698, + "99": 9.21983, + "100": 9.22928 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 2694.0, - "2": 2038.0, - "3": 1725.0, - "4": 2486.0, - "5": 2286.0, - "6": 2006.0, - "7": 2358.0, - "8": 2557.0, - "9": 2439.0, - "10": 2467.0, - "11": 2695.0, - "12": 2001.0, - "13": 2274.0, - "14": 2852.0, - "15": 2293.0, - "16": 2718.0, - "17": 2764.0, - "18": 2543.0, - "19": 2783.0, - "20": 2742.0, - "21": 2199.0, - "22": 2754.0, - "23": 2704.0, - "24": 2827.0, - "25": 2664.0, - "26": 2810.0, - "27": 2678.0, - "28": 2751.0, - "29": 2624.0, - "30": 2875.0, - "31": 2498.0, - "32": 2970.0, - "33": 2380.0, - "34": 2542.0, - "35": 2774.0, - "36": 2985.0, - "37": 3282.0, - "38": 2706.0, - "39": 2924.0, - "40": 3560.0, - "41": 1578.0, - "42": 1531.0, - "43": 1744.0, - "44": 2892.0, - "45": 3574.0, - "46": 3340.0, - "47": 3082.0, - "48": 2605.0, - "49": 2496.0, - "50": 2102.0, - "51": 1769.0, - "52": 2645.0, - "53": 3852.0, - "54": 3615.0, - "55": 3365.0, - "56": 4146.0, - "57": 3860.0, - "58": 4285.0, - "59": 1773.0, - "60": 2698.0, - "61": 2192.0, - "62": 3981.0, - "63": 3916.0, - "64": 4576.0, - "65": 3081.0, - "66": 1913.0, - "67": 2157.0, - "68": 4109.0, - "69": 4392.0, - "70": 4007.0, - "71": 2078.0, - "72": 4121.0, - "73": 3482.0, - "74": 2587.0, - "75": 5381.0, - "76": 2630.0, - "77": 4087.0, - "78": 4282.0, - "79": 2267.0, - "80": 3523.0, - "81": 3970.0, - "82": 3684.0, - "83": 4798.0, - "84": 5334.0, - "85": 4550.0, - "86": 4009.0, - "87": 3707.0, - "88": 4522.0, - "89": 3812.0, - "90": 4600.0, - "91": 4730.0, - "92": 3955.0, - "93": 3787.0, - "94": 2962.0, - "95": 4073.0, - "96": 3648.0, - "97": 3327.0, + "1": 2686.0, + "2": 2112.0, + "3": 1726.0, + "4": 2429.0, + "5": 2317.0, + "6": 1996.0, + "7": 2391.0, + "8": 2491.0, + "9": 2530.0, + "10": 2541.0, + "11": 2794.0, + "12": 2028.0, + "13": 2246.0, + "14": 2836.0, + "15": 2210.0, + "16": 2792.0, + "17": 2741.0, + "18": 2616.0, + "19": 2637.0, + "20": 2632.0, + "21": 2254.0, + "22": 2774.0, + "23": 2646.0, + "24": 2860.0, + "25": 2649.0, + "26": 2785.0, + "27": 2714.0, + "28": 2871.0, + "29": 2584.0, + "30": 2857.0, + "31": 2358.0, + "32": 2860.0, + "33": 2392.0, + "34": 2594.0, + "35": 2659.0, + "36": 2989.0, + "37": 3336.0, + "38": 2737.0, + "39": 2910.0, + "40": 3487.0, + "41": 1658.0, + "42": 1491.0, + "43": 1695.0, + "44": 2915.0, + "45": 3654.0, + "46": 3338.0, + "47": 3130.0, + "48": 2755.0, + "49": 2501.0, + "50": 2055.0, + "51": 1801.0, + "52": 2509.0, + "53": 3939.0, + "54": 3535.0, + "55": 3374.0, + "56": 4337.0, + "57": 3885.0, + "58": 4270.0, + "59": 1680.0, + "60": 2699.0, + "61": 2164.0, + "62": 3916.0, + "63": 3894.0, + "64": 4603.0, + "65": 2991.0, + "66": 1986.0, + "67": 2166.0, + "68": 3973.0, + "69": 4403.0, + "70": 4023.0, + "71": 2134.0, + "72": 4119.0, + "73": 3386.0, + "74": 2513.0, + "75": 5337.0, + "76": 2567.0, + "77": 4093.0, + "78": 4363.0, + "79": 2309.0, + "80": 3651.0, + "81": 3958.0, + "82": 3578.0, + "83": 4861.0, + "84": 5338.0, + "85": 4380.0, + "86": 3952.0, + "87": 3705.0, + "88": 4503.0, + "89": 3942.0, + "90": 4550.0, + "91": 4821.0, + "92": 3934.0, + "93": 3732.0, + "94": 3104.0, + "95": 4118.0, + "96": 3769.0, + "97": 3296.0, "98": 4531.0, - "99": 3795.0, - "100": 3279.0 + "99": 3736.0, + "100": 3372.0 } }, "mem-allocated-bytes": { @@ -234,7 +234,7 @@ "14": 1796646400.0, "15": 1796646400.0, "16": 1796646400.0, - "17": 1796646400.0, + "17": 1797694976.0, "18": 1796646400.0, "19": 1796646400.0, "20": 1796646400.0, @@ -327,104 +327,104 @@ "values": { "1": 2376915456.0, "2": 3124626944.0, - "3": 3124626944.0, - "4": 3124626944.0, - "5": 3124626944.0, - "6": 3124626944.0, - "7": 3124626944.0, - "8": 3124626944.0, - "9": 3124626944.0, - "10": 3124626944.0, - "11": 3124626944.0, - "12": 3124626944.0, - "13": 3124626944.0, - "14": 3124626944.0, - "15": 3124626944.0, - "16": 3124626944.0, - "17": 3124626944.0, - "18": 3124626944.0, - "19": 3124626944.0, - "20": 3124626944.0, - "21": 3124626944.0, - "22": 3124626944.0, - "23": 3124626944.0, - "24": 3124626944.0, - "25": 3124626944.0, - "26": 3124626944.0, - "27": 3124626944.0, - "28": 3124626944.0, - "29": 3124626944.0, - "30": 3124626944.0, - "31": 3124626944.0, - "32": 3124626944.0, - "33": 3124626944.0, - "34": 3124626944.0, - "35": 3124626944.0, - "36": 3124626944.0, - "37": 3124626944.0, - "38": 3124626944.0, - "39": 3124626944.0, - "40": 3124626944.0, - "41": 3124626944.0, - "42": 3124626944.0, - "43": 3124626944.0, - "44": 3124626944.0, - "45": 3124626944.0, - "46": 3124626944.0, - "47": 3124626944.0, - "48": 3124626944.0, - "49": 3124626944.0, - "50": 3124626944.0, - "51": 3124626944.0, - "52": 3124626944.0, - "53": 3124626944.0, - "54": 3124626944.0, - "55": 3124626944.0, - "56": 3124626944.0, - "57": 3124626944.0, - "58": 3124626944.0, - "59": 3124626944.0, - "60": 3124626944.0, - "61": 3124626944.0, - "62": 3124626944.0, - "63": 3124626944.0, - "64": 3124626944.0, - "65": 3124626944.0, - "66": 3124626944.0, - "67": 3124626944.0, - "68": 3124626944.0, - "69": 3124626944.0, - "70": 3124626944.0, - "71": 3124626944.0, - "72": 3124626944.0, - "73": 3124626944.0, - "74": 3124626944.0, - "75": 3124626944.0, - "76": 3124626944.0, - "77": 3124626944.0, - "78": 3124626944.0, - "79": 3124626944.0, - "80": 3124626944.0, - "81": 3124626944.0, - "82": 3124626944.0, - "83": 3124626944.0, - "84": 3124626944.0, - "85": 3124626944.0, - "86": 3124626944.0, - "87": 3124626944.0, - "88": 3124626944.0, - "89": 3124626944.0, - "90": 3124626944.0, - "91": 3124626944.0, - "92": 3124626944.0, - "93": 3124626944.0, - "94": 3124626944.0, - "95": 3124626944.0, - "96": 3124626944.0, - "97": 3124626944.0, - "98": 3124626944.0, - "99": 3124626944.0, - "100": 3124626944.0 + "3": 3125669888.0, + "4": 3126723584.0, + "5": 3126723584.0, + "6": 3126723584.0, + "7": 3126723584.0, + "8": 3126723584.0, + "9": 3126723584.0, + "10": 3126723584.0, + "11": 3126723584.0, + "12": 3126723584.0, + "13": 3126723584.0, + "14": 3126723584.0, + "15": 3126723584.0, + "16": 3126723584.0, + "17": 3126724096.0, + "18": 3126724096.0, + "19": 3126724096.0, + "20": 3126724096.0, + "21": 3126724096.0, + "22": 3126724096.0, + "23": 3126724096.0, + "24": 3126724096.0, + "25": 3126724096.0, + "26": 3126724096.0, + "27": 3126724096.0, + "28": 3126724096.0, + "29": 3126724096.0, + "30": 3126724096.0, + "31": 3126724096.0, + "32": 3126724096.0, + "33": 3126724096.0, + "34": 3126724096.0, + "35": 3126724096.0, + "36": 3126724096.0, + "37": 3126724096.0, + "38": 3126724096.0, + "39": 3126724096.0, + "40": 3126724096.0, + "41": 3126724096.0, + "42": 3126724096.0, + "43": 3126724096.0, + "44": 3126724096.0, + "45": 3126724096.0, + "46": 3126724096.0, + "47": 3126724096.0, + "48": 3126724096.0, + "49": 3126724096.0, + "50": 3126724096.0, + "51": 3126724096.0, + "52": 3126724096.0, + "53": 3126724096.0, + "54": 3126724096.0, + "55": 3126724096.0, + "56": 3126724096.0, + "57": 3126724096.0, + "58": 3126724096.0, + "59": 3126724096.0, + "60": 3126724096.0, + "61": 3126724096.0, + "62": 3126724096.0, + "63": 3126724096.0, + "64": 3126724096.0, + "65": 3126724096.0, + "66": 3126724096.0, + "67": 3126724096.0, + "68": 3126724096.0, + "69": 3126724096.0, + "70": 3126724096.0, + "71": 3126724096.0, + "72": 3126724096.0, + "73": 3126724096.0, + "74": 3126724096.0, + "75": 3126724096.0, + "76": 3126724096.0, + "77": 3126724096.0, + "78": 3126724096.0, + "79": 3126724096.0, + "80": 3126724096.0, + "81": 3126724096.0, + "82": 3126724096.0, + "83": 3126724096.0, + "84": 3126724096.0, + "85": 3126724096.0, + "86": 3126724096.0, + "87": 3126724096.0, + "88": 3126724096.0, + "89": 3126724096.0, + "90": 3126724096.0, + "91": 3126724096.0, + "92": 3126724096.0, + "93": 3126724096.0, + "94": 3126724096.0, + "95": 3126724096.0, + "96": 3126724096.0, + "97": 3126724096.0, + "98": 3126724096.0, + "99": 3126724096.0, + "100": 3126724096.0 } }, "iteration-time": { @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 6.77679, - "3": 0.96083, - "4": 0.93727, - "5": 0.88655, - "6": 0.88656, - "7": 0.88571, - "8": 0.8873, - "9": 0.8882, - "10": 0.8882, - "11": 0.88472, - "12": 0.88772, - "13": 0.88421, - "14": 0.88527, - "15": 0.88246, - "16": 0.88256, - "17": 0.88373, - "18": 0.88359, - "19": 0.8828, - "20": 1.45042, - "21": 1.16338, - "22": 1.44778, - "23": 0.88114, - "24": 0.88173, - "25": 0.88445, - "26": 0.8792, - "27": 0.8816, - "28": 0.87992, - "29": 0.88178, - "30": 0.88128, - "31": 0.88303, - "32": 0.88483, - "33": 0.88377, - "34": 0.88155, - "35": 0.88366, - "36": 0.89127, - "37": 0.88175, - "38": 0.88225, - "39": 0.88197, - "40": 0.88138, - "41": 0.88142, - "42": 0.88687, - "43": 0.88349, - "44": 0.88194, - "45": 0.88206, - "46": 0.88445, - "47": 1.08645, - "48": 0.8826, - "49": 1.27411, - "50": 1.07698, - "51": 1.09616, - "52": 1.26661, - "53": 0.88344, - "54": 0.91118, - "55": 0.88351, - "56": 0.88478, - "57": 0.88696, - "58": 0.8847, - "59": 0.88986, - "60": 0.88289, - "61": 0.88661, - "62": 0.88371, - "63": 0.8837, - "64": 0.88432, - "65": 1.76327, - "66": 1.32625, - "67": 0.88423, - "68": 0.88521, - "69": 0.88368, - "70": 1.47401, - "71": 0.8851, - "72": 0.88573, - "73": 0.88815, - "74": 0.89154, - "75": 0.99337, - "76": 1.04308, - "77": 0.8854, - "78": 1.08033, - "79": 1.08794, - "80": 1.03415, - "81": 1.02193, - "82": 0.99549, - "83": 0.88423, - "84": 0.89046, - "85": 0.8859, - "86": 0.8846, - "87": 0.88492, - "88": 0.8837, - "89": 0.88611, - "90": 0.88537, - "91": 0.8864, - "92": 0.92431, - "93": 1.42975, - "94": 1.42328, - "95": 1.38667, - "96": 0.88689, - "97": 0.88623, - "98": 0.88695, - "99": 0.88564, - "100": 0.88402 + "2": 6.99686, + "3": 0.98665, + "4": 0.95367, + "5": 0.95835, + "6": 0.95407, + "7": 0.95943, + "8": 0.96707, + "9": 0.96894, + "10": 0.97278, + "11": 0.9629, + "12": 0.94632, + "13": 0.91076, + "14": 0.89395, + "15": 0.89664, + "16": 0.89423, + "17": 0.89416, + "18": 0.89471, + "19": 0.8941, + "20": 0.90642, + "21": 0.90213, + "22": 1.17335, + "23": 1.18944, + "24": 0.89938, + "25": 1.55048, + "26": 1.14083, + "27": 1.30825, + "28": 0.8966, + "29": 0.89522, + "30": 0.92513, + "31": 0.92675, + "32": 0.92412, + "33": 0.90522, + "34": 0.89109, + "35": 0.89358, + "36": 0.89373, + "37": 0.89645, + "38": 0.89337, + "39": 0.8973, + "40": 0.89495, + "41": 0.8907, + "42": 0.89353, + "43": 0.89216, + "44": 0.93623, + "45": 0.98287, + "46": 0.92096, + "47": 0.89438, + "48": 0.89433, + "49": 0.89702, + "50": 0.89346, + "51": 0.90154, + "52": 1.0923, + "53": 1.09519, + "54": 1.04757, + "55": 1.08783, + "56": 1.01373, + "57": 1.22558, + "58": 0.89437, + "59": 0.8962, + "60": 0.90104, + "61": 0.89551, + "62": 0.89739, + "63": 0.8949, + "64": 0.89427, + "65": 0.89408, + "66": 0.89624, + "67": 0.89469, + "68": 0.89456, + "69": 0.89914, + "70": 0.89456, + "71": 0.89414, + "72": 0.89369, + "73": 0.89746, + "74": 0.89637, + "75": 0.89802, + "76": 0.89601, + "77": 1.3471, + "78": 0.89592, + "79": 1.07894, + "80": 0.89631, + "81": 0.89512, + "82": 1.20327, + "83": 0.89845, + "84": 1.46761, + "85": 1.0144, + "86": 1.22181, + "87": 0.89902, + "88": 0.89926, + "89": 0.91566, + "90": 0.89697, + "91": 0.89891, + "92": 1.28446, + "93": 0.89937, + "94": 0.89763, + "95": 1.27257, + "96": 0.89727, + "97": 0.89642, + "98": 0.89955, + "99": 0.89683, + "100": 0.90469 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json index a5b9c2f1ab2..7990f924ec5 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist/golden_values_dev_dgx_h100_2nd.json @@ -54,56 +54,56 @@ "48": "nan", "49": "nan", "50": "nan", - "51": 9.71492, - "52": 9.66464, - "53": 9.60912, - "54": 9.62726, - "55": 9.6101, - "56": 9.61721, - "57": 9.56794, - "58": 9.52741, - "59": 9.51674, - "60": 9.51863, - "61": 9.53132, - "62": 9.45018, - "63": 9.4572, - "64": 9.43437, - "65": 9.45816, - "66": 9.43669, - "67": 9.39678, - "68": 9.36478, - "69": 9.40956, - "70": 9.37595, - "71": 9.41738, - "72": 9.42564, - "73": 9.37611, - "74": 9.41543, - "75": 9.3788, - "76": 9.28012, - "77": 9.32212, - "78": 9.35744, - "79": 9.3215, - "80": 9.31497, - "81": 9.26785, - "82": 9.34183, - "83": 9.32151, - "84": 9.24796, - "85": 9.35033, - "86": 9.224, - "87": 9.30611, - "88": 9.29894, - "89": 9.22704, - "90": 9.28479, - "91": 9.2311, - "92": 9.27474, - "93": 9.19219, - "94": 9.23969, - "95": 9.28, - "96": 9.17525, - "97": 9.21888, - "98": 9.1721, - "99": 9.16455, - "100": 9.1482 + "51": 9.76001, + "52": 9.70981, + "53": 9.67192, + "54": 9.69327, + "55": 9.67994, + "56": 9.67301, + "57": 9.60209, + "58": 9.61874, + "59": 9.54659, + "60": 9.61259, + "61": 9.54836, + "62": 9.53716, + "63": 9.52333, + "64": 9.51044, + "65": 9.52202, + "66": 9.4896, + "67": 9.4575, + "68": 9.44091, + "69": 9.44314, + "70": 9.43974, + "71": 9.4698, + "72": 9.45651, + "73": 9.40468, + "74": 9.45623, + "75": 9.40499, + "76": 9.37331, + "77": 9.34347, + "78": 9.37859, + "79": 9.41089, + "80": 9.34502, + "81": 9.33074, + "82": 9.34623, + "83": 9.31635, + "84": 9.29926, + "85": 9.33611, + "86": 9.26905, + "87": 9.31456, + "88": 9.29844, + "89": 9.26888, + "90": 9.34061, + "91": 9.25718, + "92": 9.29635, + "93": 9.29969, + "94": 9.2754, + "95": 9.28117, + "96": 9.18254, + "97": 9.26445, + "98": 9.1966, + "99": 9.21992, + "100": 9.22929 } }, "num-zeros": { @@ -161,56 +161,56 @@ "48": "nan", "49": "nan", "50": "nan", - "51": 1961.0, - "52": 2445.0, - "53": 3654.0, - "54": 3489.0, - "55": 3419.0, - "56": 4364.0, - "57": 4145.0, - "58": 4155.0, - "59": 1699.0, - "60": 2358.0, - "61": 2070.0, - "62": 4094.0, - "63": 3516.0, - "64": 4287.0, - "65": 2891.0, - "66": 1733.0, - "67": 1914.0, - "68": 4420.0, - "69": 4479.0, - "70": 4656.0, - "71": 2135.0, - "72": 4476.0, - "73": 4048.0, - "74": 3199.0, - "75": 4735.0, - "76": 2218.0, - "77": 4952.0, - "78": 4158.0, - "79": 2657.0, - "80": 3846.0, - "81": 3472.0, - "82": 2979.0, - "83": 5364.0, - "84": 4430.0, - "85": 4249.0, - "86": 3509.0, - "87": 4817.0, - "88": 3434.0, - "89": 4711.0, - "90": 4448.0, - "91": 4374.0, - "92": 3507.0, - "93": 5549.0, - "94": 3635.0, - "95": 4540.0, - "96": 3659.0, - "97": 3756.0, - "98": 4513.0, - "99": 4491.0, - "100": 3445.0 + "51": 1818.0, + "52": 2620.0, + "53": 3744.0, + "54": 3604.0, + "55": 3334.0, + "56": 4304.0, + "57": 4040.0, + "58": 4220.0, + "59": 1787.0, + "60": 2678.0, + "61": 2295.0, + "62": 3929.0, + "63": 3820.0, + "64": 4560.0, + "65": 3100.0, + "66": 2039.0, + "67": 2163.0, + "68": 4135.0, + "69": 4393.0, + "70": 4091.0, + "71": 2120.0, + "72": 4062.0, + "73": 3510.0, + "74": 2614.0, + "75": 5305.0, + "76": 2601.0, + "77": 4058.0, + "78": 4315.0, + "79": 2234.0, + "80": 3448.0, + "81": 4090.0, + "82": 3752.0, + "83": 4925.0, + "84": 5349.0, + "85": 4450.0, + "86": 4011.0, + "87": 3738.0, + "88": 4415.0, + "89": 3811.0, + "90": 4620.0, + "91": 4703.0, + "92": 4036.0, + "93": 3711.0, + "94": 3059.0, + "95": 4017.0, + "96": 3793.0, + "97": 3300.0, + "98": 4562.0, + "99": 3832.0, + "100": 3458.0 } }, "mem-allocated-bytes": { @@ -268,56 +268,56 @@ "48": "nan", "49": "nan", "50": "nan", - "51": 1786112512.0, - "52": 1786112512.0, - "53": 1786112512.0, - "54": 1786112512.0, - "55": 1786112512.0, - "56": 1786112512.0, - "57": 1786112512.0, - "58": 1786112512.0, - "59": 1786112512.0, - "60": 1786112512.0, - "61": 1786112512.0, - "62": 1786112512.0, - "63": 1786112512.0, - "64": 1786112512.0, - "65": 1786112512.0, - "66": 1786112512.0, - "67": 1786112512.0, - "68": 1786112512.0, - "69": 1786112512.0, - "70": 1786112512.0, - "71": 1786112512.0, - "72": 1786112512.0, - "73": 1786112512.0, - "74": 1786112512.0, - "75": 1786112512.0, - "76": 1786112512.0, - "77": 1786112512.0, - "78": 1786112512.0, - "79": 1786112512.0, - "80": 1786112512.0, - "81": 1786112512.0, - "82": 1786112512.0, - "83": 1786112512.0, - "84": 1786112512.0, - "85": 1786112512.0, - "86": 1786112512.0, - "87": 1786112512.0, - "88": 1786112512.0, - "89": 1786112512.0, - "90": 1786112512.0, - "91": 1786112512.0, - "92": 1786112512.0, - "93": 1786112512.0, - "94": 1786112512.0, - "95": 1786112512.0, - "96": 1786112512.0, - "97": 1786112512.0, - "98": 1786112512.0, - "99": 1786112512.0, - "100": 1786112512.0 + "51": 1796646400.0, + "52": 1796646400.0, + "53": 1796646400.0, + "54": 1796646400.0, + "55": 1796646400.0, + "56": 1796646400.0, + "57": 1796646400.0, + "58": 1796646400.0, + "59": 1796646400.0, + "60": 1796646400.0, + "61": 1796646400.0, + "62": 1796646400.0, + "63": 1796646400.0, + "64": 1796646400.0, + "65": 1796646400.0, + "66": 1796646400.0, + "67": 1796646400.0, + "68": 1796646400.0, + "69": 1796646400.0, + "70": 1796646400.0, + "71": 1796646400.0, + "72": 1796646400.0, + "73": 1796646400.0, + "74": 1796646400.0, + "75": 1796646400.0, + "76": 1796646400.0, + "77": 1796646400.0, + "78": 1796646400.0, + "79": 1796646400.0, + "80": 1796646400.0, + "81": 1796646400.0, + "82": 1796646400.0, + "83": 1796646400.0, + "84": 1796646400.0, + "85": 1796646400.0, + "86": 1796646400.0, + "87": 1796646400.0, + "88": 1796646400.0, + "89": 1796646400.0, + "90": 1796646400.0, + "91": 1796646400.0, + "92": 1796646400.0, + "93": 1796646400.0, + "94": 1796646400.0, + "95": 1796646400.0, + "96": 1796646400.0, + "97": 1796646400.0, + "98": 1796646400.0, + "99": 1796646400.0, + "100": 1796646400.0 } }, "mem-max-allocated-bytes": { @@ -375,56 +375,56 @@ "48": "nan", "49": "nan", "50": "nan", - "51": 3110419456.0, - "52": 3110421504.0, - "53": 3110421504.0, - "54": 3110421504.0, - "55": 3110421504.0, - "56": 3110421504.0, - "57": 3110421504.0, - "58": 3110421504.0, - "59": 3110421504.0, - "60": 3110421504.0, - "61": 3110421504.0, - "62": 3110421504.0, - "63": 3110421504.0, - "64": 3110421504.0, - "65": 3110421504.0, - "66": 3110421504.0, - "67": 3110421504.0, - "68": 3110421504.0, - "69": 3110421504.0, - "70": 3110421504.0, - "71": 3110421504.0, - "72": 3110421504.0, - "73": 3110421504.0, - "74": 3110421504.0, - "75": 3110421504.0, - "76": 3110421504.0, - "77": 3110421504.0, - "78": 3110421504.0, - "79": 3110421504.0, - "80": 3110421504.0, - "81": 3110421504.0, - "82": 3110421504.0, - "83": 3110421504.0, - "84": 3110421504.0, - "85": 3110421504.0, - "86": 3110421504.0, - "87": 3110421504.0, - "88": 3110421504.0, - "89": 3110421504.0, - "90": 3110421504.0, - "91": 3110421504.0, - "92": 3110421504.0, - "93": 3110421504.0, - "94": 3110421504.0, - "95": 3110421504.0, - "96": 3110421504.0, - "97": 3110421504.0, - "98": 3110421504.0, - "99": 3110421504.0, - "100": 3110421504.0 + "51": 3124624896.0, + "52": 3124626944.0, + "53": 3124626944.0, + "54": 3124626944.0, + "55": 3124626944.0, + "56": 3124626944.0, + "57": 3124626944.0, + "58": 3124626944.0, + "59": 3124626944.0, + "60": 3124626944.0, + "61": 3124626944.0, + "62": 3124626944.0, + "63": 3124626944.0, + "64": 3124626944.0, + "65": 3124626944.0, + "66": 3124626944.0, + "67": 3124626944.0, + "68": 3124626944.0, + "69": 3124626944.0, + "70": 3124626944.0, + "71": 3124626944.0, + "72": 3124626944.0, + "73": 3124626944.0, + "74": 3124626944.0, + "75": 3124626944.0, + "76": 3124626944.0, + "77": 3124626944.0, + "78": 3124626944.0, + "79": 3124626944.0, + "80": 3124626944.0, + "81": 3124626944.0, + "82": 3124626944.0, + "83": 3124626944.0, + "84": 3124626944.0, + "85": 3124626944.0, + "86": 3124626944.0, + "87": 3124626944.0, + "88": 3124626944.0, + "89": 3124626944.0, + "90": 3124626944.0, + "91": 3124626944.0, + "92": 3124626944.0, + "93": 3124626944.0, + "94": 3124626944.0, + "95": 3126723584.0, + "96": 3126723584.0, + "97": 3126723584.0, + "98": 3126723584.0, + "99": 3126723584.0, + "100": 3126723584.0 } }, "iteration-time": { @@ -482,56 +482,56 @@ "48": "nan", "49": "nan", "50": "nan", - "51": 10.75043, - "52": 1.0039, - "53": 0.95516, - "54": 0.91159, - "55": 0.90836, - "56": 0.94785, - "57": 1.16936, - "58": 1.19663, - "59": 1.28755, - "60": 0.88429, - "61": 0.8835, - "62": 0.91894, - "63": 0.88317, - "64": 0.89119, - "65": 0.88844, - "66": 1.26569, - "67": 0.88764, - "68": 0.88401, - "69": 0.89243, - "70": 0.8883, - "71": 0.89113, - "72": 0.91101, - "73": 0.89072, - "74": 2.04797, - "75": 0.90184, - "76": 0.93408, - "77": 1.2869, - "78": 0.95072, - "79": 0.96458, - "80": 0.90559, - "81": 0.95787, - "82": 0.90855, - "83": 1.71942, - "84": 0.94521, - "85": 0.88307, - "86": 0.88152, - "87": 0.89039, - "88": 0.88803, - "89": 0.90894, - "90": 0.89894, - "91": 1.05886, - "92": 1.19588, - "93": 1.37335, - "94": 0.8898, - "95": 1.07004, - "96": 0.88806, - "97": 0.89083, - "98": 0.90547, - "99": 0.94317, - "100": 0.90081 + "51": "nan", + "52": 6.03911, + "53": 0.97416, + "54": 0.94779, + "55": 0.9496, + "56": 0.94003, + "57": 0.95104, + "58": 0.9583, + "59": 0.94134, + "60": 0.94902, + "61": 0.94593, + "62": 0.93544, + "63": 0.94434, + "64": 0.94553, + "65": 0.92679, + "66": 0.90378, + "67": 0.90184, + "68": 0.91076, + "69": 0.90954, + "70": 0.91372, + "71": 0.91281, + "72": 0.91175, + "73": 0.91046, + "74": 0.91357, + "75": 0.90873, + "76": 0.91033, + "77": 0.91092, + "78": 1.19718, + "79": 1.185, + "80": 1.2732, + "81": 0.90464, + "82": 0.90482, + "83": 0.90412, + "84": 0.90648, + "85": 0.9074, + "86": 0.91479, + "87": 0.91427, + "88": 0.91177, + "89": 0.91209, + "90": 0.913, + "91": 0.9133, + "92": 0.98243, + "93": 0.91047, + "94": 0.91069, + "95": 0.91618, + "96": 0.91277, + "97": 0.90968, + "98": 0.91034, + "99": 0.9131, + "100": 0.91106 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json index cbc6ad4a652..4712c9642eb 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100.json @@ -4,106 +4,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 10.52631, - "2": 10.54437, - "3": 10.53254, - "4": 10.54074, - "5": 10.5359, - "6": 10.54329, - "7": 10.54765, - "8": 10.54167, - "9": 10.53321, - "10": 10.52933, - "11": 10.53062, - "12": 10.53814, - "13": 10.53802, - "14": 10.52489, - "15": 10.52257, - "16": 10.50286, - "17": 10.51143, - "18": 10.5081, - "19": 10.50518, - "20": 10.51059, - "21": 10.51051, - "22": 10.44691, - "23": 10.43219, - "24": 10.44067, - "25": 10.40702, - "26": 10.41509, - "27": 10.39929, - "28": 10.41147, - "29": 10.36654, - "30": 10.28105, - "31": 10.23151, - "32": 10.2049, - "33": 10.21579, - "34": 10.17143, - "35": 10.14594, - "36": 10.12636, - "37": 10.11518, - "38": 10.11834, - "39": 10.08081, - "40": 10.0113, - "41": 9.96736, - "42": 9.92723, - "43": 9.92086, - "44": 9.86387, - "45": 9.83849, - "46": 9.77899, - "47": 9.77275, - "48": 9.74926, - "49": 9.77905, - "50": 9.75337, - "51": 9.75957, - "52": 9.71049, - "53": 9.67204, - "54": 9.69247, - "55": 9.68095, - "56": 9.67223, - "57": 9.60238, - "58": 9.61977, - "59": 9.54652, - "60": 9.61145, - "61": 9.54865, - "62": 9.53743, - "63": 9.52268, - "64": 9.51137, - "65": 9.52253, - "66": 9.49069, - "67": 9.45779, - "68": 9.44155, - "69": 9.44406, - "70": 9.4415, - "71": 9.47005, - "72": 9.4581, - "73": 9.40624, - "74": 9.45654, - "75": 9.40466, - "76": 9.37369, - "77": 9.34406, - "78": 9.37846, - "79": 9.41118, - "80": 9.34482, - "81": 9.33075, - "82": 9.34654, - "83": 9.31619, - "84": 9.29945, - "85": 9.33659, - "86": 9.26918, - "87": 9.31391, - "88": 9.29854, - "89": 9.26934, - "90": 9.34147, - "91": 9.25663, - "92": 9.29671, - "93": 9.2992, - "94": 9.27519, - "95": 9.28018, - "96": 9.18148, - "97": 9.2644, - "98": 9.19676, - "99": 9.21954, - "100": 9.22959 + "1": 10.52634, + "2": 10.54394, + "3": 10.5306, + "4": 10.53915, + "5": 10.53736, + "6": 10.54276, + "7": 10.54863, + "8": 10.54251, + "9": 10.53329, + "10": 10.52978, + "11": 10.53007, + "12": 10.53673, + "13": 10.53813, + "14": 10.52576, + "15": 10.52231, + "16": 10.50365, + "17": 10.51315, + "18": 10.50793, + "19": 10.50456, + "20": 10.51123, + "21": 10.50921, + "22": 10.44602, + "23": 10.43379, + "24": 10.44158, + "25": 10.40803, + "26": 10.41435, + "27": 10.39916, + "28": 10.41005, + "29": 10.36503, + "30": 10.28095, + "31": 10.23063, + "32": 10.20496, + "33": 10.21452, + "34": 10.17231, + "35": 10.14712, + "36": 10.12654, + "37": 10.11588, + "38": 10.11932, + "39": 10.07968, + "40": 10.01061, + "41": 9.96748, + "42": 9.92769, + "43": 9.92134, + "44": 9.8637, + "45": 9.8385, + "46": 9.77967, + "47": 9.77376, + "48": 9.74993, + "49": 9.77779, + "50": 9.75265, + "51": 9.75918, + "52": 9.71034, + "53": 9.67218, + "54": 9.69223, + "55": 9.68028, + "56": 9.67264, + "57": 9.60149, + "58": 9.61974, + "59": 9.54676, + "60": 9.6121, + "61": 9.54752, + "62": 9.53694, + "63": 9.52296, + "64": 9.51154, + "65": 9.5227, + "66": 9.4903, + "67": 9.45836, + "68": 9.44167, + "69": 9.44343, + "70": 9.43953, + "71": 9.4694, + "72": 9.45656, + "73": 9.40533, + "74": 9.45671, + "75": 9.40506, + "76": 9.37324, + "77": 9.34315, + "78": 9.37815, + "79": 9.41129, + "80": 9.3449, + "81": 9.33054, + "82": 9.34678, + "83": 9.31634, + "84": 9.29988, + "85": 9.33642, + "86": 9.26915, + "87": 9.31448, + "88": 9.29912, + "89": 9.26873, + "90": 9.34053, + "91": 9.25641, + "92": 9.29649, + "93": 9.30007, + "94": 9.27502, + "95": 9.28051, + "96": 9.1819, + "97": 9.26424, + "98": 9.19719, + "99": 9.2201, + "100": 9.22924 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 2617.0, - "2": 2094.0, - "3": 1798.0, - "4": 2505.0, - "5": 2297.0, - "6": 2078.0, - "7": 2301.0, - "8": 2625.0, - "9": 2497.0, - "10": 2504.0, - "11": 2784.0, - "12": 1904.0, - "13": 2303.0, - "14": 2875.0, - "15": 2193.0, - "16": 2770.0, - "17": 2665.0, - "18": 2573.0, - "19": 2627.0, - "20": 2816.0, - "21": 2300.0, - "22": 2823.0, - "23": 2599.0, - "24": 2828.0, - "25": 2674.0, - "26": 2747.0, - "27": 2770.0, - "28": 2854.0, - "29": 2523.0, - "30": 2875.0, - "31": 2490.0, - "32": 2860.0, - "33": 2319.0, - "34": 2527.0, - "35": 2726.0, - "36": 3054.0, - "37": 3300.0, - "38": 2754.0, - "39": 2733.0, - "40": 3533.0, - "41": 1743.0, - "42": 1529.0, - "43": 1772.0, - "44": 2961.0, - "45": 3611.0, - "46": 3413.0, - "47": 3127.0, - "48": 2770.0, - "49": 2539.0, - "50": 2141.0, - "51": 1761.0, - "52": 2660.0, - "53": 3915.0, - "54": 3652.0, - "55": 3296.0, - "56": 4245.0, - "57": 4057.0, - "58": 4098.0, - "59": 1783.0, - "60": 2705.0, - "61": 2237.0, - "62": 3914.0, - "63": 3917.0, - "64": 4487.0, + "1": 2642.0, + "2": 2057.0, + "3": 1774.0, + "4": 2472.0, + "5": 2334.0, + "6": 2020.0, + "7": 2380.0, + "8": 2615.0, + "9": 2506.0, + "10": 2485.0, + "11": 2740.0, + "12": 2047.0, + "13": 2284.0, + "14": 2999.0, + "15": 2316.0, + "16": 2793.0, + "17": 2703.0, + "18": 2559.0, + "19": 2658.0, + "20": 2877.0, + "21": 2289.0, + "22": 2854.0, + "23": 2674.0, + "24": 2823.0, + "25": 2703.0, + "26": 2766.0, + "27": 2777.0, + "28": 2824.0, + "29": 2554.0, + "30": 3008.0, + "31": 2462.0, + "32": 2838.0, + "33": 2359.0, + "34": 2572.0, + "35": 2593.0, + "36": 2948.0, + "37": 3259.0, + "38": 2748.0, + "39": 2896.0, + "40": 3397.0, + "41": 1673.0, + "42": 1525.0, + "43": 1785.0, + "44": 2918.0, + "45": 3588.0, + "46": 3318.0, + "47": 3155.0, + "48": 2691.0, + "49": 2544.0, + "50": 2069.0, + "51": 1754.0, + "52": 2405.0, + "53": 3877.0, + "54": 3680.0, + "55": 3301.0, + "56": 4203.0, + "57": 3903.0, + "58": 4287.0, + "59": 1780.0, + "60": 2719.0, + "61": 2170.0, + "62": 3896.0, + "63": 3952.0, + "64": 4456.0, "65": 3080.0, - "66": 1921.0, - "67": 2164.0, - "68": 4161.0, - "69": 4434.0, - "70": 4023.0, - "71": 2111.0, - "72": 4044.0, - "73": 3510.0, - "74": 2619.0, - "75": 5231.0, - "76": 2626.0, - "77": 4109.0, - "78": 4337.0, - "79": 2321.0, - "80": 3502.0, - "81": 3952.0, - "82": 3644.0, - "83": 4827.0, - "84": 5477.0, - "85": 4396.0, - "86": 3953.0, - "87": 3499.0, - "88": 4439.0, - "89": 3874.0, - "90": 4637.0, - "91": 4734.0, - "92": 3999.0, - "93": 3762.0, - "94": 3075.0, - "95": 4043.0, - "96": 3804.0, - "97": 3339.0, - "98": 4824.0, - "99": 3730.0, - "100": 3312.0 + "66": 2012.0, + "67": 2191.0, + "68": 4097.0, + "69": 4428.0, + "70": 4042.0, + "71": 2079.0, + "72": 3949.0, + "73": 3432.0, + "74": 2586.0, + "75": 5224.0, + "76": 2600.0, + "77": 4059.0, + "78": 4370.0, + "79": 2233.0, + "80": 3534.0, + "81": 4048.0, + "82": 3671.0, + "83": 4884.0, + "84": 5401.0, + "85": 4417.0, + "86": 4056.0, + "87": 3648.0, + "88": 4350.0, + "89": 3898.0, + "90": 4709.0, + "91": 4692.0, + "92": 3936.0, + "93": 3682.0, + "94": 3069.0, + "95": 4033.0, + "96": 3784.0, + "97": 3383.0, + "98": 4601.0, + "99": 3850.0, + "100": 3294.0 } }, "mem-allocated-bytes": { @@ -326,15 +326,15 @@ "step_interval": 1, "values": { "1": 2347554304.0, - "2": 3097362432.0, - "3": 3097362944.0, - "4": 3097362944.0, - "5": 3097362944.0, - "6": 3097362944.0, - "7": 3097362944.0, - "8": 3097362944.0, - "9": 3097362944.0, - "10": 3097362944.0, + "2": 3095265792.0, + "3": 3095265792.0, + "4": 3095265792.0, + "5": 3095265792.0, + "6": 3095265792.0, + "7": 3095265792.0, + "8": 3097356800.0, + "9": 3097356800.0, + "10": 3097360896.0, "11": 3097362944.0, "12": 3097362944.0, "13": 3097362944.0, @@ -433,105 +433,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 6.64764, - "3": 0.79084, - "4": 0.75471, - "5": 0.75484, - "6": 0.75223, - "7": 0.75145, - "8": 0.75475, - "9": 0.75533, - "10": 0.75399, - "11": 0.75254, - "12": 0.77237, - "13": 0.76941, - "14": 0.75371, - "15": 0.74878, - "16": 0.75748, - "17": 0.75013, - "18": 0.75393, - "19": 0.75308, - "20": 0.82461, - "21": 1.14815, - "22": 1.40873, - "23": 0.78134, - "24": 1.13678, - "25": 0.77065, - "26": 0.77109, - "27": 0.75236, - "28": 0.7541, - "29": 0.97476, - "30": 0.98559, - "31": 0.75096, - "32": 0.7479, - "33": 0.74863, - "34": 0.74963, - "35": 0.74885, - "36": 0.82711, - "37": 0.85378, - "38": 0.76888, - "39": 0.81485, - "40": 0.75679, - "41": 0.74893, - "42": 0.75104, - "43": 0.7494, - "44": 0.75881, - "45": 0.75257, - "46": 0.75827, - "47": 0.7504, - "48": 0.98004, - "49": 0.91266, - "50": 0.75782, - "51": 1.21882, - "52": 0.75137, - "53": 0.98393, - "54": 0.74921, - "55": 0.7528, - "56": 0.75403, - "57": 0.75214, - "58": 0.99844, - "59": 0.75039, - "60": 0.91137, - "61": 0.75353, - "62": 0.75465, - "63": 0.76022, - "64": 0.74973, - "65": 0.7503, - "66": 0.75508, - "67": 0.75386, - "68": 0.7506, - "69": 0.75959, - "70": 0.75257, - "71": 0.75584, - "72": 0.75851, - "73": 0.75296, - "74": 0.74943, - "75": 0.755, - "76": 0.99487, - "77": 0.75879, - "78": 0.92637, - "79": 0.75765, - "80": 0.91589, - "81": 1.0084, - "82": 0.75509, - "83": 1.18005, - "84": 0.75533, - "85": 0.76431, - "86": 0.81253, - "87": 1.14347, - "88": 0.75555, - "89": 0.75535, - "90": 1.13878, - "91": 0.75648, - "92": 0.75589, - "93": 0.75482, - "94": 0.75398, - "95": 0.75489, - "96": 0.75471, - "97": 0.7583, - "98": 0.75293, - "99": 0.752, - "100": 0.74962 + "2": 7.06508, + "3": 0.77864, + "4": 0.75718, + "5": 0.75459, + "6": 0.75257, + "7": 0.75088, + "8": 0.75164, + "9": 0.7607, + "10": 0.75336, + "11": 0.7798, + "12": 0.76111, + "13": 0.76339, + "14": 0.75564, + "15": 0.76373, + "16": 0.75249, + "17": 0.75665, + "18": 0.75242, + "19": 0.75178, + "20": 0.75234, + "21": 0.75172, + "22": 0.75118, + "23": 1.15411, + "24": 1.03138, + "25": 1.09133, + "26": 1.02757, + "27": 1.07288, + "28": 1.4512, + "29": 0.75209, + "30": 0.7502, + "31": 0.75481, + "32": 0.75023, + "33": 0.75077, + "34": 0.7503, + "35": 0.74975, + "36": 0.7553, + "37": 0.75022, + "38": 0.75169, + "39": 0.74874, + "40": 0.75008, + "41": 0.75221, + "42": 0.75188, + "43": 0.75244, + "44": 0.75196, + "45": 0.74964, + "46": 0.75163, + "47": 0.75373, + "48": 0.75401, + "49": 0.99089, + "50": 0.75414, + "51": 0.76853, + "52": 0.7548, + "53": 0.9997, + "54": 0.99015, + "55": 0.97331, + "56": 0.79026, + "57": 0.75717, + "58": 0.75812, + "59": 0.97529, + "60": 0.75727, + "61": 0.75876, + "62": 0.7568, + "63": 0.75762, + "64": 0.75672, + "65": 0.75695, + "66": 0.7565, + "67": 0.75713, + "68": 0.7622, + "69": 0.75637, + "70": 0.75576, + "71": 0.76033, + "72": 0.75535, + "73": 0.75715, + "74": 0.75704, + "75": 0.75889, + "76": 0.75795, + "77": 0.7605, + "78": 0.90863, + "79": 0.75748, + "80": 0.75712, + "81": 0.95431, + "82": 1.16954, + "83": 0.9664, + "84": 0.7532, + "85": 0.75198, + "86": 0.91713, + "87": 0.75057, + "88": 0.96928, + "89": 1.16801, + "90": 0.7753, + "91": 0.7575, + "92": 0.75427, + "93": 0.77919, + "94": 0.76699, + "95": 0.76557, + "96": 0.76692, + "97": 0.77111, + "98": 0.76771, + "99": 0.75017, + "100": 1.17729 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100_2nd.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100_2nd.json index de97d194787..d95395a05bc 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100_2nd.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp2_pp2_resume_torch_dist_local_spec/golden_values_dev_dgx_h100_2nd.json @@ -54,56 +54,56 @@ "48": "nan", "49": "nan", "50": "nan", - "51": 9.71501, - "52": 9.66488, - "53": 9.60917, - "54": 9.62733, - "55": 9.61022, - "56": 9.61723, - "57": 9.56794, - "58": 9.52733, - "59": 9.51677, - "60": 9.5188, - "61": 9.53149, - "62": 9.45031, - "63": 9.45717, - "64": 9.43441, - "65": 9.45812, - "66": 9.43672, - "67": 9.39687, - "68": 9.36469, - "69": 9.40964, - "70": 9.37606, - "71": 9.41737, - "72": 9.42585, - "73": 9.37601, - "74": 9.4154, - "75": 9.37896, - "76": 9.28004, - "77": 9.32212, - "78": 9.35755, - "79": 9.3216, - "80": 9.31491, - "81": 9.26783, - "82": 9.342, - "83": 9.32159, - "84": 9.24786, - "85": 9.35018, - "86": 9.22384, - "87": 9.30618, - "88": 9.29905, - "89": 9.22708, - "90": 9.28498, - "91": 9.23123, - "92": 9.27487, - "93": 9.19233, - "94": 9.23985, - "95": 9.28002, - "96": 9.17532, - "97": 9.21898, - "98": 9.17203, - "99": 9.16444, - "100": 9.14821 + "51": 9.76075, + "52": 9.71056, + "53": 9.67176, + "54": 9.6927, + "55": 9.67989, + "56": 9.67286, + "57": 9.60245, + "58": 9.61961, + "59": 9.54672, + "60": 9.61178, + "61": 9.5471, + "62": 9.53744, + "63": 9.52367, + "64": 9.51028, + "65": 9.52244, + "66": 9.4897, + "67": 9.45768, + "68": 9.44085, + "69": 9.44346, + "70": 9.4408, + "71": 9.4698, + "72": 9.4568, + "73": 9.40593, + "74": 9.45636, + "75": 9.40517, + "76": 9.37307, + "77": 9.34299, + "78": 9.37809, + "79": 9.41123, + "80": 9.34466, + "81": 9.33103, + "82": 9.34625, + "83": 9.3164, + "84": 9.29915, + "85": 9.33639, + "86": 9.26971, + "87": 9.31419, + "88": 9.29965, + "89": 9.26919, + "90": 9.34128, + "91": 9.25648, + "92": 9.29665, + "93": 9.29974, + "94": 9.27612, + "95": 9.28092, + "96": 9.18147, + "97": 9.26466, + "98": 9.19714, + "99": 9.21941, + "100": 9.22968 } }, "num-zeros": { @@ -161,56 +161,56 @@ "48": "nan", "49": "nan", "50": "nan", - "51": 1900.0, - "52": 2483.0, - "53": 3763.0, - "54": 3478.0, - "55": 3412.0, - "56": 4400.0, - "57": 4019.0, - "58": 4253.0, - "59": 1805.0, - "60": 2457.0, - "61": 2045.0, - "62": 3994.0, - "63": 3650.0, - "64": 4466.0, - "65": 2968.0, - "66": 1837.0, - "67": 1961.0, - "68": 4347.0, - "69": 4441.0, - "70": 4452.0, - "71": 2131.0, - "72": 4523.0, - "73": 4105.0, - "74": 3300.0, - "75": 4651.0, - "76": 2216.0, - "77": 4932.0, - "78": 4218.0, - "79": 2784.0, - "80": 3824.0, - "81": 3472.0, - "82": 2976.0, - "83": 5282.0, - "84": 4464.0, - "85": 4344.0, - "86": 3460.0, - "87": 4774.0, - "88": 3426.0, - "89": 4600.0, - "90": 4360.0, - "91": 4283.0, - "92": 3362.0, - "93": 5633.0, - "94": 3676.0, - "95": 4610.0, - "96": 3449.0, - "97": 3751.0, - "98": 4524.0, - "99": 4399.0, - "100": 3295.0 + "51": 1750.0, + "52": 2572.0, + "53": 3843.0, + "54": 3567.0, + "55": 3310.0, + "56": 4186.0, + "57": 3977.0, + "58": 4223.0, + "59": 1816.0, + "60": 2651.0, + "61": 2206.0, + "62": 3937.0, + "63": 3877.0, + "64": 4554.0, + "65": 3105.0, + "66": 1946.0, + "67": 2137.0, + "68": 4040.0, + "69": 4485.0, + "70": 4153.0, + "71": 2129.0, + "72": 4015.0, + "73": 3458.0, + "74": 2507.0, + "75": 5373.0, + "76": 2693.0, + "77": 4154.0, + "78": 4384.0, + "79": 2285.0, + "80": 3506.0, + "81": 3999.0, + "82": 3623.0, + "83": 4827.0, + "84": 5368.0, + "85": 4391.0, + "86": 3955.0, + "87": 3601.0, + "88": 4436.0, + "89": 3880.0, + "90": 4593.0, + "91": 4804.0, + "92": 3875.0, + "93": 3586.0, + "94": 2988.0, + "95": 4054.0, + "96": 3792.0, + "97": 3452.0, + "98": 4640.0, + "99": 3861.0, + "100": 3325.0 } }, "mem-allocated-bytes": { @@ -268,56 +268,56 @@ "48": "nan", "49": "nan", "50": "nan", - "51": 1769334272.0, - "52": 1769334272.0, - "53": 1769334272.0, - "54": 1769334272.0, - "55": 1769334272.0, - "56": 1769334272.0, - "57": 1769334272.0, - "58": 1769334272.0, - "59": 1769334272.0, - "60": 1769334272.0, - "61": 1769334272.0, - "62": 1769334272.0, - "63": 1769334272.0, - "64": 1769334272.0, - "65": 1769334272.0, - "66": 1769334272.0, - "67": 1769334272.0, - "68": 1769334272.0, - "69": 1769334272.0, - "70": 1769334272.0, - "71": 1769334272.0, - "72": 1769334272.0, - "73": 1769334272.0, - "74": 1769334272.0, - "75": 1769334272.0, - "76": 1769334272.0, - "77": 1769334272.0, - "78": 1769334272.0, - "79": 1769334272.0, - "80": 1769334272.0, - "81": 1769334272.0, - "82": 1769334272.0, - "83": 1769334272.0, - "84": 1769334272.0, - "85": 1769334272.0, - "86": 1769334272.0, - "87": 1769334272.0, - "88": 1769334272.0, - "89": 1769334272.0, - "90": 1769334272.0, - "91": 1769334272.0, - "92": 1769334272.0, - "93": 1769334272.0, - "94": 1769334272.0, - "95": 1769334272.0, - "96": 1769334272.0, - "97": 1769334272.0, - "98": 1769334272.0, - "99": 1769334272.0, - "100": 1769334272.0 + "51": 1779868160.0, + "52": 1779868160.0, + "53": 1779868160.0, + "54": 1779868160.0, + "55": 1779868160.0, + "56": 1779868160.0, + "57": 1779868160.0, + "58": 1779868160.0, + "59": 1779868160.0, + "60": 1779868160.0, + "61": 1779868160.0, + "62": 1779868160.0, + "63": 1779868160.0, + "64": 1779868160.0, + "65": 1779868160.0, + "66": 1779868160.0, + "67": 1779868160.0, + "68": 1779868160.0, + "69": 1779868160.0, + "70": 1779868160.0, + "71": 1779868160.0, + "72": 1779868160.0, + "73": 1779868160.0, + "74": 1779868160.0, + "75": 1779868160.0, + "76": 1779868160.0, + "77": 1779868160.0, + "78": 1779868160.0, + "79": 1779868160.0, + "80": 1779868160.0, + "81": 1779868160.0, + "82": 1779868160.0, + "83": 1779868160.0, + "84": 1779868160.0, + "85": 1779868160.0, + "86": 1779868160.0, + "87": 1779868160.0, + "88": 1779868160.0, + "89": 1779868160.0, + "90": 1779868160.0, + "91": 1779868160.0, + "92": 1779868160.0, + "93": 1779868160.0, + "94": 1779868160.0, + "95": 1779868160.0, + "96": 1779868160.0, + "97": 1779868160.0, + "98": 1779868160.0, + "99": 1779868160.0, + "100": 1779868160.0 } }, "mem-max-allocated-bytes": { @@ -375,56 +375,56 @@ "48": "nan", "49": "nan", "50": "nan", - "51": 3081058304.0, - "52": 3081060352.0, - "53": 3081060352.0, - "54": 3081060352.0, - "55": 3081060352.0, - "56": 3081060352.0, - "57": 3081060352.0, - "58": 3081060352.0, - "59": 3081060352.0, - "60": 3081060352.0, - "61": 3081060352.0, - "62": 3081060352.0, - "63": 3081060352.0, - "64": 3081060352.0, - "65": 3081060352.0, - "66": 3081060352.0, - "67": 3081060352.0, - "68": 3081060352.0, - "69": 3081060352.0, - "70": 3081060352.0, - "71": 3081060352.0, - "72": 3081060352.0, - "73": 3081060352.0, - "74": 3081060352.0, - "75": 3081060352.0, - "76": 3081060352.0, - "77": 3081060352.0, - "78": 3081060352.0, - "79": 3081060352.0, - "80": 3081060352.0, - "81": 3081060352.0, - "82": 3081060352.0, - "83": 3081060352.0, - "84": 3081060352.0, - "85": 3081060352.0, - "86": 3081060352.0, - "87": 3081060352.0, - "88": 3081060352.0, - "89": 3081060352.0, - "90": 3081060352.0, - "91": 3081060352.0, - "92": 3081060352.0, - "93": 3081060352.0, - "94": 3081060352.0, - "95": 3081060352.0, - "96": 3081060352.0, - "97": 3081060352.0, - "98": 3081060352.0, - "99": 3081060352.0, - "100": 3081060352.0 + "51": 3095263744.0, + "52": 3095265792.0, + "53": 3095265792.0, + "54": 3095265792.0, + "55": 3095265792.0, + "56": 3095265792.0, + "57": 3095265792.0, + "58": 3095265792.0, + "59": 3095265792.0, + "60": 3095265792.0, + "61": 3095265792.0, + "62": 3095265792.0, + "63": 3095265792.0, + "64": 3095265792.0, + "65": 3095265792.0, + "66": 3095265792.0, + "67": 3095265792.0, + "68": 3095265792.0, + "69": 3095265792.0, + "70": 3095265792.0, + "71": 3095265792.0, + "72": 3095265792.0, + "73": 3095265792.0, + "74": 3095265792.0, + "75": 3095265792.0, + "76": 3095265792.0, + "77": 3095265792.0, + "78": 3095265792.0, + "79": 3095265792.0, + "80": 3095265792.0, + "81": 3095265792.0, + "82": 3095265792.0, + "83": 3095265792.0, + "84": 3095265792.0, + "85": 3095265792.0, + "86": 3095265792.0, + "87": 3095265792.0, + "88": 3095265792.0, + "89": 3095265792.0, + "90": 3095265792.0, + "91": 3095265792.0, + "92": 3095265792.0, + "93": 3095265792.0, + "94": 3095265792.0, + "95": 3095265792.0, + "96": 3095265792.0, + "97": 3095265792.0, + "98": 3095265792.0, + "99": 3095265792.0, + "100": 3095265792.0 } }, "iteration-time": { @@ -482,56 +482,56 @@ "48": "nan", "49": "nan", "50": "nan", - "51": 10.15551, - "52": 0.8598, - "53": 0.74904, - "54": 0.7512, - "55": 0.75011, - "56": 0.7593, - "57": 1.36317, - "58": 1.3678, - "59": 0.75114, - "60": 0.74624, - "61": 0.74824, - "62": 0.75285, - "63": 0.75097, - "64": 0.7539, - "65": 1.11179, - "66": 0.7482, - "67": 0.75224, - "68": 0.75225, - "69": 0.73791, - "70": 0.74141, - "71": 0.74372, - "72": 0.74097, - "73": 1.17879, - "74": 1.13369, - "75": 0.75135, - "76": 0.74737, - "77": 0.7455, - "78": 0.74472, - "79": 1.10005, - "80": 0.74804, - "81": 0.75235, - "82": 2.07286, - "83": 0.74595, - "84": 0.75659, - "85": 0.74796, - "86": 0.73902, - "87": 0.73952, - "88": 0.73743, - "89": 0.74161, - "90": 0.94861, - "91": 0.94405, - "92": 1.05613, - "93": 1.27634, - "94": 0.80928, - "95": 0.77886, - "96": 1.11223, - "97": 0.73925, - "98": 0.773, - "99": 0.74424, - "100": 0.78256 + "51": "nan", + "52": 5.54763, + "53": 0.78509, + "54": 0.82468, + "55": 0.81369, + "56": 0.75437, + "57": 0.74407, + "58": 0.7419, + "59": 0.7391, + "60": 0.77097, + "61": 0.77143, + "62": 0.76937, + "63": 0.78765, + "64": 0.76073, + "65": 0.75718, + "66": 0.74836, + "67": 0.80073, + "68": 0.75902, + "69": 0.75444, + "70": 0.75408, + "71": 0.76149, + "72": 0.75382, + "73": 0.75147, + "74": 0.75327, + "75": 0.75088, + "76": 0.7521, + "77": 0.74451, + "78": 1.32888, + "79": 1.28358, + "80": 1.03676, + "81": 0.74719, + "82": 0.7467, + "83": 0.74458, + "84": 0.74539, + "85": 0.74679, + "86": 0.74474, + "87": 0.74542, + "88": 0.74593, + "89": 0.74508, + "90": 0.74431, + "91": 0.75502, + "92": 0.75679, + "93": 0.75329, + "94": 0.75712, + "95": 0.75665, + "96": 0.75426, + "97": 0.7519, + "98": 0.75175, + "99": 0.79031, + "100": 0.7512 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json index 15e0ee3f6e7..07bdf6ec962 100644 --- a/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/bert/bert_mcore_tp4_pp1/golden_values_dev_dgx_h100.json @@ -4,56 +4,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 10.51806, - "2": 10.49565, - "3": 10.51267, - "4": 10.51141, - "5": 10.49369, - "6": 10.48608, - "7": 10.49117, - "8": 10.50241, - "9": 10.50043, - "10": 10.49509, - "11": 10.49799, - "12": 10.51384, - "13": 10.49269, - "14": 10.48489, - "15": 10.502, - "16": 10.48076, - "17": 10.47767, - "18": 10.4852, - "19": 10.47833, - "20": 10.47379, - "21": 10.47299, - "22": 10.42889, - "23": 10.41776, - "24": 10.41531, - "25": 10.40697, - "26": 10.38423, - "27": 10.37494, - "28": 10.37653, - "29": 10.32604, - "30": 10.24422, - "31": 10.23408, - "32": 10.19383, - "33": 10.20522, - "34": 10.16587, - "35": 10.15852, + "1": 10.51743, + "2": 10.49513, + "3": 10.5132, + "4": 10.51121, + "5": 10.4955, + "6": 10.48512, + "7": 10.49231, + "8": 10.5034, + "9": 10.50164, + "10": 10.49536, + "11": 10.49842, + "12": 10.51305, + "13": 10.49258, + "14": 10.48431, + "15": 10.50204, + "16": 10.48055, + "17": 10.47595, + "18": 10.4865, + "19": 10.47861, + "20": 10.47296, + "21": 10.47182, + "22": 10.42873, + "23": 10.41909, + "24": 10.41489, + "25": 10.40724, + "26": 10.38212, + "27": 10.37497, + "28": 10.37657, + "29": 10.32619, + "30": 10.24238, + "31": 10.23351, + "32": 10.19375, + "33": 10.20557, + "34": 10.1657, + "35": 10.1577, "36": 10.12269, - "37": 10.11497, - "38": 10.10705, - "39": 10.06641, - "40": 10.02632, - "41": 9.98556, - "42": 9.92428, - "43": 9.90962, - "44": 9.88341, - "45": 9.84923, - "46": 9.81212, - "47": 9.79588, - "48": 9.76846, - "49": 9.82614, - "50": 9.78525 + "37": 10.11498, + "38": 10.10674, + "39": 10.06513, + "40": 10.02612, + "41": 9.98667, + "42": 9.92406, + "43": 9.90661, + "44": 9.88446, + "45": 9.84896, + "46": 9.81122, + "47": 9.79568, + "48": 9.76837, + "49": 9.8272, + "50": 9.78593 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 3606.0, - "2": 3061.0, - "3": 2908.0, - "4": 3502.0, - "5": 3205.0, - "6": 2982.0, - "7": 3777.0, - "8": 3489.0, - "9": 3413.0, - "10": 2253.0, - "11": 3940.0, - "12": 2933.0, - "13": 3430.0, - "14": 4218.0, - "15": 3298.0, - "16": 4000.0, - "17": 3603.0, - "18": 3495.0, - "19": 3808.0, - "20": 3820.0, - "21": 3255.0, - "22": 3972.0, - "23": 3917.0, - "24": 2882.0, - "25": 2803.0, - "26": 4001.0, - "27": 3883.0, - "28": 3910.0, - "29": 2547.0, - "30": 3964.0, - "31": 3511.0, - "32": 3717.0, - "33": 3377.0, - "34": 3548.0, - "35": 3571.0, - "36": 4212.0, - "37": 4317.0, - "38": 4005.0, - "39": 4202.0, - "40": 4472.0, - "41": 2706.0, - "42": 2542.0, - "43": 2678.0, - "44": 3338.0, - "45": 4117.0, - "46": 3984.0, - "47": 3750.0, - "48": 4219.0, - "49": 3967.0, - "50": 3325.0 + "1": 3674.0, + "2": 2960.0, + "3": 2798.0, + "4": 3448.0, + "5": 3155.0, + "6": 2875.0, + "7": 2759.0, + "8": 3669.0, + "9": 3353.0, + "10": 3297.0, + "11": 3758.0, + "12": 1885.0, + "13": 3403.0, + "14": 4114.0, + "15": 3223.0, + "16": 3964.0, + "17": 3670.0, + "18": 3539.0, + "19": 3721.0, + "20": 3920.0, + "21": 3315.0, + "22": 3897.0, + "23": 3904.0, + "24": 3874.0, + "25": 3725.0, + "26": 4052.0, + "27": 3789.0, + "28": 3950.0, + "29": 2640.0, + "30": 3866.0, + "31": 3375.0, + "32": 3781.0, + "33": 3509.0, + "34": 3580.0, + "35": 3608.0, + "36": 4191.0, + "37": 3316.0, + "38": 3937.0, + "39": 4268.0, + "40": 4421.0, + "41": 2656.0, + "42": 2483.0, + "43": 2624.0, + "44": 3301.0, + "45": 4088.0, + "46": 3918.0, + "47": 3748.0, + "48": 4223.0, + "49": 2937.0, + "50": 3324.0 } }, "mem-allocated-bytes": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 5.99604, - "3": 1.63944, - "4": 1.61469, - "5": 1.62145, - "6": 1.62065, - "7": 1.65639, - "8": 1.62144, - "9": 1.61901, - "10": 1.61769, - "11": 1.61883, - "12": 1.66045, - "13": 1.68762, - "14": 1.61513, - "15": 1.61681, - "16": 1.61577, - "17": 1.61337, - "18": 1.60921, - "19": 1.63599, - "20": 1.6134, - "21": 1.6127, - "22": 1.74027, - "23": 1.6175, - "24": 1.82766, - "25": 2.13333, - "26": 1.60883, - "27": 1.61034, - "28": 1.61171, - "29": 1.61548, - "30": 2.01691, - "31": 2.01164, - "32": 1.61379, - "33": 2.00557, - "34": 1.61478, - "35": 2.00811, - "36": 1.619, - "37": 1.62046, - "38": 2.05026, - "39": 2.03159, - "40": 1.61554, - "41": 1.6173, - "42": 1.99606, - "43": 1.61893, - "44": 1.61472, - "45": 1.61889, - "46": 1.6138, - "47": 1.62029, - "48": 1.61753, - "49": 1.61676, - "50": 1.6175 + "2": 5.96883, + "3": 1.7825, + "4": 1.69326, + "5": 1.63773, + "6": 1.63977, + "7": 1.6358, + "8": 1.62832, + "9": 1.63456, + "10": 1.6282, + "11": 1.62868, + "12": 1.63242, + "13": 1.62615, + "14": 1.63154, + "15": 1.62604, + "16": 1.62905, + "17": 1.63221, + "18": 1.62813, + "19": 1.62981, + "20": 1.62696, + "21": 1.62845, + "22": 1.6382, + "23": 1.67234, + "24": 1.7115, + "25": 1.87968, + "26": 1.62748, + "27": 1.6249, + "28": 1.62342, + "29": 1.63496, + "30": 1.62748, + "31": 1.62647, + "32": 1.62546, + "33": 2.01873, + "34": 1.62357, + "35": 1.62843, + "36": 1.6279, + "37": 1.6229, + "38": 2.4129, + "39": 2.0304, + "40": 1.63267, + "41": 2.87889, + "42": 1.63237, + "43": 1.63351, + "44": 1.63499, + "45": 1.63633, + "46": 1.632, + "47": 1.65424, + "48": 1.64003, + "49": 1.6378, + "50": 1.64605 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_gb200.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_gb200.json index 0bcdf051fe8..82393aa2372 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_gb200.json +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph/golden_values_dev_dgx_gb200.json @@ -4,106 +4,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 10.94839, - "2": 10.94024, - "3": 10.95883, - "4": 10.95945, - "5": 10.93922, - "6": 10.95356, - "7": 10.95188, - "8": 10.94008, - "9": 10.94584, - "10": 10.94155, - "11": 10.94418, - "12": 10.93363, - "13": 10.92884, - "14": 10.93169, - "15": 10.87633, - "16": 10.88177, - "17": 10.87371, - "18": 10.86872, - "19": 10.86381, - "20": 10.7867, - "21": 10.73203, - "22": 10.62158, - "23": 10.72338, - "24": 10.61801, - "25": 10.54715, - "26": 10.64147, - "27": 10.63358, - "28": 10.5896, - "29": 10.59904, - "30": 10.36872, - "31": 10.11579, - "32": 10.45756, - "33": 10.45241, - "34": 10.18915, - "35": 10.2439, - "36": 10.20736, - "37": 10.32093, - "38": 10.17157, - "39": 10.39578, - "40": 10.03339, - "41": 10.08583, - "42": 10.17461, - "43": 9.7269, - "44": 9.88264, - "45": 9.73977, - "46": 9.72083, - "47": 10.08359, - "48": 9.75242, - "49": 9.39338, - "50": 9.83766, - "51": 9.76205, - "52": 9.65359, - "53": 10.01542, - "54": 9.86969, - "55": 9.79359, - "56": 9.53523, - "57": 9.36504, - "58": 9.75196, - "59": 9.47483, - "60": 9.40403, - "61": 9.59091, - "62": 9.91004, - "63": 9.24114, - "64": 9.67715, - "65": 8.7955, - "66": 9.60545, - "67": 9.24275, - "68": 9.70506, - "69": 9.70768, - "70": 9.65927, - "71": 9.50665, - "72": 9.45743, - "73": 9.38631, - "74": 8.79319, - "75": 9.32104, - "76": 8.92761, - "77": 9.9944, - "78": 9.63108, - "79": 9.26663, - "80": 9.29169, - "81": 9.37916, - "82": 9.60951, - "83": 9.21123, - "84": 9.3369, - "85": 9.52732, - "86": 8.94678, - "87": 9.49933, - "88": 9.67822, - "89": 9.49501, - "90": 9.75097, - "91": 9.22906, - "92": 9.25338, - "93": 8.96161, - "94": 8.68814, - "95": 9.43597, - "96": 9.3988, - "97": 9.19106, - "98": 9.57241, - "99": 8.7571, - "100": 9.29259 + "1": 10.94823, + "2": 10.94033, + "3": 10.95961, + "4": 10.95909, + "5": 10.93919, + "6": 10.95318, + "7": 10.95231, + "8": 10.94016, + "9": 10.9455, + "10": 10.94117, + "11": 10.94389, + "12": 10.93412, + "13": 10.92925, + "14": 10.93231, + "15": 10.87628, + "16": 10.88222, + "17": 10.87414, + "18": 10.86908, + "19": 10.86312, + "20": 10.78641, + "21": 10.73242, + "22": 10.6219, + "23": 10.72353, + "24": 10.61734, + "25": 10.54742, + "26": 10.64162, + "27": 10.63386, + "28": 10.59055, + "29": 10.59939, + "30": 10.36903, + "31": 10.11654, + "32": 10.45705, + "33": 10.45267, + "34": 10.18921, + "35": 10.24458, + "36": 10.20703, + "37": 10.32121, + "38": 10.17099, + "39": 10.39598, + "40": 10.03279, + "41": 10.08578, + "42": 10.17477, + "43": 9.72808, + "44": 9.88259, + "45": 9.73928, + "46": 9.72089, + "47": 10.08331, + "48": 9.75236, + "49": 9.39319, + "50": 9.83763, + "51": 9.76227, + "52": 9.65392, + "53": 10.01615, + "54": 9.86954, + "55": 9.79741, + "56": 9.53493, + "57": 9.36542, + "58": 9.75262, + "59": 9.47646, + "60": 9.40405, + "61": 9.59177, + "62": 9.91025, + "63": 9.24076, + "64": 9.67743, + "65": 8.79642, + "66": 9.60596, + "67": 9.24293, + "68": 9.70484, + "69": 9.70761, + "70": 9.6593, + "71": 9.50656, + "72": 9.45783, + "73": 9.38618, + "74": 8.79419, + "75": 9.32199, + "76": 8.92809, + "77": 9.99487, + "78": 9.63099, + "79": 9.26681, + "80": 9.29137, + "81": 9.3784, + "82": 9.61005, + "83": 9.21151, + "84": 9.33734, + "85": 9.52725, + "86": 8.94554, + "87": 9.49814, + "88": 9.67805, + "89": 9.49474, + "90": 9.75072, + "91": 9.22892, + "92": 9.2536, + "93": 8.96171, + "94": 8.68904, + "95": 9.43512, + "96": 9.39887, + "97": 9.19156, + "98": 9.57198, + "99": 8.75689, + "100": 9.29231 } }, "num-zeros": { @@ -111,106 +111,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 22750380.0, - "2": 22952988.0, - "3": 22604404.0, + "1": 22750416.0, + "2": 22953082.0, + "3": 22604428.0, "4": 23266260.0, - "5": 22735464.0, - "6": 23061776.0, - "7": 22793284.0, - "8": 22960752.0, - "9": 22865586.0, - "10": 22950244.0, - "11": 22499508.0, - "12": 22455914.0, + "5": 22735586.0, + "6": 23061766.0, + "7": 22793244.0, + "8": 22960824.0, + "9": 22865530.0, + "10": 22950258.0, + "11": 22499560.0, + "12": 22456028.0, "13": 22948128.0, - "14": 22384492.0, - "15": 22846200.0, - "16": 22856740.0, - "17": 22836348.0, - "18": 22590092.0, - "19": 22626980.0, - "20": 22712346.0, - "21": 22762584.0, - "22": 22816872.0, - "23": 22545200.0, - "24": 22794424.0, - "25": 22841840.0, - "26": 22549600.0, - "27": 22464668.0, - "28": 22453636.0, - "29": 22534564.0, - "30": 22636080.0, - "31": 22989416.0, - "32": 22593964.0, - "33": 22565836.0, - "34": 22855482.0, - "35": 22813636.0, - "36": 22595464.0, - "37": 22499300.0, - "38": 22926232.0, - "39": 22825020.0, - "40": 22675796.0, - "41": 22671596.0, - "42": 22682664.0, - "43": 23014302.0, - "44": 22765298.0, - "45": 22680068.0, - "46": 22911564.0, - "47": 23691886.0, - "48": 24003214.0, - "49": 23786576.0, - "50": 22930692.0, - "51": 23866304.0, - "52": 23807258.0, - "53": 24007416.0, - "54": 23916884.0, - "55": 23571264.0, - "56": 23954262.0, - "57": 24211706.0, - "58": 23914568.0, - "59": 23771788.0, - "60": 23813668.0, - "61": 23795444.0, - "62": 23739602.0, - "63": 23917932.0, - "64": 23895652.0, - "65": 24147024.0, - "66": 23795416.0, - "67": 23983764.0, - "68": 23671776.0, - "69": 23647440.0, - "70": 23907516.0, - "71": 23816848.0, - "72": 23723142.0, - "73": 22754780.0, - "74": 25180576.0, - "75": 24143882.0, - "76": 23976376.0, - "77": 22260324.0, - "78": 23862220.0, - "79": 23806848.0, - "80": 23773316.0, - "81": 25020610.0, - "82": 23858356.0, - "83": 23911948.0, - "84": 25143954.0, - "85": 23774768.0, - "86": 24201996.0, - "87": 23801536.0, - "88": 23701450.0, - "89": 22506106.0, - "90": 23014912.0, - "91": 22789294.0, - "92": 24923556.0, - "93": 23721120.0, - "94": 23993696.0, - "95": 24140284.0, - "96": 23909332.0, - "97": 23668404.0, - "98": 24881320.0, - "99": 23981986.0, - "100": 24101576.0 + "14": 22384386.0, + "15": 22846164.0, + "16": 22856836.0, + "17": 22836302.0, + "18": 22590230.0, + "19": 22627008.0, + "20": 22712328.0, + "21": 22762572.0, + "22": 22816968.0, + "23": 22545116.0, + "24": 22794412.0, + "25": 22841892.0, + "26": 22549748.0, + "27": 22464736.0, + "28": 22453558.0, + "29": 22534574.0, + "30": 22636124.0, + "31": 22989468.0, + "32": 22594006.0, + "33": 22565844.0, + "34": 22855548.0, + "35": 22813680.0, + "36": 22595384.0, + "37": 22499264.0, + "38": 22926248.0, + "39": 22825060.0, + "40": 22675740.0, + "41": 22671636.0, + "42": 22682170.0, + "43": 23014410.0, + "44": 22765956.0, + "45": 22679616.0, + "46": 22915056.0, + "47": 23691902.0, + "48": 24003302.0, + "49": 23786600.0, + "50": 22930610.0, + "51": 23866302.0, + "52": 23807132.0, + "53": 24007534.0, + "54": 23916916.0, + "55": 23571376.0, + "56": 23954154.0, + "57": 23162648.0, + "58": 23914496.0, + "59": 23772014.0, + "60": 23813700.0, + "61": 23795584.0, + "62": 23739556.0, + "63": 23917516.0, + "64": 23895568.0, + "65": 24147486.0, + "66": 23794728.0, + "67": 25032894.0, + "68": 23674160.0, + "69": 23647148.0, + "70": 23907500.0, + "71": 23816812.0, + "72": 23723560.0, + "73": 22754164.0, + "74": 25181308.0, + "75": 24145352.0, + "76": 23976550.0, + "77": 22260572.0, + "78": 23868556.0, + "79": 23806816.0, + "80": 23773118.0, + "81": 25020394.0, + "82": 23862088.0, + "83": 23911856.0, + "84": 25143888.0, + "85": 23774598.0, + "86": 24201752.0, + "87": 24850632.0, + "88": 23700648.0, + "89": 22504018.0, + "90": 23011328.0, + "91": 22791520.0, + "92": 23874620.0, + "93": 23720454.0, + "94": 23993140.0, + "95": 24140296.0, + "96": 23909576.0, + "97": 23668372.0, + "98": 24881608.0, + "99": 23981058.0, + "100": 24101548.0 } }, "mem-allocated-bytes": { @@ -218,106 +218,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 815727104.0, - "2": 804506624.0, - "3": 768707584.0, - "4": 935311360.0, - "5": 938555392.0, - "6": 935311360.0, - "7": 935311360.0, - "8": 938555392.0, - "9": 935311360.0, - "10": 935311360.0, - "11": 935311360.0, - "12": 938380288.0, - "13": 941193216.0, - "14": 935311360.0, - "15": 938637312.0, - "16": 941229056.0, - "17": 935311360.0, - "18": 935311360.0, - "19": 938677248.0, - "20": 935311360.0, - "21": 935311360.0, - "22": 938555392.0, - "23": 935311360.0, - "24": 935311360.0, - "25": 938298368.0, - "26": 942405632.0, - "27": 935311360.0, - "28": 935311360.0, - "29": 941799424.0, - "30": 935311360.0, - "31": 938982400.0, - "32": 941586432.0, - "33": 935311360.0, - "34": 935311360.0, - "35": 941193216.0, - "36": 935311360.0, - "37": 938044416.0, - "38": 935311360.0, - "39": 935311360.0, - "40": 938053632.0, - "41": 941635584.0, - "42": 935311360.0, - "43": 941193216.0, - "44": 935311360.0, - "45": 935311360.0, - "46": 938274816.0, - "47": 935311360.0, - "48": 935311360.0, - "49": 935311360.0, - "50": 938555392.0, - "51": 937949184.0, - "52": 941804544.0, - "53": 935311360.0, - "54": 941193216.0, - "55": 935311360.0, - "56": 935311360.0, - "57": 937949184.0, - "58": 935311360.0, - "59": 935311360.0, - "60": 938555392.0, - "61": 935311360.0, - "62": 937949184.0, - "63": 938555392.0, - "64": 935311360.0, - "65": 935311360.0, - "66": 937949184.0, - "67": 941193216.0, - "68": 935311360.0, - "69": 935311360.0, - "70": 937949184.0, - "71": 941193216.0, - "72": 935311360.0, - "73": 935311360.0, - "74": 935311360.0, - "75": 937949184.0, - "76": 938555392.0, - "77": 935311360.0, - "78": 935311360.0, - "79": 937949184.0, - "80": 941326336.0, - "81": 935311360.0, - "82": 937949184.0, - "83": 941193216.0, - "84": 935311360.0, - "85": 935311360.0, - "86": 937949184.0, - "87": 935311360.0, - "88": 935311360.0, - "89": 937949184.0, - "90": 941193216.0, - "91": 935311360.0, - "92": 935311360.0, - "93": 935311360.0, - "94": 937949184.0, - "95": 941193216.0, - "96": 935311360.0, - "97": 935311360.0, - "98": 938555392.0, - "99": 935311360.0, - "100": 935311360.0 + "1": 811164160.0, + "2": 782060544.0, + "3": 815729664.0, + "4": 934957056.0, + "5": 934957056.0, + "6": 937594880.0, + "7": 934957056.0, + "8": 937594880.0, + "9": 934957056.0, + "10": 934957056.0, + "11": 941778944.0, + "12": 934957056.0, + "13": 938604544.0, + "14": 934957056.0, + "15": 937836544.0, + "16": 934957056.0, + "17": 934957056.0, + "18": 938348544.0, + "19": 934957056.0, + "20": 934957056.0, + "21": 940920832.0, + "22": 934957056.0, + "23": 941666304.0, + "24": 934957056.0, + "25": 938201088.0, + "26": 934957056.0, + "27": 937594880.0, + "28": 934957056.0, + "29": 934957056.0, + "30": 941882368.0, + "31": 934957056.0, + "32": 934957056.0, + "33": 941342720.0, + "34": 934957056.0, + "35": 941571072.0, + "36": 937594880.0, + "37": 934957056.0, + "38": 938250240.0, + "39": 934957056.0, + "40": 937873408.0, + "41": 941204480.0, + "42": 934957056.0, + "43": 934957056.0, + "44": 934957056.0, + "45": 934957056.0, + "46": 934957056.0, + "47": 940838912.0, + "48": 934957056.0, + "49": 937786368.0, + "50": 934957056.0, + "51": 934957056.0, + "52": 934957056.0, + "53": 934957056.0, + "54": 938201088.0, + "55": 934957056.0, + "56": 937594880.0, + "57": 934957056.0, + "58": 934957056.0, + "59": 940838912.0, + "60": 934957056.0, + "61": 938201088.0, + "62": 934957056.0, + "63": 938528768.0, + "64": 934957056.0, + "65": 934957056.0, + "66": 940838912.0, + "67": 934957056.0, + "68": 934957056.0, + "69": 940838912.0, + "70": 934957056.0, + "71": 934957056.0, + "72": 941314048.0, + "73": 934957056.0, + "74": 937594880.0, + "75": 934957056.0, + "76": 934957056.0, + "77": 940838912.0, + "78": 934957056.0, + "79": 937594880.0, + "80": 940838912.0, + "81": 934957056.0, + "82": 937594880.0, + "83": 934957056.0, + "84": 934957056.0, + "85": 940838912.0, + "86": 934957056.0, + "87": 934957056.0, + "88": 940838912.0, + "89": 934957056.0, + "90": 937594880.0, + "91": 940838912.0, + "92": 934957056.0, + "93": 938201088.0, + "94": 934957056.0, + "95": 934957056.0, + "96": 940838912.0, + "97": 934957056.0, + "98": 934957056.0, + "99": 940838912.0, + "100": 934957056.0 } }, "mem-max-allocated-bytes": { @@ -325,106 +325,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 966314496.0, - "2": 1175641088.0, - "3": 1179495424.0, - "4": 1250092544.0, - "5": 1250092544.0, - "6": 1250357248.0, - "7": 1250357248.0, - "8": 1250946048.0, - "9": 1250946048.0, - "10": 1250946048.0, - "11": 1250946048.0, - "12": 1250946048.0, - "13": 1250946048.0, - "14": 1250946048.0, - "15": 1250946048.0, - "16": 1250946048.0, - "17": 1250946048.0, - "18": 1250946048.0, - "19": 1250946048.0, - "20": 1250946048.0, - "21": 1250946048.0, - "22": 1251286016.0, - "23": 1251286016.0, - "24": 1251655168.0, - "25": 1251655168.0, - "26": 1251655168.0, - "27": 1251655168.0, - "28": 1251655168.0, - "29": 1251655168.0, - "30": 1251655168.0, - "31": 1251655168.0, - "32": 1251655168.0, - "33": 1251655168.0, - "34": 1251655168.0, - "35": 1251655168.0, - "36": 1251655168.0, - "37": 1251655168.0, - "38": 1251655168.0, - "39": 1251655168.0, - "40": 1251655168.0, - "41": 1251655168.0, - "42": 1251655168.0, - "43": 1251655168.0, - "44": 1251655168.0, - "45": 1251655168.0, - "46": 1251655168.0, - "47": 1251655168.0, - "48": 1251655168.0, - "49": 1251655168.0, - "50": 1251655168.0, - "51": 1251655168.0, - "52": 1251655168.0, - "53": 1251655168.0, - "54": 1251655168.0, - "55": 1251655168.0, - "56": 1251655168.0, - "57": 1251655168.0, - "58": 1251655168.0, - "59": 1251655168.0, - "60": 1251655168.0, - "61": 1251655168.0, - "62": 1251655168.0, - "63": 1251655168.0, - "64": 1251655168.0, - "65": 1251655168.0, - "66": 1251655168.0, - "67": 1251655168.0, - "68": 1251655168.0, - "69": 1251655168.0, - "70": 1251655168.0, - "71": 1251655168.0, - "72": 1251655168.0, - "73": 1251655168.0, - "74": 1251655168.0, - "75": 1251655168.0, - "76": 1251655168.0, - "77": 1251655168.0, - "78": 1251655168.0, - "79": 1251655168.0, - "80": 1251655168.0, - "81": 1251655168.0, - "82": 1251655168.0, - "83": 1251655168.0, - "84": 1251655168.0, - "85": 1251655168.0, - "86": 1251655168.0, - "87": 1251655168.0, - "88": 1251655168.0, - "89": 1251655168.0, - "90": 1251655168.0, - "91": 1251655168.0, - "92": 1251655168.0, - "93": 1251655168.0, - "94": 1251655168.0, - "95": 1251655168.0, - "96": 1251655168.0, - "97": 1251655168.0, - "98": 1251655168.0, - "99": 1251655168.0, - "100": 1251655168.0 + "1": 952047616.0, + "2": 1164506112.0, + "3": 1172587008.0, + "4": 1247328256.0, + "5": 1249284608.0, + "6": 1249284608.0, + "7": 1249828864.0, + "8": 1250309632.0, + "9": 1250309632.0, + "10": 1252186624.0, + "11": 1252186624.0, + "12": 1252186624.0, + "13": 1252186624.0, + "14": 1252186624.0, + "15": 1252186624.0, + "16": 1252186624.0, + "17": 1252186624.0, + "18": 1252186624.0, + "19": 1252186624.0, + "20": 1252186624.0, + "21": 1252186624.0, + "22": 1252186624.0, + "23": 1252186624.0, + "24": 1252186624.0, + "25": 1252821504.0, + "26": 1252821504.0, + "27": 1252821504.0, + "28": 1252821504.0, + "29": 1252821504.0, + "30": 1252821504.0, + "31": 1252821504.0, + "32": 1252821504.0, + "33": 1252821504.0, + "34": 1252821504.0, + "35": 1252821504.0, + "36": 1252821504.0, + "37": 1252821504.0, + "38": 1252821504.0, + "39": 1252821504.0, + "40": 1252821504.0, + "41": 1252821504.0, + "42": 1252821504.0, + "43": 1252821504.0, + "44": 1252821504.0, + "45": 1252821504.0, + "46": 1252821504.0, + "47": 1252821504.0, + "48": 1252821504.0, + "49": 1252821504.0, + "50": 1252821504.0, + "51": 1252821504.0, + "52": 1252821504.0, + "53": 1252821504.0, + "54": 1252821504.0, + "55": 1252821504.0, + "56": 1252821504.0, + "57": 1252821504.0, + "58": 1252821504.0, + "59": 1252821504.0, + "60": 1252821504.0, + "61": 1252821504.0, + "62": 1252821504.0, + "63": 1252821504.0, + "64": 1252821504.0, + "65": 1252821504.0, + "66": 1252821504.0, + "67": 1252821504.0, + "68": 1252821504.0, + "69": 1252821504.0, + "70": 1252821504.0, + "71": 1252821504.0, + "72": 1252821504.0, + "73": 1252821504.0, + "74": 1252821504.0, + "75": 1252821504.0, + "76": 1252821504.0, + "77": 1252821504.0, + "78": 1252821504.0, + "79": 1252821504.0, + "80": 1252821504.0, + "81": 1252821504.0, + "82": 1252821504.0, + "83": 1252821504.0, + "84": 1252821504.0, + "85": 1252821504.0, + "86": 1252821504.0, + "87": 1252821504.0, + "88": 1252821504.0, + "89": 1252821504.0, + "90": 1252821504.0, + "91": 1252821504.0, + "92": 1252821504.0, + "93": 1252821504.0, + "94": 1252821504.0, + "95": 1252821504.0, + "96": 1252821504.0, + "97": 1252821504.0, + "98": 1252821504.0, + "99": 1252821504.0, + "100": 1252821504.0 } }, "mtp_1 loss": { @@ -432,106 +432,106 @@ "end_step": 100, "step_interval": 1, "values": { - "1": 10.89507, - "2": 10.90521, - "3": 10.90891, - "4": 10.86405, - "5": 10.91795, - "6": 10.92308, - "7": 10.90092, - "8": 10.8905, - "9": 10.90564, - "10": 10.88627, - "11": 10.93361, - "12": 10.91588, - "13": 10.90939, - "14": 10.9234, - "15": 10.89846, - "16": 10.90396, - "17": 10.88447, - "18": 10.90582, - "19": 10.90022, - "20": 10.88707, - "21": 10.88163, - "22": 10.85466, - "23": 10.89315, - "24": 10.87384, - "25": 10.862, - "26": 10.87173, - "27": 10.88833, - "28": 10.87871, - "29": 10.888, - "30": 10.87291, - "31": 10.80089, - "32": 10.87349, - "33": 10.88255, - "34": 10.83851, - "35": 10.85278, - "36": 10.84955, - "37": 10.85859, - "38": 10.8318, - "39": 10.86269, - "40": 10.82201, - "41": 10.8292, - "42": 10.84807, - "43": 10.79058, - "44": 10.8144, - "45": 10.78763, - "46": 10.77993, - "47": 10.83153, - "48": 10.78853, - "49": 10.7101, - "50": 10.7689, - "51": 10.76353, - "52": 10.7394, - "53": 10.8014, - "54": 10.77742, - "55": 10.76695, - "56": 10.71545, - "57": 10.67279, - "58": 10.75064, - "59": 10.69451, - "60": 10.66153, - "61": 10.69547, - "62": 10.77153, - "63": 10.61306, - "64": 10.70871, - "65": 10.49004, - "66": 10.66861, - "67": 10.58123, - "68": 10.6819, - "69": 10.6866, - "70": 10.6736, - "71": 10.64397, - "72": 10.6101, - "73": 10.56737, - "74": 10.38578, - "75": 10.53646, - "76": 10.40232, - "77": 10.75426, - "78": 10.62498, - "79": 10.47825, - "80": 10.47326, - "81": 10.51209, - "82": 10.58547, - "83": 10.43904, - "84": 10.45482, - "85": 10.55079, - "86": 10.2791, - "87": 10.51114, + "1": 10.89522, + "2": 10.9055, + "3": 10.90926, + "4": 10.86359, + "5": 10.91792, + "6": 10.92358, + "7": 10.90096, + "8": 10.89059, + "9": 10.90472, + "10": 10.88636, + "11": 10.93327, + "12": 10.9162, + "13": 10.90929, + "14": 10.92312, + "15": 10.89847, + "16": 10.90354, + "17": 10.88436, + "18": 10.90591, + "19": 10.90018, + "20": 10.88683, + "21": 10.88144, + "22": 10.85482, + "23": 10.89296, + "24": 10.87381, + "25": 10.86183, + "26": 10.87181, + "27": 10.88852, + "28": 10.87809, + "29": 10.88756, + "30": 10.87302, + "31": 10.80231, + "32": 10.8733, + "33": 10.88183, + "34": 10.83887, + "35": 10.85285, + "36": 10.84921, + "37": 10.8585, + "38": 10.83132, + "39": 10.86308, + "40": 10.82192, + "41": 10.82924, + "42": 10.84808, + "43": 10.79031, + "44": 10.81434, + "45": 10.7876, + "46": 10.78025, + "47": 10.83109, + "48": 10.78877, + "49": 10.71082, + "50": 10.76875, + "51": 10.76425, + "52": 10.73967, + "53": 10.80158, + "54": 10.77723, + "55": 10.76509, + "56": 10.71616, + "57": 10.67333, + "58": 10.75073, + "59": 10.69598, + "60": 10.66191, + "61": 10.69602, + "62": 10.77175, + "63": 10.61376, + "64": 10.70915, + "65": 10.49096, + "66": 10.66855, + "67": 10.58058, + "68": 10.68188, + "69": 10.68673, + "70": 10.67269, + "71": 10.64391, + "72": 10.60976, + "73": 10.56733, + "74": 10.38542, + "75": 10.5362, + "76": 10.40357, + "77": 10.75442, + "78": 10.6257, + "79": 10.47852, + "80": 10.47397, + "81": 10.51354, + "82": 10.58546, + "83": 10.43887, + "84": 10.4543, + "85": 10.55056, + "86": 10.27842, + "87": 10.51128, "88": 10.60507, - "89": 10.50871, - "90": 10.60312, - "91": 10.38605, - "92": 10.38222, - "93": 10.2369, - "94": 10.08056, - "95": 10.42693, - "96": 10.44672, - "97": 10.31582, - "98": 10.49658, - "99": 10.04836, - "100": 10.32882 + "89": 10.50847, + "90": 10.60254, + "91": 10.38502, + "92": 10.38116, + "93": 10.23534, + "94": 10.07761, + "95": 10.42745, + "96": 10.4469, + "97": 10.31557, + "98": 10.49618, + "99": 10.04971, + "100": 10.32835 } }, "iteration-time": { @@ -540,105 +540,105 @@ "step_interval": 1, "values": { "1": "nan", - "2": 25.3001, - "3": 2.37449, - "4": 6.45526, - "5": 1.25943, - "6": 1.2406, - "7": 1.2439, - "8": 1.23826, - "9": 1.23638, - "10": 1.23645, - "11": 1.28556, - "12": 1.24755, - "13": 1.24428, - "14": 1.23765, - "15": 1.23841, - "16": 1.23701, - "17": 1.23629, - "18": 1.23686, - "19": 1.2385, - "20": 1.24373, - "21": 1.23809, - "22": 1.24616, - "23": 1.23693, - "24": 1.23357, - "25": 1.23233, - "26": 1.23893, - "27": 1.23811, - "28": 1.23473, - "29": 1.23994, - "30": 1.24448, - "31": 1.23919, - "32": 1.23674, - "33": 1.23353, - "34": 1.24176, - "35": 1.2521, - "36": 1.25339, - "37": 1.23958, - "38": 1.2423, - "39": 1.2407, - "40": 1.23768, - "41": 1.23529, - "42": 1.26123, - "43": 1.24517, - "44": 1.22842, - "45": 1.22883, - "46": 1.24531, - "47": 1.23707, - "48": 1.23315, - "49": 1.23288, - "50": 1.23004, - "51": 1.28252, - "52": 1.22628, - "53": 1.22943, - "54": 1.28301, - "55": 1.22868, - "56": 1.23725, - "57": 1.23032, - "58": 1.23924, - "59": 1.24181, - "60": 1.24927, - "61": 1.24821, - "62": 1.25233, - "63": 1.2192, - "64": 1.23724, - "65": 1.24262, - "66": 1.23425, - "67": 1.22341, - "68": 1.22875, - "69": 1.2369, - "70": 1.23637, - "71": 1.22418, - "72": 1.23267, - "73": 1.22945, - "74": 1.22797, - "75": 1.21647, - "76": 1.22687, - "77": 1.23339, - "78": 1.23518, - "79": 1.22748, - "80": 1.24985, - "81": 1.23345, - "82": 1.24687, - "83": 1.23815, - "84": 1.21836, - "85": 1.23899, - "86": 1.23973, - "87": 1.2356, - "88": 1.23475, - "89": 1.23433, - "90": 1.23924, - "91": 1.22203, - "92": 1.22429, - "93": 1.23054, - "94": 1.23476, - "95": 1.23033, - "96": 1.23182, - "97": 1.2322, - "98": 1.2363, - "99": 1.2319, - "100": 1.2316 + "2": 26.77445, + "3": 2.3938, + "4": 6.40417, + "5": 1.26885, + "6": 1.24907, + "7": 1.25723, + "8": 1.25657, + "9": 1.33889, + "10": 1.25683, + "11": 1.25267, + "12": 1.25561, + "13": 1.25246, + "14": 1.24761, + "15": 1.24643, + "16": 1.24727, + "17": 1.24292, + "18": 1.23753, + "19": 1.23993, + "20": 1.24596, + "21": 1.26164, + "22": 1.24772, + "23": 1.25285, + "24": 1.2468, + "25": 1.26172, + "26": 1.24828, + "27": 1.257, + "28": 1.24659, + "29": 1.24457, + "30": 1.2407, + "31": 1.23668, + "32": 1.24315, + "33": 1.24716, + "34": 1.25855, + "35": 1.26046, + "36": 1.38644, + "37": 1.25328, + "38": 1.25515, + "39": 1.25384, + "40": 1.24552, + "41": 1.2485, + "42": 1.262, + "43": 1.25617, + "44": 1.25919, + "45": 1.26199, + "46": 1.25428, + "47": 1.24471, + "48": 1.24307, + "49": 1.24679, + "50": 1.24891, + "51": 1.28817, + "52": 1.25521, + "53": 1.24527, + "54": 1.24518, + "55": 1.2468, + "56": 1.24621, + "57": 1.25047, + "58": 1.24867, + "59": 1.26079, + "60": 1.24775, + "61": 1.24346, + "62": 1.25131, + "63": 1.25566, + "64": 1.24589, + "65": 1.2553, + "66": 1.2401, + "67": 1.22827, + "68": 1.24522, + "69": 1.24119, + "70": 1.24697, + "71": 1.23528, + "72": 1.24446, + "73": 1.24667, + "74": 1.24742, + "75": 1.24336, + "76": 1.24368, + "77": 1.24445, + "78": 1.24629, + "79": 1.24259, + "80": 1.24021, + "81": 1.24641, + "82": 1.24965, + "83": 1.25036, + "84": 1.24484, + "85": 1.2362, + "86": 1.24268, + "87": 1.24052, + "88": 1.24879, + "89": 1.24131, + "90": 1.24121, + "91": 1.24084, + "92": 1.24705, + "93": 1.24233, + "94": 1.24355, + "95": 1.24335, + "96": 1.2445, + "97": 1.23885, + "98": 1.24016, + "99": 1.23994, + "100": 1.24761 } } } \ No newline at end of file From febfaea9adb7b7c44d2a6c80c624d8d5e19b1ab2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 4 Mar 2026 12:45:43 +0000 Subject: [PATCH 10/12] update mr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .../golden_values_dev_dgx_h100.json | 548 +++++++++--------- .../golden_values_dev_dgx_h100.json | 376 ++++++------ .../golden_values_dev_dgx_h100.json | 487 ++++++++-------- .../golden_values_dev_dgx_h100.json | 2 +- .../golden_values_dev_dgx_h100.json | 2 +- .../golden_values_dev_dgx_h100.json | 2 +- 6 files changed, 710 insertions(+), 707 deletions(-) diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json index 1f980642e72..6b0ff4810d1 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading/golden_values_dev_dgx_h100.json @@ -5,55 +5,55 @@ "step_interval": 1, "values": { "1": 11.06701, - "2": 11.06021, - "3": 10.20005, - "4": 9.75939, - "5": 10.37343, - "6": 10.19088, - "7": 11.31502, - "8": 12.43285, - "9": 12.01458, - "10": 11.45726, - "11": 11.58577, - "12": 11.42409, - "13": 12.09212, - "14": 12.17488, - "15": 11.97062, - "16": 11.88313, - "17": 11.93875, - "18": 12.32174, - "19": 11.67153, - "20": 12.19814, - "21": 11.54287, - "22": 9.80848, - "23": 9.97818, - "24": 9.71264, - "25": 10.50518, - "26": 9.97363, - "27": 10.00439, - "28": 10.19053, - "29": 10.46412, - "30": 10.66579, - "31": 10.85612, - "32": 10.74726, - "33": 10.76846, - "34": 10.76858, - "35": 10.76094, - "36": 10.53668, - "37": 10.77826, - "38": 10.71275, - "39": 10.82689, - "40": 10.72817, - "41": 10.68451, - "42": 10.18244, - "43": 10.79574, - "44": 10.8553, - "45": 10.43881, - "46": 10.73617, - "47": 10.80059, - "48": 10.8248, - "49": 10.79974, - "50": 10.54592 + "2": 11.06037, + "3": 10.16112, + "4": 9.78383, + "5": 10.39023, + "6": 9.45732, + "7": 10.35644, + "8": 9.6318, + "9": 9.53442, + "10": 9.31949, + "11": 10.37146, + "12": 10.36963, + "13": 9.89309, + "14": 10.08171, + "15": 10.1507, + "16": 10.24205, + "17": 10.23159, + "18": 10.01535, + "19": 10.25412, + "20": 10.185, + "21": 10.23852, + "22": 9.98018, + "23": 10.31047, + "24": 10.18149, + "25": 10.4785, + "26": 10.22954, + "27": 10.2621, + "28": 10.33422, + "29": 10.28186, + "30": 10.33095, + "31": 10.33642, + "32": 9.88287, + "33": 10.12143, + "34": 10.41079, + "35": 10.31677, + "36": 9.9553, + "37": 10.49378, + "38": 10.31351, + "39": 10.42098, + "40": NaN, + "41": NaN, + "42": NaN, + "43": NaN, + "44": NaN, + "45": NaN, + "46": NaN, + "47": NaN, + "48": NaN, + "49": NaN, + "50": NaN } }, "num-zeros": { @@ -61,12 +61,12 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 47165260.0, - "2": 46897544.0, - "3": 1815580672.0, + "1": 47165224.0, + "2": 46897548.0, + "3": 52696416.0, "4": 1815580672.0, - "5": 135092176.0, - "6": 64130344.0, + "5": 1815580672.0, + "6": 1815580672.0, "7": 1815580672.0, "8": 1815580672.0, "9": 1815580672.0, @@ -81,36 +81,36 @@ "18": 1815580672.0, "19": 1815580672.0, "20": 1815580672.0, - "21": 386908928.0, + "21": 1815580672.0, "22": 1815580672.0, "23": 1815580672.0, "24": 1815580672.0, - "25": 534944416.0, - "26": 616509824.0, - "27": 914967936.0, - "28": 937646336.0, - "29": 924778048.0, + "25": 1815580672.0, + "26": 1815580672.0, + "27": 1815580672.0, + "28": 1815580672.0, + "29": 1815580672.0, "30": 1815580672.0, - "31": 867456256.0, - "32": 838028800.0, - "33": 969112000.0, - "34": 503505536.0, - "35": 562247616.0, - "36": 658155200.0, - "37": 542371712.0, + "31": 1815580672.0, + "32": 1815580672.0, + "33": 1815580672.0, + "34": 1815580672.0, + "35": 1815580672.0, + "36": 1815580672.0, + "37": 1815580672.0, "38": 1815580672.0, - "39": 1815580672.0, - "40": 1815580672.0, - "41": 1815580672.0, - "42": 1815580672.0, - "43": 1815580672.0, - "44": 1815580672.0, - "45": 1815580672.0, - "46": 1815580672.0, - "47": 1815580672.0, - "48": 1815580672.0, - "49": 490314144.0, - "50": 667091136.0 + "39": 522244320.0, + "40": 1191764992.0, + "41": 1192089600.0, + "42": 1193279488.0, + "43": 1191941120.0, + "44": 1193036800.0, + "45": 1191983104.0, + "46": 1192171520.0, + "47": 1192939520.0, + "48": 1192237056.0, + "49": 1191771136.0, + "50": 1192407040.0 } }, "mem-allocated-bytes": { @@ -121,53 +121,53 @@ "1": 5283603968.0, "2": 5283808768.0, "3": 5284011520.0, - "4": 5284214272.0, - "5": 5284417024.0, - "6": 5283504640.0, - "7": 5283707392.0, - "8": 5283910144.0, - "9": 5284112896.0, - "10": 5284315648.0, - "11": 5284518400.0, - "12": 5283504640.0, - "13": 5283707392.0, - "14": 5283910144.0, - "15": 5284112896.0, - "16": 5284315648.0, - "17": 5284518400.0, - "18": 5284721152.0, - "19": 5284923904.0, - "20": 5285126656.0, - "21": 5285329408.0, - "22": 5285532160.0, - "23": 5285734912.0, - "24": 5285937664.0, - "25": 5286140416.0, - "26": 5286343168.0, - "27": 5286545920.0, - "28": 5286748672.0, - "29": 5286951424.0, - "30": 5287154176.0, - "31": 5287356928.0, - "32": 5287559680.0, - "33": 5287762432.0, - "34": 5287965184.0, - "35": 5288167936.0, - "36": 5288370688.0, - "37": 5288573440.0, - "38": 5288776192.0, - "39": 5288978944.0, - "40": 5289181696.0, - "41": 5289384448.0, - "42": 5289587200.0, - "43": 5289789952.0, - "44": 5289992704.0, - "45": 5290195456.0, - "46": 5290398208.0, - "47": 5290600960.0, - "48": 5290803712.0, - "49": 5291006464.0, - "50": 5291209216.0 + "4": 5283606016.0, + "5": 5283808768.0, + "6": 5284011520.0, + "7": 5283504640.0, + "8": 5283707392.0, + "9": 5283910144.0, + "10": 5284112896.0, + "11": 5284315648.0, + "12": 5284518400.0, + "13": 5284721152.0, + "14": 5284923904.0, + "15": 5285126656.0, + "16": 5285329408.0, + "17": 5285532160.0, + "18": 5285734912.0, + "19": 5285937664.0, + "20": 5286140416.0, + "21": 5286343168.0, + "22": 5286545920.0, + "23": 5286748672.0, + "24": 5286951424.0, + "25": 5287154176.0, + "26": 5287356928.0, + "27": 5287559680.0, + "28": 5287762432.0, + "29": 5287965184.0, + "30": 5288167936.0, + "31": 5288370688.0, + "32": 5288573440.0, + "33": 5288776192.0, + "34": 5288978944.0, + "35": 5289181696.0, + "36": 5289384448.0, + "37": 5289587200.0, + "38": 5289789952.0, + "39": 5289992704.0, + "40": 5290195456.0, + "41": 5290398208.0, + "42": 5290600960.0, + "43": 5290803712.0, + "44": 5291006464.0, + "45": 5291209216.0, + "46": 5291411968.0, + "47": 5291614720.0, + "48": 5291817472.0, + "49": 5292020224.0, + "50": 5292222976.0 } }, "mem-max-allocated-bytes": { @@ -176,55 +176,55 @@ "step_interval": 1, "values": { "1": 5283606528.0, - "2": 8252578304.0, - "3": 8252578304.0, - "4": 8252578304.0, - "5": 8253250048.0, - "6": 8253250048.0, - "7": 8253250048.0, - "8": 8253250048.0, - "9": 8253250048.0, - "10": 8253250048.0, - "11": 8253250048.0, - "12": 8253250048.0, - "13": 8253250048.0, - "14": 8253250048.0, - "15": 8253250048.0, - "16": 8253250048.0, - "17": 8253250048.0, - "18": 8253250048.0, - "19": 8253250048.0, - "20": 8253250048.0, - "21": 8253250048.0, - "22": 8259284480.0, - "23": 8319884800.0, - "24": 8358525440.0, - "25": 8362294784.0, - "26": 8362294784.0, - "27": 8362294784.0, - "28": 8362294784.0, - "29": 8362294784.0, - "30": 8364191232.0, - "31": 8364191232.0, - "32": 8364191232.0, - "33": 8364191232.0, - "34": 8364191232.0, - "35": 8364191232.0, - "36": 8364191232.0, - "37": 8364191232.0, - "38": 8377395712.0, - "39": 8377395712.0, - "40": 8377395712.0, - "41": 8377395712.0, - "42": 8377395712.0, - "43": 8377395712.0, - "44": 8377395712.0, - "45": 8377395712.0, - "46": 8377395712.0, - "47": 8377395712.0, - "48": 8377395712.0, - "49": 8377395712.0, - "50": 8377395712.0 + "2": 8252500480.0, + "3": 8252500480.0, + "4": 8252500480.0, + "5": 8252500480.0, + "6": 8252500480.0, + "7": 8252500480.0, + "8": 8252500480.0, + "9": 8252500480.0, + "10": 8252500480.0, + "11": 8252500480.0, + "12": 8252500480.0, + "13": 8252500480.0, + "14": 8252500480.0, + "15": 8252500480.0, + "16": 8252500480.0, + "17": 8252500480.0, + "18": 8252500480.0, + "19": 8252500480.0, + "20": 8252500480.0, + "21": 8252500480.0, + "22": 8252500480.0, + "23": 8252500480.0, + "24": 8252500480.0, + "25": 8252500480.0, + "26": 8252500480.0, + "27": 8252500480.0, + "28": 8252500480.0, + "29": 8252500480.0, + "30": 8252500480.0, + "31": 8252500480.0, + "32": 8252500480.0, + "33": 8252500480.0, + "34": 8252500480.0, + "35": 8252500480.0, + "36": 8252500480.0, + "37": 8252500480.0, + "38": 8252500480.0, + "39": 8252500480.0, + "40": 8252500480.0, + "41": 8252500480.0, + "42": 8252500480.0, + "43": 8252500480.0, + "44": 8252500480.0, + "45": 8252500480.0, + "46": 8252500480.0, + "47": 8252500480.0, + "48": 8252500480.0, + "49": 8252500480.0, + "50": 8252500480.0 } }, "mtp_1 loss": { @@ -232,56 +232,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 11.07397, - "2": 11.09265, - "3": 10.8306, - "4": 10.58342, - "5": 10.83769, - "6": 9.90459, - "7": 11.15094, - "8": 10.15193, - "9": 9.65551, - "10": 8.91334, - "11": 11.06242, - "12": 11.10352, - "13": 9.87343, - "14": 10.20717, - "15": 10.70026, - "16": 10.7834, - "17": 10.5832, - "18": 10.1899, - "19": 10.78252, - "20": 10.26742, - "21": 10.90867, - "22": 9.68739, - "23": 10.17903, - "24": 9.56283, - "25": 10.62358, - "26": 10.16437, - "27": 10.10325, - "28": 10.15042, - "29": 10.31854, - "30": 10.20663, - "31": 10.59866, - "32": 8.87951, - "33": 9.7619, - "34": 10.59317, - "35": 10.64661, - "36": 9.3348, - "37": 10.63017, - "38": 10.3672, - "39": 10.74352, - "40": 10.44713, - "41": 10.25471, - "42": 8.50737, - "43": 10.63395, - "44": 10.69426, - "45": 9.43185, - "46": 10.42516, - "47": 10.75514, - "48": 10.75581, - "49": 10.5988, - "50": 9.66264 + "1": 11.07409, + "2": 11.09257, + "3": 10.82808, + "4": 10.57952, + "5": 10.85266, + "6": 9.7486, + "7": 10.82034, + "8": 9.69336, + "9": 9.26195, + "10": 8.62357, + "11": 10.67211, + "12": 10.74251, + "13": 9.55087, + "14": 9.83348, + "15": 10.22948, + "16": 10.34842, + "17": 10.17443, + "18": 9.7341, + "19": 10.37609, + "20": 9.84685, + "21": 10.50319, + "22": 9.63799, + "23": 10.23636, + "24": 9.61855, + "25": 10.69239, + "26": 10.24419, + "27": 10.21012, + "28": 10.19063, + "29": 10.31516, + "30": 10.15036, + "31": 10.46955, + "32": 8.7988, + "33": 9.66792, + "34": 10.46876, + "35": 10.48208, + "36": 9.21407, + "37": 10.51651, + "38": 10.20751, + "39": 10.61554, + "40": NaN, + "41": NaN, + "42": NaN, + "43": NaN, + "44": NaN, + "45": NaN, + "46": NaN, + "47": NaN, + "48": NaN, + "49": NaN, + "50": NaN } }, "iteration-time": { @@ -290,55 +290,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 34.13201, - "3": 2.31389, - "4": 2.16978, - "5": 2.33783, - "6": 2.3455, - "7": 3.06265, - "8": 1.86983, - "9": 3.03958, - "10": 1.88627, - "11": 1.86417, - "12": 2.8165, - "13": 1.87594, - "14": 1.88355, - "15": 1.86624, - "16": 1.87284, - "17": 1.85967, - "18": 1.88925, - "19": 1.87467, - "20": 1.86371, - "21": 1.88039, - "22": 1.91965, - "23": 1.8694, - "24": 2.45414, - "25": 2.28676, - "26": 1.88528, - "27": 1.86545, - "28": 1.87283, - "29": 1.87365, - "30": 1.88033, - "31": 1.87038, - "32": 1.86674, - "33": 1.86959, - "34": 1.86841, - "35": 1.86552, - "36": 1.84979, - "37": 1.87455, - "38": 1.86713, - "39": 1.86261, - "40": 1.86376, - "41": 1.86336, - "42": 1.85522, - "43": 1.86429, - "44": 1.87047, - "45": 1.86475, - "46": 1.86422, - "47": 1.86044, - "48": 1.86089, - "49": 1.86676, - "50": 1.8645 + "2": 34.71722, + "3": 3.7158, + "4": 3.6007, + "5": 2.33234, + "6": 1.89037, + "7": 3.07449, + "8": 1.85828, + "9": 1.88095, + "10": 1.8882, + "11": 1.88039, + "12": 1.8594, + "13": 2.39001, + "14": 1.88477, + "15": 2.5685, + "16": 1.878, + "17": 1.86827, + "18": 1.88334, + "19": 1.85819, + "20": 1.85484, + "21": 1.87178, + "22": 1.87318, + "23": 1.85835, + "24": 1.90084, + "25": 1.87532, + "26": 1.87949, + "27": 1.87055, + "28": 1.85986, + "29": 1.87076, + "30": 1.88513, + "31": 1.87442, + "32": 1.87231, + "33": 1.86876, + "34": 1.8705, + "35": 1.8854, + "36": 1.86393, + "37": 1.86552, + "38": 1.86884, + "39": 1.87594, + "40": 2.11434, + "41": 1.87568, + "42": 1.89142, + "43": 1.89628, + "44": 1.89607, + "45": 1.90113, + "46": 1.89323, + "47": 1.89337, + "48": 1.89245, + "49": 1.89572, + "50": 1.89658 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json index e826ff174f4..fb0b106380f 100644 --- a/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading/golden_values_dev_dgx_h100.json @@ -4,56 +4,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 11.01695, - "2": 11.0627, - "3": 10.08254, - "4": 9.77341, - "5": 10.42209, - "6": 9.50215, - "7": 10.51122, - "8": 9.93361, - "9": 9.96346, - "10": 9.78405, - "11": 10.83484, - "12": 10.9711, - "13": 9.37879, - "14": 9.60221, - "15": 9.74747, - "16": 9.85611, - "17": 9.91366, - "18": 9.67222, - "19": 10.12837, - "20": 10.07093, - "21": 10.22833, - "22": 10.00916, - "23": 10.3176, - "24": 10.249, - "25": 10.57076, - "26": 10.3543, - "27": 10.38498, - "28": 10.44036, - "29": 10.42095, - "30": 10.39991, - "31": 10.47396, - "32": 9.9548, - "33": 10.16366, - "34": 10.45178, - "35": 10.38486, - "36": 9.7646, - "37": 10.43313, - "38": 10.24672, - "39": 10.43639, - "40": 10.27337, - "41": 10.2693, - "42": 9.41843, - "43": 10.39381, - "44": 10.49775, - "45": 9.91264, - "46": 10.30142, - "47": 10.50805, - "48": 10.49407, - "49": 10.46356, - "50": 10.05295 + "1": 11.01686, + "2": 11.06267, + "3": 10.08229, + "4": 10.16147, + "5": 10.72064, + "6": 11.70153, + "7": 11.79282, + "8": 12.20877, + "9": 12.25091, + "10": 12.3107, + "11": 11.67129, + "12": 11.64285, + "13": 12.04005, + "14": 11.9213, + "15": 11.81231, + "16": 11.69915, + "17": 11.61381, + "18": 11.65945, + "19": 11.20363, + "20": 11.3173, + "21": 10.90675, + "22": 10.9599, + "23": 10.84703, + "24": 10.85095, + "25": 10.80788, + "26": 10.72602, + "27": 10.63593, + "28": 10.64153, + "29": 10.53218, + "30": 10.51077, + "31": 10.49637, + "32": 9.839, + "33": 10.09746, + "34": 10.38982, + "35": 10.52629, + "36": 9.57043, + "37": 10.53631, + "38": 10.3185, + "39": 10.63808, + "40": 10.39071, + "41": 10.28877, + "42": 8.9125, + "43": 10.56359, + "44": 10.61974, + "45": 9.66825, + "46": 10.38419, + "47": 10.55452, + "48": 10.67664, + "49": 10.50799, + "50": 9.82198 } }, "num-zeros": { @@ -61,56 +61,56 @@ "end_step": 50, "step_interval": 1, "values": { - "1": 47167740.0, - "2": 46899720.0, - "3": 58977560.0, - "4": 108338976.0, - "5": 179138592.0, - "6": 1689646080.0, + "1": 47167824.0, + "2": 46899460.0, + "3": 55839716.0, + "4": 92622768.0, + "5": 216911504.0, + "6": 196269344.0, "7": 1689646080.0, "8": 1689646080.0, - "9": 1689646080.0, - "10": 233872368.0, + "9": 349655776.0, + "10": 246500032.0, "11": 1689646080.0, - "12": 411751104.0, + "12": 298420448.0, "13": 1689646080.0, - "14": 612875008.0, - "15": 596709440.0, - "16": 546873536.0, - "17": 522787808.0, - "18": 742742272.0, - "19": 1689646080.0, - "20": 1689646080.0, - "21": 1689646080.0, + "14": 376943744.0, + "15": 1689646080.0, + "16": 1689646080.0, + "17": 245958224.0, + "18": 371545856.0, + "19": 534120512.0, + "20": 444030048.0, + "21": 534764864.0, "22": 1689646080.0, "23": 1689646080.0, "24": 1689646080.0, - "25": 437427808.0, - "26": 949940032.0, - "27": 1689646080.0, - "28": 553880064.0, - "29": 673135104.0, - "30": 921992576.0, - "31": 578061184.0, - "32": 731112064.0, - "33": 937648896.0, - "34": 648171264.0, - "35": 845326400.0, - "36": 1689646080.0, + "25": 333597536.0, + "26": 581907520.0, + "27": 704208704.0, + "28": 531856576.0, + "29": 736041216.0, + "30": 623146624.0, + "31": 543458496.0, + "32": 1689646080.0, + "33": 597941248.0, + "34": 478297856.0, + "35": 700648256.0, + "36": 651863552.0, "37": 1689646080.0, - "38": 557866752.0, - "39": 953183360.0, - "40": 657044800.0, - "41": 780078464.0, - "42": 639694272.0, + "38": 1689646080.0, + "39": 1689646080.0, + "40": 528034976.0, + "41": 682519360.0, + "42": 869272768.0, "43": 1689646080.0, "44": 1689646080.0, - "45": 597456384.0, - "46": 635425088.0, - "47": 538688640.0, - "48": 525364640.0, - "49": 795455232.0, - "50": 890439424.0 + "45": 462216256.0, + "46": 1689646080.0, + "47": 658198400.0, + "48": 1689646080.0, + "49": 1689646080.0, + "50": 903011072.0 } }, "mem-allocated-bytes": { @@ -176,55 +176,55 @@ "step_interval": 1, "values": { "1": 4313449472.0, - "2": 7108321792.0, - "3": 7108321792.0, - "4": 7108321792.0, - "5": 7108321792.0, - "6": 7108321792.0, - "7": 7108321792.0, - "8": 7108321792.0, - "9": 7108321792.0, - "10": 7125704192.0, - "11": 7125704192.0, - "12": 7125704192.0, - "13": 7125704192.0, - "14": 7125704192.0, - "15": 7125704192.0, - "16": 7125704192.0, - "17": 7125704192.0, - "18": 7125704192.0, - "19": 7125704192.0, - "20": 7125704192.0, - "21": 7125704192.0, - "22": 7125704192.0, - "23": 7125704192.0, - "24": 7125704192.0, - "25": 7125704192.0, - "26": 7125704192.0, - "27": 7125704192.0, - "28": 7125704192.0, - "29": 7125704192.0, - "30": 7125704192.0, - "31": 7125704192.0, - "32": 7125704192.0, - "33": 7125704192.0, - "34": 7125704192.0, - "35": 7125704192.0, - "36": 7125704192.0, - "37": 7125704192.0, - "38": 7125704192.0, - "39": 7125704192.0, - "40": 7125704192.0, - "41": 7125704192.0, - "42": 7125704192.0, - "43": 7125704192.0, - "44": 7125704192.0, - "45": 7125704192.0, - "46": 7125704192.0, - "47": 7125704192.0, - "48": 7125704192.0, - "49": 7125704192.0, - "50": 7125704192.0 + "2": 7108299264.0, + "3": 7108299264.0, + "4": 7108299264.0, + "5": 7108299264.0, + "6": 7153934336.0, + "7": 7161392128.0, + "8": 7161392128.0, + "9": 7161392128.0, + "10": 7161392128.0, + "11": 7161392128.0, + "12": 7161392128.0, + "13": 7161392128.0, + "14": 7161392128.0, + "15": 7161392128.0, + "16": 7161392128.0, + "17": 7161392128.0, + "18": 7161392128.0, + "19": 7161392128.0, + "20": 7161392128.0, + "21": 7162702336.0, + "22": 7162702336.0, + "23": 7162702336.0, + "24": 7162702336.0, + "25": 7162702336.0, + "26": 7162702336.0, + "27": 7162702336.0, + "28": 7162702336.0, + "29": 7162702336.0, + "30": 7162702336.0, + "31": 7162702336.0, + "32": 7162702336.0, + "33": 7162702336.0, + "34": 7162702336.0, + "35": 7162702336.0, + "36": 7162702336.0, + "37": 7162702336.0, + "38": 7162702336.0, + "39": 7162702336.0, + "40": 7162702336.0, + "41": 7162702336.0, + "42": 7162702336.0, + "43": 7162702336.0, + "44": 7162702336.0, + "45": 7162702336.0, + "46": 7162702336.0, + "47": 7162702336.0, + "48": 7162702336.0, + "49": 7162702336.0, + "50": 7162702336.0 } }, "iteration-time": { @@ -233,55 +233,55 @@ "step_interval": 1, "values": { "1": "nan", - "2": 34.30859, - "3": 2.40715, - "4": 2.25303, - "5": 2.10844, - "6": 3.04985, - "7": 2.57498, - "8": 1.96238, - "9": 2.51648, - "10": 1.96358, - "11": 3.22098, - "12": 2.58112, - "13": 2.58339, - "14": 1.93058, - "15": 2.00134, - "16": 1.93882, - "17": 2.4041, - "18": 1.92447, - "19": 1.95655, - "20": 1.92675, - "21": 1.93097, - "22": 1.94756, - "23": 1.93203, - "24": 1.92845, - "25": 1.92893, - "26": 1.93383, - "27": 1.92816, - "28": 1.97235, - "29": 1.92972, - "30": 1.92497, - "31": 1.92667, - "32": 1.92464, - "33": 1.92813, - "34": 1.93393, - "35": 1.92662, - "36": 1.92665, - "37": 1.93412, - "38": 1.92479, - "39": 1.92275, - "40": 1.93356, - "41": 1.93707, - "42": 1.92834, - "43": 1.92982, - "44": 1.93937, - "45": 1.93285, - "46": 1.93985, - "47": 1.93106, - "48": 1.93834, - "49": 1.92661, - "50": 1.9285 + "2": 34.64984, + "3": 2.50781, + "4": 2.43857, + "5": 2.43534, + "6": 3.09095, + "7": 2.07495, + "8": 2.74748, + "9": 2.51536, + "10": 1.98706, + "11": 2.55414, + "12": 1.95505, + "13": 1.94961, + "14": 2.69445, + "15": 3.35077, + "16": 1.94869, + "17": 1.96319, + "18": 1.93962, + "19": 1.96843, + "20": 2.70084, + "21": 1.92797, + "22": 1.93803, + "23": 1.93307, + "24": 1.9409, + "25": 1.92707, + "26": 1.92992, + "27": 1.92868, + "28": 1.96449, + "29": 1.92915, + "30": 1.92054, + "31": 1.92827, + "32": 1.93675, + "33": 1.93486, + "34": 1.96731, + "35": 1.93337, + "36": 1.93408, + "37": 1.98678, + "38": 1.94233, + "39": 1.93036, + "40": 1.93195, + "41": 1.92976, + "42": 1.93562, + "43": 1.93004, + "44": 1.93477, + "45": 1.93099, + "46": 1.93373, + "47": 1.93461, + "48": 1.9316, + "49": 1.92839, + "50": 1.92819 } } } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json index 447e404810b..dcff21e09e0 100644 --- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json @@ -34,7 +34,8 @@ 1394, 1636 ], - "latency": 0.6956486701965332, + "latency": 0.6967978477478027, + "ttft": 0.17612504959106445, "cuda_graph_request_count_map": { "8": 29 }, @@ -43,253 +44,255 @@ "prompt_top_n_logprobs": null, "prompt_logprobs": [ -10.737512588500977, - -3.724862575531006, - -2.833397388458252, - -1.2464861869812012, - -0.2549239993095398, - -1.7607988119125366, - -2.419379711151123, - -1.9533929824829102, - -2.1014301776885986, - -6.169030666351318, - -0.8734959363937378, - -2.4733574390411377, - -3.4822516441345215, - -4.180896759033203, - -1.9767613410949707, - -1.8347630500793457, - -2.2581257820129395, - -7.180149078369141, - -0.0453881211578846, - -1.9841610193252563, - -5.015386581420898, - -8.827117919921875, - -9.885746002197266, - -0.8498678207397461, - -4.770059585571289, - -0.855280339717865, - -2.2494924068450928, - -0.017164958640933037, - -0.03715415671467781, - -3.4830124378204346, - -8.635110855102539, - -1.2520610094070435, - -6.62324857711792, - -3.639960765838623, - -3.664339542388916, - -4.182392597198486, - -2.1796066761016846, - -1.0725229978561401, - -0.26311880350112915, - -0.8036076426506042, - -4.6958818435668945, - -9.042495727539062, - -0.013647346757352352, - -3.1747794151306152, - -1.322129487991333, - -3.949110746383667, - -0.7829495072364807, - -0.002083513652905822, - -2.970266580581665, - -10.56244945526123, - -3.2369167804718018, - -1.1530492305755615, - -4.917466163635254, - -0.21241025626659393, - -0.06490474194288254, - -1.372581124305725, - -2.224682092666626, - -4.3847503662109375, - -0.36867555975914, - -4.035493850708008, - -0.39869019389152527, - -0.14373983442783356, - -2.716118812561035, - -10.687016487121582, - -0.04773370549082756, - -3.398231267929077, - -0.8646175265312195, - -4.74052619934082, - -0.23649944365024567, - -2.6610701084136963, - -0.8428961634635925, - -1.614527940750122, - -5.793307781219482, - -16.929147720336914, - -2.6586406230926514, - -0.1385982781648636, - -7.435610771179199, - -1.0483647584915161, - -2.1261863708496094, - -1.5261307954788208, - -0.27082547545433044, - -5.859070777893066, - -0.00648513063788414, - -7.732051849365234, - -2.712515354156494, - -2.9137418270111084, - -3.041210651397705 + -3.6886487007141113, + -2.8194005489349365, + -1.2396225929260254, + -0.22920642793178558, + -1.8583638668060303, + -2.384000778198242, + -2.008981227874756, + -2.09975528717041, + -6.182888031005859, + -0.890110433101654, + -2.478799343109131, + -3.502744436264038, + -4.090683937072754, + -1.964805006980896, + -1.8054677248001099, + -2.321495294570923, + -7.160285949707031, + -0.04007242992520332, + -1.9924155473709106, + -5.093112468719482, + -8.779500007629395, + -9.904478073120117, + -0.8523460626602173, + -4.82073974609375, + -0.86553555727005, + -2.36330509185791, + -0.01946748048067093, + -0.034191977232694626, + -3.3692376613616943, + -8.743470191955566, + -1.3306764364242554, + -6.645841598510742, + -3.7794108390808105, + -3.7756881713867188, + -4.187170028686523, + -2.2124571800231934, + -1.0734034776687622, + -0.22135400772094727, + -0.7782289981842041, + -4.716646671295166, + -9.09740161895752, + -0.013706612400710583, + -3.1654152870178223, + -1.3161238431930542, + -4.0115814208984375, + -0.7715368866920471, + -0.002078041434288025, + -2.9644386768341064, + -10.690834045410156, + -3.2352819442749023, + -1.1544448137283325, + -4.971055030822754, + -0.2275625467300415, + -0.05549970641732216, + -1.2946704626083374, + -2.20631742477417, + -4.362645149230957, + -0.37027108669281006, + -4.025010108947754, + -0.3656681478023529, + -0.13922274112701416, + -2.720149040222168, + -10.679438591003418, + -0.04889172688126564, + -3.483541965484619, + -0.9704694151878357, + -4.714925289154053, + -0.2623435854911804, + -2.60113525390625, + -0.8295360207557678, + -1.671109676361084, + -5.792396068572998, + -16.96857261657715, + -2.647408962249756, + -0.13962996006011963, + -7.453517436981201, + -1.0938549041748047, + -2.134143829345703, + -1.5537410974502563, + -0.2869631052017212, + -5.857534408569336, + -0.006919232662767172, + -7.747158527374268, + -2.7227447032928467, + -3.037987232208252, + -3.0312116146087646 ], "generated_logprobs": [ - -2.3559694290161133, - -0.3973437249660492, - -1.4338903427124023, - -2.2967660427093506, - -0.6096595525741577, - -1.310807704925537, - -1.9799187183380127, - -1.710689663887024, - -0.8325198888778687, - -0.4943186938762665, - -1.2886956930160522, - -1.585263729095459, - -1.101692795753479, - -0.44188663363456726, - -0.4740143418312073, - -0.042198192328214645, - -1.2899682521820068, - -2.1242613792419434, - -2.7151529788970947, - -0.8274281024932861, - -0.39647114276885986, - -2.7846553325653076, - -1.5348155498504639, - -1.626529335975647, - -0.047930192202329636, - -1.3622726202011108, - -1.3274445533752441, - -1.2834383249282837, - -1.3211638927459717, - -0.48530423641204834 + -2.342064619064331, + -0.39744827151298523, + -1.516434907913208, + -2.3665950298309326, + -0.6091187000274658, + -1.3131166696548462, + -1.9402395486831665, + -1.6906054019927979, + -0.8380135297775269, + -0.4860585927963257, + -1.2984997034072876, + -1.5767009258270264, + -0.9797608852386475, + -0.40544381737709045, + -0.46793943643569946, + -0.041146114468574524, + -1.2806015014648438, + -2.1299755573272705, + -2.7230143547058105, + -0.7635340690612793, + -0.43544360995292664, + -2.7788889408111572, + -1.5222126245498657, + -1.6613693237304688, + -0.05091489478945732, + -1.3707530498504639, + -1.3513561487197876, + -1.2351378202438354, + -1.2675105333328247, + -0.4767935872077942 ], "logprobs": [ -10.737512588500977, - -3.724862575531006, - -2.833397388458252, - -1.2464861869812012, - -0.2549239993095398, - -1.7607988119125366, - -2.419379711151123, - -1.9533929824829102, - -2.1014301776885986, - -6.169030666351318, - -0.8734959363937378, - -2.4733574390411377, - -3.4822516441345215, - -4.180896759033203, - -1.9767613410949707, - -1.8347630500793457, - -2.2581257820129395, - -7.180149078369141, - -0.0453881211578846, - -1.9841610193252563, - -5.015386581420898, - -8.827117919921875, - -9.885746002197266, - -0.8498678207397461, - -4.770059585571289, - -0.855280339717865, - -2.2494924068450928, - -0.017164958640933037, - -0.03715415671467781, - -3.4830124378204346, - -8.635110855102539, - -1.2520610094070435, - -6.62324857711792, - -3.639960765838623, - -3.664339542388916, - -4.182392597198486, - -2.1796066761016846, - -1.0725229978561401, - -0.26311880350112915, - -0.8036076426506042, - -4.6958818435668945, - -9.042495727539062, - -0.013647346757352352, - -3.1747794151306152, - -1.322129487991333, - -3.949110746383667, - -0.7829495072364807, - -0.002083513652905822, - -2.970266580581665, - -10.56244945526123, - -3.2369167804718018, - -1.1530492305755615, - -4.917466163635254, - -0.21241025626659393, - -0.06490474194288254, - -1.372581124305725, - -2.224682092666626, - -4.3847503662109375, - -0.36867555975914, - -4.035493850708008, - -0.39869019389152527, - -0.14373983442783356, - -2.716118812561035, - -10.687016487121582, - -0.04773370549082756, - -3.398231267929077, - -0.8646175265312195, - -4.74052619934082, - -0.23649944365024567, - -2.6610701084136963, - -0.8428961634635925, - -1.614527940750122, - -5.793307781219482, - -16.929147720336914, - -2.6586406230926514, - -0.1385982781648636, - -7.435610771179199, - -1.0483647584915161, - -2.1261863708496094, - -1.5261307954788208, - -0.27082547545433044, - -5.859070777893066, - -0.00648513063788414, - -7.732051849365234, - -2.712515354156494, - -2.9137418270111084, - -3.041210651397705, - -2.3559694290161133, - -0.3973437249660492, - -1.4338903427124023, - -2.2967660427093506, - -0.6096595525741577, - -1.310807704925537, - -1.9799187183380127, - -1.710689663887024, - -0.8325198888778687, - -0.4943186938762665, - -1.2886956930160522, - -1.585263729095459, - -1.101692795753479, - -0.44188663363456726, - -0.4740143418312073, - -0.042198192328214645, - -1.2899682521820068, - -2.1242613792419434, - -2.7151529788970947, - -0.8274281024932861, - -0.39647114276885986, - -2.7846553325653076, - -1.5348155498504639, - -1.626529335975647, - -0.047930192202329636, - -1.3622726202011108, - -1.3274445533752441, - -1.2834383249282837, - -1.3211638927459717, - -0.48530423641204834 + -3.6886487007141113, + -2.8194005489349365, + -1.2396225929260254, + -0.22920642793178558, + -1.8583638668060303, + -2.384000778198242, + -2.008981227874756, + -2.09975528717041, + -6.182888031005859, + -0.890110433101654, + -2.478799343109131, + -3.502744436264038, + -4.090683937072754, + -1.964805006980896, + -1.8054677248001099, + -2.321495294570923, + -7.160285949707031, + -0.04007242992520332, + -1.9924155473709106, + -5.093112468719482, + -8.779500007629395, + -9.904478073120117, + -0.8523460626602173, + -4.82073974609375, + -0.86553555727005, + -2.36330509185791, + -0.01946748048067093, + -0.034191977232694626, + -3.3692376613616943, + -8.743470191955566, + -1.3306764364242554, + -6.645841598510742, + -3.7794108390808105, + -3.7756881713867188, + -4.187170028686523, + -2.2124571800231934, + -1.0734034776687622, + -0.22135400772094727, + -0.7782289981842041, + -4.716646671295166, + -9.09740161895752, + -0.013706612400710583, + -3.1654152870178223, + -1.3161238431930542, + -4.0115814208984375, + -0.7715368866920471, + -0.002078041434288025, + -2.9644386768341064, + -10.690834045410156, + -3.2352819442749023, + -1.1544448137283325, + -4.971055030822754, + -0.2275625467300415, + -0.05549970641732216, + -1.2946704626083374, + -2.20631742477417, + -4.362645149230957, + -0.37027108669281006, + -4.025010108947754, + -0.3656681478023529, + -0.13922274112701416, + -2.720149040222168, + -10.679438591003418, + -0.04889172688126564, + -3.483541965484619, + -0.9704694151878357, + -4.714925289154053, + -0.2623435854911804, + -2.60113525390625, + -0.8295360207557678, + -1.671109676361084, + -5.792396068572998, + -16.96857261657715, + -2.647408962249756, + -0.13962996006011963, + -7.453517436981201, + -1.0938549041748047, + -2.134143829345703, + -1.5537410974502563, + -0.2869631052017212, + -5.857534408569336, + -0.006919232662767172, + -7.747158527374268, + -2.7227447032928467, + -3.037987232208252, + -3.0312116146087646, + -2.342064619064331, + -0.39744827151298523, + -1.516434907913208, + -2.3665950298309326, + -0.6091187000274658, + -1.3131166696548462, + -1.9402395486831665, + -1.6906054019927979, + -0.8380135297775269, + -0.4860585927963257, + -1.2984997034072876, + -1.5767009258270264, + -0.9797608852386475, + -0.40544381737709045, + -0.46793943643569946, + -0.041146114468574524, + -1.2806015014648438, + -2.1299755573272705, + -2.7230143547058105, + -0.7635340690612793, + -0.43544360995292664, + -2.7788889408111572, + -1.5222126245498657, + -1.6613693237304688, + -0.05091489478945732, + -1.3707530498504639, + -1.3513561487197876, + -1.2351378202438354, + -1.2675105333328247, + -0.4767935872077942 ] }, "throughput": [ - 1.9407774475980641, - 40.8327035151158, - 42.82685786577602, - 42.814063580843204, - 42.88001112304976, - 42.90077111461981, - 42.81151438072744, - 42.89731946981911 - ] + 7.60023464673007, + 40.7758053869289, + 42.60669610069648, + 42.688712195052446, + 42.85349098496967, + 42.693752716516315, + 42.66967613818249, + 42.795818000013604 + ], + "mem-max-allocated-bytes": 30516649984, + "lifetime_prefill_token_count": 88 } \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json index 0a32a2c875c..9b105e4fa2d 100644 --- a/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_cuda_graphs_pad_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json @@ -1 +1 @@ -{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [0.4429202973842621, 0.017771262675523758, 0.01790841668844223, 0.016864703968167305, 0.016803359612822533, 0.016811424866318703, 0.016884256154298782, 0.016778528690338135, 0.016986047849059105, 0.016792479902505875, 0.016880257055163383, 0.016891231760382652, 0.016854560002684593, 0.01682988740503788, 0.016822686418890953, 0.01692502386868, 0.016792958602309227, 0.0169671680778265, 0.01693049632012844, 0.0170868169516325, 0.01686326414346695, 0.017535584047436714, 0.0174525436013937, 0.017324192449450493, 0.016914688050746918, 0.01683216169476509, 0.016909919679164886, 0.016884224489331245, 0.017001086845993996, 0.016849694773554802], "latency": 0.9421291537582874, "logprobs": [-10.737512588500977, -3.6907906532287598, -2.8354406356811523, -1.241209626197815, -0.23157396912574768, -1.8544931411743164, -2.305788278579712, -1.9660029411315918, -2.139838695526123, -6.157798767089844, -0.8562318086624146, -2.4214887619018555, -3.5043883323669434, -4.133810043334961, -2.009009838104248, -1.8133208751678467, -2.3323073387145996, -7.160175323486328, -0.040603119879961014, -1.9747259616851807, -5.076613903045654, -8.853288650512695, -9.848663330078125, -0.783089280128479, -4.775578022003174, -0.8501623868942261, -2.353159189224243, -0.019392186775803566, -0.035699184983968735, -3.369636058807373, -8.736637115478516, -1.2479770183563232, -6.668802261352539, -3.84025239944458, -3.774880886077881, -4.180184364318848, -2.2127902507781982, -1.07676362991333, -0.2321961224079132, -0.8445965647697449, -4.720583915710449, -9.144975662231445, -0.013737889938056469, -3.1713855266571045, -1.316046953201294, -3.976555824279785, -0.7929940223693848, -0.0020036876667290926, -2.9234514236450195, -10.630117416381836, -3.2423582077026367, -1.1527093648910522, -4.902451515197754, -0.20881010591983795, -0.06518254429101944, -1.3553434610366821, -2.205620765686035, -4.443068981170654, -0.3349221646785736, -4.0811614990234375, -0.40434733033180237, -0.14260707795619965, -2.7138302326202393, -10.61572551727295, -0.05091002210974693, -3.3788461685180664, -0.8990436792373657, -4.757172584533691, -0.2625967562198639, -2.6857080459594727, -0.8338347673416138, -1.5987446308135986, -5.796599388122559, -17.023239135742188, -2.5919642448425293, -0.1391627937555313, -7.425058841705322, -1.0969927310943604, -2.1373608112335205, -1.5555475950241089, -0.29913192987442017, -5.805688381195068, -0.006563534028828144, -7.741588592529297, -2.729809284210205, -2.989825487136841, -2.937342643737793, -2.452791690826416, -0.39692243933677673, -1.4191737174987793, -2.281113862991333, -0.6101264357566833, -1.3127052783966064, -1.93826162815094, -1.759519100189209, -0.8280774354934692, -0.48737525939941406, -1.2929327487945557, -1.4731515645980835, -1.0149478912353516, -0.402925580739975, -0.4662020802497864, -0.04289804771542549, -1.2809830904006958, -2.1367523670196533, -2.672316074371338, -0.832058310508728, -0.3975365459918976, -2.8649744987487793, -1.5586214065551758, -1.6164027452468872, -0.048774562776088715, -1.3553334474563599, -1.374987006187439, -1.2671791315078735, -1.29192054271698, -0.49132436513900757]}} \ No newline at end of file +{"1": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [0.9536803364753723, 0.016934143379330635, 0.016783935949206352, 0.016745438799262047, 0.0167548805475235, 0.01675168052315712, 0.016725631430745125, 0.016744224354624748, 0.016771871596574783, 0.01673772744834423, 0.01671331189572811, 0.016742143779993057, 0.016752831637859344, 0.016735071316361427, 0.016708416864275932, 0.01676086336374283, 0.01672108843922615, 0.01675747148692608, 0.01672768034040928, 0.01673593558371067, 0.01673644781112671, 0.016768928617239, 0.016721663996577263, 0.01674390397965908, 0.016715550795197487, 0.01673286408185959, 0.016974497586488724, 0.016759486868977547, 0.01675366424024105, 0.017026912420988083], "latency": 1.4477943610399961, "logprobs": [-10.737512588500977, -3.6907906532287598, -2.8354406356811523, -1.241209626197815, -0.23157396912574768, -1.8544931411743164, -2.3058578968048096, -1.965078592300415, -2.131650447845459, -6.1445770263671875, -0.8567591905593872, -2.4118287563323975, -3.49544095993042, -4.07368278503418, -1.9927215576171875, -1.8116189241409302, -2.220911979675293, -7.154321193695068, -0.041270580142736435, -1.897834300994873, -5.07651424407959, -8.796056747436523, -9.88559341430664, -0.7177769541740417, -4.775661468505859, -0.8519912362098694, -2.3280630111694336, -0.018436847254633904, -0.038715627044439316, -3.4831454753875732, -8.691339492797852, -1.2537559270858765, -6.647421836853027, -3.785327911376953, -3.7754249572753906, -4.1729302406311035, -2.268207550048828, -1.0762473344802856, -0.22336173057556152, -0.7773433923721313, -4.659046173095703, -9.07835865020752, -0.01364840567111969, -3.1569409370422363, -1.3136285543441772, -3.999577760696411, -0.8146029710769653, -0.0020667400676757097, -2.9257936477661133, -10.559369087219238, -3.301023483276367, -1.1468515396118164, -4.866663455963135, -0.20965954661369324, -0.06276518106460571, -1.3678232431411743, -2.2146267890930176, -4.369752883911133, -0.35328271985054016, -4.076470851898193, -0.39479735493659973, -0.14124885201454163, -2.7151336669921875, -10.645881652832031, -0.05100790411233902, -3.277879238128662, -0.859420657157898, -4.694356918334961, -0.26331964135169983, -2.601013660430908, -0.8340632319450378, -1.5944981575012207, -5.802148818969727, -16.992801666259766, -2.9630136489868164, -0.12174151837825775, -7.421735763549805, -1.090034008026123, -2.115244150161743, -1.592454195022583, -0.297377347946167, -5.607227325439453, -0.006619194056838751, -7.817281723022461, -2.701261043548584, -2.988292932510376, -2.9706215858459473, -2.4563350677490234, -0.3993130624294281, -1.5173310041427612, -2.269473075866699, -0.6111201047897339, -1.313757300376892, -1.9383023977279663, -1.6797527074813843, -0.7714957594871521, -0.49417543411254883, -1.2875804901123047, -1.5885818004608154, -1.018824577331543, -0.4005858302116394, -0.46394026279449463, -0.04470847547054291, -1.2914193868637085, -2.2410547733306885, -2.740159273147583, -0.7651359438896179, -0.4316181242465973, -2.7719383239746094, -1.5367236137390137, -1.652032732963562, -0.051836322993040085, -1.3689777851104736, -1.342658519744873, -1.26646089553833, -1.3113347291946411, -0.5160548686981201]}} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgx_h100.json index 0a088a3b4ed..16ce3cb46a0 100644 --- a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp1_pp1_ep1_16B_logitsmatch/golden_values_dev_dgx_h100.json @@ -1 +1 @@ -{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [2.715181350708008, 11.115615844726562, 0.08171491324901581, 0.08067888021469116, 0.08026569336652756, 0.08201305568218231, 0.08297079056501389, 0.07990239560604095, 0.07923731207847595, 0.08088396489620209, 0.08342364430427551, 0.07902496308088303, 0.07883366197347641, 0.0821300819516182, 0.07879510521888733, 0.07889033854007721, 0.08096816390752792, 0.07890790700912476, 0.07898931205272675, 0.07891136407852173, 0.07872992008924484, 0.07891993969678879, 0.07908882945775986, 0.08219005167484283, 0.07928377389907837, 0.0791754499077797, 0.08204790204763412, 0.07909702509641647, 0.07994850724935532, 0.0790436789393425], "latency": 16.090769955015276, "logprobs": [-10.748703956604004, -3.675847053527832, -2.8152527809143066, -1.2499192953109741, -0.2585306465625763, -1.7650476694107056, -2.4413700103759766, -1.9855635166168213, -2.1556897163391113, -6.126346588134766, -0.8885424733161926, -2.466485023498535, -3.53129506111145, -4.1022443771362305, -1.973730444908142, -1.8129527568817139, -2.3135061264038086, -7.073224067687988, -0.0406799241900444, -1.9924827814102173, -5.044793128967285, -8.79849910736084, -9.896184921264648, -0.9244536757469177, -4.819119453430176, -0.8409886360168457, -2.3493337631225586, -0.019546041265130043, -0.03429899737238884, -3.486131429672241, -8.708669662475586, -1.2524677515029907, -6.648501396179199, -3.6543850898742676, -3.5817432403564453, -4.293689250946045, -2.213235855102539, -1.026153802871704, -0.22022850811481476, -0.7749938368797302, -4.7083001136779785, -9.260919570922852, -0.013350849971175194, -3.177624464035034, -1.3237272500991821, -3.991711139678955, -0.7711713314056396, -0.0020787552930414677, -2.9259750843048096, -10.556608200073242, -3.0338008403778076, -1.165448546409607, -4.884476184844971, -0.22491267323493958, -0.06299388408660889, -1.2974224090576172, -2.228250503540039, -4.375787258148193, -0.3615659773349762, -4.020719528198242, -0.3728649318218231, -0.16031591594219208, -2.7166409492492676, -10.650144577026367, -0.057426948100328445, -3.3819196224212646, -0.8289875388145447, -4.716109752655029, -0.2623739540576935, -2.6586318016052246, -0.846296489238739, -1.6911215782165527, -5.863524436950684, -17.074047088623047, -2.9786670207977295, -0.12697581946849823, -7.423051834106445, -1.1104215383529663, -2.125497579574585, -1.481943130493164, -0.26388564705848694, -5.852108001708984, -0.006604391150176525, -7.682407379150391, -2.7386088371276855, -2.9692039489746094, -3.0358991622924805, -2.434255838394165, -0.4008456766605377, -1.4501973390579224, -2.3068716526031494, -0.5563173294067383, -1.3114793300628662, -1.9436699151992798, -1.6950371265411377, -0.7694160342216492, -0.504065215587616, -1.2403564453125, -1.5687276124954224, -1.0141794681549072, -0.4076817035675049, -0.48037511110305786, -0.04258028045296669, -1.3669413328170776, -2.1299216747283936, -2.6870312690734863, -0.7604206800460815, -0.39242351055145264, -2.869314432144165, -1.464285135269165, -1.643430471420288, -0.04816753789782524, -1.4055166244506836, -1.3622899055480957, -1.2863339185714722, -1.2075212001800537, -0.5359172821044922]}} \ No newline at end of file +{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [2.545225143432617, 0.8161460161209106, 0.08185821026563644, 0.08545549213886261, 0.0806993618607521, 0.08108274638652802, 0.08104605227708817, 0.08051212877035141, 0.08013814687728882, 0.08076290786266327, 0.08076870441436768, 0.08029650896787643, 0.07956188917160034, 0.07908589392900467, 0.07883257418870926, 0.0789686068892479, 0.07868800312280655, 0.0788155198097229, 0.07876496016979218, 0.07869625836610794, 0.07890035212039948, 0.08047452569007874, 0.08083260804414749, 0.08091676980257034, 0.08101955056190491, 0.08092495799064636, 0.08079087734222412, 0.08215625584125519, 0.08282729238271713, 0.08078553527593613], "latency": 5.632539719343185, "logprobs": [-10.748703956604004, -3.6647238731384277, -2.829481363296509, -1.2498102188110352, -0.2529161274433136, -1.8659480810165405, -2.381495237350464, -1.9656293392181396, -2.1487326622009277, -6.079752445220947, -0.854481041431427, -2.4210457801818848, -3.5035147666931152, -4.105377674102783, -1.9736212491989136, -1.812272310256958, -2.2293882369995117, -7.158683776855469, -0.04035309702157974, -1.8912277221679688, -5.030168056488037, -8.83056640625, -9.887261390686035, -0.783703088760376, -4.775330066680908, -0.8503050208091736, -2.3185184001922607, -0.019133294001221657, -0.035927604883909225, -3.367396831512451, -8.677278518676758, -1.249742031097412, -6.646797180175781, -3.7622885704040527, -3.5719683170318604, -4.217543125152588, -2.1486034393310547, -1.0877041816711426, -0.21807751059532166, -0.779091477394104, -4.703271865844727, -9.254538536071777, -0.013438244350254536, -3.1536498069763184, -1.347169041633606, -3.951521873474121, -0.7524824142456055, -0.0020295039284974337, -2.977275848388672, -10.547475814819336, -3.2425384521484375, -1.1805994510650635, -4.853910446166992, -0.20160463452339172, -0.06345974653959274, -1.3051351308822632, -2.185082197189331, -4.38629150390625, -0.36718395352363586, -4.047024726867676, -0.35912153124809265, -0.14624275267124176, -2.715745687484741, -10.57785415649414, -0.057624127715826035, -3.278026819229126, -0.8596221804618835, -4.68856954574585, -0.2627037465572357, -2.6609294414520264, -0.8223639726638794, -1.599161148071289, -5.80098295211792, -16.973386764526367, -2.616450548171997, -0.13072170317173004, -7.462261199951172, -1.082613229751587, -2.122760534286499, -1.5544897317886353, -0.2857922911643982, -5.874238967895508, -0.006397482007741928, -7.670827865600586, -2.7104744911193848, -2.9125661849975586, -3.0234169960021973, -2.3567330837249756, -0.39910370111465454, -1.4856780767440796, -2.293515920639038, -0.6103871464729309, -1.3627440929412842, -1.9288791418075562, -1.683215618133545, -0.7717607021331787, -0.5037432312965393, -1.2485487461090088, -1.559265375137329, -1.0295413732528687, -0.4404847025871277, -0.46411699056625366, -0.04514020308852196, -1.3507211208343506, -2.1256327629089355, -2.687516927719116, -0.759257435798645, -0.41867509484291077, -2.854245185852051, -1.5189608335494995, -1.5710458755493164, -0.05560074374079704, -1.357395052909851, -1.3819431066513062, -1.2458440065383911, -1.2788543701171875, -0.5129148364067078]}} \ No newline at end of file diff --git a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json index 46a451d0b64..9374ead29ab 100644 --- a/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt_static_inference_tp4_pp1_ep4_16B_logitsmatch/golden_values_dev_dgx_h100.json @@ -1 +1 @@ -{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [32.35166549682617, 0.833970844745636, 0.09404217451810837, 0.09242991358041763, 0.09451283514499664, 0.09116563200950623, 0.0916728675365448, 0.09169203042984009, 0.0920996442437172, 0.09247440099716187, 0.09316505491733551, 0.09183433651924133, 0.09311366081237793, 0.0922863706946373, 0.09139427542686462, 0.0916166678071022, 0.09881363064050674, 0.09086793661117554, 0.09085418283939362, 0.0913468450307846, 0.0913306251168251, 0.09422652423381805, 0.09134646505117416, 0.09283513575792313, 0.09112297743558884, 0.09120230376720428, 0.09097100794315338, 0.09246265143156052, 0.09317846596240997, 0.09015017747879028], "latency": 35.78577698091976, "logprobs": [-10.737512588500977, -3.6907906532287598, -2.8354406356811523, -1.241209626197815, -0.23157396912574768, -1.8544931411743164, -2.305788278579712, -1.9660029411315918, -2.139838695526123, -6.157798767089844, -0.8562318086624146, -2.4214887619018555, -3.5043883323669434, -4.133810043334961, -2.009009838104248, -1.8133208751678467, -2.3323073387145996, -7.160175323486328, -0.040603119879961014, -1.9747259616851807, -5.076613903045654, -8.853288650512695, -9.848663330078125, -0.783089280128479, -4.775578022003174, -0.8501623868942261, -2.353159189224243, -0.019392186775803566, -0.035699184983968735, -3.369636058807373, -8.736637115478516, -1.2479770183563232, -6.668802261352539, -3.84025239944458, -3.774880886077881, -4.180184364318848, -2.2127902507781982, -1.07676362991333, -0.2321961224079132, -0.8445965647697449, -4.720583915710449, -9.144975662231445, -0.013737889938056469, -3.1713855266571045, -1.316046953201294, -3.976555824279785, -0.7929940223693848, -0.0020036876667290926, -2.9234514236450195, -10.630117416381836, -3.2423582077026367, -1.1527093648910522, -4.902451515197754, -0.20881010591983795, -0.06518254429101944, -1.3553434610366821, -2.205620765686035, -4.443068981170654, -0.3349221646785736, -4.0811614990234375, -0.40434733033180237, -0.14260707795619965, -2.7138302326202393, -10.61572551727295, -0.05091002210974693, -3.3788461685180664, -0.8990436792373657, -4.757172584533691, -0.2625967562198639, -2.6857080459594727, -0.8338347673416138, -1.5987446308135986, -5.796599388122559, -17.023239135742188, -2.5919642448425293, -0.1391627937555313, -7.425058841705322, -1.0969927310943604, -2.1373608112335205, -1.5555475950241089, -0.29913192987442017, -5.805688381195068, -0.006563534028828144, -7.741588592529297, -2.729809284210205, -2.989825487136841, -2.937342643737793, -2.452791690826416, -0.39692243933677673, -1.4191737174987793, -2.281113862991333, -0.6101264357566833, -1.3127052783966064, -1.93826162815094, -1.759519100189209, -0.8280774354934692, -0.48737525939941406, -1.2929327487945557, -1.4731515645980835, -1.0149478912353516, -0.402925580739975, -0.4662020802497864, -0.04289804771542549, -1.2809830904006958, -2.1367523670196533, -2.672316074371338, -0.832058310508728, -0.3975365459918976, -2.8649744987487793, -1.5586214065551758, -1.6164027452468872, -0.048774562776088715, -1.3553334474563599, -1.374987006187439, -1.2671791315078735, -1.29192054271698, -0.49132436513900757]}} \ No newline at end of file +{"0": {"input_prompt": "Time travel to 2008, and go to a bar or a club or one of the myriad disco-basements on the Lower East Side that does not quite know which of those it is. Dance awkwardly in a room full of other glittered-up nerds, and wait for something to happen, buoyed on the feeling that this is the big swollen heart of life, that this is New York like the movies.", "generated_text": " Wait for the moment when the music stops, and the lights come up, and the DJ says, \"I'm going to play a song for you", "generated_tokens": [32844, 1394, 1278, 4735, 2200, 1278, 7146, 30774, 1044, 1321, 1278, 26466, 3930, 2015, 1044, 1321, 1278, 30245, 8223, 1044, 1429, 1073, 4525, 4670, 1317, 3354, 1261, 6947, 1394, 1636], "tpot": [13.711557388305664, 0.9041259288787842, 0.09681683033704758, 0.09153660386800766, 0.09111235290765762, 0.09335695952177048, 0.09111097455024719, 0.09149472415447235, 0.09076278656721115, 0.09109959006309509, 0.09105174243450165, 0.09161145985126495, 0.09026294946670532, 0.09116349369287491, 0.09077664464712143, 0.09046704322099686, 0.09124940633773804, 0.09130454063415527, 0.09034591913223267, 0.09066786617040634, 0.0906708836555481, 0.09116076678037643, 0.09075567871332169, 0.09169074892997742, 0.0911671370267868, 0.09112399816513062, 0.09118761122226715, 0.09077664464712143, 0.09082793444395065, 0.09155046194791794], "latency": 17.191568877082318, "logprobs": [-10.737512588500977, -3.6907906532287598, -2.8354406356811523, -1.241209626197815, -0.23157396912574768, -1.8544931411743164, -2.3058578968048096, -1.965078592300415, -2.131650447845459, -6.1445770263671875, -0.8567591905593872, -2.4118287563323975, -3.49544095993042, -4.07368278503418, -1.9927215576171875, -1.8116189241409302, -2.220911979675293, -7.154321193695068, -0.041270580142736435, -1.897834300994873, -5.07651424407959, -8.796056747436523, -9.88559341430664, -0.7177769541740417, -4.775661468505859, -0.8519912362098694, -2.3280630111694336, -0.018436847254633904, -0.038715627044439316, -3.4831454753875732, -8.691339492797852, -1.2537559270858765, -6.647421836853027, -3.785327911376953, -3.7754249572753906, -4.1729302406311035, -2.268207550048828, -1.0762473344802856, -0.22336173057556152, -0.7773433923721313, -4.659046173095703, -9.07835865020752, -0.01364840567111969, -3.1569409370422363, -1.3136285543441772, -3.999577760696411, -0.8146029710769653, -0.0020667400676757097, -2.9257936477661133, -10.559369087219238, -3.301023483276367, -1.1468515396118164, -4.866663455963135, -0.20965954661369324, -0.06276518106460571, -1.3678232431411743, -2.2146267890930176, -4.369752883911133, -0.35328271985054016, -4.076470851898193, -0.39479735493659973, -0.14124885201454163, -2.7151336669921875, -10.645881652832031, -0.05100790411233902, -3.277879238128662, -0.859420657157898, -4.694356918334961, -0.26331964135169983, -2.601013660430908, -0.8340632319450378, -1.5944981575012207, -5.802148818969727, -16.992801666259766, -2.9630136489868164, -0.12174151837825775, -7.421735763549805, -1.090034008026123, -2.115244150161743, -1.592454195022583, -0.297377347946167, -5.607227325439453, -0.006619194056838751, -7.817281723022461, -2.701261043548584, -2.988292932510376, -2.9706215858459473, -2.4563350677490234, -0.3993130624294281, -1.5173310041427612, -2.269473075866699, -0.6111201047897339, -1.313757300376892, -1.9383023977279663, -1.6797527074813843, -0.7714957594871521, -0.49417543411254883, -1.2875804901123047, -1.5885818004608154, -1.018824577331543, -0.4005858302116394, -0.46394026279449463, -0.04470847547054291, -1.2914193868637085, -2.2410547733306885, -2.740159273147583, -0.7651359438896179, -0.4316181242465973, -2.7719383239746094, -1.5367236137390137, -1.652032732963562, -0.051836322993040085, -1.3689777851104736, -1.342658519744873, -1.26646089553833, -1.3113347291946411, -0.5160548686981201]}} \ No newline at end of file From 2e77eaeb2183d2e89e5a90bc7bf5c54cdcf568ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 4 Mar 2026 12:48:13 +0000 Subject: [PATCH 11/12] install mock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- pyproject.toml | 8 +- uv.lock | 351 ++++++------------------------------------------- 2 files changed, 46 insertions(+), 313 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6a752a98d59..0037ce3be76 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,12 @@ build-backend = "setuptools.build_meta" include-package-data = true [tool.setuptools.packages.find] -include = ["megatron.core", "megatron.core.*", "megatron.training", "megatron.training.*"] +include = [ + "megatron.core", + "megatron.core.*", + "megatron.training", + "megatron.training.*", +] [tool.setuptools.dynamic] version = { attr = "megatron.core.package_info.__version__" } @@ -136,6 +141,7 @@ test = [ "wrapt", "pytest==8.3.5", "pytest-mock", + "mock", "pytest-cov", "pytest-random-order", "pytest-asyncio", diff --git a/uv.lock b/uv.lock index 48c71a36f75..5f66c2d5233 100644 --- a/uv.lock +++ b/uv.lock @@ -1211,74 +1211,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ff/fa/d3c15189f7c52aaefbaea76fb012119b04b9013f4bf446cb4eb4c26c4e6b/cython-3.2.4-py3-none-any.whl", hash = "sha256:732fc93bc33ae4b14f6afaca663b916c2fdd5dcbfad7114e17fb2434eeaea45c", size = 1257078, upload-time = "2026-01-04T14:14:12.373Z" }, ] -[[package]] -name = "datasets" -version = "2.2.1" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.14' and sys_platform == 'linux'", -] -dependencies = [ - { name = "aiohttp", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" }, - { name = "dill", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' and sys_platform == 'linux'" }, - { name = "fsspec", extra = ["http"], marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, - { name = "huggingface-hub", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" }, - { name = "multiprocess", version = "0.70.19", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' and sys_platform == 'linux'" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' and sys_platform == 'linux'" }, - { name = "packaging", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" }, - { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' and sys_platform == 'linux'" }, - { name = "pyarrow", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" }, - { name = "requests", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" }, - { name = "responses", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" }, - { name = "tqdm", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" }, - { name = "xxhash", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/31/64/1e6fb2a0eb6b0d55117233cf33279ba6d680c0f031ebae81281a47c92760/datasets-2.2.1.tar.gz", hash = "sha256:d362717c4394589b516c8f397ff20a6fe720454aed877ab61d06f3bc05df9544", size = 302132, upload-time = "2022-05-11T17:02:29.543Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d7/2d/41e8aec8d4bad6f07adfcbc89cf743e0d31c876371d453b2936bcfa7fe34/datasets-2.2.1-py3-none-any.whl", hash = "sha256:1938f3e99599422de50b9b54fe802aca854ed130382dab0b3820c821f7ae6d5e", size = 342193, upload-time = "2022-05-11T17:02:27.047Z" }, -] - [[package]] name = "datasets" version = "4.6.1" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version == '3.13.*' and sys_platform == 'linux'", - "python_full_version == '3.12.*' and sys_platform == 'linux'", - "python_full_version >= '3.14' and sys_platform == 'win32'", - "python_full_version >= '3.14' and sys_platform == 'emscripten'", - "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", - "python_full_version == '3.13.*' and sys_platform == 'win32'", - "python_full_version == '3.12.*' and sys_platform == 'win32'", - "python_full_version == '3.13.*' and sys_platform == 'emscripten'", - "python_full_version == '3.12.*' and sys_platform == 'emscripten'", - "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", - "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", - "python_full_version == '3.11.*' and sys_platform == 'linux'", - "python_full_version == '3.11.*' and sys_platform == 'win32'", - "python_full_version == '3.11.*' and sys_platform == 'emscripten'", - "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", - "python_full_version < '3.11' and sys_platform == 'linux'", - "python_full_version < '3.11' and sys_platform != 'linux'", - "python_full_version >= '3.14' and sys_platform == 'linux'", -] dependencies = [ - { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux'" }, - { name = "filelock", marker = "python_full_version < '3.14' or sys_platform != 'linux'" }, - { name = "fsspec", extra = ["http"], marker = "(python_full_version < '3.14' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-lts') or extra == 'extra-13-megatron-core-dev'" }, - { name = "httpx", marker = "python_full_version < '3.14' or sys_platform != 'linux'" }, - { name = "huggingface-hub", marker = "python_full_version < '3.14' or sys_platform != 'linux'" }, - { name = "multiprocess", version = "0.70.18", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux'" }, + { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" } }, + { name = "filelock" }, + { name = "fsspec", extra = ["http"], marker = "extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" }, + { name = "httpx" }, + { name = "huggingface-hub" }, + { name = "multiprocess" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and python_full_version < '3.14' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, - { name = "packaging", marker = "python_full_version < '3.14' or sys_platform != 'linux'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, + { name = "packaging" }, { name = "pandas", version = "2.3.3", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, - { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and python_full_version < '3.14' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and sys_platform != 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, - { name = "pyarrow", marker = "python_full_version < '3.14' or sys_platform != 'linux'" }, - { name = "pyyaml", marker = "python_full_version < '3.14' or sys_platform != 'linux'" }, - { name = "requests", marker = "python_full_version < '3.14' or sys_platform != 'linux'" }, - { name = "tqdm", marker = "python_full_version < '3.14' or sys_platform != 'linux'" }, - { name = "xxhash", marker = "python_full_version < '3.14' or sys_platform != 'linux'" }, + { name = "pandas", version = "3.0.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, + { name = "pyarrow" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "tqdm" }, + { name = "xxhash" }, ] sdist = { url = "https://files.pythonhosted.org/packages/d7/94/eb81c6fe32e9b6ef92223141b5a553aeff2e9456968424a8533cbe88f476/datasets-4.6.1.tar.gz", hash = "sha256:140ce500bc41939ff6ce995702d66b1f4b2ee7f117bb9b07512fab6804d4070a", size = 593865, upload-time = "2026-02-27T23:26:49.482Z" } wheels = [ @@ -1320,6 +1273,7 @@ name = "dill" version = "0.4.0" source = { registry = "https://pypi.org/simple" } resolution-markers = [ + "python_full_version >= '3.14' and sys_platform == 'linux'", "python_full_version == '3.13.*' and sys_platform == 'linux'", "python_full_version == '3.12.*' and sys_platform == 'linux'", "python_full_version >= '3.14' and sys_platform == 'win32'", @@ -1337,7 +1291,6 @@ resolution-markers = [ "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", "python_full_version < '3.11' and sys_platform == 'linux'", "python_full_version < '3.11' and sys_platform != 'linux'", - "python_full_version >= '3.14' and sys_platform == 'linux'", ] sdist = { url = "https://files.pythonhosted.org/packages/12/80/630b4b88364e9a8c8c5797f4602d0f76ef820909ee32f0bacb9f90654042/dill-0.4.0.tar.gz", hash = "sha256:0633f1d2df477324f53a895b02c901fb961bdbf65a17122586ea7019292cbcf0", size = 186976, upload-time = "2025-04-16T00:41:48.867Z" } wheels = [ @@ -2641,7 +2594,7 @@ dependencies = [ dev = [ { name = "av" }, { name = "causal-conv1d" }, - { name = "datasets", version = "4.6.1", source = { registry = "https://pypi.org/simple" } }, + { name = "datasets" }, { name = "einops" }, { name = "emerging-optimizers" }, { name = "fastapi" }, @@ -2668,8 +2621,7 @@ dev = [ lts = [ { name = "av" }, { name = "causal-conv1d" }, - { name = "datasets", version = "2.2.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra == 'extra-13-megatron-core-lts') or (python_full_version < '3.14' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, - { name = "datasets", version = "4.6.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.14' and extra == 'extra-13-megatron-core-lts') or (sys_platform != 'linux' and extra == 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, + { name = "datasets" }, { name = "einops" }, { name = "emerging-optimizers" }, { name = "fastapi" }, @@ -2744,6 +2696,7 @@ no-pypi-wheels = [ ] test = [ { name = "coverage" }, + { name = "mock" }, { name = "nemo-run" }, { name = "nltk" }, { name = "pydantic" }, @@ -2855,6 +2808,7 @@ no-pypi-wheels = [ ] test = [ { name = "coverage" }, + { name = "mock" }, { name = "nemo-run", git = "https://github.com/NVIDIA-NeMo/Run.git?rev=01a9a8ba360f7b2908728ad0516e0ad9d936966d" }, { name = "nltk" }, { name = "pydantic" }, @@ -2947,6 +2901,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ad/3f/3d42e9a78fe5edf792a83c074b13b9b770092a4fbf3462872f4303135f09/ml_dtypes-0.5.4-cp314-cp314t-win_arm64.whl", hash = "sha256:11942cbf2cf92157db91e5022633c0d9474d4dfd813a909383bd23ce828a4b7d", size = 168825, upload-time = "2025-11-17T22:32:23.766Z" }, ] +[[package]] +name = "mock" +version = "5.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/07/8c/14c2ae915e5f9dca5a22edd68b35be94400719ccfa068a03e0fb63d0f6f6/mock-5.2.0.tar.gz", hash = "sha256:4e460e818629b4b173f32d08bf30d3af8123afbb8e04bb5707a1fd4799e503f0", size = 92796, upload-time = "2025-03-03T12:31:42.911Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/d9/617e6af809bf3a1d468e0d58c3997b1dc219a9a9202e650d30c2fc85d481/mock-5.2.0-py3-none-any.whl", hash = "sha256:7ba87f72ca0e915175596069dbbcc7c75af7b5e9b9bc107ad6349ede0819982f", size = 31617, upload-time = "2025-03-03T12:31:41.518Z" }, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -3193,28 +3156,8 @@ wheels = [ name = "multiprocess" version = "0.70.18" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version == '3.13.*' and sys_platform == 'linux'", - "python_full_version == '3.12.*' and sys_platform == 'linux'", - "python_full_version >= '3.14' and sys_platform == 'win32'", - "python_full_version >= '3.14' and sys_platform == 'emscripten'", - "python_full_version >= '3.14' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", - "python_full_version == '3.13.*' and sys_platform == 'win32'", - "python_full_version == '3.12.*' and sys_platform == 'win32'", - "python_full_version == '3.13.*' and sys_platform == 'emscripten'", - "python_full_version == '3.12.*' and sys_platform == 'emscripten'", - "python_full_version == '3.13.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", - "python_full_version == '3.12.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", - "python_full_version == '3.11.*' and sys_platform == 'linux'", - "python_full_version == '3.11.*' and sys_platform == 'win32'", - "python_full_version == '3.11.*' and sys_platform == 'emscripten'", - "python_full_version == '3.11.*' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'", - "python_full_version < '3.11' and sys_platform == 'linux'", - "python_full_version < '3.11' and sys_platform != 'linux'", - "python_full_version >= '3.14' and sys_platform == 'linux'", -] dependencies = [ - { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux'" }, + { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" } }, ] sdist = { url = "https://files.pythonhosted.org/packages/72/fd/2ae3826f5be24c6ed87266bc4e59c46ea5b059a103f3d7e7eb76a52aeecb/multiprocess-0.70.18.tar.gz", hash = "sha256:f9597128e6b3e67b23956da07cf3d2e5cba79e2f4e0fba8d7903636663ec6d0d", size = 1798503, upload-time = "2025-04-17T03:11:27.742Z" } wheels = [ @@ -3232,32 +3175,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6c/28/dd72947e59a6a8c856448a5e74da6201cb5502ddff644fbc790e4bd40b9a/multiprocess-0.70.18-py39-none-any.whl", hash = "sha256:e78ca805a72b1b810c690b6b4cc32579eba34f403094bbbae962b7b5bf9dfcb8", size = 133478, upload-time = "2025-04-17T03:11:26.253Z" }, ] -[[package]] -name = "multiprocess" -version = "0.70.19" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.14' and sys_platform == 'linux'", -] -dependencies = [ - { name = "dill", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.14' and sys_platform == 'linux'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a2/f2/e783ac7f2aeeed14e9e12801f22529cc7e6b7ab80928d6dcce4e9f00922d/multiprocess-0.70.19.tar.gz", hash = "sha256:952021e0e6c55a4a9fe4cd787895b86e239a40e76802a789d6305398d3975897", size = 2079989, upload-time = "2026-01-19T06:47:39.744Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8b/b6/10832f96b499690854e574360be342a282f5f7dba58eff791299ff6c0637/multiprocess-0.70.19-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:02e5c35d7d6cd2bdc89c1858867f7bde4012837411023a4696c148c1bdd7c80e", size = 135131, upload-time = "2026-01-19T06:47:20.479Z" }, - { url = "https://files.pythonhosted.org/packages/99/50/faef2d8106534b0dc4a0b772668a1a99682696ebf17d3c0f13f2ed6a656a/multiprocess-0.70.19-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:79576c02d1207ec405b00cabf2c643c36070800cca433860e14539df7818b2aa", size = 135131, upload-time = "2026-01-19T06:47:21.879Z" }, - { url = "https://files.pythonhosted.org/packages/94/b1/0b71d18b76bf423c2e8ee00b31db37d17297ab3b4db44e188692afdca628/multiprocess-0.70.19-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c6b6d78d43a03b68014ca1f0b7937d965393a670c5de7c29026beb2258f2f896", size = 135134, upload-time = "2026-01-19T06:47:23.262Z" }, - { url = "https://files.pythonhosted.org/packages/7e/aa/714635c727dbfc251139226fa4eaf1b07f00dc12d9cd2eb25f931adaf873/multiprocess-0.70.19-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1bbf1b69af1cf64cd05f65337d9215b88079ec819cd0ea7bac4dab84e162efe7", size = 144743, upload-time = "2026-01-19T06:47:24.562Z" }, - { url = "https://files.pythonhosted.org/packages/0f/e1/155f6abf5e6b5d9cef29b6d0167c180846157a4aca9b9bee1a217f67c959/multiprocess-0.70.19-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:5be9ec7f0c1c49a4f4a6fd20d5dda4aeabc2d39a50f4ad53720f1cd02b3a7c2e", size = 144738, upload-time = "2026-01-19T06:47:26.636Z" }, - { url = "https://files.pythonhosted.org/packages/af/cb/f421c2869d75750a4f32301cc20c4b63fab6376e9a75c8e5e655bdeb3d9b/multiprocess-0.70.19-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1c3dce098845a0db43b32a0b76a228ca059a668071cfeaa0f40c36c0b1585d45", size = 144741, upload-time = "2026-01-19T06:47:27.985Z" }, - { url = "https://files.pythonhosted.org/packages/e3/45/8004d1e6b9185c1a444d6b55ac5682acf9d98035e54386d967366035a03a/multiprocess-0.70.19-py310-none-any.whl", hash = "sha256:97404393419dcb2a8385910864eedf47a3cadf82c66345b44f036420eb0b5d87", size = 134948, upload-time = "2026-01-19T06:47:32.325Z" }, - { url = "https://files.pythonhosted.org/packages/86/c2/dec9722dc3474c164a0b6bcd9a7ed7da542c98af8cabce05374abab35edd/multiprocess-0.70.19-py311-none-any.whl", hash = "sha256:928851ae7973aea4ce0eaf330bbdafb2e01398a91518d5c8818802845564f45c", size = 144457, upload-time = "2026-01-19T06:47:33.711Z" }, - { url = "https://files.pythonhosted.org/packages/71/70/38998b950a97ea279e6bd657575d22d1a2047256caf707d9a10fbce4f065/multiprocess-0.70.19-py312-none-any.whl", hash = "sha256:3a56c0e85dd5025161bac5ce138dcac1e49174c7d8e74596537e729fd5c53c28", size = 150281, upload-time = "2026-01-19T06:47:35.037Z" }, - { url = "https://files.pythonhosted.org/packages/7f/74/d2c27e03cb84251dfe7249b8e82923643c6d48fa4883b9476b025e7dc7eb/multiprocess-0.70.19-py313-none-any.whl", hash = "sha256:8d5eb4ec5017ba2fab4e34a747c6d2c2b6fecfe9e7236e77988db91580ada952", size = 156414, upload-time = "2026-01-19T06:47:35.915Z" }, - { url = "https://files.pythonhosted.org/packages/a0/61/af9115673a5870fd885247e2f1b68c4f1197737da315b520a91c757a861a/multiprocess-0.70.19-py314-none-any.whl", hash = "sha256:e8cc7fbdff15c0613f0a1f1f8744bef961b0a164c0ca29bdff53e9d2d93c5e5f", size = 160318, upload-time = "2026-01-19T06:47:37.497Z" }, - { url = "https://files.pythonhosted.org/packages/7e/82/69e539c4c2027f1e1697e09aaa2449243085a0edf81ae2c6341e84d769b6/multiprocess-0.70.19-py39-none-any.whl", hash = "sha256:0d4b4397ed669d371c81dcd1ef33fd384a44d6c3de1bd0ca7ac06d837720d3c5", size = 133477, upload-time = "2026-01-19T06:47:38.619Z" }, -] - [[package]] name = "mypy-extensions" version = "1.1.0" @@ -3644,59 +3561,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8a/86/94188e03e5d4dd7b73c390b0cddcde5618b3799c18e327b2bf15763f6137/nvdlfw_inspect-0.2.2-py3-none-any.whl", hash = "sha256:8a4dc2814c5a4cd19ae304170b9bfa514538ef3c3eb243a45a82404ec3cb279d", size = 30964, upload-time = "2025-12-03T10:52:01.933Z" }, ] -[[package]] -name = "nvidia-cublas-cu12" -version = "12.8.4.1" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" }, - { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" }, - { url = "https://files.pythonhosted.org/packages/70/61/7d7b3c70186fb651d0fbd35b01dbfc8e755f69fd58f817f3d0f642df20c3/nvidia_cublas_cu12-12.8.4.1-py3-none-win_amd64.whl", hash = "sha256:47e9b82132fa8d2b4944e708049229601448aaad7e6f296f630f2d1a32de35af", size = 567544208, upload-time = "2025-03-07T01:53:30.535Z" }, -] - -[[package]] -name = "nvidia-cuda-cupti-cu12" -version = "12.8.90" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" }, - { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" }, - { url = "https://files.pythonhosted.org/packages/41/bc/83f5426095d93694ae39fe1311431b5d5a9bb82e48bf0dd8e19be2765942/nvidia_cuda_cupti_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:bb479dcdf7e6d4f8b0b01b115260399bf34154a1a2e9fe11c85c517d87efd98e", size = 7015759, upload-time = "2025-03-07T01:51:11.355Z" }, -] - -[[package]] -name = "nvidia-cuda-nvrtc-cu12" -version = "12.8.93" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" }, - { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" }, - { url = "https://files.pythonhosted.org/packages/45/51/52a3d84baa2136cc8df15500ad731d74d3a1114d4c123e043cb608d4a32b/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:7a4b6b2904850fe78e0bd179c4b655c404d4bb799ef03ddc60804247099ae909", size = 73586838, upload-time = "2025-03-07T01:52:13.483Z" }, -] - -[[package]] -name = "nvidia-cuda-runtime-cu12" -version = "12.8.90" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" }, - { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" }, - { url = "https://files.pythonhosted.org/packages/30/a5/a515b7600ad361ea14bfa13fb4d6687abf500adc270f19e89849c0590492/nvidia_cuda_runtime_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:c0c6027f01505bfed6c3b21ec546f69c687689aad5f1a377554bc6ca4aa993a8", size = 944318, upload-time = "2025-03-07T01:51:01.794Z" }, -] - -[[package]] -name = "nvidia-cudnn-cu12" -version = "9.10.2.21" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-cublas-cu12" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" }, - { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" }, - { url = "https://files.pythonhosted.org/packages/3d/90/0bd6e586701b3a890fd38aa71c387dab4883d619d6e5ad912ccbd05bfd67/nvidia_cudnn_cu12-9.10.2.21-py3-none-win_amd64.whl", hash = "sha256:c6288de7d63e6cf62988f0923f96dc339cea362decb1bf5b3141883392a7d65e", size = 692992268, upload-time = "2025-06-06T21:55:18.114Z" }, -] - [[package]] name = "nvidia-cudnn-frontend" version = "1.18.0" @@ -3719,76 +3583,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/67/53/df2810b56d259ef96fa6beaa1381bd14c29fbe82836b409516e864c5e177/nvidia_cudnn_frontend-1.18.0-cp314-cp314-win_amd64.whl", hash = "sha256:5053b473fa74168b5fbf35934cd6187f88aa03b8447b9f2cd417332d5e5c9569", size = 1592759, upload-time = "2026-02-11T21:32:33.87Z" }, ] -[[package]] -name = "nvidia-cufft-cu12" -version = "11.3.3.83" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" }, - { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" }, - { url = "https://files.pythonhosted.org/packages/7d/ec/ce1629f1e478bb5ccd208986b5f9e0316a78538dd6ab1d0484f012f8e2a1/nvidia_cufft_cu12-11.3.3.83-py3-none-win_amd64.whl", hash = "sha256:7a64a98ef2a7c47f905aaf8931b69a3a43f27c55530c698bb2ed7c75c0b42cb7", size = 192216559, upload-time = "2025-03-07T01:53:57.106Z" }, -] - -[[package]] -name = "nvidia-cufile-cu12" -version = "1.13.1.3" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" }, - { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" }, -] - -[[package]] -name = "nvidia-curand-cu12" -version = "10.3.9.90" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" }, - { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" }, - { url = "https://files.pythonhosted.org/packages/b9/75/70c05b2f3ed5be3bb30b7102b6eb78e100da4bbf6944fd6725c012831cab/nvidia_curand_cu12-10.3.9.90-py3-none-win_amd64.whl", hash = "sha256:f149a8ca457277da854f89cf282d6ef43176861926c7ac85b2a0fbd237c587ec", size = 62765309, upload-time = "2025-03-07T01:54:20.478Z" }, -] - -[[package]] -name = "nvidia-cusolver-cu12" -version = "11.7.3.90" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-cublas-cu12" }, - { name = "nvidia-cusparse-cu12" }, - { name = "nvidia-nvjitlink-cu12" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" }, - { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" }, - { url = "https://files.pythonhosted.org/packages/13/c0/76ca8551b8a84146ffa189fec81c26d04adba4bc0dbe09cd6e6fd9b7de04/nvidia_cusolver_cu12-11.7.3.90-py3-none-win_amd64.whl", hash = "sha256:4a550db115fcabc4d495eb7d39ac8b58d4ab5d8e63274d3754df1c0ad6a22d34", size = 256720438, upload-time = "2025-03-07T01:54:39.898Z" }, -] - -[[package]] -name = "nvidia-cusparse-cu12" -version = "12.5.8.93" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" }, - { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" }, - { url = "https://files.pythonhosted.org/packages/62/07/f3b2ad63f8e3d257a599f422ae34eb565e70c41031aecefa3d18b62cabd1/nvidia_cusparse_cu12-12.5.8.93-py3-none-win_amd64.whl", hash = "sha256:9a33604331cb2cac199f2e7f5104dfbb8a5a898c367a53dfda9ff2acb6b6b4dd", size = 284937404, upload-time = "2025-03-07T01:55:07.742Z" }, -] - -[[package]] -name = "nvidia-cusparselt-cu12" -version = "0.7.1" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" }, - { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" }, - { url = "https://files.pythonhosted.org/packages/2f/d8/a6b0d0d0c2435e9310f3e2bb0d9c9dd4c33daef86aa5f30b3681defd37ea/nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075", size = 271020911, upload-time = "2025-02-26T00:14:47.204Z" }, -] - [[package]] name = "nvidia-cutlass-dsl" version = "4.4.1" @@ -3862,44 +3656,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/09/30147ab0d0409d3492f1d37469fe0586c82aeec6eec9a907f59d24094516/nvidia_modelopt-0.41.0-py3-none-any.whl", hash = "sha256:ffa5f903d22653649318831a470550ae55ee04716c068d5ade61c3176fdc1d7d", size = 934582, upload-time = "2026-01-20T17:21:28.494Z" }, ] -[[package]] -name = "nvidia-nccl-cu12" -version = "2.27.5" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/bb/1c/857979db0ef194ca5e21478a0612bcdbbe59458d7694361882279947b349/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:31432ad4d1fb1004eb0c56203dc9bc2178a1ba69d1d9e02d64a6938ab5e40e7a", size = 322400625, upload-time = "2025-06-26T04:11:04.496Z" }, - { url = "https://files.pythonhosted.org/packages/6e/89/f7a07dc961b60645dbbf42e80f2bc85ade7feb9a491b11a1e973aa00071f/nvidia_nccl_cu12-2.27.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ad730cf15cb5d25fe849c6e6ca9eb5b76db16a80f13f425ac68d8e2e55624457", size = 322348229, upload-time = "2025-06-26T04:11:28.385Z" }, -] - -[[package]] -name = "nvidia-nvjitlink-cu12" -version = "12.8.93" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" }, - { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" }, - { url = "https://files.pythonhosted.org/packages/ed/d7/34f02dad2e30c31b10a51f6b04e025e5dd60e5f936af9045a9b858a05383/nvidia_nvjitlink_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:bd93fbeeee850917903583587f4fc3a4eafa022e34572251368238ab5e6bd67f", size = 268553710, upload-time = "2025-03-07T01:56:24.13Z" }, -] - -[[package]] -name = "nvidia-nvshmem-cu12" -version = "3.4.5" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1d/6a/03aa43cc9bd3ad91553a88b5f6fb25ed6a3752ae86ce2180221962bc2aa5/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0b48363fc6964dede448029434c6abed6c5e37f823cb43c3bcde7ecfc0457e15", size = 138936938, upload-time = "2025-09-06T00:32:05.589Z" }, - { url = "https://files.pythonhosted.org/packages/b5/09/6ea3ea725f82e1e76684f0708bbedd871fc96da89945adeba65c3835a64c/nvidia_nvshmem_cu12-3.4.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:042f2500f24c021db8a06c5eec2539027d57460e1c1a762055a6554f72c369bd", size = 139103095, upload-time = "2025-09-06T00:32:31.266Z" }, -] - -[[package]] -name = "nvidia-nvtx-cu12" -version = "12.8.90" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" }, - { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" }, - { url = "https://files.pythonhosted.org/packages/9f/99/4c9c0c329bf9fc125008c3b54c7c94c0023518d06fc025ae36431375e1fe/nvidia_nvtx_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:619c8304aedc69f02ea82dd244541a83c3d9d40993381b3b590f1adaed3db41e", size = 56492, upload-time = "2025-03-07T01:52:24.69Z" }, -] - [[package]] name = "nvidia-resiliency-ext" version = "0.5.0" @@ -5245,8 +5001,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "astroid" }, { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, - { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev'" }, - { name = "dill", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, + { name = "dill", version = "0.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.14' or sys_platform != 'linux' or extra == 'extra-13-megatron-core-dev' or extra == 'extra-13-megatron-core-lts'" }, + { name = "dill", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.14' and sys_platform == 'linux' and extra != 'extra-13-megatron-core-dev' and extra != 'extra-13-megatron-core-lts') or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, { name = "isort" }, { name = "mccabe" }, { name = "platformdirs" }, @@ -5755,19 +5511,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" }, ] -[[package]] -name = "responses" -version = "0.18.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "requests", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" }, - { name = "urllib3", marker = "python_full_version >= '3.14' and sys_platform == 'linux'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/03/a5/186653e51cb20fe3ac793403334d4d077fbb7bb18a9c5c2fce8304d5a2e2/responses-0.18.0.tar.gz", hash = "sha256:380cad4c1c1dc942e5e8a8eaae0b4d4edf708f4f010db8b7bcfafad1fcd254ff", size = 45885, upload-time = "2022-02-02T19:59:52.834Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/79/f3/2b3a6dc5986303b3dd1bbbcf482022acb2583c428cd23f0b6d37b1a1a519/responses-0.18.0-py3-none-any.whl", hash = "sha256:15c63ad16de13ee8e7182d99c9334f64fd81f1ee79f90748d527c28f7ca9dd51", size = 38735, upload-time = "2022-02-02T19:59:52.833Z" }, -] - [[package]] name = "rich" version = "14.3.3" @@ -6921,31 +6664,15 @@ name = "torch" version = "2.10.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cuda-bindings", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "filelock" }, - { name = "fsspec" }, - { name = "jinja2" }, - { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, - { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, - { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvshmem-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "setuptools", marker = "python_full_version >= '3.12'" }, - { name = "sympy" }, + { name = "filelock", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" }, + { name = "fsspec", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" }, + { name = "jinja2", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and sys_platform != 'linux') or (python_full_version >= '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, + { name = "networkx", version = "3.6.1", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32') or (python_full_version < '3.11' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'emscripten' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'linux' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts') or (sys_platform == 'win32' and extra == 'extra-13-megatron-core-dev' and extra == 'extra-13-megatron-core-lts')" }, + { name = "setuptools", marker = "python_full_version >= '3.12' and sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32'" }, + { name = "sympy", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" }, { name = "triton", marker = "sys_platform == 'never'" }, - { name = "typing-extensions" }, + { name = "typing-extensions", marker = "(python_full_version < '3.11' and sys_platform == 'emscripten') or (python_full_version < '3.11' and sys_platform == 'win32') or (sys_platform != 'emscripten' and sys_platform != 'linux' and sys_platform != 'win32')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/5b/30/bfebdd8ec77db9a79775121789992d6b3b75ee5494971294d7b4b7c999bc/torch-2.10.0-2-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:2b980edd8d7c0a68c4e951ee1856334a43193f98730d97408fbd148c1a933313", size = 79411457, upload-time = "2026-02-10T21:44:59.189Z" }, From a0fc3b49f05c46964285e9bbf541f59446acce45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?oliver=20k=C3=B6nig?= Date: Wed, 4 Mar 2026 16:41:32 +0000 Subject: [PATCH 12/12] update MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: oliver könig --- .../golden_values_dev_dgx_h100.json | 1774 ++++++++--------- 1 file changed, 887 insertions(+), 887 deletions(-) diff --git a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_etp1_pp1_ep8_16B_logitsmatch_cudagraph_zmq/golden_values_dev_dgx_h100.json b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_etp1_pp1_ep8_16B_logitsmatch_cudagraph_zmq/golden_values_dev_dgx_h100.json index deca014648f..73be1228f97 100644 --- a/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_etp1_pp1_ep8_16B_logitsmatch_cudagraph_zmq/golden_values_dev_dgx_h100.json +++ b/tests/functional_tests/test_cases/moe/gpt_dynamic_inference_tp4_etp1_pp1_ep8_16B_logitsmatch_cudagraph_zmq/golden_values_dev_dgx_h100.json @@ -34,125 +34,125 @@ 1394, 1636 ], - "latency": 2.8673329539597034, + "latency": 2.0598746850009775, "logprobs": [ -10.737512588500977, - -3.724862575531006, - -2.833397388458252, - -1.2464861869812012, - -0.2549239993095398, - -1.7607988119125366, - -2.419379711151123, - -1.9533929824829102, - -2.1014301776885986, - -6.169030666351318, - -0.8734959363937378, - -2.4733574390411377, - -3.4822516441345215, - -4.180896759033203, - -1.9767613410949707, - -1.8347630500793457, - -2.2581257820129395, - -7.180149078369141, - -0.0453881211578846, - -1.9841610193252563, - -5.015386581420898, - -8.827117919921875, - -9.885746002197266, - -0.8498678207397461, - -4.770059585571289, - -0.855280339717865, - -2.2494924068450928, - -0.017164958640933037, - -0.03715415671467781, - -3.4830124378204346, - -8.635110855102539, - -1.2520610094070435, - -6.62324857711792, - -3.639960765838623, - -3.664339542388916, - -4.182392597198486, - -2.1796066761016846, - -1.0725229978561401, - -0.26311880350112915, - -0.8036076426506042, - -4.6958818435668945, - -9.042495727539062, - -0.013647346757352352, - -3.1747794151306152, - -1.322129487991333, - -3.949110746383667, - -0.7829495072364807, - -0.002083513652905822, - -2.970266580581665, - -10.56244945526123, - -3.2369167804718018, - -1.1530492305755615, - -4.917466163635254, - -0.21241025626659393, - -0.06490474194288254, - -1.372581124305725, - -2.224682092666626, - -4.3847503662109375, - -0.36867555975914, - -4.035493850708008, - -0.39869019389152527, - -0.14373983442783356, - -2.716118812561035, - -10.687016487121582, - -0.04773370549082756, - -3.398231267929077, - -0.8646175265312195, - -4.74052619934082, - -0.23649944365024567, - -2.6610701084136963, - -0.8428961634635925, - -1.614527940750122, - -5.793307781219482, - -16.929147720336914, - -2.6586406230926514, - -0.1385982781648636, - -7.435610771179199, - -1.0483647584915161, - -2.1261863708496094, - -1.5261307954788208, - -0.27082547545433044, - -5.859070777893066, - -0.00648513063788414, - -7.732051849365234, - -2.712515354156494, - -2.9137418270111084, - -3.041210651397705, - -2.3559694290161133, - -0.3973437249660492, - -1.4338903427124023, - -2.2967660427093506, - -0.6096595525741577, - -1.310807704925537, - -1.9799187183380127, - -1.710689663887024, - -0.8325198888778687, - -0.4943186938762665, - -1.2886956930160522, - -1.585263729095459, - -1.101692795753479, - -0.44188663363456726, - -0.4740143418312073, - -0.042198192328214645, - -1.2899682521820068, - -2.1242613792419434, - -2.7151529788970947, - -0.8274281024932861, - -0.39647114276885986, - -2.7846553325653076, - -1.5348155498504639, - -1.626529335975647, - -0.047930192202329636, - -1.3622726202011108, - -1.3274445533752441, - -1.2834383249282837, - -1.3211638927459717, - -0.48530423641204834 + -3.6886487007141113, + -2.8194005489349365, + -1.2396225929260254, + -0.22920642793178558, + -1.8583638668060303, + -2.384000778198242, + -2.008981227874756, + -2.09975528717041, + -6.182888031005859, + -0.890110433101654, + -2.478799343109131, + -3.502744436264038, + -4.090683937072754, + -1.964805006980896, + -1.8054677248001099, + -2.321495294570923, + -7.160285949707031, + -0.04007242992520332, + -1.9924155473709106, + -5.093112468719482, + -8.779500007629395, + -9.904478073120117, + -0.8523460626602173, + -4.82073974609375, + -0.86553555727005, + -2.36330509185791, + -0.01946748048067093, + -0.034191977232694626, + -3.3692376613616943, + -8.743470191955566, + -1.3306764364242554, + -6.645841598510742, + -3.7794108390808105, + -3.7756881713867188, + -4.187170028686523, + -2.2124571800231934, + -1.0734034776687622, + -0.22135400772094727, + -0.7782289981842041, + -4.716646671295166, + -9.09740161895752, + -0.013706612400710583, + -3.1654152870178223, + -1.3161238431930542, + -4.0115814208984375, + -0.7715368866920471, + -0.002078041434288025, + -2.9644386768341064, + -10.690834045410156, + -3.2352819442749023, + -1.1544448137283325, + -4.971055030822754, + -0.2275625467300415, + -0.05549970641732216, + -1.2946704626083374, + -2.20631742477417, + -4.362645149230957, + -0.37027108669281006, + -4.025010108947754, + -0.3656681478023529, + -0.13922274112701416, + -2.720149040222168, + -10.679438591003418, + -0.04889172688126564, + -3.483541965484619, + -0.9704694151878357, + -4.714925289154053, + -0.2623435854911804, + -2.60113525390625, + -0.8295360207557678, + -1.671109676361084, + -5.792396068572998, + -16.96857261657715, + -2.647408962249756, + -0.13962996006011963, + -7.453517436981201, + -1.0938549041748047, + -2.134143829345703, + -1.5537410974502563, + -0.2869631052017212, + -5.857534408569336, + -0.006919232662767172, + -7.747158527374268, + -2.7227447032928467, + -3.037987232208252, + -3.0312116146087646, + -2.342064619064331, + -0.39744827151298523, + -1.516434907913208, + -2.3665950298309326, + -0.6091187000274658, + -1.3131166696548462, + -1.9402395486831665, + -1.6906054019927979, + -0.8380135297775269, + -0.4860585927963257, + -1.2984997034072876, + -1.5767009258270264, + -0.9797608852386475, + -0.40544381737709045, + -0.46793943643569946, + -0.041146114468574524, + -1.2806015014648438, + -2.1299755573272705, + -2.7230143547058105, + -0.7635340690612793, + -0.43544360995292664, + -2.7788889408111572, + -1.5222126245498657, + -1.6613693237304688, + -0.05091489478945732, + -1.3707530498504639, + -1.3513561487197876, + -1.2351378202438354, + -1.2675105333328247, + -0.4767935872077942 ], "routing_indices": [ [ @@ -492,15 +492,15 @@ 32, 56, 63, - 3 + 14 ], [ 36, 55, - 35, 17, 32, - 44 + 44, + 35 ], [ 59, @@ -516,11 +516,11 @@ 61, 43, 30, - 22 + 16 ], [ - 32, 2, + 32, 5, 39, 11, @@ -540,7 +540,7 @@ 45, 0, 7, - 5 + 41 ], [ 48, @@ -568,8 +568,8 @@ ], [ 58, - 39, 36, + 39, 47, 29, 37 @@ -583,8 +583,8 @@ 17 ], [ - 13, 22, + 13, 20, 52, 24, @@ -683,8 +683,8 @@ [ 36, 8, - 16, 37, + 16, 10, 14 ], @@ -757,8 +757,8 @@ 7, 3, 38, - 59, - 54 + 54, + 59 ], [ 31, @@ -774,7 +774,7 @@ 18, 53, 40, - 52 + 0 ], [ 27, @@ -798,7 +798,7 @@ 51, 30, 18, - 48 + 3 ], [ 0, @@ -880,7 +880,7 @@ 44, 27, 4, - 41 + 62 ], [ 51, @@ -892,19 +892,19 @@ ], [ 8, - 50, - 5, 19, + 5, 16, - 22 + 22, + 50 ], [ 36, 49, 60, - 44, 15, - 33 + 44, + 25 ], [ 13, @@ -917,16 +917,16 @@ [ 26, 22, - 14, 7, + 14, 32, 17 ], [ 26, 60, - 2, 58, + 2, 54, 10 ], @@ -951,8 +951,8 @@ 23, 27, 46, - 56, - 55 + 55, + 56 ], [ 3, @@ -987,12 +987,12 @@ 32 ], [ - 51, 27, + 51, 20, 50, - 16, - 55 + 55, + 16 ], [ 63, @@ -1024,7 +1024,7 @@ 37, 14, 44, - 36 + 0 ] ], [ @@ -1081,8 +1081,8 @@ 15, 47, 17, - 35, - 24 + 24, + 35 ], [ 3, @@ -1113,8 +1113,8 @@ 19, 50, 16, - 32, - 22 + 22, + 32 ], [ 36, @@ -1170,7 +1170,7 @@ 41, 1, 55, - 18 + 11 ], [ 59, @@ -1178,15 +1178,15 @@ 16, 23, 42, - 11 + 12 ], [ 17, 30, 46, - 55, 25, - 0 + 55, + 36 ], [ 54, @@ -1194,15 +1194,15 @@ 45, 0, 6, - 19 + 56 ], [ 51, 9, 22, 23, - 16, - 25 + 31, + 16 ], [ 62, @@ -1223,26 +1223,26 @@ [ 61, 26, - 1, 17, - 32, - 63 + 1, + 63, + 32 ], [ 37, 46, 63, 20, - 24, - 4 + 9, + 24 ], [ 63, 11, 12, 61, - 31, - 22 + 22, + 31 ] ], [ @@ -1372,7 +1372,7 @@ 51, 47, 62, - 60 + 26 ], [ 16, @@ -1427,23 +1427,23 @@ 27, 14, 51, - 8, - 34 + 18, + 19 ], [ 12, 13, 33, 1, - 5, - 43 + 43, + 5 ], [ 26, 32, 1, - 50, 37, + 50, 57 ], [ @@ -1451,8 +1451,8 @@ 47, 63, 46, - 4, - 5 + 5, + 4 ], [ 63, @@ -1550,7 +1550,7 @@ 19, 48, 16, - 54 + 20 ], [ 13, @@ -1578,8 +1578,8 @@ ], [ 26, - 40, 60, + 40, 2, 52, 7 @@ -1589,8 +1589,8 @@ 51, 17, 46, - 13, - 62 + 62, + 13 ], [ 38, @@ -1643,8 +1643,8 @@ [ 50, 51, - 19, 7, + 19, 48, 53 ], @@ -1831,8 +1831,8 @@ 53, 32, 12, - 9, - 38 + 38, + 48 ], [ 30, @@ -1845,8 +1845,8 @@ [ 40, 49, - 14, 28, + 14, 23, 55 ], @@ -1872,12 +1872,12 @@ 59, 3, 55, - 10 + 8 ], [ 1, - 32, 53, + 32, 26, 47, 3 @@ -2037,28 +2037,28 @@ 39 ], [ - 23, 25, + 23, 4, 14, 46, - 53 + 60 ], [ 8, 45, 32, 53, - 10, - 54 + 54, + 10 ], [ 15, 38, 53, 55, - 7, - 30 + 30, + 7 ], [ 41, @@ -2079,8 +2079,8 @@ [ 50, 0, - 53, 51, + 53, 34, 55 ], @@ -2090,7 +2090,7 @@ 55, 25, 8, - 3 + 27 ], [ 11, @@ -2098,7 +2098,7 @@ 1, 9, 0, - 3 + 23 ], [ 50, @@ -2112,9 +2112,9 @@ 17, 37, 31, - 40, 5, - 36 + 36, + 40 ] ], [ @@ -2204,7 +2204,7 @@ 14, 59, 29, - 4 + 40 ], [ 36, @@ -2266,8 +2266,8 @@ 35, 32, 46, - 10, 31, + 10, 45 ], [ @@ -2406,14 +2406,14 @@ 4, 18, 31, - 29 + 35 ], [ 54, 62, 47, - 38, 4, + 38, 32 ], [ @@ -2421,23 +2421,23 @@ 14, 15, 22, - 59, - 38 + 38, + 59 ], [ 16, 36, 42, - 55, - 15, - 18 + 18, + 0, + 55 ], [ 49, 8, 20, - 14, 0, + 14, 33 ], [ @@ -2445,16 +2445,16 @@ 39, 25, 2, - 62, - 22 + 22, + 62 ], [ 62, 5, 58, 37, - 7, - 32 + 32, + 2 ], [ 43, @@ -2477,23 +2477,23 @@ 0, 7, 16, - 32, - 13 + 13, + 32 ], [ 12, 39, 32, - 61, 16, + 61, 45 ], [ 52, 34, + 62, 15, 18, - 62, 30 ], [ @@ -2502,23 +2502,23 @@ 46, 40, 6, - 38 + 14 ], [ 1, 19, 17, - 20, 4, - 21 + 20, + 40 ], [ 41, 40, 4, 53, - 55, - 19 + 19, + 55 ], [ 25, @@ -2533,8 +2533,8 @@ 29, 52, 44, - 13, - 35 + 35, + 13 ], [ 50, @@ -2656,7 +2656,7 @@ 0, 3, 43, - 8 + 17 ], [ 39, @@ -2675,8 +2675,8 @@ 56 ], [ - 19, 42, + 19, 55, 43, 11, @@ -2686,8 +2686,8 @@ 9, 47, 43, - 52, 18, + 52, 50 ], [ @@ -2703,16 +2703,16 @@ 32, 61, 3, - 21, - 43 + 43, + 21 ], [ 36, 13, 40, 7, - 62, - 16 + 16, + 62 ], [ 14, @@ -2760,7 +2760,7 @@ 26, 48, 8, - 16 + 43 ], [ 46, @@ -2850,14 +2850,14 @@ 38, 20, 58, - 17 + 40 ], [ 19, 1, 46, - 26, 22, + 26, 63 ], [ @@ -2871,18 +2871,18 @@ [ 17, 7, + 16, 49, 14, - 16, 26 ], [ 45, 47, 22, - 62, 0, - 58 + 58, + 62 ], [ 58, @@ -2912,8 +2912,8 @@ 32, 57, 42, - 25, 63, + 25, 43 ], [ @@ -2921,8 +2921,8 @@ 32, 49, 61, - 21, - 12 + 12, + 21 ], [ 5, @@ -2976,8 +2976,8 @@ 14, 26, 3, - 48, 16, + 48, 21 ], [ @@ -3333,8 +3333,8 @@ 18, 28, 55, - 5, - 37 + 37, + 5 ], [ 13, @@ -3615,8 +3615,8 @@ 8, 53, 41, - 11, - 4 + 4, + 11 ], [ 40, @@ -3754,7 +3754,7 @@ 33, 38, 28, - 52 + 19 ], [ 58, @@ -3767,14 +3767,14 @@ [ 55, 59, - 8, 13, - 41, - 43 + 8, + 3, + 41 ], [ - 51, 3, + 51, 15, 46, 57, @@ -3782,8 +3782,8 @@ ], [ 32, - 10, 30, + 10, 12, 25, 18 @@ -3794,7 +3794,7 @@ 27, 61, 53, - 39 + 59 ], [ 42, @@ -3818,13 +3818,13 @@ 48, 47, 16, - 61 + 17 ], [ 4, 0, - 53, 25, + 53, 24, 11 ], @@ -3832,8 +3832,8 @@ 1, 44, 11, - 34, 45, + 34, 51 ], [ @@ -3956,7 +3956,7 @@ 36, 63, 15, - 52 + 23 ], [ 29, @@ -3979,8 +3979,8 @@ 44, 58, 61, - 37, - 38 + 38, + 37 ], [ 23, @@ -3988,15 +3988,15 @@ 17, 57, 13, - 5 + 40 ], [ 13, 9, 19, 37, - 50, - 15 + 15, + 50 ], [ 32, @@ -4020,7 +4020,7 @@ 34, 18, 22, - 7 + 49 ], [ 53, @@ -4035,8 +4035,8 @@ 47, 18, 41, - 30, - 62 + 62, + 30 ], [ 33, @@ -4044,7 +4044,7 @@ 19, 5, 0, - 34 + 44 ], [ 51, @@ -4052,7 +4052,7 @@ 25, 53, 27, - 55 + 16 ], [ 40, @@ -4076,7 +4076,7 @@ 36, 9, 5, - 33 + 12 ] ], [ @@ -4213,8 +4213,8 @@ 59, 6, 9, - 16, - 43 + 43, + 16 ], [ 41, @@ -4230,7 +4230,7 @@ 3, 10, 34, - 59 + 40 ], [ 4, @@ -4251,10 +4251,10 @@ [ 55, 9, - 61, 18, - 60, - 3 + 61, + 45, + 60 ], [ 60, @@ -4286,7 +4286,7 @@ 9, 58, 21, - 12 + 63 ], [ 63, @@ -4320,7 +4320,7 @@ 21, 7, 59, - 9 + 8 ], [ 1, @@ -4391,8 +4391,8 @@ 33, 36, 60, - 49, - 0 + 0, + 49 ], [ 29, @@ -4406,16 +4406,16 @@ 6, 0, 61, - 50, 48, + 50, 3 ], [ 8, 6, 58, - 37, 29, + 37, 19 ], [ @@ -4658,7 +4658,7 @@ 48, 42, 29, - 39 + 63 ], [ 42, @@ -4697,8 +4697,8 @@ 33, 28, 5, - 44, - 8 + 8, + 44 ], [ 51, @@ -4827,8 +4827,8 @@ 59, 10, 36, - 58, - 60 + 60, + 58 ], [ 17, @@ -4836,7 +4836,7 @@ 28, 31, 29, - 27 + 44 ], [ 47, @@ -4849,15 +4849,15 @@ [ 39, 58, - 56, 37, - 18, - 59 + 59, + 56, + 18 ], [ 60, - 18, 57, + 18, 9, 55, 23 @@ -4871,8 +4871,8 @@ 19 ], [ - 1, 32, + 1, 42, 57, 35, @@ -4908,7 +4908,7 @@ 30, 18, 14, - 17 + 8 ], [ 33, @@ -4916,7 +4916,7 @@ 8, 5, 0, - 19 + 56 ], [ 51, @@ -4928,8 +4928,8 @@ ], [ 40, - 4, 27, + 4, 6, 9, 16 @@ -4947,8 +4947,8 @@ 14, 12, 54, - 20, - 35 + 35, + 53 ] ], [ @@ -5070,7 +5070,7 @@ 56, 50, 3, - 58 + 23 ], [ 3, @@ -5094,7 +5094,7 @@ 57, 42, 52, - 19 + 1 ], [ 2, @@ -5117,14 +5117,14 @@ 7, 56, 25, - 60, - 13 + 13, + 60 ], [ 17, 39, - 53, 14, + 53, 30, 25 ], @@ -5140,8 +5140,8 @@ 51, 4, 11, - 58, 57, + 58, 28 ], [ @@ -5272,11 +5272,11 @@ 4, 11, 7, - 6 + 39 ], [ - 35, 34, + 35, 4, 42, 62, @@ -5319,8 +5319,8 @@ 63, 61, 28, - 56, - 12 + 24, + 56 ], [ 33, @@ -5341,8 +5341,8 @@ [ 12, 54, - 4, 27, + 4, 50, 14 ], @@ -5383,7 +5383,7 @@ 13, 59, 51, - 58, + 26, 23 ] ], @@ -5490,7 +5490,7 @@ 46, 59, 41, - 15 + 13 ], [ 23, @@ -5511,23 +5511,23 @@ [ 9, 22, - 36, 46, + 36, 26, 41 ], [ 3, 51, - 40, 56, + 40, 46, 8 ], [ 12, - 25, 21, + 25, 50, 17, 62 @@ -5553,8 +5553,8 @@ 37, 12, 24, - 25, - 63 + 63, + 25 ], [ 45, @@ -5562,14 +5562,14 @@ 18, 26, 17, - 1 + 43 ], [ 4, 25, 32, - 48, 1, + 48, 53 ], [ @@ -5578,7 +5578,7 @@ 63, 4, 62, - 44 + 34 ], [ 6, @@ -5586,7 +5586,7 @@ 62, 40, 46, - 23 + 54 ], [ 10, @@ -5746,8 +5746,8 @@ 18, 20, 57, - 45, 32, + 45, 1 ], [ @@ -6020,8 +6020,8 @@ 40, 19, 16, - 9, 46, + 9, 47 ], [ @@ -6036,8 +6036,8 @@ 35, 60, 54, - 1, 5, + 1, 40 ] ], @@ -6157,8 +6157,8 @@ [ 3, 7, - 46, 42, + 46, 33, 35 ], @@ -6199,8 +6199,8 @@ 23, 54, 36, - 28, - 7 + 7, + 28 ], [ 40, @@ -6231,8 +6231,8 @@ 29, 51, 4, - 32, - 40 + 40, + 32 ], [ 40, @@ -6401,8 +6401,8 @@ 0, 54, 52, - 17, - 39 + 39, + 17 ], [ 25, @@ -6612,7 +6612,7 @@ 3, 51, 22, - 57 + 25 ], [ 12, @@ -6628,31 +6628,31 @@ 27, 43, 14, - 29 + 13 ], [ 56, 53, 44, - 43, + 24, 60, - 24 + 43 ], [ 12, 53, 40, - 2, 49, + 2, 62 ], [ 18, 39, + 44, 61, 26, - 23, - 44 + 23 ], [ 0, @@ -6902,13 +6902,13 @@ 7, 60, 33, - 32 + 3 ], [ 45, 52, - 4, 36, + 4, 21, 9 ] @@ -7047,8 +7047,8 @@ 54, 5, 27, - 18, - 31 + 31, + 18 ], [ 50, @@ -7071,8 +7071,8 @@ 0, 61, 10, - 30, - 5 + 5, + 30 ], [ 29, @@ -7085,15 +7085,15 @@ [ 29, 17, - 21, 30, + 21, 14, 40 ], [ 5, - 17, 33, + 17, 32, 18, 28 @@ -7118,15 +7118,15 @@ 3, 26, 12, - 59, 2, - 53 + 59, + 48 ], [ 46, 43, - 20, 18, + 20, 9, 53 ] @@ -7201,8 +7201,8 @@ 44, 14, 16, - 33, - 39 + 39, + 33 ], [ 42, @@ -7387,8 +7387,8 @@ 7, 20, 15, - 16, - 33 + 33, + 16 ], [ 45, @@ -7428,7 +7428,7 @@ 44, 24, 43, - 28 + 47 ], [ 42, @@ -7456,8 +7456,8 @@ ], [ 43, - 2, 25, + 2, 5, 3, 49 @@ -7476,7 +7476,7 @@ 44, 41, 7, - 49 + 63 ], [ 48, @@ -7546,8 +7546,8 @@ 21, 62, 11, - 31, 46, + 31, 33 ], [ @@ -7670,7 +7670,7 @@ 17, 56, 32, - 57 + 55 ], [ 55, @@ -7755,8 +7755,8 @@ [ 38, 54, - 53, 48, + 53, 21, 36 ], @@ -7771,8 +7771,8 @@ [ 38, 27, - 6, 19, + 6, 44, 3 ], @@ -7926,8 +7926,8 @@ 48, 17, 37, - 21, 32, + 21, 57 ], [ @@ -7981,8 +7981,8 @@ [ 47, 19, - 43, 48, + 43, 4, 58 ], @@ -8104,8 +8104,8 @@ 17, 31, 4, - 5, 56, + 5, 9 ], [ @@ -8134,8 +8134,8 @@ ], [ 56, - 30, 22, + 30, 10, 5, 55 @@ -8146,7 +8146,7 @@ 26, 1, 61, - 37 + 45 ], [ 8, @@ -8160,9 +8160,9 @@ 45, 23, 51, + 17, 4, - 13, - 14 + 13 ], [ 17, @@ -8170,15 +8170,15 @@ 61, 14, 29, - 4 + 55 ], [ 14, 27, 43, 21, - 42, - 53 + 56, + 57 ], [ 52, @@ -8194,31 +8194,31 @@ 49, 14, 21, - 52 + 34 ], [ - 9, 4, - 13, + 9, 47, + 13, 8, - 36 + 61 ], [ 27, 3, 16, + 43, 31, - 47, - 33 + 47 ], [ 59, 43, - 29, 61, + 29, 0, - 8 + 18 ] ], [ @@ -8264,8 +8264,8 @@ ], [ 46, - 6, 51, + 6, 29, 58, 4 @@ -8322,8 +8322,8 @@ 17, 56, 7, - 53, 5, + 53, 36 ], [ @@ -8371,8 +8371,8 @@ 42, 5, 2, - 48, - 61 + 61, + 48 ], [ 37, @@ -8509,8 +8509,8 @@ 20, 31, 54, - 38, - 21 + 21, + 38 ], [ 43, @@ -8550,7 +8550,7 @@ 29, 5, 55, - 41 + 6 ], [ 23, @@ -8558,7 +8558,7 @@ 36, 22, 15, - 54 + 30 ], [ 12, @@ -8573,8 +8573,8 @@ 7, 41, 11, - 53, - 1 + 1, + 53 ], [ 6, @@ -8597,8 +8597,8 @@ 6, 32, 21, - 47, - 0 + 0, + 47 ], [ 38, @@ -8606,7 +8606,7 @@ 36, 53, 31, - 17 + 61 ], [ 3, @@ -8622,7 +8622,7 @@ 46, 12, 35, - 48 + 3 ], [ 20, @@ -8643,8 +8643,8 @@ [ 33, 34, - 45, 60, + 45, 7, 32 ], @@ -8773,8 +8773,8 @@ [ 23, 19, - 22, 62, + 22, 11, 9 ], @@ -8797,8 +8797,8 @@ [ 21, 2, - 6, 36, + 6, 50, 56 ], @@ -8807,8 +8807,8 @@ 10, 48, 53, - 19, - 61 + 61, + 19 ], [ 47, @@ -8823,8 +8823,8 @@ 12, 3, 55, - 53, - 41 + 41, + 53 ], [ 44, @@ -8840,7 +8840,7 @@ 53, 11, 39, - 19 + 38 ], [ 1, @@ -8848,7 +8848,7 @@ 38, 3, 37, - 63 + 42 ], [ 45, @@ -8864,14 +8864,14 @@ 42, 49, 11, - 3 + 62 ], [ 30, 57, 15, - 16, 56, + 16, 41 ] ], @@ -9025,8 +9025,8 @@ 31, 10, 45, - 35, - 30 + 30, + 35 ], [ 15, @@ -9203,8 +9203,8 @@ 9, 15, 13, - 28, - 63 + 63, + 28 ], [ 62, @@ -9260,7 +9260,7 @@ 53, 41, 13, - 23 + 0 ], [ 14, @@ -9386,8 +9386,8 @@ ], [ 5, - 54, 43, + 54, 16, 12, 53 @@ -9395,8 +9395,8 @@ [ 18, 6, - 35, 3, + 35, 21, 1 ], @@ -9485,8 +9485,8 @@ 42, 6, 16, - 15, - 22 + 22, + 15 ], [ 33, @@ -9500,8 +9500,8 @@ 4, 58, 48, - 11, 33, + 11, 28 ], [ @@ -9576,20 +9576,20 @@ 20, 22, 43, - 3 + 33 ], [ 42, - 5, 30, + 5, 25, 19, 34 ], [ 33, - 35, 18, + 35, 51, 7, 57 @@ -9612,8 +9612,8 @@ ], [ 18, - 3, 21, + 3, 6, 39, 53 @@ -9669,8 +9669,8 @@ [ 0, 25, - 59, 62, + 59, 21, 13 ], @@ -9897,8 +9897,8 @@ 12, 61, 26, - 1, - 39 + 32, + 1 ], [ 25, @@ -10023,8 +10023,8 @@ 35 ], [ - 50, 23, + 50, 12, 8, 9, @@ -10039,8 +10039,8 @@ 4 ], [ - 55, 38, + 55, 0, 40, 20, @@ -10253,8 +10253,8 @@ 29, 3, 2, - 46, - 10 + 10, + 46 ], [ 38, @@ -10292,9 +10292,9 @@ 35, 38, 33, - 51, 25, - 22 + 51, + 52 ], [ 58, @@ -10310,7 +10310,7 @@ 53, 3, 54, - 35 + 7 ], [ 3, @@ -10323,8 +10323,8 @@ [ 32, 5, - 57, 54, + 57, 52, 30 ], @@ -10333,8 +10333,8 @@ 61, 40, 0, - 48, - 3 + 3, + 48 ], [ 22, @@ -10364,8 +10364,8 @@ 1, 4, 13, - 11, 39, + 11, 33 ], [ @@ -10373,8 +10373,8 @@ 10, 51, 44, - 4, - 55 + 55, + 4 ], [ 35, @@ -10396,8 +10396,8 @@ 27, 41, 32, - 45, 10, + 45, 47 ] ], @@ -10455,8 +10455,8 @@ 63, 43, 59, - 42, - 25 + 25, + 42 ], [ 16, @@ -10583,8 +10583,8 @@ 13, 16, 11, - 2, - 24 + 24, + 2 ], [ 44, @@ -10599,8 +10599,8 @@ 5, 49, 33, - 63, - 31 + 31, + 63 ], [ 49, @@ -10608,7 +10608,7 @@ 1, 57, 4, - 53 + 44 ], [ 2, @@ -10690,20 +10690,20 @@ 42, 6, 17, - 23 + 14 ], [ 60, 10, 22, + 50, 48, - 38, - 50 + 38 ], [ 31, - 19, 35, + 19, 27, 38, 53 @@ -10734,8 +10734,8 @@ ], [ 18, - 39, 58, + 39, 21, 50, 36 @@ -10746,7 +10746,7 @@ 49, 19, 53, - 54 + 23 ], [ 6, @@ -10770,7 +10770,7 @@ 7, 0, 61, - 50 + 32 ], [ 39, @@ -10782,8 +10782,8 @@ ], [ 51, - 50, 30, + 50, 7, 3, 61 @@ -10799,8 +10799,8 @@ [ 33, 19, - 58, 56, + 58, 11, 15 ], @@ -10825,8 +10825,8 @@ 52, 31, 16, - 26, - 39 + 39, + 26 ], [ 4, @@ -10891,15 +10891,15 @@ 59, 4, 16, - 11, - 33 + 33, + 11 ], [ 12, 51, 30, - 56, 40, + 56, 27 ], [ @@ -10916,7 +10916,7 @@ 38, 6, 21, - 20 + 50 ], [ 11, @@ -10938,23 +10938,23 @@ 9, 28, 45, - 57, 55, - 53 + 57, + 16 ], [ 59, 1, 25, - 6, 47, + 6, 3 ], [ 9, 39, - 18, 50, + 18, 58, 21 ], @@ -10964,7 +10964,7 @@ 38, 53, 50, - 54 + 35 ], [ 41, @@ -10987,8 +10987,8 @@ 40, 7, 42, - 62, - 32 + 32, + 62 ], [ 4, @@ -11008,8 +11008,8 @@ ], [ 9, - 58, 14, + 58, 30, 42, 8 @@ -11051,8 +11051,8 @@ 1, 10, 61, - 0, - 12 + 12, + 0 ] ], [ @@ -11181,8 +11181,8 @@ 3, 35, 45, - 32, - 4 + 4, + 32 ], [ 46, @@ -11221,8 +11221,8 @@ 54, 53, 56, - 0, - 63 + 63, + 0 ], [ 34, @@ -11342,8 +11342,8 @@ 17, 50, 41, - 16, 13, + 16, 51 ], [ @@ -11404,8 +11404,8 @@ ], [ 24, - 14, 42, + 14, 17, 32, 62 @@ -11544,8 +11544,8 @@ 28, 25, 42, - 29, 30, + 29, 46 ], [ @@ -11574,8 +11574,8 @@ ], [ 39, - 31, 35, + 31, 3, 36, 0 @@ -11642,15 +11642,15 @@ 7, 62, 32, - 42 + 50 ], [ 37, 14, - 2, 10, - 17, - 57 + 2, + 57, + 17 ], [ 7, @@ -11690,7 +11690,7 @@ 47, 9, 2, - 16 + 48 ], [ 3, @@ -11748,7 +11748,7 @@ 39, 20, 10, - 61 + 3 ], [ 39, @@ -11771,14 +11771,14 @@ 41, 62, 30, - 52, - 37 + 37, + 52 ], [ 21, 15, - 48, 28, + 48, 26, 27 ], @@ -11791,8 +11791,8 @@ 41 ], [ - 43, 35, + 43, 28, 31, 60, @@ -11819,8 +11819,8 @@ 33, 7, 6, - 14, - 51 + 51, + 14 ], [ 51, @@ -11864,8 +11864,8 @@ ], [ 42, - 46, 48, + 46, 9, 17, 37 @@ -11899,8 +11899,8 @@ 63, 7, 42, - 40, - 49 + 49, + 40 ], [ 59, @@ -11908,7 +11908,7 @@ 38, 62, 44, - 25 + 41 ], [ 23, @@ -12036,8 +12036,8 @@ 20, 9, 19, - 22, 6, + 22, 15 ], [ @@ -12054,7 +12054,7 @@ 13, 38, 60, - 26 + 20 ], [ 46, @@ -12084,8 +12084,8 @@ 42, 37, 48, - 33, 3, + 33, 31 ], [ @@ -12101,8 +12101,8 @@ 14, 26, 13, - 12, - 22 + 22, + 12 ], [ 25, @@ -12118,7 +12118,7 @@ 26, 41, 4, - 57 + 10 ], [ 37, @@ -12207,8 +12207,8 @@ 30, 26, 62, - 53, - 57 + 57, + 53 ], [ 3, @@ -12328,20 +12328,20 @@ 25, 58, 35, - 13 + 45 ], [ 4, 9, 10, - 39, 37, - 40 + 39, + 16 ], [ 22, - 21, 11, + 21, 48, 45, 47 @@ -12488,9 +12488,9 @@ 5, 8, 62, - 38, 24, - 61 + 38, + 57 ], [ 13, @@ -12512,8 +12512,8 @@ 1, 10, 20, - 8, 53, + 8, 4 ], [ @@ -12536,8 +12536,8 @@ 11, 62, 41, - 14, 46, + 14, 44 ], [ @@ -12561,8 +12561,8 @@ 21, 45, 47, - 61, - 18 + 18, + 61 ], [ 10, @@ -12618,8 +12618,8 @@ 14, 61, 59, - 29, 1, + 29, 49 ], [ @@ -12651,8 +12651,8 @@ 25, 4, 1, - 0, - 29 + 29, + 0 ], [ 54, @@ -12688,8 +12688,8 @@ ], [ 57, - 13, 40, + 13, 22, 60, 6 @@ -12698,8 +12698,8 @@ 37, 44, 58, - 8, 5, + 8, 50 ], [ @@ -12756,7 +12756,7 @@ 16, 29, 6, - 46 + 19 ], [ 46, @@ -12768,8 +12768,8 @@ ], [ 4, - 47, 51, + 47, 1, 16, 41 @@ -12917,8 +12917,8 @@ 62, 10, 33, - 26, - 5 + 5, + 26 ], [ 48, @@ -12958,7 +12958,7 @@ 15, 27, 42, - 3 + 34 ], [ 43, @@ -13087,8 +13087,8 @@ 4, 6, 12, - 47, - 60 + 60, + 47 ], [ 54, @@ -13231,7 +13231,7 @@ 18, 8, 4, - 29, + 40, 50 ] ], @@ -13290,7 +13290,7 @@ 33, 9, 56, - 35 + 25 ], [ 12, @@ -13298,7 +13298,7 @@ 14, 36, 25, - 61 + 2 ], [ 23, @@ -13359,10 +13359,10 @@ [ 55, 2, - 57, 54, - 56, - 22 + 57, + 22, + 56 ], [ 53, @@ -13426,7 +13426,7 @@ 31, 10, 4, - 43 + 40 ], [ 29, @@ -13528,8 +13528,8 @@ ], [ 42, - 5, 43, + 5, 25, 21, 6 @@ -13844,9 +13844,9 @@ 51, 1, 35, - 34, + 48, 44, - 48 + 34 ], [ 32, @@ -13877,8 +13877,8 @@ 55, 15, 1, - 49, - 8 + 8, + 49 ], [ 30, @@ -13924,8 +13924,8 @@ ], [ 17, - 35, 2, + 35, 48, 44, 62 @@ -14036,8 +14036,8 @@ ], [ 8, - 45, 10, + 45, 1, 31, 53 @@ -14079,8 +14079,8 @@ 14, 12, 10, - 54, - 51 + 51, + 54 ], [ 11, @@ -14175,8 +14175,8 @@ [ 50, 48, - 37, 57, + 37, 38, 10 ], @@ -14270,8 +14270,8 @@ ], [ 22, - 60, 11, + 60, 39, 1, 49 @@ -14384,8 +14384,8 @@ ], [ 33, - 56, 35, + 56, 42, 38, 45 @@ -14434,8 +14434,8 @@ 56, 15, 51, - 55, 50, + 55, 13 ], [ @@ -14646,7 +14646,7 @@ 42, 58, 57, - 26 + 15 ], [ 30, @@ -14659,8 +14659,8 @@ [ 55, 40, - 62, 12, + 62, 13, 30 ], @@ -14713,8 +14713,8 @@ 11 ], [ - 6, 56, + 6, 32, 14, 10, @@ -14839,8 +14839,8 @@ 10, 0, 55, - 37, - 40 + 40, + 37 ], [ 54, @@ -15145,15 +15145,15 @@ 42, 24, 37, - 8, - 4 + 4, + 8 ], [ 34, 16, 8, 46, - 6, + 56, 17 ], [ @@ -15169,8 +15169,8 @@ 0, 42, 56, - 31, - 30 + 30, + 31 ], [ 6, @@ -15236,7 +15236,7 @@ 39, 31, 63, - 0 + 9 ], [ 58, @@ -15386,9 +15386,9 @@ 10, 38, 1, - 57, 4, - 31 + 31, + 57 ], [ 8, @@ -15396,7 +15396,7 @@ 54, 33, 3, - 48 + 50 ], [ 6, @@ -15409,8 +15409,8 @@ [ 55, 43, - 4, 5, + 4, 25, 8 ] @@ -15549,8 +15549,8 @@ 60, 2, 17, - 18, - 27 + 27, + 18 ], [ 51, @@ -15598,7 +15598,7 @@ 60, 47, 15, - 53 + 52 ], [ 20, @@ -15614,15 +15614,15 @@ 47, 41, 60, - 1 + 40 ], [ 33, 3, - 29, 49, - 59, - 14 + 29, + 14, + 59 ], [ 52, @@ -15672,15 +15672,15 @@ 13, 20, 44, - 30 + 40 ], [ 33, 45, 27, 53, - 63, - 52 + 52, + 63 ], [ 28, @@ -15728,12 +15728,12 @@ 22, 19, 45, - 23 + 6 ], [ 6, - 20, 26, + 20, 30, 5, 25 @@ -15958,8 +15958,8 @@ ], [ 47, - 27, 3, + 27, 6, 24, 22 @@ -16038,8 +16038,8 @@ ], [ 11, - 19, 4, + 19, 8, 58, 52 @@ -16137,8 +16137,8 @@ [ 48, 26, - 17, 35, + 17, 55, 6 ], @@ -16236,15 +16236,15 @@ 7, 53, 21, - 13 + 58 ], [ 49, 31, 14, 8, - 22, - 19 + 19, + 22 ], [ 15, @@ -16268,7 +16268,7 @@ 24, 12, 9, - 42 + 20 ], [ 58, @@ -16340,8 +16340,8 @@ 29, 42, 25, - 30, 47, + 30, 17 ], [ @@ -16470,7 +16470,7 @@ 33, 17, 51, - 44 + 13 ], [ 41, @@ -16773,8 +16773,8 @@ 0 ], [ - 36, 20, + 36, 63, 28, 60, @@ -16866,7 +16866,7 @@ 1, 31, 17, - 43 + 54 ], [ 50, @@ -16889,16 +16889,16 @@ 23, 19, 13, - 7, - 50 + 5, + 7 ], [ 30, - 46, 18, + 46, 14, 1, - 15 + 0 ], [ 15, @@ -16906,13 +16906,13 @@ 52, 0, 51, - 27 + 30 ], [ 4, 21, - 41, 53, + 41, 10, 14 ], @@ -16928,8 +16928,8 @@ 3, 19, 16, - 47, 30, + 47, 27 ], [ @@ -16995,8 +16995,8 @@ 31, 49, 52, - 15, - 25 + 25, + 15 ], [ 11, @@ -17033,8 +17033,8 @@ [ 50, 21, - 19, 36, + 19, 4, 42 ], @@ -17064,8 +17064,8 @@ ], [ 27, - 57, 22, + 57, 8, 54, 37 @@ -17148,7 +17148,7 @@ 28, 55, 2, - 24 + 34 ], [ 57, @@ -17229,8 +17229,8 @@ 34, 30, 48, - 26, - 7 + 7, + 26 ], [ 27, @@ -17246,20 +17246,20 @@ 30, 59, 6, - 18 + 52 ], [ 31, 62, 25, 42, - 32, - 36 + 21, + 32 ], [ 34, - 19, 32, + 19, 2, 62, 14 @@ -17269,8 +17269,8 @@ 40, 24, 31, - 57, - 13 + 13, + 48 ], [ 44, @@ -17286,28 +17286,28 @@ 7, 57, 50, - 25 + 47 ], [ 13, 60, 45, 22, - 52, - 25 + 25, + 52 ], [ - 23, 4, + 23, 33, 46, 58, - 19 + 34 ], [ 8, - 56, 33, + 56, 45, 51, 59 @@ -17317,23 +17317,23 @@ 53, 39, 26, - 31, - 35 + 35, + 31 ], [ 41, 46, + 11, 53, 59, - 11, 8 ], [ 11, 44, 33, - 55, 52, + 55, 53 ], [ @@ -17350,29 +17350,29 @@ 14, 10, 44, - 4 + 7 ], [ 45, 6, - 18, + 62, 12, - 24, - 28 + 18, + 24 ], [ 10, 50, 42, 54, - 3, - 24 + 24, + 3 ], [ 17, 31, - 5, 37, + 5, 10, 41 ] @@ -17491,8 +17491,8 @@ 33 ], [ - 8, 44, + 8, 16, 30, 2, @@ -17511,8 +17511,8 @@ 30, 60, 61, - 10, - 18 + 18, + 10 ], [ 6, @@ -17543,8 +17543,8 @@ 53, 59, 23, - 46, - 6 + 6, + 46 ], [ 36, @@ -17558,8 +17558,8 @@ 10, 15, 16, - 4, 52, + 4, 38 ], [ @@ -17665,8 +17665,8 @@ 47, 39, 44, - 56, - 38 + 38, + 56 ], [ 40, @@ -17698,7 +17698,7 @@ 59, 29, 25, - 12 + 36 ], [ 8, @@ -17706,22 +17706,22 @@ 1, 47, 33, - 4 + 24 ], [ 24, 30, - 56, 38, - 54, + 56, + 4, 7 ], [ 46, 22, 44, - 12, 57, + 12, 40 ], [ @@ -17761,8 +17761,8 @@ 36, 45, 53, - 56, - 21 + 21, + 56 ], [ 60, @@ -17873,8 +17873,8 @@ [ 24, 13, - 3, 47, + 3, 45, 50 ], @@ -17899,8 +17899,8 @@ 63, 23, 25, - 17, - 34 + 34, + 17 ], [ 14, @@ -17931,15 +17931,15 @@ 59, 2, 10, - 8, - 55 + 54, + 26 ], [ 45, 61, 19, - 42, 57, + 42, 17 ], [ @@ -17947,8 +17947,8 @@ 60, 41, 34, - 53, - 35 + 35, + 53 ], [ 36, @@ -17959,8 +17959,8 @@ 5 ], [ - 36, 45, + 36, 16, 38, 51, @@ -17984,9 +17984,9 @@ ], [ 52, + 28, 46, 33, - 28, 18, 55 ], @@ -17994,16 +17994,16 @@ 1, 15, 28, - 13, 18, + 13, 52 ], [ 10, 21, 4, - 23, 2, + 23, 31 ], [ @@ -18085,8 +18085,8 @@ 24, 47, 52, - 49, - 17 + 17, + 49 ], [ 34, @@ -18106,15 +18106,15 @@ ], [ 23, - 43, 36, + 43, 20, 10, 21 ], [ - 63, 48, + 63, 51, 10, 62, @@ -18173,8 +18173,8 @@ 34, 43, 39, - 19, - 17 + 17, + 19 ], [ 23, @@ -18190,7 +18190,7 @@ 51, 16, 30, - 49 + 60 ], [ 25, @@ -18230,7 +18230,7 @@ 23, 16, 40, - 18 + 37 ], [ 9, @@ -18358,8 +18358,8 @@ 41, 19, 31, - 48, 6, + 48, 24 ], [ @@ -18610,7 +18610,7 @@ 17, 2, 37, - 32 + 55 ], [ 13, @@ -18658,7 +18658,7 @@ 10, 31, 14, - 52 + 62 ], [ 9, @@ -18682,7 +18682,7 @@ 43, 40, 46, - 18 + 45 ] ], [ @@ -18716,15 +18716,15 @@ 2, 41, 48, - 62 + 16 ], [ 6, 18, 37, + 27, 41, - 30, - 27 + 30 ], [ 59, @@ -18768,8 +18768,8 @@ ], [ 61, - 9, 63, + 9, 8, 17, 39 @@ -18882,8 +18882,8 @@ 4, 9, 16, - 58, 40, + 58, 60 ], [ @@ -19482,15 +19482,15 @@ 34, 43, 3, - 25 + 6 ], [ 58, 38, 54, 47, - 59, - 11 + 11, + 59 ], [ 55, @@ -19505,8 +19505,8 @@ 41, 1, 51, - 29, - 5 + 5, + 29 ], [ 49, @@ -19538,7 +19538,7 @@ 52, 2, 8, - 48 + 29 ], [ 23, @@ -19553,8 +19553,8 @@ 23, 62, 50, - 34, - 51 + 51, + 34 ] ], [ @@ -19691,8 +19691,8 @@ 5, 52, 24, - 17, - 18 + 18, + 17 ], [ 44, @@ -19761,8 +19761,8 @@ [ 19, 3, - 31, 27, + 31, 43, 2 ], @@ -19834,8 +19834,8 @@ ], [ 12, - 24, 14, + 24, 25, 58, 7 @@ -19860,8 +19860,8 @@ 9, 8, 24, - 5, 25, + 5, 63 ], [ @@ -19916,8 +19916,8 @@ 48, 57, 38, - 39, 63, + 39, 43 ], [ @@ -19958,14 +19958,14 @@ 15, 28, 23, - 5 + 44 ], [ 4, 41, 9, - 11, 8, + 11, 51 ], [ @@ -19979,8 +19979,8 @@ [ 19, 3, - 58, 26, + 58, 12, 61 ], @@ -20024,20 +20024,20 @@ 5, 50, 8, - 53 + 42 ], [ 35, 52, 9, 43, - 27, - 0 + 0, + 27 ], [ 16, - 29, 60, + 29, 61, 28, 58 @@ -20060,8 +20060,8 @@ ], [ 48, - 4, 35, + 4, 32, 8, 60 @@ -20078,8 +20078,8 @@ 9, 3, 19, - 61, 20, + 61, 24 ], [ @@ -20159,16 +20159,16 @@ 42, 48, 37, - 52, - 8 + 8, + 52 ], [ 46, 45, 57, 36, - 32, - 51 + 51, + 32 ], [ 62, @@ -20255,8 +20255,8 @@ [ 9, 58, - 0, 60, + 0, 17, 38 ], @@ -20317,8 +20317,8 @@ 60 ], [ - 13, 10, + 13, 29, 22, 58, @@ -20327,8 +20327,8 @@ [ 44, 8, - 2, 58, + 2, 6, 5 ], @@ -20344,8 +20344,8 @@ 5, 61, 30, - 17, 1, + 17, 27 ], [ @@ -20410,15 +20410,15 @@ 33, 9, 47, - 48 + 20 ], [ 3, 43, 6, 19, - 2, - 30 + 30, + 2 ], [ 55, @@ -20635,13 +20635,13 @@ 3, 26, 12, - 53, - 21 + 21, + 53 ], [ 53, - 1, 35, + 1, 54, 60, 20 @@ -20724,8 +20724,8 @@ 16, 22, 55, - 41, 32, + 41, 26 ], [ @@ -20733,7 +20733,7 @@ 41, 56, 45, - 10, + 38, 34 ], [ @@ -20763,8 +20763,8 @@ [ 56, 2, - 23, 24, + 23, 9, 5 ], @@ -20981,8 +20981,8 @@ [ 9, 5, - 33, 28, + 33, 58, 49 ], @@ -21037,10 +21037,10 @@ [ 43, 1, - 12, 55, + 12, 4, - 40 + 21 ], [ 41, @@ -21056,7 +21056,7 @@ 1, 22, 30, - 0 + 42 ], [ 5, @@ -21070,16 +21070,16 @@ 10, 28, 55, - 0, 23, + 0, 53 ], [ 30, 57, 8, - 17, 29, + 17, 5 ] ], @@ -21146,7 +21146,7 @@ 42, 35, 3, - 6 + 54 ], [ 4, @@ -21158,8 +21158,8 @@ ], [ 54, - 25, 55, + 25, 38, 21, 27 @@ -21201,16 +21201,16 @@ 44, 58, 5, - 16, - 1 + 1, + 16 ], [ 8, 53, 59, 25, - 24, - 52 + 52, + 24 ], [ 13, @@ -21218,15 +21218,15 @@ 45, 0, 42, - 8 + 37 ], [ 23, 44, 55, - 33, 38, - 7 + 7, + 33 ], [ 53, @@ -21257,15 +21257,15 @@ 62, 55, 16, - 10, - 41 + 41, + 10 ], [ 31, 20, 46, - 37, 34, + 37, 41 ], [ @@ -21280,23 +21280,23 @@ 11, 45, 47, - 54, 48, - 36 + 54, + 20 ], [ 50, - 10, 23, - 51, + 10, + 18, 3, - 18 + 2 ], [ 17, 31, - 5, 36, + 5, 4, 20 ] @@ -21338,8 +21338,8 @@ 14, 59, 61, - 1, 16, + 1, 19 ], [ @@ -21428,7 +21428,7 @@ 24, 2, 54, - 59 + 37 ], [ 13, @@ -21457,8 +21457,8 @@ [ 15, 13, - 45, 63, + 45, 55, 9 ], @@ -21468,7 +21468,7 @@ 23, 6, 35, - 0 + 63 ], [ 36, @@ -21484,23 +21484,23 @@ 41, 20, 5, - 46 + 23 ], [ - 47, 4, + 47, 26, 55, - 10, - 49 + 49, + 10 ], [ 4, 11, 8, - 48, 36, - 33 + 48, + 9 ], [ 18, @@ -21558,7 +21558,7 @@ 14, 16, 37, - 19 + 44 ], [ 59, @@ -21580,9 +21580,9 @@ 12, 24, 20, + 61, 58, - 28, - 61 + 28 ], [ 47, @@ -21636,8 +21636,8 @@ 24, 5, 46, - 44, 2, + 44, 38 ], [ @@ -21821,10 +21821,10 @@ [ 3, 45, - 37, 41, + 37, 21, - 47 + 33 ], [ 21, @@ -21853,8 +21853,8 @@ [ 2, 5, - 43, 20, + 43, 24, 29 ], @@ -21891,8 +21891,8 @@ 37 ], [ - 33, 0, + 33, 15, 32, 6, @@ -21917,16 +21917,16 @@ [ 41, 5, - 40, 46, + 40, 4, 32 ], [ 55, 20, - 44, 26, + 44, 4, 40 ], @@ -22058,15 +22058,15 @@ 24, 32, 25, - 36 + 42 ], [ 30, 21, 63, + 51, 49, - 16, - 51 + 16 ], [ 62, @@ -22074,15 +22074,15 @@ 13, 5, 29, - 56 + 40 ], [ 53, 2, 9, 63, - 46, - 54 + 54, + 46 ], [ 27, @@ -22098,7 +22098,7 @@ 43, 44, 0, - 12 + 50 ], [ 38, @@ -22130,7 +22130,7 @@ 34, 4, 21, - 50 + 22 ], [ 41, @@ -22300,15 +22300,15 @@ 59, 55, 2, - 15 + 50 ], [ 0, 47, 51, 45, - 14, - 37 + 37, + 14 ], [ 0, @@ -22322,8 +22322,8 @@ 38, 53, 60, - 34, 36, + 34, 9 ], [ @@ -22369,8 +22369,8 @@ [ 5, 35, - 42, 25, + 42, 2, 22 ], @@ -22388,7 +22388,7 @@ 6, 57, 39, - 33 + 51 ] ], [ @@ -22445,8 +22445,8 @@ 61, 29, 38, - 50, - 62 + 62, + 50 ], [ 20, @@ -22460,8 +22460,8 @@ 4, 10, 7, - 47, 25, + 47, 34 ], [ @@ -22490,8 +22490,8 @@ ], [ 34, - 2, 10, + 2, 26, 11, 47 @@ -22499,8 +22499,8 @@ [ 16, 40, - 34, 23, + 34, 9, 33 ], @@ -22537,8 +22537,8 @@ 25 ], [ - 8, 53, + 8, 38, 11, 24, @@ -22550,31 +22550,31 @@ 0, 11, 21, - 8 + 60 ], [ 27, 23, 62, - 41, 8, + 41, 42 ], [ 55, 11, 10, - 41, 16, + 41, 21 ], [ 31, 21, - 41, 54, 34, - 9 + 41, + 46 ], [ 55, @@ -22596,8 +22596,8 @@ 50, 10, 47, - 35, 53, + 35, 57 ], [ @@ -22767,8 +22767,8 @@ 13, 45, 0, - 51, - 60 + 60, + 51 ], [ 15, @@ -22776,7 +22776,7 @@ 23, 50, 6, - 13 + 51 ], [ 57, @@ -22789,16 +22789,16 @@ [ 10, 23, - 41, 33, - 30, - 54 + 41, + 54, + 30 ], [ 33, 26, - 10, 4, + 10, 49, 55 ], @@ -22814,8 +22814,8 @@ 43, 6, 3, - 50, 18, + 50, 38 ], [ @@ -23220,7 +23220,7 @@ 16, 28, 41, - 3 + 14 ], [ 5, @@ -23244,15 +23244,15 @@ 27, 62, 33, - 7 + 48 ], [ 33, - 7, 54, + 7, 3, - 32, - 12 + 12, + 32 ], [ 58, @@ -23306,8 +23306,8 @@ ], [ 30, - 9, 2, + 9, 57, 11, 43 @@ -23317,8 +23317,8 @@ 19, 57, 42, - 51, - 27 + 27, + 51 ], [ 46, @@ -23326,15 +23326,15 @@ 47, 7, 32, - 23 + 0 ], [ 12, 4, 35, 10, - 50, - 47 + 47, + 50 ], [ 15, @@ -23342,7 +23342,7 @@ 54, 25, 38, - 11 + 51 ], [ 18, @@ -23357,16 +23357,16 @@ 22, 8, 16, - 7, - 5 + 5, + 7 ], [ 5, 3, - 35, 24, - 30, - 4 + 35, + 4, + 30 ], [ 17, @@ -23437,8 +23437,8 @@ 23, 28, 57, - 21, - 16 + 16, + 21 ], [ 51, @@ -23550,8 +23550,8 @@ 15, 12, 35, - 45, 52, + 45, 4 ], [ @@ -23566,8 +23566,8 @@ 60, 5, 59, - 50, 29, + 50, 18 ], [ @@ -23607,8 +23607,8 @@ 50, 14, 6, - 37, - 16 + 16, + 37 ], [ 26, @@ -23624,7 +23624,7 @@ 23, 43, 60, - 41 + 34 ], [ 45, @@ -23638,8 +23638,8 @@ 4, 38, 53, - 58, 15, + 58, 5 ], [ @@ -23680,7 +23680,7 @@ 49, 45, 56, - 57 + 53 ], [ 51, @@ -23964,7 +23964,7 @@ 23, 39, 57, - 51 + 60 ], [ 9, @@ -24020,7 +24020,7 @@ 4, 52, 30, - 26 + 33 ], [ 34, @@ -24044,13 +24044,13 @@ 2, 5, 54, - 37 + 44 ], [ 12, 34, - 7, 47, + 7, 54, 59 ], @@ -24060,7 +24060,7 @@ 2, 43, 33, - 7 + 50 ], [ 8, @@ -24072,16 +24072,16 @@ ], [ 17, - 38, 6, + 38, 29, 49, 41 ], [ 38, - 52, 45, + 52, 57, 63, 27 @@ -24090,8 +24090,8 @@ 26, 28, 3, - 5, 40, + 5, 47 ], [ @@ -24100,13 +24100,13 @@ 35, 34, 38, - 3 + 55 ], [ 61, 24, - 55, 4, + 55, 45, 16 ], @@ -24114,8 +24114,8 @@ 46, 11, 27, - 56, 0, + 56, 48 ], [ @@ -24286,14 +24286,14 @@ 41, 10, 32, - 29 + 18 ], [ 47, 6, 49, 15, - 13, + 34, 7 ], [ @@ -24305,9 +24305,9 @@ 53 ], [ - 57, 47, 28, + 57, 26, 19, 22 @@ -24318,11 +24318,11 @@ 18, 5, 46, - 53 + 22 ], [ - 55, 9, + 55, 4, 3, 25, @@ -24339,8 +24339,8 @@ [ 42, 10, - 43, 3, + 43, 49, 0 ], @@ -24350,7 +24350,7 @@ 17, 41, 10, - 8 + 3 ] ], [ @@ -24408,7 +24408,7 @@ 19, 48, 15, - 6 + 10 ], [ 14, @@ -24416,7 +24416,7 @@ 54, 8, 57, - 28 + 31 ], [ 6, @@ -24431,23 +24431,23 @@ 38, 23, 53, - 25, - 17 + 17, + 20 ], [ 26, - 22, 25, + 22, 5, - 42, - 33 + 33, + 42 ], [ 5, 22, 16, - 9, 61, + 9, 55 ], [ @@ -24456,7 +24456,7 @@ 30, 25, 20, - 22 + 4 ], [ 18, @@ -24464,7 +24464,7 @@ 25, 63, 39, - 2 + 11 ], [ 62, @@ -24480,29 +24480,29 @@ 5, 24, 37, - 26 + 53 ], [ 47, 34, - 27, 10, + 27, 33, 22 ], [ 60, - 44, 43, + 44, 52, 7, 37 ], [ 12, + 39, 32, 38, - 39, 1, 20 ], @@ -24511,7 +24511,7 @@ 52, 49, 15, - 28, + 40, 63 ], [ @@ -24520,7 +24520,7 @@ 3, 26, 22, - 13 + 30 ], [ 29, @@ -24532,19 +24532,19 @@ ], [ 41, - 5, 53, + 5, + 4, 33, - 26, - 39 + 21 ], [ 25, 38, 4, 34, - 49, - 51 + 51, + 41 ], [ 29, @@ -24552,14 +24552,14 @@ 48, 47, 33, - 35 + 20 ], [ 50, 3, - 38, 16, 53, + 38, 15 ], [ @@ -24567,8 +24567,8 @@ 62, 6, 23, - 10, - 36 + 36, + 10 ] ], [ @@ -24633,8 +24633,8 @@ 59, 14, 52, - 28, - 7 + 7, + 28 ], [ 6, @@ -24657,8 +24657,8 @@ 42, 5, 22, - 58, - 19 + 19, + 58 ], [ 22, @@ -24681,8 +24681,8 @@ 2, 39, 13, - 63, - 44 + 44, + 63 ], [ 10, @@ -24876,15 +24876,15 @@ 22, 15, 5, - 46 + 0 ], [ 7, 22, - 24, 63, - 53, - 5 + 24, + 10, + 53 ], [ 11, @@ -24899,22 +24899,22 @@ 18, 13, 39, - 0, - 17 + 17, + 0 ], [ 62, 54, - 5, - 18, 6, + 18, + 5, 14 ], [ 19, 43, - 31, 30, + 31, 57, 42 ], @@ -24969,8 +24969,8 @@ [ 5, 53, - 51, 35, + 51, 22, 38 ], @@ -24979,8 +24979,8 @@ 14, 44, 41, - 11, - 51 + 51, + 57 ], [ 47, @@ -25004,7 +25004,7 @@ 59, 62, 18, - 50 + 40 ] ], [ @@ -25078,15 +25078,15 @@ 23, 19, 62, - 27 + 52 ], [ 53, 32, 51, - 38, + 58, 41, - 58 + 38 ], [ 18, @@ -25102,15 +25102,15 @@ 60, 11, 12, - 19 + 5 ], [ 24, 7, 11, - 17, 30, - 37 + 17, + 47 ], [ 47, @@ -25132,17 +25132,17 @@ 56, 30, 0, - 55, 51, + 55, 10 ], [ 12, 5, - 21, 34, - 29, - 63 + 21, + 63, + 29 ], [ 42, @@ -25150,7 +25150,7 @@ 38, 57, 41, - 44 + 43 ], [ 32, @@ -25182,7 +25182,7 @@ 30, 60, 39, - 14 + 42 ], [ 5, @@ -25205,8 +25205,8 @@ 47, 61, 28, - 60, - 27 + 27, + 60 ], [ 14, @@ -25312,23 +25312,23 @@ 22, 3, 34, - 6 + 45 ], [ 21, 53, - 28, - 22, 5, - 9 + 22, + 9, + 28 ], [ 11, - 28, 24, + 28, 16, - 35, - 8 + 8, + 35 ], [ 38, @@ -25351,15 +25351,15 @@ 55, 43, 30, - 35, - 51 + 51, + 35 ], [ 51, 3, 14, - 5, 60, + 5, 26 ], [ @@ -25397,8 +25397,8 @@ [ 40, 45, - 44, 22, + 44, 1, 34 ], @@ -25420,11 +25420,11 @@ ], [ 31, - 5, 52, - 48, + 5, 58, - 35 + 48, + 43 ], [ 23, @@ -25536,8 +25536,8 @@ 57, 37, 5, - 7, 22, + 7, 53 ], [ @@ -25560,8 +25560,8 @@ 54, 21, 58, - 55, 6, + 55, 14 ], [ @@ -25593,8 +25593,8 @@ 25, 40, 32, - 54, - 58 + 58, + 54 ], [ 49, @@ -25641,8 +25641,8 @@ 13, 8, 61, - 47, - 9 + 9, + 47 ], [ 31,