From a745826e83e446eac1312b5f40b845bc3a99f3b9 Mon Sep 17 00:00:00 2001 From: Sanju C Sudhakaran Date: Wed, 4 Mar 2026 16:54:18 +0530 Subject: [PATCH] Set moe_router_force_load_balancing for Nemotron3 Nano Signed-off-by: Sanju C Sudhakaran --- .../configs/nemotronh/nemotron_3_nano_llm_pretrain.py | 2 ++ .../configs/nemotronh/nemotron_3_nano_workload_base_configs.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py index 4f23420451..3ecccd6f54 100644 --- a/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py +++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_llm_pretrain.py @@ -30,6 +30,8 @@ def set_nemotron_3_nano_common_configs(cfg: ConfigContainer) -> None: cfg.mixed_precision.grad_reduce_in_fp32 = False cfg.ddp.grad_reduce_in_fp32 = False + cfg.model.moe_router_force_load_balancing = True + def nemotron_3_nano_pretrain_config_gb300( precision: str = "bf16", mock: bool = True, config_variant: str = "v1" diff --git a/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py index 7c85f88543..31310b3fea 100644 --- a/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py +++ b/scripts/performance/configs/nemotronh/nemotron_3_nano_workload_base_configs.py @@ -41,6 +41,9 @@ NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1 = replace( BASE_NEMOTRON_3_NANO_CONFIG, tensor_model_parallel_size=1, + micro_batch_size=4, + cuda_graph_impl="transformer_engine", + cuda_graph_scope=["attn", "moe_router", "moe_preprocess", "mamba"], ) NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_FP8_MX_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1 NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_NVFP4_V1 = NEMOTRON_3_NANO_PRETRAIN_CONFIG_GB300_BF16_V1