From 32c8c8a3688214a8472cba97e486da17b31441ff Mon Sep 17 00:00:00 2001 From: jaeminh Date: Thu, 5 Mar 2026 15:46:00 +0900 Subject: [PATCH] Override moe_token_dispatcher_type to "alltoall" when export megatron checkpoint --- src/megatron/bridge/models/conversion/auto_bridge.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/megatron/bridge/models/conversion/auto_bridge.py b/src/megatron/bridge/models/conversion/auto_bridge.py index 3e0ac4e02a..915aa7d423 100644 --- a/src/megatron/bridge/models/conversion/auto_bridge.py +++ b/src/megatron/bridge/models/conversion/auto_bridge.py @@ -835,7 +835,12 @@ def export_ckpt( # Export ckpt performs on CPU with temporary_distributed_context(backend="gloo"): # Load the Megatron model - megatron_model = self.load_megatron_model(megatron_path, wrap_with_ddp=False) + # 'flex' dispatcher requires TPxEP > 1; fall back to 'alltoall' + megatron_model = self.load_megatron_model( + megatron_path, + wrap_with_ddp=False, + mp_overrides={"moe_token_dispatcher_type": "alltoall"}, + ) # Save in HuggingFace format self.save_hf_pretrained(