diff --git a/src/megatron/bridge/models/conversion/auto_bridge.py b/src/megatron/bridge/models/conversion/auto_bridge.py index 3e0ac4e02a..915aa7d423 100644 --- a/src/megatron/bridge/models/conversion/auto_bridge.py +++ b/src/megatron/bridge/models/conversion/auto_bridge.py @@ -835,7 +835,12 @@ def export_ckpt( # Export ckpt performs on CPU with temporary_distributed_context(backend="gloo"): # Load the Megatron model - megatron_model = self.load_megatron_model(megatron_path, wrap_with_ddp=False) + # 'flex' dispatcher requires TPxEP > 1; fall back to 'alltoall' + megatron_model = self.load_megatron_model( + megatron_path, + wrap_with_ddp=False, + mp_overrides={"moe_token_dispatcher_type": "alltoall"}, + ) # Save in HuggingFace format self.save_hf_pretrained(