diff --git a/README.md b/README.md index 447ebf87..7ddd0070 100644 --- a/README.md +++ b/README.md @@ -69,6 +69,7 @@ pip install -e . | --------------------------------- | --------------- | ------------------------------------------------- | | FSDP finetuning | transformers | [Script](cookbook/transformers/fsdp2.py) | | FSDP MoE finetuning | transformers | [Script](cookbook/transformers/fsdp2_moe.py) | +| ep/sp FSDP MoE finetuning | transformers | [Script](cookbook/transformers/ep_fsdp_qwen3_moe.py) | | EP MoE finetuning | transformers | [Script](cookbook/transformers/ep_fsdp_qwen3_moe.py) | | pp/tp/cp finetuning | megatron | [Script](cookbook/megatron/tp.py) | | pp/tp/cp MoE finetuning | megatron | [Script](cookbook/megatron/tp_moe.py) | diff --git a/cookbook/transformers/ep_fsdp_qwen3_moe.py b/cookbook/transformers/ep_fsdp_qwen3_moe.py index 16706eae..6473dc63 100644 --- a/cookbook/transformers/ep_fsdp_qwen3_moe.py +++ b/cookbook/transformers/ep_fsdp_qwen3_moe.py @@ -21,11 +21,13 @@ # 4 gpus, dp=2, ep=2 dp_size = 2 ep_size = 2 +ulysses_size = 2 device_mesh = DeviceMesh( device_type=Platform.get_platform().device_prefix(), mesh=np.arange(dp_size * ep_size).reshape(dp_size, ep_size), mesh_dim_names=('dp', 'ep'), + ulysses_size=ulysses_size, # enable sp ) twinkle.initialize(