diff --git a/README.md b/README.md index 7ddd0070..b615cc0b 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,8 @@ pip install -e . | --------------------------------- | --------------- | ------------------------------------------------- | | FSDP finetuning | transformers | [Script](cookbook/transformers/fsdp2.py) | | FSDP MoE finetuning | transformers | [Script](cookbook/transformers/fsdp2_moe.py) | -| ep/sp FSDP MoE finetuning | transformers | [Script](cookbook/transformers/ep_fsdp_qwen3_moe.py) | +| ep FSDP MoE finetuning | transformers | [Script](cookbook/transformers/ep_fsdp_qwen3_moe.py) | +| sp FSDP finetuning | transformers | [Script](cookbook/transformers/sp_fsdp_dense.py) | | EP MoE finetuning | transformers | [Script](cookbook/transformers/ep_fsdp_qwen3_moe.py) | | pp/tp/cp finetuning | megatron | [Script](cookbook/megatron/tp.py) | | pp/tp/cp MoE finetuning | megatron | [Script](cookbook/megatron/tp_moe.py) | diff --git a/cookbook/transformers/ep_fsdp_qwen3_moe.py b/cookbook/transformers/ep_fsdp_qwen3_moe.py index 6473dc63..16706eae 100644 --- a/cookbook/transformers/ep_fsdp_qwen3_moe.py +++ b/cookbook/transformers/ep_fsdp_qwen3_moe.py @@ -21,13 +21,11 @@ # 4 gpus, dp=2, ep=2 dp_size = 2 ep_size = 2 -ulysses_size = 2 device_mesh = DeviceMesh( device_type=Platform.get_platform().device_prefix(), mesh=np.arange(dp_size * ep_size).reshape(dp_size, ep_size), mesh_dim_names=('dp', 'ep'), - ulysses_size=ulysses_size, # enable sp ) twinkle.initialize(