From c6ace7fd88556e945c3bce61f8e982dff0a891ef Mon Sep 17 00:00:00 2001 From: meichangsu1 <1484603386@qq.com> Date: Wed, 11 Feb 2026 15:41:47 +0800 Subject: [PATCH 1/2] feat(tests): replace manual sp_group retrieval with module attribute Replace calls to `_get_sp_group_from_device_mesh` with direct access to `sequence_parallel._sp_group` in sequence parallel attention tests. This simplifies the test setup by using the already initialized group stored in the module, improving code clarity and reducing redundancy. --- .../test_sequence_parallel_single_attention.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/sequence_parallel/test_sequence_parallel_single_attention.py b/tests/sequence_parallel/test_sequence_parallel_single_attention.py index dde6b387..32e01aaa 100644 --- a/tests/sequence_parallel/test_sequence_parallel_single_attention.py +++ b/tests/sequence_parallel/test_sequence_parallel_single_attention.py @@ -181,7 +181,7 @@ def _run_worker_single_attn(rank: int, world_size: int, port: int, padding: bool sp_size = world_size device_mesh = DeviceMesh.from_sizes(dp_size=world_size, ulysses_size=sp_size, device_type="cuda") _setup_sp(device_mesh, sp_size) - sp_group = _get_sp_group_from_device_mesh(device_mesh, sp_size) + sp_group = sequence_parallel._sp_group batch_size = 2 unpad_seq_len = 127 if padding else 128 @@ -271,7 +271,7 @@ def _run_worker_single_attn_fsdp(rank: int, world_size: int, port: int): # For FSDP+SP, SP is derived from dp/fsdp ranks. Use fsdp=world, dp=1. device_mesh = DeviceMesh.from_sizes(fsdp_size=world_size, dp_size=1, ulysses_size=sp_size, device_type="cuda") _setup_sp(device_mesh, sp_size) - sp_group = _get_sp_group_from_device_mesh(device_mesh, sp_size) + sp_group = sequence_parallel._sp_group batch_size = 2 unpad_seq_len = 128 From bcfb465894913cf2e4ce4898712ce7f131950361 Mon Sep 17 00:00:00 2001 From: meichangsu1 <1484603386@qq.com> Date: Wed, 11 Feb 2026 16:18:07 +0800 Subject: [PATCH 2/2] feat(tests): remove unused import in sequence parallel test Remove `_get_sp_group_from_device_mesh` import from test file as it is no longer used in the test, cleaning up imports and improving code clarity. --- .../sequence_parallel/test_sequence_parallel_single_attention.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/sequence_parallel/test_sequence_parallel_single_attention.py b/tests/sequence_parallel/test_sequence_parallel_single_attention.py index 32e01aaa..ef4c29fc 100644 --- a/tests/sequence_parallel/test_sequence_parallel_single_attention.py +++ b/tests/sequence_parallel/test_sequence_parallel_single_attention.py @@ -19,7 +19,6 @@ from twinkle.model.transformers.strategy.sequence_parallel import ( DistributedAttention, - _get_sp_group_from_device_mesh, sequence_parallel, ) from twinkle.model.transformers.strategy import NativeFSDPStrategy