diff --git a/src/twinkle/processor/base.py b/src/twinkle/processor/base.py index fe9733f1..ff0fbabf 100644 --- a/src/twinkle/processor/base.py +++ b/src/twinkle/processor/base.py @@ -105,6 +105,9 @@ def to_tensor(_input): def pad_cp(self, inputs: List[InputFeature], **kwargs) -> List[InputFeature]: + if self.device_mesh is None: + return inputs + def _pad_cp(_input: InputFeature) -> InputFeature: # Pad sequence for parallel compatibility # 1. For CP > 1: Megatron's RoPE requires seq_len % (2 * cp_size) == 0