From 01aaad15c11bbffc1e0eefb25b843e6189eb5bbc Mon Sep 17 00:00:00 2001 From: root Date: Mon, 2 Mar 2026 16:21:33 +0800 Subject: [PATCH] fix_contiguous --- src/twinkle/model/megatron/model/gpts/qwen3_next.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/twinkle/model/megatron/model/gpts/qwen3_next.py b/src/twinkle/model/megatron/model/gpts/qwen3_next.py index b589a0f2..94c26470 100644 --- a/src/twinkle/model/megatron/model/gpts/qwen3_next.py +++ b/src/twinkle/model/megatron/model/gpts/qwen3_next.py @@ -361,7 +361,7 @@ def _gated_delta_net_forward(self, hidden_states: torch.Tensor, **kwargs): res = res[attention_mask][:, None] res = torch.concat([res, res.new_zeros(seq_len - res.shape[0], 1, res.shape[2])]) else: - res = res.transpose(0, 1) + res = res.transpose(0, 1).contiguous() if args.sequence_parallel and args.tensor_model_parallel_size > 1: res = reduce_scatter_to_sequence_parallel_region(res) / args.tensor_model_parallel_size return res, None