From d84dd8404b91cf93396f0704c3b456582d1ed6d7 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Wed, 8 Apr 2026 17:49:14 +0800 Subject: [PATCH 1/2] fix gptq_bridge --- src/mcore_bridge/model/gpts/qwen3_next.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mcore_bridge/model/gpts/qwen3_next.py b/src/mcore_bridge/model/gpts/qwen3_next.py index ec87ddb..099e75e 100644 --- a/src/mcore_bridge/model/gpts/qwen3_next.py +++ b/src/mcore_bridge/model/gpts/qwen3_next.py @@ -516,12 +516,12 @@ def _set_layer_attn(self, mg_layer, hf_state_dict, layer_idx: int, to_mcore: boo self._set_state_dict(mg_layer, 'input_layernorm.weight', hf_state_dict, 'input_layernorm.weight', to_mcore) return hf_state_dict - def _set_layer_mlp(self, mg_layer, hf_state_dict, layer_idx: int, to_mcore: bool): + def _set_layer_mlp(self, mg_layer, hf_state_dict, layer_idx: int, to_mcore: bool, is_mtp: bool = False): if self.model_type != 'qwen3_5': - return super()._set_layer_mlp(mg_layer, hf_state_dict, layer_idx, to_mcore) + return super()._set_layer_mlp(mg_layer, hf_state_dict, layer_idx, to_mcore, is_mtp=is_mtp) # dense mg_mlp = None if mg_layer is None else mg_layer.mlp - hf_state_dict.update(self._set_mlp_state(mg_mlp, hf_state_dict, f'{self.hf_mlp_prefix}.', layer_idx, to_mcore)) + hf_state_dict.update(self._set_mlp_state(mg_mlp, hf_state_dict, f'{self.hf_mlp_prefix}.', layer_idx, to_mcore, is_mtp=is_mtp)) self._set_state_dict(mg_layer, 'pre_mlp_layernorm.weight', hf_state_dict, 'post_attention_layernorm.weight', to_mcore) return hf_state_dict From 79eb4ac40b5c24822a984c0883ac489284aa22e2 Mon Sep 17 00:00:00 2001 From: Jintao Huang Date: Wed, 8 Apr 2026 23:23:09 +0800 Subject: [PATCH 2/2] revert qwen3_5 save_weight --- src/mcore_bridge/bridge/gpt_bridge.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mcore_bridge/bridge/gpt_bridge.py b/src/mcore_bridge/bridge/gpt_bridge.py index 2716993..7039a91 100644 --- a/src/mcore_bridge/bridge/gpt_bridge.py +++ b/src/mcore_bridge/bridge/gpt_bridge.py @@ -700,8 +700,6 @@ def _set_moe_state( def _get_hf_experts_attr(self, is_mtp: bool = False): # return hf_grouped, is_gate_up - if self.model_type == 'qwen3_5_moe' and not is_mtp: - return True, True if self.model_type in {'glm4v_moe', 'kimi_vl', 'qwen3_omni_moe'} or self.llm_model_type in { 'qwen2_moe', 'qwen3_moe', 'deepseek_v2', 'deepseek_v3', 'kimi_k2', 'dots1', 'ernie4_5_moe', 'glm4_moe', 'glm4_moe_lite', 'minimax_m2', 'olmoe', 'qwen3_next', 'qwen3_5_moe', 'glm_moe_dsa', 'deepseek_v32'