packed_module_mapping = {
"q_proj": ('q_proj', 'q'),
"k_proj": ('k_proj', 'k'),
"v_proj": ('v_proj', 'v'),
"gate_up": ('gate_up_proj', '0'),
"gate_down": ('gate_down_proj', '1'),
}
in class Qwen3ForCausalLM(nn.Module), I think this is fault: ('q_proj', 'q'), ('k_proj', 'k'),('v_proj', 'v').
The weight name is "qkv_projection".
packed_module_mapping = {
"q_proj": ('qkv_projection', 'q'),
"k_proj": ('qkv_projection', 'k'),
"v_proj": ('qkv_projection', 'v'),
"gate_up": ('gate_up', '0'),
"gate_down": ('gate_up', '1'),
}
packed_module_mapping = {
"q_proj": ('q_proj', 'q'),
"k_proj": ('k_proj', 'k'),
"v_proj": ('v_proj', 'v'),
"gate_up": ('gate_up_proj', '0'),
"gate_down": ('gate_down_proj', '1'),
}
in class Qwen3ForCausalLM(nn.Module), I think this is fault: ('q_proj', 'q'), ('k_proj', 'k'),('v_proj', 'v').
The weight name is "qkv_projection".
packed_module_mapping = {
"q_proj": ('qkv_projection', 'q'),
"k_proj": ('qkv_projection', 'k'),
"v_proj": ('qkv_projection', 'v'),
"gate_up": ('gate_up', '0'),
"gate_down": ('gate_up', '1'),
}