diff --git a/src/twinkle/model/megatron/megatron.py b/src/twinkle/model/megatron/megatron.py index 9b485f55..4160f591 100644 --- a/src/twinkle/model/megatron/megatron.py +++ b/src/twinkle/model/megatron/megatron.py @@ -1178,7 +1178,10 @@ def _save_megatron_format(self, output_dir: str, adapter_name: str, lora_convert cpu_state_dict = {} for k, v in state_dict.items(): if lora_converter is not None: - k, v = lora_converter(k, v) + kv = lora_converter(k, v) + if kv is None: + continue + k, v = kv if k is not None and v is not None: cpu_state_dict[k] = v.cpu() diff --git a/src/twinkle/model/multi_lora.py b/src/twinkle/model/multi_lora.py index 38b253ab..df60e9f4 100644 --- a/src/twinkle/model/multi_lora.py +++ b/src/twinkle/model/multi_lora.py @@ -494,7 +494,7 @@ def save_lora_converter(self, name, parameter, adapter_name): name = name.replace(f'.{_lora.adapter_name}.', '.') return name, _param else: - return None, None + return None def set_state_dict(self, tenant_adapter_name, state_dict): _lora = self.find_lora_by_tenant(tenant_adapter_name)