From 7689583693b5b4dee88c1b836570258e6ba20be2 Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Apr 2026 17:39:11 +0800 Subject: [PATCH 1/2] lint --- cookbook/rl/grpo_mm.py | 7 ++----- src/twinkle/model/megatron/megatron.py | 9 ++++++--- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cookbook/rl/grpo_mm.py b/cookbook/rl/grpo_mm.py index d6f934d5..130540bc 100644 --- a/cookbook/rl/grpo_mm.py +++ b/cookbook/rl/grpo_mm.py @@ -138,11 +138,7 @@ def main(): # LoRA configuration lora_config = LoraConfig( - target_modules=[ - 'q_proj', 'k_proj', 'v_proj', 'o_proj', - 'gate_proj', 'up_proj', 'down_proj', - 'in_proj_qkv', 'in_proj_z', 'in_proj_a', 'in_proj_b', 'out_proj', - ], + target_modules=['all-linear'], # including ViT and Merger/Connector r=16, lora_alpha=32, lora_dropout=0.05, @@ -186,6 +182,7 @@ def main(): 'max_lora_rank': 32, 'enable_lora': True, 'limit_mm_per_prompt': {'image': 9}, # OlympiadBench has up to 9 images + 'enable_tower_connector_lora': True, # enable ViT(tower) and Merger(connector) LoRA on vLLM side }, device_mesh=sampler_mesh, remote_group='sampler', diff --git a/src/twinkle/model/megatron/megatron.py b/src/twinkle/model/megatron/megatron.py index 9b485f55..c134e41c 100644 --- a/src/twinkle/model/megatron/megatron.py +++ b/src/twinkle/model/megatron/megatron.py @@ -1178,7 +1178,10 @@ def _save_megatron_format(self, output_dir: str, adapter_name: str, lora_convert cpu_state_dict = {} for k, v in state_dict.items(): if lora_converter is not None: - k, v = lora_converter(k, v) + kv = lora_converter(k, v) + if kv is None: + continue + k, v = kv if k is not None and v is not None: cpu_state_dict[k] = v.cpu() @@ -1414,11 +1417,11 @@ def _print_weight_example(names): def _add_base_layer_suffix(name): if name.endswith('.weight'): base_layer_name = f'{name[:-7]}.base_layer.weight' - if base_layer_name in model_keys or not model_keys: + if not model_keys or base_layer_name in model_keys: name = base_layer_name elif name.endswith('.bias'): base_layer_name = f'{name[:-5]}.base_layer.bias' - if base_layer_name in model_keys or not model_keys: + if not model_keys or base_layer_name in model_keys: name = base_layer_name return name From b873e2fe6d1785e3904dedf53c863c263065705f Mon Sep 17 00:00:00 2001 From: root Date: Thu, 9 Apr 2026 17:46:25 +0800 Subject: [PATCH 2/2] more comment --- cookbook/rl/grpo.py | 1 + cookbook/rl/short_math_grpo.py | 1 + 2 files changed, 2 insertions(+) diff --git a/cookbook/rl/grpo.py b/cookbook/rl/grpo.py index 30d5d898..7fc3f2fd 100644 --- a/cookbook/rl/grpo.py +++ b/cookbook/rl/grpo.py @@ -70,6 +70,7 @@ def main(): twinkle.initialize(mode='ray', nproc_per_node=NUM_GPUS, groups=device_groups, lazy_collect=False) # lora_config = LoraConfig(target_modules='all-linear', r=32, lora_alpha=64, lora_dropout=0.05) + # Since we are training on text-only data, we avoid using 'all-linear' which would include the ViT layers. lora_config = LoraConfig( target_modules=[ 'q_proj', 'k_proj', 'v_proj', 'o_proj', diff --git a/cookbook/rl/short_math_grpo.py b/cookbook/rl/short_math_grpo.py index 8f498923..bbfda68b 100644 --- a/cookbook/rl/short_math_grpo.py +++ b/cookbook/rl/short_math_grpo.py @@ -116,6 +116,7 @@ def main(): sampler_mesh = DeviceMesh.from_sizes(world_size=SAMPLER_GPUS, dp_size=SAMPLER_GPUS) twinkle.initialize(mode='ray', nproc_per_node=NUM_GPUS, groups=device_groups, lazy_collect=False) + # Since we are training on text-only data, we avoid using 'all-linear' which would include the ViT layers. lora_config = LoraConfig( target_modules=[ 'q_proj', 'k_proj', 'v_proj', 'o_proj',