From 7689583693b5b4dee88c1b836570258e6ba20be2 Mon Sep 17 00:00:00 2001
From: root <hujinghan.hjh@alibaba-inc.com>
Date: Thu, 9 Apr 2026 17:39:11 +0800
Subject: [PATCH 1/2] lint

---
 cookbook/rl/grpo_mm.py                 | 7 ++-----
 src/twinkle/model/megatron/megatron.py | 9 ++++++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/cookbook/rl/grpo_mm.py b/cookbook/rl/grpo_mm.py
index d6f934d5..130540bc 100644
--- a/cookbook/rl/grpo_mm.py
+++ b/cookbook/rl/grpo_mm.py
@@ -138,11 +138,7 @@ def main():
 
     # LoRA configuration
     lora_config = LoraConfig(
-        target_modules=[
-            'q_proj', 'k_proj', 'v_proj', 'o_proj',
-            'gate_proj', 'up_proj', 'down_proj',
-            'in_proj_qkv', 'in_proj_z', 'in_proj_a', 'in_proj_b', 'out_proj',
-        ],
+        target_modules=['all-linear'], # including ViT and Merger/Connector
         r=16,
         lora_alpha=32,
         lora_dropout=0.05,
@@ -186,6 +182,7 @@ def main():
             'max_lora_rank': 32,
             'enable_lora': True,
             'limit_mm_per_prompt': {'image': 9},  # OlympiadBench has up to 9 images
+            'enable_tower_connector_lora': True, # enable ViT(tower) and Merger(connector) LoRA on vLLM side
         },
         device_mesh=sampler_mesh,
         remote_group='sampler',
diff --git a/src/twinkle/model/megatron/megatron.py b/src/twinkle/model/megatron/megatron.py
index 9b485f55..c134e41c 100644
--- a/src/twinkle/model/megatron/megatron.py
+++ b/src/twinkle/model/megatron/megatron.py
@@ -1178,7 +1178,10 @@ def _save_megatron_format(self, output_dir: str, adapter_name: str, lora_convert
         cpu_state_dict = {}
         for k, v in state_dict.items():
             if lora_converter is not None:
-                k, v = lora_converter(k, v)
+                kv = lora_converter(k, v)
+                if kv is None:
+                    continue
+                k, v = kv
             if k is not None and v is not None:
                 cpu_state_dict[k] = v.cpu()
 
@@ -1414,11 +1417,11 @@ def _print_weight_example(names):
         def _add_base_layer_suffix(name):
             if name.endswith('.weight'):
                 base_layer_name = f'{name[:-7]}.base_layer.weight'
-                if base_layer_name in model_keys or not model_keys:
+                if not model_keys or base_layer_name in model_keys:
                     name = base_layer_name
             elif name.endswith('.bias'):
                 base_layer_name = f'{name[:-5]}.base_layer.bias'
-                if base_layer_name in model_keys or not model_keys:
+                if not model_keys or base_layer_name in model_keys:
                     name = base_layer_name
             return name
 

From b873e2fe6d1785e3904dedf53c863c263065705f Mon Sep 17 00:00:00 2001
From: root <hujinghan.hjh@alibaba-inc.com>
Date: Thu, 9 Apr 2026 17:46:25 +0800
Subject: [PATCH 2/2] more comment

---
 cookbook/rl/grpo.py            | 1 +
 cookbook/rl/short_math_grpo.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/cookbook/rl/grpo.py b/cookbook/rl/grpo.py
index 30d5d898..7fc3f2fd 100644
--- a/cookbook/rl/grpo.py
+++ b/cookbook/rl/grpo.py
@@ -70,6 +70,7 @@ def main():
     twinkle.initialize(mode='ray', nproc_per_node=NUM_GPUS, groups=device_groups, lazy_collect=False)
 
     # lora_config = LoraConfig(target_modules='all-linear', r=32, lora_alpha=64, lora_dropout=0.05)
+    # Since we are training on text-only data, we avoid using 'all-linear' which would include the ViT layers.
     lora_config = LoraConfig(
         target_modules=[
             'q_proj', 'k_proj', 'v_proj', 'o_proj',
diff --git a/cookbook/rl/short_math_grpo.py b/cookbook/rl/short_math_grpo.py
index 8f498923..bbfda68b 100644
--- a/cookbook/rl/short_math_grpo.py
+++ b/cookbook/rl/short_math_grpo.py
@@ -116,6 +116,7 @@ def main():
     sampler_mesh = DeviceMesh.from_sizes(world_size=SAMPLER_GPUS, dp_size=SAMPLER_GPUS)
     twinkle.initialize(mode='ray', nproc_per_node=NUM_GPUS, groups=device_groups, lazy_collect=False)
 
+    # Since we are training on text-only data, we avoid using 'all-linear' which would include the ViT layers.
     lora_config = LoraConfig(
         target_modules=[
             'q_proj', 'k_proj', 'v_proj', 'o_proj',