patch qwen3moe by existed code

vx120 · vx120 · commit 2860e2e23b74 · 2026-03-30T17:27:27.000+08:00
diff --git a/src/twinkle/sampler/vllm_sampler/vllm_sampler.py b/src/twinkle/sampler/vllm_sampler/vllm_sampler.py
@@ -122,8 +122,7 @@ def __init__(self, model_id: str, engine_args: Dict[str, Any] = None, device_mes
         # fix: On NPU, monkey_patch_model can trigger Triton compatibility errors and abort sampler init.
         # fix: Explicitly skip this patch on NPU and keep it for non-NPU paths only.
         # NPU platform may trigger triton errors with monkey_patch_model
-        if Platform.get_platform().device_prefix() != 'npu':
-            self._run_in_loop(self.engine.engine.collective_rpc('monkey_patch_model'))
+        self._run_in_loop(self.engine.engine.collective_rpc('monkey_patch_model'))
 
         VLLMLoraWeights()(self)
 
diff --git a/src/twinkle/sampler/vllm_sampler/vllm_worker_extension.py b/src/twinkle/sampler/vllm_sampler/vllm_worker_extension.py
@@ -131,11 +131,6 @@ def update_weights_from_ipc(
 
         if peft_config and base_sync_done:
             self.remove_lora(VLLM_LORA_INT_ID)
-        else:
-            try:
-                self.monkey_patch_model()
-            except Exception as e:
-                logger.warning(f'Failed to apply MoE weight_loader patch before load_weights: {e}')
 
         # Detect TP rank — vLLM sets self.rank on each worker.
         tp_rank = getattr(self, 'rank', 0)
@@ -358,12 +353,6 @@ def load_synced_weights(
             # fix: Keep device resolution consistent with update_weights_from_ipc to avoid path divergence.
             self.device = torch.device(Torch.get_device(getattr(self, 'local_rank', None)))
 
-        if not (peft_config and base_sync_done):
-            try:
-                self.monkey_patch_model()
-            except Exception as e:
-                logger.warning(f'Failed to apply MoE weight_loader patch before load_weights: {e}')
-
         weight_list = list(weights.items())
         self._load_weights(weight_list, peft_config=peft_config, base_sync_done=base_sync_done)