fix

kevssim · kevssim · commit eadaea7549a7 · 2026-03-17T09:52:35.000+08:00
diff --git a/src/twinkle/model/transformers/transformers.py b/src/twinkle/model/transformers/transformers.py
@@ -955,9 +955,6 @@ def _get_full_state_dict(self) -> dict:
         the local expert shards across the EP group to reconstruct the
         full expert tensor (all num_experts on dim-0).
         """
-        import torch
-        import torch.distributed as dist
-
         model = self.strategy.unwrap_model(self.model)
         state_dict = {}