addsubmuldiv
diff --git a/‎README.md‎
Lines changed: 5 additions & 5 deletions b/‎README.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎README_ZH.md‎
Lines changed: 5 additions & 3 deletions b/‎README_ZH.md‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎assets/slogan.png‎
206 KB b/‎assets/slogan.png‎
206 KB
diff --git a/‎client_tools/client_generator.py‎
Lines changed: 10 additions & 2 deletions b/‎client_tools/client_generator.py‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎cookbook/client/server/megatron/run.sh‎
Lines changed: 6 additions & 0 deletions b/‎cookbook/client/server/megatron/run.sh‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎cookbook/client/server/megatron/server_config.yaml‎
Lines changed: 6 additions & 6 deletions b/‎cookbook/client/server/megatron/server_config.yaml‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎cookbook/client/server/megatron/server_config_4b.yaml‎
Lines changed: 1 addition & 0 deletions b/‎cookbook/client/server/megatron/server_config_4b.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎cookbook/client/server/transformer/run.sh‎
Lines changed: 6 additions & 0 deletions b/‎cookbook/client/server/transformer/run.sh‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎cookbook/client/tinker/modelscope/sample.py‎
Lines changed: 2 additions & 2 deletions b/‎cookbook/client/tinker/modelscope/sample.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cookbook/client/tinker/modelscope/self_cognition.py‎
Lines changed: 1 addition & 1 deletion b/‎cookbook/client/tinker/modelscope/self_cognition.py‎
Lines changed: 1 addition & 1 deletion
@@ -19,7 +19,7 @@ by <a href="https://modelscope.cn/home">ModelScope</a>
 </p>
 
 <p align="center">
-        <a href="https://twinkle-kit.readthedocs.io/en/latest/">English Documentation</a> &nbsp ｜ &nbsp <a href="https://twinkle-kit.readthedocs.io/zh-cn/latest/">中文文档</a> &nbsp
+        <a href="https://twinkle-kit.readthedocs.io/en/latest/">English Documentation</a> &nbsp ｜ &nbsp <a href="https://twinkle-kit.readthedocs.io/zh-cn/latest/">中文文档</a> &nbsp ｜ &nbsp <a href="https://modelscope.github.io/twinkle-web/">Twinkle Web</a> &nbsp
 </p>
 
 ## ✨ What is Twinkle?
@@ -101,9 +101,9 @@ Or use ModelScope's [official image](https://www.modelscope.cn/docs/intro/enviro
 
 ## Changelog
 
+- 🎉2026-03-24 Twinkle Web site is now live at https://modelscope.github.io/twinkle-web/
+- 🎉2026-03-19 Support GKD training ，please refer to this [cookbook](cookbook/rl/gkd_on_policy.py).
 - 🎉2026-02-13 Initial version of Twinkle✨ released, including SFT/PT/RL support for text models.
-We also made available serverless training capabilities on [ModelScope](https://modelscope.cn) via
-Tinker-compatible APIs.
 
 ## Training as a Service on ModelScope
 
@@ -130,7 +130,7 @@ supported on Twinkle✨ framework.
 > For serverless training service accessed via `base_url=https://www.modelscope.cn/twinkle`, it
 > is currently provided via the Tinker-compatible APIs. We will be rolling out services that support
 > both Tinker APIs, as well as the full-fledged Twinkle✨ native APIs. The serverless endpoint is backed
-> by one training base at a time, and currently it is [Qwen3-30B-A3B-Instruct-2507](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507).
+> by one training base at a time, and currently it is [Qwen3.5-4B](https://modelscope.cn/models/Qwen/Qwen3.5-4B).
 
 | Model Type          | Model ID on [ModelScope](https://modelscope.cn)                                                                 |               Model Size                | Requires             | Support Megatron |                                                HF Model ID                                                |
 |---------------------|-----------------------------------------------------------------------------------------------------------------|:---------------------------------------:|----------------------|:----------------:|:---------------------------------------------------------------------------------------------------------:|
@@ -235,7 +235,7 @@ from twinkle.dataset import Dataset, DatasetMeta
 from twinkle.preprocessor import SelfCognitionProcessor
 from twinkle.server.common import input_feature_to_datum
 
-base_model = 'ms://Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'ms://Qwen/Qwen3.5-4B'
 base_url='your-base-url'
 api_key='your-api-key'
 
 
@@ -19,7 +19,7 @@
 </p>
 
 <p align="center">
-        <a href="https://twinkle-kit.readthedocs.io/en/latest/">英文文档</a> &nbsp ｜ &nbsp <a href="https://twinkle-kit.readthedocs.io/zh-cn/latest/">中文文档</a> &nbsp
+        <a href="https://twinkle-kit.readthedocs.io/en/latest/">英文文档</a> &nbsp ｜ &nbsp <a href="https://twinkle-kit.readthedocs.io/zh-cn/latest/">中文文档</a> &nbsp ｜ &nbsp <a href="https://modelscope.github.io/twinkle-web/">Twinkle 站点</a> &nbsp
 </p>
 
 ## ✨ Twinkle 是什么？
@@ -91,6 +91,8 @@ Twinkle✨支持相同的算法接口运行在单GPU、torchrun多机、Ray、Cl
 
 ## 更新日志
 
+🎉2026-03-24 Twinkle 站点上线，访问地址 https://modelscope.github.io/twinkle-web/
+🎉2026-03-19 支持GKD蒸馏能力，参考[cookbook](cookbook/rl/gkd_on_policy.py)。
 🎉2026-02-13 Twinkle✨ 初始版本发布，支持文本模型的SFT/PT/RL训练。我们还通过兼容Tinker的API，在魔搭社区上提供了无服务器训练功能。
 
 ## ModelScope 的训练服务
@@ -111,7 +113,7 @@ Twinkle✨支持相同的算法接口运行在单GPU、torchrun多机、Ray、Cl
 随着新模型的发布，我们将添加对更多模型的支持。下表列出了 Twinkle✨ 框架当前支持的模型。
 
 >[!Note]
-> 通过 `base_url=https://www.modelscope.cn/twinkle` 访问的无服务器训练服务，目前是通过兼容Tinker的API提供的。我们将陆续推出同时支持Tinker API和完整Twinkle✨原生 API的服务。无服务器端点每次由一个训练基座支持，目前使用的是[Qwen3-30B-A3B-Instruct-2507](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507)。
+> 通过 `base_url=https://www.modelscope.cn/twinkle` 访问的无服务器训练服务，目前是通过兼容Tinker的API提供的。我们将陆续推出同时支持Tinker API和完整Twinkle✨原生 API的服务。无服务器端点每次由一个训练基座支持，目前使用的是[Qwen3.5-4B](https://modelscope.cn/models/Qwen/Qwen3.5-4B)。
 
 | Model Type          | Model ID 举例                                                                                                     |               Model Size                | Requires             | Support Megatron |                                                HF Model ID                                                |
 |---------------------|-----------------------------------------------------------------------------------------------------------------|:---------------------------------------:|----------------------|:----------------:|:---------------------------------------------------------------------------------------------------------:|
@@ -215,7 +217,7 @@ from twinkle.dataset import Dataset, DatasetMeta
 from twinkle.preprocessor import SelfCognitionProcessor
 from twinkle.server.common import input_feature_to_datum
 
-base_model = 'ms://Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'ms://Qwen/Qwen3.5-4B'
 base_url='your-base-url'
 api_key='your-api-key'
 
 
@@ -768,7 +768,7 @@ def sample(
         adapter_name: str = '',
         adapter_uri: Optional[str] = None,
         num_samples: int = 1,
-    ) -> SampleResponseModel:
+    ) -> List[SampleResponseModel]:
         """Sample from the model.
 
         Args:
@@ -795,7 +795,7 @@ def sample(
             json_data=json_data
         )
         response.raise_for_status()
-        return SampleResponseModel(**response.json())
+        return [SampleResponseModel(**r) for r in response.json()['samples']]
 
     def set_template(self, template_cls: str, adapter_name: str = '', **kwargs) -> SetTemplateResponse:
         """Set the template for encoding trajectories."""
@@ -805,6 +805,14 @@ def set_template(self, template_cls: str, adapter_name: str = '', **kwargs) -> S
         )
         response.raise_for_status()
         return SetTemplateResponse(**response.json())
+    
+    def apply_patch(self, patch_cls: str, **kwargs) -> None:
+        """Apply a patch to the model."""
+        response = http_post(
+            url=f'{self.server_url}/apply_patch',
+            json_data={'patch_cls': patch_cls, 'adapter_name': self.adapter_name, **kwargs}
+        )
+        response.raise_for_status()
 '''
 
     # Write the sampler client file
 
@@ -0,0 +1,6 @@
+export RAY_ROTATION_MAX_BYTES=1024
+export RAY_ROTATION_BACKUP_COUNT=1
+CUDA_VISIBLE_DEVICES=0,1,2,3 ray start --head --port=6379 --num-gpus=4 --disable-usage-stats --include-dashboard=false
+CUDA_VISIBLE_DEVICES=4,5,6,7 ray start --address=127.0.0.1:6379 --num-gpus=4
+CUDA_VISIBLE_DEVICES="" ray start --address=127.0.0.1:6379 --num-gpus=0
+python server.py
@@ -36,11 +36,11 @@ applications:
 
   # 3. Sampler Service - Runs inference / sampling using vLLM engine
   #    Used for generating text from the model (e.g., evaluating LoRA results).
-  - name: sampler-Qwen3-30B-A3B-Instruct-2507
-    route_prefix: /api/v1/sampler/Qwen/Qwen3-30B-A3B-Instruct-2507
+  - name: sampler-Qwen3.5-4B
+    route_prefix: /api/v1/sampler/Qwen/Qwen3.5-4B
     import_path: sampler
     args:
-      model_id: "ms://Qwen/Qwen3-30B-A3B-Instruct-2507"   # ModelScope model identifier
+      model_id: "ms://Qwen/Qwen3.5-4B"   # ModelScope model identifier
       nproc_per_node: 4               # Number of GPU processes per node
       sampler_type: vllm              # Inference engine: 'vllm' (fast) or 'torch' (TorchSampler)
       engine_args:                    # vLLM engine-specific settings
@@ -73,12 +73,12 @@ applications:
 
   # 2. Model Service (commented out) - Would host the base model for training.
   #    Uncomment and configure if you need a training model worker.
-  - name: models-Qwen3-30B-A3B-Instruct-2507
-    route_prefix: /api/v1/model/Qwen/Qwen3-30B-A3B-Instruct-2507
+  - name: models-Qwen3.5-4B
+    route_prefix: /api/v1/model/Qwen/Qwen3.5-4B
     import_path: model
     args:
       use_megatron: true                          # Use HuggingFace Transformers backend
-      model_id: "ms://Qwen/Qwen3-30B-A3B-Instruct-2507" # ModelScope model identifier
+      model_id: "ms://Qwen/Qwen3.5-4B" # ModelScope model identifier
       max_length: 16000                           # model max length
       max_loras: 5                                # model max loras
       nproc_per_node: 4                           # Number of GPU processes per node
 
@@ -39,6 +39,7 @@ applications:
     import_path: model
     args:
       use_megatron: true
+      model_cls: Qwen3_5ForConditionalGeneration
       model_id: "ms://Qwen/Qwen3.5-4B" # ModelScope model identifier
       max_length: 10240
       nproc_per_node: 2                            # Number of GPU processes per node
 
@@ -0,0 +1,6 @@
+export RAY_ROTATION_MAX_BYTES=1024
+export RAY_ROTATION_BACKUP_COUNT=1
+CUDA_VISIBLE_DEVICES=0,1,2,3 ray start --head --port=6379 --num-gpus=4 --disable-usage-stats --include-dashboard=false
+CUDA_VISIBLE_DEVICES=4,5,6,7 ray start --address=127.0.0.1:6379 --num-gpus=4
+CUDA_VISIBLE_DEVICES="" ray start --address=127.0.0.1:6379 --num-gpus=0
+python server.py
@@ -16,7 +16,7 @@
 
 from tinker import ServiceClient
 
-base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'Qwen/Qwen3.5-4B'
 base_url = 'http://www.modelscope.cn/twinkle'
 
 # Step 2: Define the base model and connect to the server
@@ -29,7 +29,7 @@
 # The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint.
 # The server will load the base model and apply the LoRA adapter weights.
 sampling_client = service_client.create_sampling_client(
-    model_path='twinkle://xxx-Qwen_Qwen3-30B-A3B-Instruct-2507-xxx/weights/twinkle-lora-1',
+    model_path='twinkle://xxx-Qwen_Qwen3.5-4B-xxx/weights/twinkle-lora-1',
     base_model=base_model
 )
 
 
@@ -23,7 +23,7 @@
 from tinker import ServiceClient
 
 # The base model to fine-tune / evaluate
-base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'Qwen/Qwen3.5-4B'
 base_url = 'http://www.modelscope.cn/twinkle'