modelscope
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 10 additions & 0 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎.pre-commit-config_local.yaml‎
Lines changed: 10 additions & 0 deletions b/‎.pre-commit-config_local.yaml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 12 additions & 6 deletions b/‎README.md‎
Lines changed: 12 additions & 6 deletions
diff --git a/‎README_ZH.md‎
Lines changed: 6 additions & 1 deletion b/‎README_ZH.md‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎cookbook/client/tinker/grpo.py‎
Lines changed: 5 additions & 3 deletions b/‎cookbook/client/tinker/grpo.py‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎cookbook/client/tinker/lora.py‎
Lines changed: 1 addition & 1 deletion b/‎cookbook/client/tinker/lora.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cookbook/client/tinker/megatron/server_config.yaml‎
Lines changed: 10 additions & 13 deletions b/‎cookbook/client/tinker/megatron/server_config.yaml‎
Lines changed: 10 additions & 13 deletions
diff --git a/‎cookbook/client/tinker/sample.py‎
Lines changed: 22 additions & 8 deletions b/‎cookbook/client/tinker/sample.py‎
Lines changed: 22 additions & 8 deletions
diff --git a/‎cookbook/client/tinker/self_congnition.py‎
Lines changed: 4 additions & 2 deletions b/‎cookbook/client/tinker/self_congnition.py‎
Lines changed: 4 additions & 2 deletions
@@ -36,3 +36,13 @@ repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks.git
     rev: v6.0.0
     hooks:
+      - id: trailing-whitespace
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: requirements-txt-fixer
+      - id: double-quote-string-fixer
+      - id: check-merge-conflict
+      - id: fix-encoding-pragma
+        args: [ "--remove" ]
+      - id: mixed-line-ending
+        args: [ "--fix=lf" ]
@@ -35,3 +35,13 @@ repos:
   - repo: /home/admin/pre-commit/pre-commit-hooks
     rev: v3.1.0
     hooks:
+      - id: trailing-whitespace
+      - id: check-yaml
+      - id: end-of-file-fixer
+      - id: requirements-txt-fixer
+      - id: double-quote-string-fixer
+      - id: check-merge-conflict
+      - id: fix-encoding-pragma
+        args: [ "--remove" ]
+      - id: mixed-line-ending
+        args: [ "--fix=lf" ]
@@ -77,6 +77,12 @@ pip install -e .
 
 - 🎉2026-02-10 Initial version of Twinkle✨ released, including SFT/PT/RL for text models and serverless training capabilities on [ModelScope](https://modelscope.cn).
 
+# ModelScope Community
+
+## ModelScope Official Environment
+
+The ModelScope community provides an official environment for running Twinkle. The API endpoint is: [base_url](https://www.modelscope.cn/twinkle). Developers can refer to our [documentation](docs/source_en/Usage%20Guide/ModelScope-Official-Resources.md) for usage instructions.
+
 ## Supported Hardware
 
 | Hardware Environment | Notes                                                            |
@@ -181,6 +187,7 @@ if __name__ == '__main__':
 ### Tinker-Like Remote API
 
 ```python
+import os
 from tqdm import tqdm
 from tinker import types
 from twinkle_client import init_tinker_compat_client
@@ -191,21 +198,20 @@ from twinkle.server.tinker.common import input_feature_to_datum
 
 base_model = "Qwen/Qwen2.5-0.5B-Instruct"
 
-# 使用 Twinkle 的 Dataset 组件加载和预处理数据
+# Use twinkle dataset to load the data
 dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500)))
 dataset.set_template('Template', model_id=f'ms://{base_model}', max_length=256)
-dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队'), load_from_cache_file=False)
+dataset.map(SelfCognitionProcessor('twinkle Model', 'twinkle Team'), load_from_cache_file=False)
 dataset.encode(batched=True, load_from_cache_file=False)
 dataloader = DataLoader(dataset=dataset, batch_size=8)
 
-# 初始化 Tinker 兼容客户端
-service_client = init_tinker_compat_client(base_url='http://localhost:8000')
+# Initialize tinker client
+service_client = init_tinker_compat_client(base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
 training_client = service_client.create_lora_training_client(base_model=base_model, rank=16)
 
-# 训练循环：使用 input_feature_to_datum 转换数据格式
+# Training loop: use input_feature_to_datum to transfer the input format
 for epoch in range(3):
     for step, batch in tqdm(enumerate(dataloader)):
-        # 将 Twinkle 的 InputFeature 转换为 Tinker 的 Datum
         input_datum = [input_feature_to_datum(input_feature) for input_feature in batch]
 
         fwdbwd_future = training_client.forward_backward(input_datum, "cross_entropy")
 
@@ -67,6 +67,10 @@ pip install -e .
 
 - 🎉2026-02-10 Twinkle✨ 初始版本发布，包含文本模型的 SFT/PT/RL 以及在 [ModelScope](https://modelscope.cn) 上的无服务器训练能力。
 
+## 魔搭社区官方环境
+
+魔搭社区提供了Twinkle运行的官方环境，调用端点为:[base_url](https://www.modelscope.cn/twinkle)，开发者可以参考我们的[文档](docs/source_zh/使用指引/魔搭官方环境.md)来进行使用。
+
 ## 支持的硬件
 
 | 硬件环境 | 备注                                                            |
@@ -169,6 +173,7 @@ if __name__ == '__main__':
 ### Tinker兼容的远程训练
 
 ```python
+import os
 from tqdm import tqdm
 from tinker import types
 from twinkle_client import init_tinker_compat_client
@@ -187,7 +192,7 @@ dataset.encode(batched=True, load_from_cache_file=False)
 dataloader = DataLoader(dataset=dataset, batch_size=8)
 
 # 初始化 Tinker 兼容客户端
-service_client = init_tinker_compat_client(base_url='http://localhost:8000')
+service_client = init_tinker_compat_client(base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
 training_client = service_client.create_lora_training_client(base_model=base_model, rank=16)
 
 # 训练循环：使用 input_feature_to_datum 转换数据格式
 
@@ -19,6 +19,8 @@
 # Requires both model and sampler services to be configured.
 
 import gc
+import os
+
 import numpy as np
 from typing import List, Tuple
 
@@ -34,7 +36,7 @@
 logger = get_logger()
 
 # ========== Configuration ==========
-BASE_MODEL = 'Qwen/Qwen2.5-7B-Instruct'
+BASE_MODEL = "Qwen/Qwen3-30B-A3B-Instruct-2507"
 NUM_GENERATIONS = 4
 MAX_NEW_TOKENS = 1024
 LEARNING_RATE = 1e-5
@@ -84,8 +86,8 @@ def main():
 
     # Step 2: Initialize the Tinker-compatible client
     logger.info("Connecting to Tinker server...")
-    service_client = init_tinker_compat_client(
-        base_url='http://localhost:8000')
+    service_client = init_tinker_compat_client(base_url='http://www.modelscope.cn/twinkle',
+                                               api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
 
     logger.info("Creating LoRA training client...")
     # Create a LoRA training client for GRPO
 
@@ -16,7 +16,7 @@
 # Step 2: Initialize the Tinker-compatible client to communicate with the server.
 # - base_url: the address of the running server
 # - api_key: authentication token (loaded from environment variable)
-service_client = init_tinker_compat_client(base_url='http://localhost:8000', api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
+service_client = init_tinker_compat_client(base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
 
 # Step 3: List models available on the server to verify the connection
 print("Available models:")
 
@@ -45,18 +45,17 @@ applications:
       nproc_per_node: 4               # Number of GPU processes per node
       sampler_type: vllm              # Inference engine: 'vllm' (fast) or 'torch' (TorchSampler)
       engine_args:                    # vLLM engine-specific settings
-        max_model_len: 4096           # Maximum sequence length the engine supports
-        gpu_memory_utilization: 0.7   # Fraction of GPU memory to use (0.0-1.0)
+        max_model_len: 8192           # Maximum sequence length the engine supports
+        gpu_memory_utilization: 0.85   # Fraction of GPU memory to use (0.0-1.0)
         enable_lora: true             # Allow loading LoRA adapters during inference
       device_group:                   # Logical device group for the sampler
         name: sampler
-        gpus_per_worker: 2
-        ranks: [0,1,2,3,4,5,6,7]                    # GPU rank indices to use
+        gpus_per_worker: 1
+        ranks: [0,1,2,3]                    # GPU rank indices to use
         device_type: cuda
       device_mesh:
         device_type: cuda
         dp_size: 4
-        tp_size: 2
     deployments:
       - name: SamplerManagement
         autoscaling_config:
@@ -68,7 +67,7 @@ applications:
           runtime_env:
             env_vars:
               TWINKLE_TRUST_REMOTE_CODE: "0"
-              DEVICE_COUNT_PER_PHYSICAL_NODE: "16"
+              DEVICE_COUNT_PER_PHYSICAL_NODE: "8"
 
   # 2. Model Service (commented out) - Would host the base model for training.
   #    Uncomment and configure if you need a training model worker.
@@ -81,18 +80,16 @@ applications:
       nproc_per_node: 4                            # Number of GPU processes per node
       device_group:
         name: model
-        ranks: [8,9,10,11,12,13,14,15]                              # GPU rank indices
+        ranks: [4,5,6,7]                              # GPU rank indices
         device_type: cuda
       device_mesh:
         device_type: cuda
-        dp_size: 2
-        tp_size: 2
-        pp:size: 2
+        dp_size: 4
         ep_size: 2
 
       queue_config:
         rps_limit: 100                             # Max requests per second
-        tps_limit: 10000                           # Max tokens per second
+        tps_limit: 100000                           # Max tokens per second
       adapter_config:
         per_token_adapter_limit: 30                # Max concurrent LoRA adapters
         adapter_timeout: 1800                      # Seconds before idle adapter unload
@@ -101,10 +98,10 @@ applications:
         autoscaling_config:
           min_replicas: 1
           max_replicas: 1
-          target_ongoing_requests: 16
+          target_ongoing_requests: 8
         ray_actor_options:
           num_cpus: 0.1
           runtime_env:
             env_vars:
               TWINKLE_TRUST_REMOTE_CODE: "0"
-              DEVICE_COUNT_PER_PHYSICAL_NODE: "16"
+              DEVICE_COUNT_PER_PHYSICAL_NODE: "8"
@@ -5,39 +5,53 @@
 # The server must be running first (see server.py and server_config.yaml).
 
 from tinker import types
+
+from twinkle.data_format import Message, Trajectory
+from twinkle.template import Template
 from twinkle_client import init_tinker_compat_client
 from modelscope import AutoTokenizer
 
 # Step 1: Define the base model and connect to the server
-base_model = "Qwen/Qwen2.5-7B-Instruct"
-service_client = init_tinker_compat_client(base_url='http://localhost:8000', api_key="tml-EMPTY_TOKEN")
+base_model = "Qwen/Qwen3-30B-A3B-Instruct-2507"
+service_client = init_tinker_compat_client(base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
 
 # Step 2: Create a sampling client by loading weights from a saved checkpoint.
 # The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint.
 # The server will load the base model and apply the LoRA adapter weights.
 sampling_client = service_client.create_sampling_client(
-    model_path="twinkle://20260130_133245-Qwen_Qwen2_5-0_5B-Instruct-ffebd239/weights/pig-latin-lora-epoch-1",
+    model_path="twinkle://xxx-Qwen_Qwen3-30B-A3B-Instruct-2507-xxx/weights/twinkle-lora-1",
     base_model=base_model)
 
 # Step 3: Load the tokenizer locally to encode the prompt and decode the results
 print(f"Using model {base_model}")
-tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
+template = Template(model_id='ms://Qwen/Qwen3-30B-A3B-Instruct-2507')
+
+trajectory = Trajectory(
+    messages=[
+        Message(role='system', content='You are a helpful assistant'),
+        Message(role='user', content="你是谁？"),
+    ]
+)
+
+input_features = template.batch_encode([trajectory], add_generation_prompt=True)
+
+input_ids = input_features[0]['input_ids']
 
 # Step 4: Prepare the prompt and sampling parameters
-prompt = types.ModelInput.from_ints(tokenizer.encode("English: coffee break\nPig Latin:"))
+prompt = types.ModelInput.from_ints(list(input_ids))
 params = types.SamplingParams(
-    max_tokens=20,       # Maximum number of tokens to generate
+    max_tokens=128,       # Maximum number of tokens to generate
     temperature=0.0,     # Greedy sampling (deterministic, always pick the top token)
     stop=["\n"]          # Stop generation when a newline character is produced
 )
 
 # Step 5: Send the sampling request to the server.
 # num_samples=8 generates 8 independent completions for the same prompt.
 print("Sampling...")
-future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=8)
+future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=1)
 result = future.result()
 
 # Step 6: Decode and print the generated responses
 print("Responses:")
 for i, seq in enumerate(result.sequences):
-    print(f"{i}: {repr(tokenizer.decode(seq.tokens))}")
+    print(f"{i}: {repr(template.decode(seq.tokens))}")
@@ -6,6 +6,7 @@
 #   2. eval():  Load a trained checkpoint and sample from it to verify
 #      that the model has learned the custom identity.
 # The server must be running first (see server.py and server_config.yaml).
+import os
 
 import numpy as np
 from tqdm import tqdm
@@ -18,7 +19,7 @@
 from modelscope import AutoTokenizer
 
 # The base model to fine-tune / evaluate
-base_model = "Qwen/Qwen2.5-7B-Instruct"
+base_model = "Qwen/Qwen3-30B-A3B-Instruct-2507"
 
 
 def train():
@@ -42,7 +43,8 @@ def train():
     # Step 2: Initialize the training client
 
     # Connect to the Twinkle server running locally
-    service_client = init_tinker_compat_client(base_url='http://localhost:8000')
+    service_client = init_tinker_compat_client(base_url='http://www.modelscope.cn/twinkle',
+                                               api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
 
     # Create a LoRA training client for the base model (rank=16 for the LoRA adapter)
     training_client = service_client.create_lora_training_client(