Skip to content

Commit c944a4b

Browse files
committed
Merge remote-tracking branch 'origin/dev' into dev-wkw
2 parents 3c7b18e + 688557d commit c944a4b

File tree

102 files changed

+704
-190
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

102 files changed

+704
-190
lines changed

.pre-commit-config.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,13 @@ repos:
3636
- repo: https://github.com/pre-commit/pre-commit-hooks.git
3737
rev: v6.0.0
3838
hooks:
39+
- id: trailing-whitespace
40+
- id: check-yaml
41+
- id: end-of-file-fixer
42+
- id: requirements-txt-fixer
43+
- id: double-quote-string-fixer
44+
- id: check-merge-conflict
45+
- id: fix-encoding-pragma
46+
args: [ "--remove" ]
47+
- id: mixed-line-ending
48+
args: [ "--fix=lf" ]

.pre-commit-config_local.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,13 @@ repos:
3535
- repo: /home/admin/pre-commit/pre-commit-hooks
3636
rev: v3.1.0
3737
hooks:
38+
- id: trailing-whitespace
39+
- id: check-yaml
40+
- id: end-of-file-fixer
41+
- id: requirements-txt-fixer
42+
- id: double-quote-string-fixer
43+
- id: check-merge-conflict
44+
- id: fix-encoding-pragma
45+
args: [ "--remove" ]
46+
- id: mixed-line-ending
47+
args: [ "--fix=lf" ]

README.md

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,12 @@ pip install -e .
7777

7878
- 🎉2026-02-10 Initial version of Twinkle✨ released, including SFT/PT/RL for text models and serverless training capabilities on [ModelScope](https://modelscope.cn).
7979

80+
# ModelScope Community
81+
82+
## ModelScope Official Environment
83+
84+
The ModelScope community provides an official environment for running Twinkle. The API endpoint is: [base_url](https://www.modelscope.cn/twinkle). Developers can refer to our [documentation](docs/source_en/Usage%20Guide/ModelScope-Official-Resources.md) for usage instructions.
85+
8086
## Supported Hardware
8187

8288
| Hardware Environment | Notes |
@@ -181,6 +187,7 @@ if __name__ == '__main__':
181187
### Tinker-Like Remote API
182188

183189
```python
190+
import os
184191
from tqdm import tqdm
185192
from tinker import types
186193
from twinkle_client import init_tinker_compat_client
@@ -191,21 +198,20 @@ from twinkle.server.tinker.common import input_feature_to_datum
191198

192199
base_model = "Qwen/Qwen2.5-0.5B-Instruct"
193200

194-
# 使用 Twinkle 的 Dataset 组件加载和预处理数据
201+
# Use twinkle dataset to load the data
195202
dataset = Dataset(dataset_meta=DatasetMeta('ms://swift/self-cognition', data_slice=range(500)))
196203
dataset.set_template('Template', model_id=f'ms://{base_model}', max_length=256)
197-
dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队'), load_from_cache_file=False)
204+
dataset.map(SelfCognitionProcessor('twinkle Model', 'twinkle Team'), load_from_cache_file=False)
198205
dataset.encode(batched=True, load_from_cache_file=False)
199206
dataloader = DataLoader(dataset=dataset, batch_size=8)
200207

201-
# 初始化 Tinker 兼容客户端
202-
service_client = init_tinker_compat_client(base_url='http://localhost:8000')
208+
# Initialize tinker client
209+
service_client = init_tinker_compat_client(base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
203210
training_client = service_client.create_lora_training_client(base_model=base_model, rank=16)
204211

205-
# 训练循环:使用 input_feature_to_datum 转换数据格式
212+
# Training loop: use input_feature_to_datum to transfer the input format
206213
for epoch in range(3):
207214
for step, batch in tqdm(enumerate(dataloader)):
208-
# 将 Twinkle 的 InputFeature 转换为 Tinker 的 Datum
209215
input_datum = [input_feature_to_datum(input_feature) for input_feature in batch]
210216

211217
fwdbwd_future = training_client.forward_backward(input_datum, "cross_entropy")

README_ZH.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,10 @@ pip install -e .
6767

6868
- 🎉2026-02-10 Twinkle✨ 初始版本发布,包含文本模型的 SFT/PT/RL 以及在 [ModelScope](https://modelscope.cn) 上的无服务器训练能力。
6969

70+
## 魔搭社区官方环境
71+
72+
魔搭社区提供了Twinkle运行的官方环境,调用端点为:[base_url](https://www.modelscope.cn/twinkle),开发者可以参考我们的[文档](docs/source_zh/使用指引/魔搭官方环境.md)来进行使用。
73+
7074
## 支持的硬件
7175

7276
| 硬件环境 | 备注 |
@@ -169,6 +173,7 @@ if __name__ == '__main__':
169173
### Tinker兼容的远程训练
170174

171175
```python
176+
import os
172177
from tqdm import tqdm
173178
from tinker import types
174179
from twinkle_client import init_tinker_compat_client
@@ -187,7 +192,7 @@ dataset.encode(batched=True, load_from_cache_file=False)
187192
dataloader = DataLoader(dataset=dataset, batch_size=8)
188193

189194
# 初始化 Tinker 兼容客户端
190-
service_client = init_tinker_compat_client(base_url='http://localhost:8000')
195+
service_client = init_tinker_compat_client(base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
191196
training_client = service_client.create_lora_training_client(base_model=base_model, rank=16)
192197

193198
# 训练循环:使用 input_feature_to_datum 转换数据格式

cookbook/client/tinker/grpo.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
# Requires both model and sampler services to be configured.
2020

2121
import gc
22+
import os
23+
2224
import numpy as np
2325
from typing import List, Tuple
2426

@@ -34,7 +36,7 @@
3436
logger = get_logger()
3537

3638
# ========== Configuration ==========
37-
BASE_MODEL = 'Qwen/Qwen2.5-7B-Instruct'
39+
BASE_MODEL = "Qwen/Qwen3-30B-A3B-Instruct-2507"
3840
NUM_GENERATIONS = 4
3941
MAX_NEW_TOKENS = 1024
4042
LEARNING_RATE = 1e-5
@@ -84,8 +86,8 @@ def main():
8486

8587
# Step 2: Initialize the Tinker-compatible client
8688
logger.info("Connecting to Tinker server...")
87-
service_client = init_tinker_compat_client(
88-
base_url='http://localhost:8000')
89+
service_client = init_tinker_compat_client(base_url='http://www.modelscope.cn/twinkle',
90+
api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
8991

9092
logger.info("Creating LoRA training client...")
9193
# Create a LoRA training client for GRPO

cookbook/client/tinker/lora.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# Step 2: Initialize the Tinker-compatible client to communicate with the server.
1717
# - base_url: the address of the running server
1818
# - api_key: authentication token (loaded from environment variable)
19-
service_client = init_tinker_compat_client(base_url='http://localhost:8000', api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
19+
service_client = init_tinker_compat_client(base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
2020

2121
# Step 3: List models available on the server to verify the connection
2222
print("Available models:")

cookbook/client/tinker/megatron/server_config.yaml

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,18 +45,17 @@ applications:
4545
nproc_per_node: 4 # Number of GPU processes per node
4646
sampler_type: vllm # Inference engine: 'vllm' (fast) or 'torch' (TorchSampler)
4747
engine_args: # vLLM engine-specific settings
48-
max_model_len: 4096 # Maximum sequence length the engine supports
49-
gpu_memory_utilization: 0.7 # Fraction of GPU memory to use (0.0-1.0)
48+
max_model_len: 8192 # Maximum sequence length the engine supports
49+
gpu_memory_utilization: 0.85 # Fraction of GPU memory to use (0.0-1.0)
5050
enable_lora: true # Allow loading LoRA adapters during inference
5151
device_group: # Logical device group for the sampler
5252
name: sampler
53-
gpus_per_worker: 2
54-
ranks: [0,1,2,3,4,5,6,7] # GPU rank indices to use
53+
gpus_per_worker: 1
54+
ranks: [0,1,2,3] # GPU rank indices to use
5555
device_type: cuda
5656
device_mesh:
5757
device_type: cuda
5858
dp_size: 4
59-
tp_size: 2
6059
deployments:
6160
- name: SamplerManagement
6261
autoscaling_config:
@@ -68,7 +67,7 @@ applications:
6867
runtime_env:
6968
env_vars:
7069
TWINKLE_TRUST_REMOTE_CODE: "0"
71-
DEVICE_COUNT_PER_PHYSICAL_NODE: "16"
70+
DEVICE_COUNT_PER_PHYSICAL_NODE: "8"
7271

7372
# 2. Model Service (commented out) - Would host the base model for training.
7473
# Uncomment and configure if you need a training model worker.
@@ -81,18 +80,16 @@ applications:
8180
nproc_per_node: 4 # Number of GPU processes per node
8281
device_group:
8382
name: model
84-
ranks: [8,9,10,11,12,13,14,15] # GPU rank indices
83+
ranks: [4,5,6,7] # GPU rank indices
8584
device_type: cuda
8685
device_mesh:
8786
device_type: cuda
88-
dp_size: 2
89-
tp_size: 2
90-
pp:size: 2
87+
dp_size: 4
9188
ep_size: 2
9289

9390
queue_config:
9491
rps_limit: 100 # Max requests per second
95-
tps_limit: 10000 # Max tokens per second
92+
tps_limit: 100000 # Max tokens per second
9693
adapter_config:
9794
per_token_adapter_limit: 30 # Max concurrent LoRA adapters
9895
adapter_timeout: 1800 # Seconds before idle adapter unload
@@ -101,10 +98,10 @@ applications:
10198
autoscaling_config:
10299
min_replicas: 1
103100
max_replicas: 1
104-
target_ongoing_requests: 16
101+
target_ongoing_requests: 8
105102
ray_actor_options:
106103
num_cpus: 0.1
107104
runtime_env:
108105
env_vars:
109106
TWINKLE_TRUST_REMOTE_CODE: "0"
110-
DEVICE_COUNT_PER_PHYSICAL_NODE: "16"
107+
DEVICE_COUNT_PER_PHYSICAL_NODE: "8"

cookbook/client/tinker/sample.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,39 +5,53 @@
55
# The server must be running first (see server.py and server_config.yaml).
66

77
from tinker import types
8+
9+
from twinkle.data_format import Message, Trajectory
10+
from twinkle.template import Template
811
from twinkle_client import init_tinker_compat_client
912
from modelscope import AutoTokenizer
1013

1114
# Step 1: Define the base model and connect to the server
12-
base_model = "Qwen/Qwen2.5-7B-Instruct"
13-
service_client = init_tinker_compat_client(base_url='http://localhost:8000', api_key="tml-EMPTY_TOKEN")
15+
base_model = "Qwen/Qwen3-30B-A3B-Instruct-2507"
16+
service_client = init_tinker_compat_client(base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
1417

1518
# Step 2: Create a sampling client by loading weights from a saved checkpoint.
1619
# The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint.
1720
# The server will load the base model and apply the LoRA adapter weights.
1821
sampling_client = service_client.create_sampling_client(
19-
model_path="twinkle://20260130_133245-Qwen_Qwen2_5-0_5B-Instruct-ffebd239/weights/pig-latin-lora-epoch-1",
22+
model_path="twinkle://xxx-Qwen_Qwen3-30B-A3B-Instruct-2507-xxx/weights/twinkle-lora-1",
2023
base_model=base_model)
2124

2225
# Step 3: Load the tokenizer locally to encode the prompt and decode the results
2326
print(f"Using model {base_model}")
24-
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
27+
template = Template(model_id='ms://Qwen/Qwen3-30B-A3B-Instruct-2507')
28+
29+
trajectory = Trajectory(
30+
messages=[
31+
Message(role='system', content='You are a helpful assistant'),
32+
Message(role='user', content="你是谁?"),
33+
]
34+
)
35+
36+
input_features = template.batch_encode([trajectory], add_generation_prompt=True)
37+
38+
input_ids = input_features[0]['input_ids']
2539

2640
# Step 4: Prepare the prompt and sampling parameters
27-
prompt = types.ModelInput.from_ints(tokenizer.encode("English: coffee break\nPig Latin:"))
41+
prompt = types.ModelInput.from_ints(list(input_ids))
2842
params = types.SamplingParams(
29-
max_tokens=20, # Maximum number of tokens to generate
43+
max_tokens=128, # Maximum number of tokens to generate
3044
temperature=0.0, # Greedy sampling (deterministic, always pick the top token)
3145
stop=["\n"] # Stop generation when a newline character is produced
3246
)
3347

3448
# Step 5: Send the sampling request to the server.
3549
# num_samples=8 generates 8 independent completions for the same prompt.
3650
print("Sampling...")
37-
future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=8)
51+
future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=1)
3852
result = future.result()
3953

4054
# Step 6: Decode and print the generated responses
4155
print("Responses:")
4256
for i, seq in enumerate(result.sequences):
43-
print(f"{i}: {repr(tokenizer.decode(seq.tokens))}")
57+
print(f"{i}: {repr(template.decode(seq.tokens))}")

cookbook/client/tinker/self_congnition.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# 2. eval(): Load a trained checkpoint and sample from it to verify
77
# that the model has learned the custom identity.
88
# The server must be running first (see server.py and server_config.yaml).
9+
import os
910

1011
import numpy as np
1112
from tqdm import tqdm
@@ -18,7 +19,7 @@
1819
from modelscope import AutoTokenizer
1920

2021
# The base model to fine-tune / evaluate
21-
base_model = "Qwen/Qwen2.5-7B-Instruct"
22+
base_model = "Qwen/Qwen3-30B-A3B-Instruct-2507"
2223

2324

2425
def train():
@@ -42,7 +43,8 @@ def train():
4243
# Step 2: Initialize the training client
4344

4445
# Connect to the Twinkle server running locally
45-
service_client = init_tinker_compat_client(base_url='http://localhost:8000')
46+
service_client = init_tinker_compat_client(base_url='http://www.modelscope.cn/twinkle',
47+
api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
4648

4749
# Create a LoRA training client for the base model (rank=16 for the LoRA adapter)
4850
training_client = service_client.create_lora_training_client(

0 commit comments

Comments
 (0)