Skip to content

Commit 1b4dcc9

Browse files
committed
Merge branch 'main' into release/0.1
# Conflicts: # pyproject.toml # src/twinkle/version.py
2 parents c30d544 + 96d1354 commit 1b4dcc9

File tree

43 files changed

+2210
-614
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+2210
-614
lines changed

README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,6 @@ supported on Twinkle✨ framework.
135135
| | [deepseek-ai/DeepSeek-R1](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1) | - | transformers>=4.39.3 || [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1) |
136136
| deepSeek-r1-distill | [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | 1.5B/7B/14B/32B | transformers>=4.37 || [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) |
137137

138-
For more detailed model support list 👉 [Quick Start](docs/source_en/Usage%20Guide/Quick-Start.md)
139-
140138
## Sample Code
141139

142140
Below are some of the capabilities demonstrated in the example code. For a complete introduction to training capabilities,

README_ZH.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,6 @@ Twinkle✨支持相同的算法接口运行在单GPU、torchrun多机、Ray、Cl
116116
| | [deepseek-ai/DeepSeek-R1](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1) | - | transformers>=4.39.3 || [deepseek-ai/DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1) |
117117
| deepSeek-r1-distill | [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://modelscope.cn/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) | 1.5B/7B/14B/32B | transformers>=4.37 || [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) |
118118

119-
更详细的模型支持列表 👉 [快速开始.md](docs/source_zh/使用指引/快速开始.md)
120-
121119
## 示例代码
122120

123121
下面列出了示例代码的一部分能力。完整的训练能力介绍请参考[快速开始](docs/source_zh/使用指引/快速开始.md)以及[cookbook](cookbook)

cookbook/client/tinker/custom_service/short_math_grpo.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,13 @@
6060

6161
class MathPreprocessor(Preprocessor):
6262

63-
def __call__(self, sample):
63+
def __call__(self, rows):
64+
rows = self.map_col_to_row(rows)
65+
rows = [self.preprocess(row) for row in rows]
66+
rows = self.map_row_to_col(rows)
67+
return rows
68+
69+
def preprocess(self, sample):
6470
if sample['level'] not in ('Level 4', 'Level 5'):
6571
return Trajectory(messages=[], user_data=[])
6672

cookbook/client/tinker/modelscope_service/short_math_grpo.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,13 @@
6060

6161
class MathPreprocessor(Preprocessor):
6262

63-
def __call__(self, sample):
63+
def __call__(self, rows):
64+
rows = self.map_col_to_row(rows)
65+
rows = [self.preprocess(row) for row in rows]
66+
rows = self.map_row_to_col(rows)
67+
return rows
68+
69+
def preprocess(self, sample):
6470
if sample['level'] not in ('Level 4', 'Level 5'):
6571
return Trajectory(messages=[], user_data=[])
6672

cookbook/client/twinkle/self_congnition.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,7 @@
2121

2222
logger = get_logger()
2323

24-
# Whether to use Megatron for training
25-
use_megatron = True
24+
2625
# Step 2: Initialize the Twinkle client to communicate with the remote server.
2726
# - base_url: the address of the running Twinkle server
2827
# - api_key: authentication token (loaded from environment variable)
@@ -88,8 +87,7 @@ def train():
8887
model.set_optimizer('Adam', lr=1e-4)
8988

9089
# Use a linear learning rate scheduler (Do not support LR scheduler if server use megatron)
91-
if not use_megatron:
92-
model.set_lr_scheduler('LinearLR')
90+
model.set_lr_scheduler('LinearLR')
9391

9492
# Step 6: Optionally resume from a previous checkpoint
9593
if resume_path:

cookbook/mm/fsdp2.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
from peft import LoraConfig
2+
from tqdm import tqdm
3+
4+
import twinkle
5+
from twinkle import DeviceMesh, get_device_placement, get_logger
6+
from twinkle.data_format import Trajectory, Message
7+
from twinkle.dataloader import DataLoader
8+
from twinkle.dataset import LazyDataset, DatasetMeta
9+
from twinkle.model import TransformersModel
10+
from twinkle.preprocessor import Preprocessor
11+
12+
# Construct a device_mesh, fsdp=2
13+
device_mesh = DeviceMesh.from_sizes(fsdp_size=2)
14+
# use torchrun mode
15+
twinkle.initialize(mode='local', global_device_mesh=device_mesh)
16+
17+
logger = get_logger()
18+
19+
20+
class LatexOCRProcessor(Preprocessor):
21+
22+
def __call__(self, rows):
23+
rows = self.map_col_to_row(rows)
24+
rows = [self.preprocess(row) for row in rows]
25+
rows = self.map_row_to_col(rows)
26+
return rows
27+
28+
def preprocess(self, row) -> Trajectory:
29+
return Trajectory(
30+
messages=[
31+
Message(role='user', content='<image>Using LaTeX to perform OCR on the image.', images=[row['image']]),
32+
Message(role='assistant', content=row['text']),
33+
]
34+
)
35+
36+
37+
def eval(model):
38+
# 100 Samples
39+
dataset = LazyDataset(dataset_meta=DatasetMeta('ms://AI-ModelScope/LaTeX_OCR', data_slice=range(100)))
40+
dataset.set_template('Qwen3_5Template', model_id='ms://Qwen/Qwen3.5-4B')
41+
dataset.map(LatexOCRProcessor)
42+
dataset.encode()
43+
dataloader = DataLoader(dataset=dataset, batch_size=8)
44+
for step, batch in tqdm(enumerate(dataloader)):
45+
model.forward_only(inputs=batch)
46+
model.calculate_loss()
47+
metrics = model.calculate_metric(is_training=False)
48+
return metrics
49+
50+
51+
def train():
52+
# 2000 samples
53+
dataset = LazyDataset(dataset_meta=DatasetMeta('ms://AI-ModelScope/LaTeX_OCR', data_slice=range(2000)))
54+
# Set template to prepare encoding
55+
dataset.set_template('Qwen3_5Template', model_id='ms://Qwen/Qwen3.5-4B', max_length=1024)
56+
# Preprocess the dataset to standard format
57+
dataset.map(LatexOCRProcessor)
58+
# Encode dataset
59+
dataset.encode()
60+
# Global batch size = 4, for GPUs, so 2 sample per GPU
61+
dataloader = DataLoader(dataset=dataset, batch_size=4)
62+
# Use a TransformersModel
63+
from transformers.models.qwen3_5.modeling_qwen3_5 import Qwen3_5ForConditionalGeneration
64+
model = TransformersModel(model_id='ms://Qwen/Qwen3.5-4B', model_cls=Qwen3_5ForConditionalGeneration)
65+
model.model._no_split_modules = {'Qwen3_5DecoderLayer'}
66+
67+
lora_config = LoraConfig(r=8, lora_alpha=32, target_modules='all-linear')
68+
69+
# Add a lora to model, with name `default`
70+
# Comment this to use full-parameter training
71+
model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=2)
72+
# Add Optimizer for lora `default`
73+
model.set_template('Qwen3_5Template', model_id='ms://Qwen/Qwen3.5-4B')
74+
model.set_optimizer(optimizer_cls='AdamW', lr=1e-4)
75+
# Add LRScheduler for lora `default`
76+
model.set_lr_scheduler(
77+
scheduler_cls='CosineWarmupScheduler', num_warmup_steps=5, num_training_steps=len(dataloader))
78+
logger.info(get_device_placement())
79+
# Print the training config
80+
logger.info(model.get_train_configs())
81+
logger.info(f'Total steps: {len(dataloader)}')
82+
loss_metric = 99.0
83+
for step, batch in enumerate(dataloader):
84+
# Do forward and backward
85+
model.forward_backward(inputs=batch)
86+
# Step
87+
model.clip_grad_and_step()
88+
if step % 20 == 0:
89+
# Print metric
90+
metric = model.calculate_metric(is_training=True)
91+
logger.info(f'Current is step {step} of {len(dataloader)}, metric: {metric}')
92+
if step > 0 and step % 40 == 0:
93+
metrics = eval(model)
94+
logger.info(f'Eval metric: {metrics}')
95+
metrics['step'] = step
96+
if loss_metric > float(metrics['loss']):
97+
model.save(f'checkpoint-{step}')
98+
loss_metric = float(metrics['loss'])
99+
model.save(f'last-checkpoint')
100+
101+
102+
if __name__ == '__main__':
103+
train()

cookbook/mm/fsdp2.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
CUDA_VISIBLE_DEVICES=0,1 torchrun --nproc_per_node=2 fsdp2.py

docs/source_en/Components/Checkpoint Engine/HCCLCheckpointEngine.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,11 @@ HCCLCheckpointEngine is specifically designed for Ascend NPU environments:
2525
- Synchronizing model weights between NPUs
2626
- Large-scale NPU cluster deployment
2727

28+
## Environment Variables
29+
30+
- `TWINKLE_CKPT_HCCL_META_TIMEOUT_S`:
31+
Controls the timeout (in seconds) for the HCCL CheckpointEngine
32+
metadata handshake channel (ZMQ REQ/REP).
33+
Default is `300`. This value should be an integer greater than `0`.
34+
2835
> In Ascend NPU environments, HCCLCheckpointEngine provides performance comparable to NCCL.

docs/source_en/Components/Preprocessor and Filter/Preprocessor.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@ The base class of Preprocessor:
77
```python
88
class Preprocessor:
99

10-
def __call__(self, row) -> Trajectory:
10+
def __call__(self, rows: List[Dict]) -> List[Trajectory]:
1111
...
1212
```
1313

14-
The format is to pass in a raw sample and output a `Trajectory`. If the sample cannot be used, you can directly return None.
14+
The format is to pass in a list of samples and output a list of `Trajectory`. If a sample cannot be used, you can directly ignore it.
1515

1616
We provide some basic Preprocessors, such as `SelfCognitionProcessor`:
1717

@@ -22,7 +22,7 @@ dataset.map('SelfCognitionProcessor', model_name='some-model', model_author='som
2222
Preprocessor contains the __call__ method, which means you can use a function to replace the class:
2323

2424
```python
25-
def self_cognition_preprocessor(row):
25+
def self_cognition_preprocessor(rows):
2626
...
27-
return Trajectory(...)
27+
return [Trajectory(...), ...]
2828
```

docs/source_en/Components/Sampler/vLLMSampler.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,4 +69,11 @@ sampler = vLLMSampler(
6969
response = sampler.sample(trajectories, sampling_params=params)
7070
```
7171

72+
## Environment Variables
73+
74+
- `TWINKLE_VLLM_IPC_TIMEOUT_S`:
75+
Controls the timeout (in seconds) for the IPC channel (ZMQ REQ/REP)
76+
between `vLLMSampler` and the vLLM worker extension.
77+
Default is `300`. This value must be greater than `0`.
78+
7279
> In RLHF training, vLLMSampler is typically separated from the Actor model, using different hardware resources to avoid interference between inference and training.

0 commit comments

Comments
 (0)