Skip to content

Commit 58cca7c

Browse files
authored
Fix compat tinker and update doc (#73)
* update * update * update doc * update doc * update
1 parent 317f9b8 commit 58cca7c

File tree

20 files changed

+271
-189
lines changed

20 files changed

+271
-189
lines changed

README.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ if __name__ == '__main__':
203203
import os
204204
from tqdm import tqdm
205205
from tinker import types
206-
from twinkle_client import init_tinker_compat_client
206+
from twinkle_client import init_tinker_client
207207
from twinkle.dataloader import DataLoader
208208
from twinkle.dataset import Dataset, DatasetMeta
209209
from twinkle.preprocessor import SelfCognitionProcessor
@@ -220,8 +220,11 @@ dataset.map(SelfCognitionProcessor('twinkle Model', 'twinkle Team'), load_from_c
220220
dataset.encode(batched=True, load_from_cache_file=False)
221221
dataloader = DataLoader(dataset=dataset, batch_size=8)
222222

223-
# Initialize tinker client
224-
service_client = init_tinker_compat_client(base_url, api_key)
223+
# Initialize Tinker client before importing ServiceClient
224+
init_tinker_client()
225+
from tinker import ServiceClient
226+
227+
service_client = ServiceClient(base_url=base_url, api_key=api_key)
225228
training_client = service_client.create_lora_training_client(base_model=base_model[len('ms://'):], rank=16)
226229

227230
# Training loop: use input_feature_to_datum to transfer the input format

README_ZH.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ if __name__ == '__main__':
186186
import os
187187
from tqdm import tqdm
188188
from tinker import types
189-
from twinkle_client import init_tinker_compat_client
189+
from twinkle_client import init_tinker_client
190190
from twinkle.dataloader import DataLoader
191191
from twinkle.dataset import Dataset, DatasetMeta
192192
from twinkle.preprocessor import SelfCognitionProcessor
@@ -203,8 +203,11 @@ dataset.map(SelfCognitionProcessor('twinkle Model', 'twinkle Team'), load_from_c
203203
dataset.encode(batched=True, load_from_cache_file=False)
204204
dataloader = DataLoader(dataset=dataset, batch_size=8)
205205

206-
# Initialize tinker client
207-
service_client = init_tinker_compat_client(base_url, api_key)
206+
# Initialize Tinker client before importing ServiceClient
207+
init_tinker_client()
208+
from tinker import ServiceClient
209+
210+
service_client = ServiceClient(base_url=base_url, api_key=api_key)
208211
training_client = service_client.create_lora_training_client(base_model=base_model[len('ms://'):], rank=16)
209212

210213
# Training loop: use input_feature_to_datum to transfer the input format

cookbook/client/tinker/lora.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,25 @@
1313

1414
import os
1515

16-
from twinkle_client import init_tinker_compat_client
16+
# Step 2: Initialize Tinker client before importing ServiceClient
17+
from twinkle_client import init_tinker_client
1718

18-
# Step 2: Initialize the Tinker-compatible client to communicate with the server.
19-
# - base_url: the address of the running server
20-
# - api_key: authentication token (loaded from environment variable)
21-
service_client = init_tinker_compat_client(
22-
base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_TOKEN'))
19+
init_tinker_client()
2320

24-
# Step 3: List models available on the server to verify the connection
21+
# Step 3: Use ServiceClient directly from tinker
22+
from tinker import ServiceClient
23+
24+
service_client = ServiceClient(
25+
base_url='http://www.modelscope.cn/twinkle',
26+
api_key=os.environ.get('MODELSCOPE_TOKEN')
27+
)
28+
29+
# Step 4: List models available on the server to verify the connection
2530
print('Available models:')
2631
for item in service_client.get_server_capabilities().supported_models:
2732
print('- ' + item.model_name)
2833

29-
# Step 4: Create a REST client for querying training runs and checkpoints.
34+
# Step 5: Create a REST client for querying training runs and checkpoints.
3035
# This is useful for inspecting previous training sessions or resuming training.
3136
rest_client = service_client.create_rest_client()
3237

@@ -51,7 +56,7 @@
5156
# Uncomment the line below to resume from the last checkpoint:
5257
# resume_path = chpt.tinker_path
5358

54-
# Step 5: Create or resume a training client.
59+
# Step 6: Create or resume a training client.
5560
# If resume_path is set, it restores both model weights and optimizer state.
5661
base_model = 'Qwen/Qwen2.5-7B-Instruct'
5762
if not resume_path:
@@ -60,7 +65,7 @@
6065
print('Resuming from ' + resume_path)
6166
training_client = service_client.create_training_client_from_state_with_optimizer(path=resume_path)
6267

63-
# Step 6: Prepare training data manually
68+
# Step 7: Prepare training data manually
6469
#
6570
# This example teaches the model to translate English into Pig Latin.
6671
# Each example has an "input" (English phrase) and "output" (Pig Latin).
@@ -146,7 +151,7 @@ def process_example(example: dict, tokenizer) -> types.Datum:
146151
datum0.loss_fn_inputs['weights'].tolist())):
147152
print(f'{repr(tokenizer.decode([inp])):<20} {repr(tokenizer.decode([tgt])):<20} {wgt:<10}')
148153

149-
# Step 7: Run the training loop
154+
# Step 8: Run the training loop
150155
#
151156
# For each epoch, iterate over multiple batches:
152157
# - forward_backward: sends data to the server, computes loss & gradients
@@ -174,7 +179,7 @@ def process_example(example: dict, tokenizer) -> types.Datum:
174179
save_result = save_future.result()
175180
print(f'Saved checkpoint for epoch {epoch} to {save_result.path}')
176181

177-
# Step 8: Publish the final checkpoint to ModelScope Hub.
182+
# Step 9: Publish the final checkpoint to ModelScope Hub.
178183
# NOTE: Requires a valid ModelScope token set as api_key when initializing the client.
179184
# The published model name will be: {run_id}_{checkpoint_name}
180185
rest_client.publish_checkpoint_from_tinker_path(save_result.path).result()

cookbook/client/tinker/megatron/server_config_7b.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ applications:
2121
route_prefix: /api/v1 # API endpoint prefix (Tinker-compatible)
2222
import_path: server # Python module to import
2323
args:
24-
24+
supported_models:
25+
- Qwen/Qwen2.5-7B-Instruct
2526
deployments:
2627
- name: TinkerCompatServer
2728
autoscaling_config:

cookbook/client/tinker/sample.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,27 +4,34 @@
44
# for text generation (sampling) via the Tinker-compatible client API.
55
# The server must be running first (see server.py and server_config.yaml).
66

7+
import os
78
from tinker import types
89

910
from twinkle.data_format import Message, Trajectory
1011
from twinkle.template import Template
11-
from twinkle_client import init_tinker_compat_client
12+
from twinkle_client import init_tinker_client
1213

13-
# Step 1: Define the base model and connect to the server
14+
# Step 1: Initialize Tinker client
15+
init_tinker_client()
16+
17+
from tinker import ServiceClient
18+
19+
# Step 2: Define the base model and connect to the server
1420
base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
15-
service_client = init_tinker_compat_client(
21+
service_client = ServiceClient(
1622
base_url='http://www.modelscope.cn/twinkle',
1723
api_key=os.environ.get('MODELSCOPE_TOKEN')
1824
)
19-
# Step 2: Create a sampling client by loading weights from a saved checkpoint.
25+
26+
# Step 3: Create a sampling client by loading weights from a saved checkpoint.
2027
# The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint.
2128
# The server will load the base model and apply the LoRA adapter weights.
22-
service_client.create_sampling_client(
29+
sampling_client = service_client.create_sampling_client(
2330
model_path='twinkle://xxx-Qwen_Qwen3-30B-A3B-Instruct-2507-xxx/weights/twinkle-lora-1',
2431
base_model=base_model
2532
)
2633

27-
# Step 3: Load the tokenizer locally to encode the prompt and decode the results
34+
# Step 4: Load the tokenizer locally to encode the prompt and decode the results
2835
print(f'Using model {base_model}')
2936

3037
template = Template(model_id=f'ms://{base_model}')
@@ -40,21 +47,21 @@
4047

4148
input_ids = input_feature['input_ids'].tolist()
4249

43-
# Step 4: Prepare the prompt and sampling parameters
50+
# Step 5: Prepare the prompt and sampling parameters
4451
prompt = types.ModelInput.from_ints(input_ids)
4552
params = types.SamplingParams(
4653
max_tokens=128, # Maximum number of tokens to generate
4754
temperature=0.7,
4855
stop=['\n'] # Stop generation when a newline character is produced
4956
)
5057

51-
# Step 5: Send the sampling request to the server.
58+
# Step 6: Send the sampling request to the server.
5259
# num_samples=8 generates 8 independent completions for the same prompt.
5360
print('Sampling...')
5461
future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=1)
5562
result = future.result()
5663

57-
# Step 6: Decode and print the generated responses
64+
# Step 7: Decode and print the generated responses
5865
print('Responses:')
5966
for i, seq in enumerate(result.sequences):
6067
print(f'{i}: {repr(template.decode(seq.tokens))}')

cookbook/client/tinker/self_congnition.py

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,23 @@
1010
import os
1111
from tqdm import tqdm
1212
from tinker import types
13-
from twinkle_client import init_tinker_compat_client
13+
from twinkle_client import init_tinker_client
1414
from twinkle.data_format import Message, Trajectory
1515
from twinkle.template import Template
1616
from twinkle.dataloader import DataLoader
1717
from twinkle.dataset import Dataset, DatasetMeta
1818
from twinkle.preprocessor import SelfCognitionProcessor
1919
from twinkle.server.tinker.common import input_feature_to_datum
2020

21+
# Initialize the Tinker client before importing ServiceClient
22+
init_tinker_client()
23+
24+
from tinker import ServiceClient
25+
2126
# The base model to fine-tune / evaluate
22-
base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
27+
# base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
28+
base_model = 'Qwen/Qwen2.5-7B-Instruct'
29+
base_url = 'http://localhost:8000'
2330

2431

2532
def train():
@@ -42,9 +49,11 @@ def train():
4249

4350
# Step 2: Initialize the training client
4451

45-
# Connect to the Twinkle server running locally
46-
service_client = init_tinker_compat_client(
47-
base_url='localhost:9000', api_key=os.environ.get('MODELSCOPE_TOKEN'))
52+
53+
service_client = ServiceClient(
54+
base_url=base_url,
55+
api_key=os.environ.get('MODELSCOPE_TOKEN')
56+
)
4857

4958
# Create a LoRA training client for the base model (rank=16 for the LoRA adapter)
5059
training_client = service_client.create_lora_training_client(base_model=base_model, rank=16)
@@ -85,8 +94,7 @@ def eval():
8594
# Path to a previously saved LoRA checkpoint (twinkle:// URI)
8695
weight_path = 'twinkle://20260212_174205-Qwen_Qwen2_5-7B-Instruct-51edc9ed/weights/twinkle-lora-2'
8796

88-
# Connect to the server and create a sampling client with the trained weights
89-
service_client = init_tinker_compat_client(base_url='http://localhost:9000')
97+
service_client = ServiceClient(base_url=base_url, api_key=os.environ.get('MODELSCOPE_TOKEN'))
9098
sampling_client = service_client.create_sampling_client(model_path=weight_path, base_model=base_model)
9199

92100
# Step 2: Prepare the chat prompt

cookbook/client/tinker/short_math_grpo.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from tinker import types
2525
from typing import List, Tuple
2626

27-
from twinkle_client import init_tinker_compat_client
27+
from twinkle_client import init_tinker_client
2828
from twinkle import get_logger
2929
from twinkle.advantage import GRPOAdvantage
3030
from twinkle.data_format import Message, Trajectory
@@ -206,8 +206,13 @@ def main():
206206

207207
# Step 2: Initialize the Tinker-compatible client
208208
logger.info('Connecting to Tinker server...')
209-
service_client = init_tinker_compat_client(
210-
base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_TOKEN'))
209+
init_tinker_client()
210+
211+
from tinker import ServiceClient
212+
service_client = ServiceClient(
213+
base_url='http://www.modelscope.cn/twinkle',
214+
api_key=os.environ.get('MODELSCOPE_TOKEN')
215+
)
211216

212217
logger.info('Creating LoRA training client...')
213218
# Create a LoRA training client for GRPO

docs/source_en/Usage Guide/Server and Client/Overview.md

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Server and Client
1+
# Overview
22

33
Twinkle provides a complete HTTP Server/Client architecture that supports deploying models as services and remotely calling them through clients to complete training, inference, and other tasks. This architecture decouples **model hosting (Server side)** and **training logic (Client side)**, allowing multiple users to share the same base model for training.
44

@@ -14,7 +14,7 @@ Twinkle Server supports two protocol modes:
1414
| Mode | server_type | Description |
1515
|------|------------|------|
1616
| **Twinkle Server** | `twinkle` | Native Twinkle protocol, used with `twinkle_client`, simpler API |
17-
| **Tinker Compatible Server** | `tinker` | Compatible with Tinker protocol, used with `init_tinker_compat_client`, can reuse existing Tinker training code |
17+
| **Tinker Compatible Server** | `tinker` | Compatible with Tinker protocol, can reuse existing Tinker training code |
1818

1919
### Two Model Backends
2020

@@ -30,7 +30,7 @@ Regardless of Server mode, model loading supports two backends:
3030
| Client | Initialization Method | Description |
3131
|--------|---------|------|
3232
| **Twinkle Client** | `init_twinkle_client` | Native client, simply change `from twinkle import` to `from twinkle_client import` to migrate local training code to remote calls |
33-
| **Tinker Compatible Client** | `init_tinker_compat_client` | Patches Tinker SDK, allowing existing Tinker training code to be directly reused |
33+
| **Tinker Client** | `init_tinker_client` | Patches Tinker SDK, allowing existing Tinker training code to be directly reused |
3434

3535
## How to Choose
3636

@@ -47,7 +47,7 @@ Regardless of Server mode, model loading supports two backends:
4747
| Scenario | Recommendation |
4848
|------|------|
4949
| Existing Twinkle local training code, want to switch to remote | Twinkle Client — only need to change import paths |
50-
| Existing Tinker training code, want to reuse | Tinker Compatible Client — only need to initialize patch |
50+
| Existing Tinker training code, want to reuse | Tinker Client — only need to initialize patch |
5151
| New project | Twinkle Client — simpler API |
5252

5353
### Model Backend Selection
@@ -65,33 +65,35 @@ Complete runnable examples are located in the `cookbook/client/` directory:
6565
```
6666
cookbook/client/
6767
├── twinkle/ # Twinkle native protocol examples
68-
│ ├── transformer/ # Transformers backend
68+
│ ├── transformer/ # Transformers backend server config
6969
│ │ ├── server.py # Startup script
70-
│ │ ├── server_config.yaml # Configuration file
71-
│ │ └── lora.py # LoRA training client
72-
│ └── megatron/ # Megatron backend
73-
│ ├── server.py
74-
│ ├── server_config.yaml
75-
│ └── lora.py
70+
│ │ └── server_config.yaml # Configuration file
71+
│ ├── megatron/ # Megatron backend server config
72+
│ │ ├── server.py
73+
│ │ └── server_config.yaml
74+
│ ├── grpo.py # GRPO training client
75+
│ ├── sample.py # Inference sampling client
76+
│ └── self_congnition.py # Self-cognition training client
7677
└── tinker/ # Tinker compatible protocol examples
77-
├── transformer/ # Transformers backend
78+
├── transformer/ # Transformers backend server config
79+
│ ├── server.py
80+
│ └── server_config.yaml
81+
├── megatron/ # Megatron backend server config
7882
│ ├── server.py
7983
│ ├── server_config.yaml
80-
│ ├── lora.py # LoRA training
81-
│ ├── sample.py # Inference sampling
82-
│ └── self_congnition.py # Self-cognition training+evaluation
83-
└── megatron/ # Megatron backend
84-
├── server.py
85-
├── server_config.yaml
86-
└── lora.py
84+
│ └── server_config_7b.yaml
85+
├── lora.py # LoRA training client
86+
├── sample.py # Inference sampling client
87+
├── self_congnition.py # Self-cognition training+evaluation
88+
└── short_math_grpo.py # GRPO math training client
8789
```
8890

8991
Running steps:
9092

9193
```bash
9294
# 1. Start Server first
93-
python cookbook/client/twinkle/transformer/server.py
95+
python cookbook/client/tinker/transformer/server.py
9496

9597
# 2. Run Client in another terminal
96-
python cookbook/client/twinkle/transformer/lora.py
98+
python cookbook/client/tinker/lora.py
9799
```

docs/source_en/Usage Guide/Server and Client/Server.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,6 @@ CLI supported parameters:
210210
| `-c, --config` | YAML configuration file path (required) | — |
211211
| `-t, --server-type` | Server mode: `twinkle` or `tinker` | `twinkle` |
212212
| `--namespace` | Ray namespace | tinker mode defaults to `twinkle_cluster` |
213-
| `--no-wait` | Do not block and wait (daemon mode) | `False` |
214213
| `--log-level` | Log level | `INFO` |
215214

216215
## YAML Configuration Details

0 commit comments

Comments
 (0)