Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ if __name__ == '__main__':
import os
from tqdm import tqdm
from tinker import types
from twinkle_client import init_tinker_compat_client
from twinkle_client import init_tinker_client
from twinkle.dataloader import DataLoader
from twinkle.dataset import Dataset, DatasetMeta
from twinkle.preprocessor import SelfCognitionProcessor
Expand All @@ -220,8 +220,11 @@ dataset.map(SelfCognitionProcessor('twinkle Model', 'twinkle Team'), load_from_c
dataset.encode(batched=True, load_from_cache_file=False)
dataloader = DataLoader(dataset=dataset, batch_size=8)

# Initialize tinker client
service_client = init_tinker_compat_client(base_url, api_key)
# Initialize Tinker client before importing ServiceClient
init_tinker_client()
from tinker import ServiceClient

service_client = ServiceClient(base_url=base_url, api_key=api_key)
training_client = service_client.create_lora_training_client(base_model=base_model[len('ms://'):], rank=16)

# Training loop: use input_feature_to_datum to transfer the input format
Expand Down
9 changes: 6 additions & 3 deletions README_ZH.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ if __name__ == '__main__':
import os
from tqdm import tqdm
from tinker import types
from twinkle_client import init_tinker_compat_client
from twinkle_client import init_tinker_client
from twinkle.dataloader import DataLoader
from twinkle.dataset import Dataset, DatasetMeta
from twinkle.preprocessor import SelfCognitionProcessor
Expand All @@ -203,8 +203,11 @@ dataset.map(SelfCognitionProcessor('twinkle Model', 'twinkle Team'), load_from_c
dataset.encode(batched=True, load_from_cache_file=False)
dataloader = DataLoader(dataset=dataset, batch_size=8)

# Initialize tinker client
service_client = init_tinker_compat_client(base_url, api_key)
# Initialize Tinker client before importing ServiceClient
init_tinker_client()
from tinker import ServiceClient

service_client = ServiceClient(base_url=base_url, api_key=api_key)
training_client = service_client.create_lora_training_client(base_model=base_model[len('ms://'):], rank=16)

# Training loop: use input_feature_to_datum to transfer the input format
Expand Down
29 changes: 17 additions & 12 deletions cookbook/client/tinker/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,25 @@

import os

from twinkle_client import init_tinker_compat_client
# Step 2: Initialize Tinker client before importing ServiceClient
from twinkle_client import init_tinker_client

# Step 2: Initialize the Tinker-compatible client to communicate with the server.
# - base_url: the address of the running server
# - api_key: authentication token (loaded from environment variable)
service_client = init_tinker_compat_client(
base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_TOKEN'))
init_tinker_client()

# Step 3: List models available on the server to verify the connection
# Step 3: Use ServiceClient directly from tinker
from tinker import ServiceClient

service_client = ServiceClient(
base_url='http://www.modelscope.cn/twinkle',
api_key=os.environ.get('MODELSCOPE_TOKEN')
)

# Step 4: List models available on the server to verify the connection
print('Available models:')
for item in service_client.get_server_capabilities().supported_models:
print('- ' + item.model_name)

# Step 4: Create a REST client for querying training runs and checkpoints.
# Step 5: Create a REST client for querying training runs and checkpoints.
# This is useful for inspecting previous training sessions or resuming training.
rest_client = service_client.create_rest_client()

Expand All @@ -51,7 +56,7 @@
# Uncomment the line below to resume from the last checkpoint:
# resume_path = chpt.tinker_path

# Step 5: Create or resume a training client.
# Step 6: Create or resume a training client.
# If resume_path is set, it restores both model weights and optimizer state.
base_model = 'Qwen/Qwen2.5-7B-Instruct'
if not resume_path:
Expand All @@ -60,7 +65,7 @@
print('Resuming from ' + resume_path)
training_client = service_client.create_training_client_from_state_with_optimizer(path=resume_path)

# Step 6: Prepare training data manually
# Step 7: Prepare training data manually
#
# This example teaches the model to translate English into Pig Latin.
# Each example has an "input" (English phrase) and "output" (Pig Latin).
Expand Down Expand Up @@ -146,7 +151,7 @@ def process_example(example: dict, tokenizer) -> types.Datum:
datum0.loss_fn_inputs['weights'].tolist())):
print(f'{repr(tokenizer.decode([inp])):<20} {repr(tokenizer.decode([tgt])):<20} {wgt:<10}')

# Step 7: Run the training loop
# Step 8: Run the training loop
#
# For each epoch, iterate over multiple batches:
# - forward_backward: sends data to the server, computes loss & gradients
Expand Down Expand Up @@ -174,7 +179,7 @@ def process_example(example: dict, tokenizer) -> types.Datum:
save_result = save_future.result()
print(f'Saved checkpoint for epoch {epoch} to {save_result.path}')

# Step 8: Publish the final checkpoint to ModelScope Hub.
# Step 9: Publish the final checkpoint to ModelScope Hub.
# NOTE: Requires a valid ModelScope token set as api_key when initializing the client.
# The published model name will be: {run_id}_{checkpoint_name}
rest_client.publish_checkpoint_from_tinker_path(save_result.path).result()
Expand Down
3 changes: 2 additions & 1 deletion cookbook/client/tinker/megatron/server_config_7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ applications:
route_prefix: /api/v1 # API endpoint prefix (Tinker-compatible)
import_path: server # Python module to import
args:

supported_models:
- Qwen/Qwen2.5-7B-Instruct
deployments:
- name: TinkerCompatServer
autoscaling_config:
Expand Down
25 changes: 16 additions & 9 deletions cookbook/client/tinker/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,34 @@
# for text generation (sampling) via the Tinker-compatible client API.
# The server must be running first (see server.py and server_config.yaml).

import os
from tinker import types

from twinkle.data_format import Message, Trajectory
from twinkle.template import Template
from twinkle_client import init_tinker_compat_client
from twinkle_client import init_tinker_client

# Step 1: Define the base model and connect to the server
# Step 1: Initialize Tinker client
init_tinker_client()

from tinker import ServiceClient

# Step 2: Define the base model and connect to the server
base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
service_client = init_tinker_compat_client(
service_client = ServiceClient(
base_url='http://www.modelscope.cn/twinkle',
api_key=os.environ.get('MODELSCOPE_TOKEN')
)
# Step 2: Create a sampling client by loading weights from a saved checkpoint.

# Step 3: Create a sampling client by loading weights from a saved checkpoint.
# The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint.
# The server will load the base model and apply the LoRA adapter weights.
service_client.create_sampling_client(
sampling_client = service_client.create_sampling_client(
model_path='twinkle://xxx-Qwen_Qwen3-30B-A3B-Instruct-2507-xxx/weights/twinkle-lora-1',
base_model=base_model
)

# Step 3: Load the tokenizer locally to encode the prompt and decode the results
# Step 4: Load the tokenizer locally to encode the prompt and decode the results
print(f'Using model {base_model}')

template = Template(model_id=f'ms://{base_model}')
Expand All @@ -40,21 +47,21 @@

input_ids = input_feature['input_ids'].tolist()

# Step 4: Prepare the prompt and sampling parameters
# Step 5: Prepare the prompt and sampling parameters
prompt = types.ModelInput.from_ints(input_ids)
params = types.SamplingParams(
max_tokens=128, # Maximum number of tokens to generate
temperature=0.7,
stop=['\n'] # Stop generation when a newline character is produced
)

# Step 5: Send the sampling request to the server.
# Step 6: Send the sampling request to the server.
# num_samples=8 generates 8 independent completions for the same prompt.
print('Sampling...')
future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=1)
result = future.result()

# Step 6: Decode and print the generated responses
# Step 7: Decode and print the generated responses
print('Responses:')
for i, seq in enumerate(result.sequences):
print(f'{i}: {repr(template.decode(seq.tokens))}')
22 changes: 15 additions & 7 deletions cookbook/client/tinker/self_congnition.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,23 @@
import os
from tqdm import tqdm
from tinker import types
from twinkle_client import init_tinker_compat_client
from twinkle_client import init_tinker_client
from twinkle.data_format import Message, Trajectory
from twinkle.template import Template
from twinkle.dataloader import DataLoader
from twinkle.dataset import Dataset, DatasetMeta
from twinkle.preprocessor import SelfCognitionProcessor
from twinkle.server.tinker.common import input_feature_to_datum

# Initialize the Tinker client before importing ServiceClient
init_tinker_client()

from tinker import ServiceClient

# The base model to fine-tune / evaluate
base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
# base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
base_model = 'Qwen/Qwen2.5-7B-Instruct'
base_url = 'http://localhost:8000'


def train():
Expand All @@ -42,9 +49,11 @@ def train():

# Step 2: Initialize the training client

# Connect to the Twinkle server running locally
service_client = init_tinker_compat_client(
base_url='localhost:9000', api_key=os.environ.get('MODELSCOPE_TOKEN'))

service_client = ServiceClient(
base_url=base_url,
api_key=os.environ.get('MODELSCOPE_TOKEN')
)

# Create a LoRA training client for the base model (rank=16 for the LoRA adapter)
training_client = service_client.create_lora_training_client(base_model=base_model, rank=16)
Expand Down Expand Up @@ -85,8 +94,7 @@ def eval():
# Path to a previously saved LoRA checkpoint (twinkle:// URI)
weight_path = 'twinkle://20260212_174205-Qwen_Qwen2_5-7B-Instruct-51edc9ed/weights/twinkle-lora-2'

# Connect to the server and create a sampling client with the trained weights
service_client = init_tinker_compat_client(base_url='http://localhost:9000')
service_client = ServiceClient(base_url=base_url, api_key=os.environ.get('MODELSCOPE_TOKEN'))
sampling_client = service_client.create_sampling_client(model_path=weight_path, base_model=base_model)

# Step 2: Prepare the chat prompt
Expand Down
11 changes: 8 additions & 3 deletions cookbook/client/tinker/short_math_grpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from tinker import types
from typing import List, Tuple

from twinkle_client import init_tinker_compat_client
from twinkle_client import init_tinker_client
from twinkle import get_logger
from twinkle.advantage import GRPOAdvantage
from twinkle.data_format import Message, Trajectory
Expand Down Expand Up @@ -206,8 +206,13 @@ def main():

# Step 2: Initialize the Tinker-compatible client
logger.info('Connecting to Tinker server...')
service_client = init_tinker_compat_client(
base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_TOKEN'))
init_tinker_client()

from tinker import ServiceClient
service_client = ServiceClient(
base_url='http://www.modelscope.cn/twinkle',
api_key=os.environ.get('MODELSCOPE_TOKEN')
)

logger.info('Creating LoRA training client...')
# Create a LoRA training client for GRPO
Expand Down
44 changes: 23 additions & 21 deletions docs/source_en/Usage Guide/Server and Client/Overview.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Server and Client
# Overview

Twinkle provides a complete HTTP Server/Client architecture that supports deploying models as services and remotely calling them through clients to complete training, inference, and other tasks. This architecture decouples **model hosting (Server side)** and **training logic (Client side)**, allowing multiple users to share the same base model for training.

Expand All @@ -14,7 +14,7 @@ Twinkle Server supports two protocol modes:
| Mode | server_type | Description |
|------|------------|------|
| **Twinkle Server** | `twinkle` | Native Twinkle protocol, used with `twinkle_client`, simpler API |
| **Tinker Compatible Server** | `tinker` | Compatible with Tinker protocol, used with `init_tinker_compat_client`, can reuse existing Tinker training code |
| **Tinker Compatible Server** | `tinker` | Compatible with Tinker protocol, can reuse existing Tinker training code |

### Two Model Backends

Expand All @@ -30,7 +30,7 @@ Regardless of Server mode, model loading supports two backends:
| Client | Initialization Method | Description |
|--------|---------|------|
| **Twinkle Client** | `init_twinkle_client` | Native client, simply change `from twinkle import` to `from twinkle_client import` to migrate local training code to remote calls |
| **Tinker Compatible Client** | `init_tinker_compat_client` | Patches Tinker SDK, allowing existing Tinker training code to be directly reused |
| **Tinker Client** | `init_tinker_client` | Patches Tinker SDK, allowing existing Tinker training code to be directly reused |

## How to Choose

Expand All @@ -47,7 +47,7 @@ Regardless of Server mode, model loading supports two backends:
| Scenario | Recommendation |
|------|------|
| Existing Twinkle local training code, want to switch to remote | Twinkle Client — only need to change import paths |
| Existing Tinker training code, want to reuse | Tinker Compatible Client — only need to initialize patch |
| Existing Tinker training code, want to reuse | Tinker Client — only need to initialize patch |
| New project | Twinkle Client — simpler API |

### Model Backend Selection
Expand All @@ -65,33 +65,35 @@ Complete runnable examples are located in the `cookbook/client/` directory:
```
cookbook/client/
├── twinkle/ # Twinkle native protocol examples
│ ├── transformer/ # Transformers backend
│ ├── transformer/ # Transformers backend server config
│ │ ├── server.py # Startup script
│ │ ├── server_config.yaml # Configuration file
│ │ └── lora.py # LoRA training client
│ └── megatron/ # Megatron backend
│ ├── server.py
│ ├── server_config.yaml
│ └── lora.py
│ │ └── server_config.yaml # Configuration file
│ ├── megatron/ # Megatron backend server config
│ │ ├── server.py
│ │ └── server_config.yaml
│ ├── grpo.py # GRPO training client
│ ├── sample.py # Inference sampling client
│ └── self_congnition.py # Self-cognition training client
└── tinker/ # Tinker compatible protocol examples
├── transformer/ # Transformers backend
├── transformer/ # Transformers backend server config
│ ├── server.py
│ └── server_config.yaml
├── megatron/ # Megatron backend server config
│ ├── server.py
│ ├── server_config.yaml
│ ├── lora.py # LoRA training
│ ├── sample.py # Inference sampling
│ └── self_congnition.py # Self-cognition training+evaluation
└── megatron/ # Megatron backend
├── server.py
├── server_config.yaml
└── lora.py
│ └── server_config_7b.yaml
├── lora.py # LoRA training client
├── sample.py # Inference sampling client
├── self_congnition.py # Self-cognition training+evaluation
└── short_math_grpo.py # GRPO math training client
```

Running steps:

```bash
# 1. Start Server first
python cookbook/client/twinkle/transformer/server.py
python cookbook/client/tinker/transformer/server.py

# 2. Run Client in another terminal
python cookbook/client/twinkle/transformer/lora.py
python cookbook/client/tinker/lora.py
```
1 change: 0 additions & 1 deletion docs/source_en/Usage Guide/Server and Client/Server.md
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@ CLI supported parameters:
| `-c, --config` | YAML configuration file path (required) | — |
| `-t, --server-type` | Server mode: `twinkle` or `tinker` | `twinkle` |
| `--namespace` | Ray namespace | tinker mode defaults to `twinkle_cluster` |
| `--no-wait` | Do not block and wait (daemon mode) | `False` |
| `--log-level` | Log level | `INFO` |

## YAML Configuration Details
Expand Down
Loading
Loading