diff --git a/README.md b/README.md index 7df948cb..39cb0832 100644 --- a/README.md +++ b/README.md @@ -203,7 +203,7 @@ if __name__ == '__main__': import os from tqdm import tqdm from tinker import types -from twinkle_client import init_tinker_compat_client +from twinkle_client import init_tinker_client from twinkle.dataloader import DataLoader from twinkle.dataset import Dataset, DatasetMeta from twinkle.preprocessor import SelfCognitionProcessor @@ -220,8 +220,11 @@ dataset.map(SelfCognitionProcessor('twinkle Model', 'twinkle Team'), load_from_c dataset.encode(batched=True, load_from_cache_file=False) dataloader = DataLoader(dataset=dataset, batch_size=8) -# Initialize tinker client -service_client = init_tinker_compat_client(base_url, api_key) +# Initialize Tinker client before importing ServiceClient +init_tinker_client() +from tinker import ServiceClient + +service_client = ServiceClient(base_url=base_url, api_key=api_key) training_client = service_client.create_lora_training_client(base_model=base_model[len('ms://'):], rank=16) # Training loop: use input_feature_to_datum to transfer the input format diff --git a/README_ZH.md b/README_ZH.md index 132fc244..65edf58d 100644 --- a/README_ZH.md +++ b/README_ZH.md @@ -186,7 +186,7 @@ if __name__ == '__main__': import os from tqdm import tqdm from tinker import types -from twinkle_client import init_tinker_compat_client +from twinkle_client import init_tinker_client from twinkle.dataloader import DataLoader from twinkle.dataset import Dataset, DatasetMeta from twinkle.preprocessor import SelfCognitionProcessor @@ -203,8 +203,11 @@ dataset.map(SelfCognitionProcessor('twinkle Model', 'twinkle Team'), load_from_c dataset.encode(batched=True, load_from_cache_file=False) dataloader = DataLoader(dataset=dataset, batch_size=8) -# Initialize tinker client -service_client = init_tinker_compat_client(base_url, api_key) +# Initialize Tinker client before importing ServiceClient +init_tinker_client() +from tinker import ServiceClient + +service_client = ServiceClient(base_url=base_url, api_key=api_key) training_client = service_client.create_lora_training_client(base_model=base_model[len('ms://'):], rank=16) # Training loop: use input_feature_to_datum to transfer the input format diff --git a/cookbook/client/tinker/lora.py b/cookbook/client/tinker/lora.py index 2714e0af..e94719bc 100644 --- a/cookbook/client/tinker/lora.py +++ b/cookbook/client/tinker/lora.py @@ -13,20 +13,25 @@ import os -from twinkle_client import init_tinker_compat_client +# Step 2: Initialize Tinker client before importing ServiceClient +from twinkle_client import init_tinker_client -# Step 2: Initialize the Tinker-compatible client to communicate with the server. -# - base_url: the address of the running server -# - api_key: authentication token (loaded from environment variable) -service_client = init_tinker_compat_client( - base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_TOKEN')) +init_tinker_client() -# Step 3: List models available on the server to verify the connection +# Step 3: Use ServiceClient directly from tinker +from tinker import ServiceClient + +service_client = ServiceClient( + base_url='http://www.modelscope.cn/twinkle', + api_key=os.environ.get('MODELSCOPE_TOKEN') +) + +# Step 4: List models available on the server to verify the connection print('Available models:') for item in service_client.get_server_capabilities().supported_models: print('- ' + item.model_name) -# Step 4: Create a REST client for querying training runs and checkpoints. +# Step 5: Create a REST client for querying training runs and checkpoints. # This is useful for inspecting previous training sessions or resuming training. rest_client = service_client.create_rest_client() @@ -51,7 +56,7 @@ # Uncomment the line below to resume from the last checkpoint: # resume_path = chpt.tinker_path -# Step 5: Create or resume a training client. +# Step 6: Create or resume a training client. # If resume_path is set, it restores both model weights and optimizer state. base_model = 'Qwen/Qwen2.5-7B-Instruct' if not resume_path: @@ -60,7 +65,7 @@ print('Resuming from ' + resume_path) training_client = service_client.create_training_client_from_state_with_optimizer(path=resume_path) -# Step 6: Prepare training data manually +# Step 7: Prepare training data manually # # This example teaches the model to translate English into Pig Latin. # Each example has an "input" (English phrase) and "output" (Pig Latin). @@ -146,7 +151,7 @@ def process_example(example: dict, tokenizer) -> types.Datum: datum0.loss_fn_inputs['weights'].tolist())): print(f'{repr(tokenizer.decode([inp])):<20} {repr(tokenizer.decode([tgt])):<20} {wgt:<10}') -# Step 7: Run the training loop +# Step 8: Run the training loop # # For each epoch, iterate over multiple batches: # - forward_backward: sends data to the server, computes loss & gradients @@ -174,7 +179,7 @@ def process_example(example: dict, tokenizer) -> types.Datum: save_result = save_future.result() print(f'Saved checkpoint for epoch {epoch} to {save_result.path}') -# Step 8: Publish the final checkpoint to ModelScope Hub. +# Step 9: Publish the final checkpoint to ModelScope Hub. # NOTE: Requires a valid ModelScope token set as api_key when initializing the client. # The published model name will be: {run_id}_{checkpoint_name} rest_client.publish_checkpoint_from_tinker_path(save_result.path).result() diff --git a/cookbook/client/tinker/megatron/server_config_7b.yaml b/cookbook/client/tinker/megatron/server_config_7b.yaml index cdac55f7..0c8c0550 100644 --- a/cookbook/client/tinker/megatron/server_config_7b.yaml +++ b/cookbook/client/tinker/megatron/server_config_7b.yaml @@ -21,7 +21,8 @@ applications: route_prefix: /api/v1 # API endpoint prefix (Tinker-compatible) import_path: server # Python module to import args: - + supported_models: + - Qwen/Qwen2.5-7B-Instruct deployments: - name: TinkerCompatServer autoscaling_config: diff --git a/cookbook/client/tinker/sample.py b/cookbook/client/tinker/sample.py index eacd043b..84931a59 100644 --- a/cookbook/client/tinker/sample.py +++ b/cookbook/client/tinker/sample.py @@ -4,27 +4,34 @@ # for text generation (sampling) via the Tinker-compatible client API. # The server must be running first (see server.py and server_config.yaml). +import os from tinker import types from twinkle.data_format import Message, Trajectory from twinkle.template import Template -from twinkle_client import init_tinker_compat_client +from twinkle_client import init_tinker_client -# Step 1: Define the base model and connect to the server +# Step 1: Initialize Tinker client +init_tinker_client() + +from tinker import ServiceClient + +# Step 2: Define the base model and connect to the server base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507' -service_client = init_tinker_compat_client( +service_client = ServiceClient( base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_TOKEN') ) -# Step 2: Create a sampling client by loading weights from a saved checkpoint. + +# Step 3: Create a sampling client by loading weights from a saved checkpoint. # The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint. # The server will load the base model and apply the LoRA adapter weights. -service_client.create_sampling_client( +sampling_client = service_client.create_sampling_client( model_path='twinkle://xxx-Qwen_Qwen3-30B-A3B-Instruct-2507-xxx/weights/twinkle-lora-1', base_model=base_model ) -# Step 3: Load the tokenizer locally to encode the prompt and decode the results +# Step 4: Load the tokenizer locally to encode the prompt and decode the results print(f'Using model {base_model}') template = Template(model_id=f'ms://{base_model}') @@ -40,7 +47,7 @@ input_ids = input_feature['input_ids'].tolist() -# Step 4: Prepare the prompt and sampling parameters +# Step 5: Prepare the prompt and sampling parameters prompt = types.ModelInput.from_ints(input_ids) params = types.SamplingParams( max_tokens=128, # Maximum number of tokens to generate @@ -48,13 +55,13 @@ stop=['\n'] # Stop generation when a newline character is produced ) -# Step 5: Send the sampling request to the server. +# Step 6: Send the sampling request to the server. # num_samples=8 generates 8 independent completions for the same prompt. print('Sampling...') future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=1) result = future.result() -# Step 6: Decode and print the generated responses +# Step 7: Decode and print the generated responses print('Responses:') for i, seq in enumerate(result.sequences): print(f'{i}: {repr(template.decode(seq.tokens))}') diff --git a/cookbook/client/tinker/self_congnition.py b/cookbook/client/tinker/self_congnition.py index 13a462b4..240c25c6 100644 --- a/cookbook/client/tinker/self_congnition.py +++ b/cookbook/client/tinker/self_congnition.py @@ -10,7 +10,7 @@ import os from tqdm import tqdm from tinker import types -from twinkle_client import init_tinker_compat_client +from twinkle_client import init_tinker_client from twinkle.data_format import Message, Trajectory from twinkle.template import Template from twinkle.dataloader import DataLoader @@ -18,8 +18,15 @@ from twinkle.preprocessor import SelfCognitionProcessor from twinkle.server.tinker.common import input_feature_to_datum +# Initialize the Tinker client before importing ServiceClient +init_tinker_client() + +from tinker import ServiceClient + # The base model to fine-tune / evaluate -base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507' +# base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507' +base_model = 'Qwen/Qwen2.5-7B-Instruct' +base_url = 'http://localhost:8000' def train(): @@ -42,9 +49,11 @@ def train(): # Step 2: Initialize the training client - # Connect to the Twinkle server running locally - service_client = init_tinker_compat_client( - base_url='localhost:9000', api_key=os.environ.get('MODELSCOPE_TOKEN')) + + service_client = ServiceClient( + base_url=base_url, + api_key=os.environ.get('MODELSCOPE_TOKEN') + ) # Create a LoRA training client for the base model (rank=16 for the LoRA adapter) training_client = service_client.create_lora_training_client(base_model=base_model, rank=16) @@ -85,8 +94,7 @@ def eval(): # Path to a previously saved LoRA checkpoint (twinkle:// URI) weight_path = 'twinkle://20260212_174205-Qwen_Qwen2_5-7B-Instruct-51edc9ed/weights/twinkle-lora-2' - # Connect to the server and create a sampling client with the trained weights - service_client = init_tinker_compat_client(base_url='http://localhost:9000') + service_client = ServiceClient(base_url=base_url, api_key=os.environ.get('MODELSCOPE_TOKEN')) sampling_client = service_client.create_sampling_client(model_path=weight_path, base_model=base_model) # Step 2: Prepare the chat prompt diff --git a/cookbook/client/tinker/short_math_grpo.py b/cookbook/client/tinker/short_math_grpo.py index d843322b..43647ab3 100644 --- a/cookbook/client/tinker/short_math_grpo.py +++ b/cookbook/client/tinker/short_math_grpo.py @@ -24,7 +24,7 @@ from tinker import types from typing import List, Tuple -from twinkle_client import init_tinker_compat_client +from twinkle_client import init_tinker_client from twinkle import get_logger from twinkle.advantage import GRPOAdvantage from twinkle.data_format import Message, Trajectory @@ -206,8 +206,13 @@ def main(): # Step 2: Initialize the Tinker-compatible client logger.info('Connecting to Tinker server...') - service_client = init_tinker_compat_client( - base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_TOKEN')) + init_tinker_client() + + from tinker import ServiceClient + service_client = ServiceClient( + base_url='http://www.modelscope.cn/twinkle', + api_key=os.environ.get('MODELSCOPE_TOKEN') + ) logger.info('Creating LoRA training client...') # Create a LoRA training client for GRPO diff --git a/docs/source_en/Usage Guide/Server and Client/Overview.md b/docs/source_en/Usage Guide/Server and Client/Overview.md index a91ccfbf..478c8fbc 100644 --- a/docs/source_en/Usage Guide/Server and Client/Overview.md +++ b/docs/source_en/Usage Guide/Server and Client/Overview.md @@ -1,4 +1,4 @@ -# Server and Client +# Overview Twinkle provides a complete HTTP Server/Client architecture that supports deploying models as services and remotely calling them through clients to complete training, inference, and other tasks. This architecture decouples **model hosting (Server side)** and **training logic (Client side)**, allowing multiple users to share the same base model for training. @@ -14,7 +14,7 @@ Twinkle Server supports two protocol modes: | Mode | server_type | Description | |------|------------|------| | **Twinkle Server** | `twinkle` | Native Twinkle protocol, used with `twinkle_client`, simpler API | -| **Tinker Compatible Server** | `tinker` | Compatible with Tinker protocol, used with `init_tinker_compat_client`, can reuse existing Tinker training code | +| **Tinker Compatible Server** | `tinker` | Compatible with Tinker protocol, can reuse existing Tinker training code | ### Two Model Backends @@ -30,7 +30,7 @@ Regardless of Server mode, model loading supports two backends: | Client | Initialization Method | Description | |--------|---------|------| | **Twinkle Client** | `init_twinkle_client` | Native client, simply change `from twinkle import` to `from twinkle_client import` to migrate local training code to remote calls | -| **Tinker Compatible Client** | `init_tinker_compat_client` | Patches Tinker SDK, allowing existing Tinker training code to be directly reused | +| **Tinker Client** | `init_tinker_client` | Patches Tinker SDK, allowing existing Tinker training code to be directly reused | ## How to Choose @@ -47,7 +47,7 @@ Regardless of Server mode, model loading supports two backends: | Scenario | Recommendation | |------|------| | Existing Twinkle local training code, want to switch to remote | Twinkle Client — only need to change import paths | -| Existing Tinker training code, want to reuse | Tinker Compatible Client — only need to initialize patch | +| Existing Tinker training code, want to reuse | Tinker Client — only need to initialize patch | | New project | Twinkle Client — simpler API | ### Model Backend Selection @@ -65,33 +65,35 @@ Complete runnable examples are located in the `cookbook/client/` directory: ``` cookbook/client/ ├── twinkle/ # Twinkle native protocol examples -│ ├── transformer/ # Transformers backend +│ ├── transformer/ # Transformers backend server config │ │ ├── server.py # Startup script -│ │ ├── server_config.yaml # Configuration file -│ │ └── lora.py # LoRA training client -│ └── megatron/ # Megatron backend -│ ├── server.py -│ ├── server_config.yaml -│ └── lora.py +│ │ └── server_config.yaml # Configuration file +│ ├── megatron/ # Megatron backend server config +│ │ ├── server.py +│ │ └── server_config.yaml +│ ├── grpo.py # GRPO training client +│ ├── sample.py # Inference sampling client +│ └── self_congnition.py # Self-cognition training client └── tinker/ # Tinker compatible protocol examples - ├── transformer/ # Transformers backend + ├── transformer/ # Transformers backend server config + │ ├── server.py + │ └── server_config.yaml + ├── megatron/ # Megatron backend server config │ ├── server.py │ ├── server_config.yaml - │ ├── lora.py # LoRA training - │ ├── sample.py # Inference sampling - │ └── self_congnition.py # Self-cognition training+evaluation - └── megatron/ # Megatron backend - ├── server.py - ├── server_config.yaml - └── lora.py + │ └── server_config_7b.yaml + ├── lora.py # LoRA training client + ├── sample.py # Inference sampling client + ├── self_congnition.py # Self-cognition training+evaluation + └── short_math_grpo.py # GRPO math training client ``` Running steps: ```bash # 1. Start Server first -python cookbook/client/twinkle/transformer/server.py +python cookbook/client/tinker/transformer/server.py # 2. Run Client in another terminal -python cookbook/client/twinkle/transformer/lora.py +python cookbook/client/tinker/lora.py ``` diff --git a/docs/source_en/Usage Guide/Server and Client/Server.md b/docs/source_en/Usage Guide/Server and Client/Server.md index 302a5875..a82002a9 100644 --- a/docs/source_en/Usage Guide/Server and Client/Server.md +++ b/docs/source_en/Usage Guide/Server and Client/Server.md @@ -210,7 +210,6 @@ CLI supported parameters: | `-c, --config` | YAML configuration file path (required) | — | | `-t, --server-type` | Server mode: `twinkle` or `tinker` | `twinkle` | | `--namespace` | Ray namespace | tinker mode defaults to `twinkle_cluster` | -| `--no-wait` | Do not block and wait (daemon mode) | `False` | | `--log-level` | Log level | `INFO` | ## YAML Configuration Details diff --git a/docs/source_en/Usage Guide/Server and Client/Tinker-Compatible-Client.md b/docs/source_en/Usage Guide/Server and Client/Tinker-Compatible-Client.md index d3cf4a8f..8978e2a3 100644 --- a/docs/source_en/Usage Guide/Server and Client/Tinker-Compatible-Client.md +++ b/docs/source_en/Usage Guide/Server and Client/Tinker-Compatible-Client.md @@ -1,18 +1,21 @@ -# Tinker Compatible Client +# Tinker Client -The Tinker Compatible Client is suitable for scenarios with existing Tinker training code. After initializing with `init_tinker_compat_client`, it patches the Tinker SDK to point to the Twinkle Server, **and the rest of the code can directly reuse existing Tinker training code**. +The Tinker Client is suitable for scenarios with existing Tinker training code. After initializing with `init_tinker_client`, it patches the Tinker SDK to point to the Twinkle Server, **and the rest of the code can directly reuse existing Tinker training code**. ## Initialization ```python -from twinkle_client import init_tinker_compat_client - -# Initialize Tinker compatible client -# init_tinker_compat_client automatically patches the Tinker SDK, -# allowing it to connect to Twinkle Server instead of Tinker Server -service_client = init_tinker_compat_client( - base_url='http://localhost:8000', # Server address - api_key='your-api-key' # Authentication token +# Initialize Tinker client before importing ServiceClient +from twinkle_client import init_tinker_client + +init_tinker_client() + +# Use ServiceClient directly from tinker +from tinker import ServiceClient + +service_client = ServiceClient( + base_url='http://localhost:8000', # Server address + api_key=os.environ.get('MODELSCOPE_TOKEN') # Recommended: set to ModelScope Token ) # Verify connection: List available models on Server @@ -20,15 +23,14 @@ for item in service_client.get_server_capabilities().supported_models: print("- " + item.model_name) ``` -### What does init_tinker_compat_client do? +### What does init_tinker_client do? -When calling `init_tinker_compat_client`, the following operations are automatically executed: +When calling `init_tinker_client`, the following operations are automatically executed: 1. **Patch Tinker SDK**: Bypass Tinker's `tinker://` prefix validation, allowing it to connect to standard HTTP addresses 2. **Set Request Headers**: Inject necessary authentication headers such as `serve_multiplexed_model_id` and `Authorization` -3. **Return `ServiceClient`**: Returns a standard Tinker `ServiceClient` object, subsequent operations are completely identical to native Tinker -This means that after initialization, **all existing Tinker training code can be used directly** without any modifications. +After initialization, simply import `from tinker import ServiceClient` to connect to Twinkle Server, and **all existing Tinker training code can be used directly** without any modifications. ## Complete Training Example @@ -38,14 +40,16 @@ import numpy as np import dotenv dotenv.load_dotenv('.env') -from tinker import types +# Step 1: Initialize Tinker client before importing ServiceClient +from twinkle_client import init_tinker_client +init_tinker_client() + +from tinker import types, ServiceClient from modelscope import AutoTokenizer -from twinkle_client import init_tinker_compat_client -# Step 1: Initialize client (automatically patches Tinker SDK) -service_client = init_tinker_compat_client( +service_client = ServiceClient( base_url='http://localhost:8000', - api_key=os.environ.get('MODELSCOPE_TOKEN') + api_key=os.environ.get('MODELSCOPE_TOKEN') # Recommended: set to ModelScope Token ) # Step 2: Query existing training runs (optional) @@ -135,12 +139,17 @@ Tinker compatible mode can also leverage Twinkle's dataset components to simplif ```python from tqdm import tqdm from tinker import types -from twinkle_client import init_tinker_compat_client +from twinkle_client import init_tinker_client from twinkle.dataloader import DataLoader from twinkle.dataset import Dataset, DatasetMeta from twinkle.preprocessor import SelfCognitionProcessor from twinkle.server.tinker.common import input_feature_to_datum +# Initialize Tinker client before importing ServiceClient +init_tinker_client() + +from tinker import ServiceClient + base_model = "Qwen/Qwen2.5-0.5B-Instruct" # Use Twinkle's Dataset component to load and preprocess data @@ -150,8 +159,11 @@ dataset.map(SelfCognitionProcessor('twinkle model', 'twinkle team'), load_from_c dataset.encode(batched=True, load_from_cache_file=False) dataloader = DataLoader(dataset=dataset, batch_size=8) -# Initialize Tinker compatible client -service_client = init_tinker_compat_client(base_url='http://localhost:8000') +# Initialize client +service_client = ServiceClient( + base_url='http://localhost:8000', + api_key=os.environ.get('MODELSCOPE_TOKEN') # Recommended: set to ModelScope Token +) training_client = service_client.create_lora_training_client(base_model=base_model, rank=16) # Training loop: Use input_feature_to_datum to convert data format @@ -201,14 +213,22 @@ for i, seq in enumerate(result.sequences): You can also load saved checkpoints for inference: ```python +import os from tinker import types from modelscope import AutoTokenizer -from twinkle_client import init_tinker_compat_client +from twinkle_client import init_tinker_client + +# Initialize Tinker client before importing ServiceClient +init_tinker_client() + +from tinker import ServiceClient base_model = "Qwen/Qwen2.5-0.5B-Instruct" -# Initialize client -service_client = init_tinker_compat_client(base_url='http://localhost:8000') +service_client = ServiceClient( + base_url='http://localhost:8000', + api_key=os.environ.get('MODELSCOPE_TOKEN') # Recommended: set to ModelScope Token +) # Create sampling client from saved checkpoint sampling_client = service_client.create_sampling_client( diff --git "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/Tinker\345\205\274\345\256\271\345\256\242\346\210\267\347\253\257.md" "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/Tinker\345\205\274\345\256\271\345\256\242\346\210\267\347\253\257.md" index 35b39536..7c204f2c 100644 --- "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/Tinker\345\205\274\345\256\271\345\256\242\346\210\267\347\253\257.md" +++ "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/Tinker\345\205\274\345\256\271\345\256\242\346\210\267\347\253\257.md" @@ -1,18 +1,21 @@ -# Tinker 兼容客户端 +# Tinker 客户端 -Tinker 兼容 Client 适用于已有 Tinker 训练代码的场景。通过 `init_tinker_compat_client` 初始化后,会对 Tinker SDK 进行 patch,使其指向 Twinkle Server,**其余代码可直接复用已有的 Tinker 训练代码**。 +Tinker Client 适用于已有 Tinker 训练代码的场景。通过 `init_tinker_client` 初始化后,会对 Tinker SDK 进行 patch,使其指向 Twinkle Server,**其余代码可直接复用已有的 Tinker 训练代码**。 ## 初始化 ```python -from twinkle_client import init_tinker_compat_client - -# 初始化 Tinker 兼容客户端 -# init_tinker_compat_client 会自动 patch Tinker SDK, -# 使其可以连接到 Twinkle Server 而非 Tinker Server -service_client = init_tinker_compat_client( - base_url='http://localhost:8000', # Server 地址 - api_key='your-api-key' # 认证令牌 +# 在导入 ServiceClient 之前,先初始化 Tinker 客户端 +from twinkle_client import init_tinker_client + +init_tinker_client() + +# 直接使用 tinker 中的 ServiceClient +from tinker import ServiceClient + +service_client = ServiceClient( + base_url='http://localhost:8000', # Server 地址 + api_key=os.environ.get('MODELSCOPE_TOKEN') # 建议设置为 ModelScope Token ) # 验证连接:列出 Server 上可用的模型 @@ -20,15 +23,14 @@ for item in service_client.get_server_capabilities().supported_models: print("- " + item.model_name) ``` -### init_tinker_compat_client 做了什么? +### init_tinker_client 做了什么? -调用 `init_tinker_compat_client` 时,会自动执行以下操作: +调用 `init_tinker_client` 时,会自动执行以下操作: 1. **Patch Tinker SDK**:绕过 Tinker 的 `tinker://` 前缀校验,使其可以连接到标准 HTTP 地址 2. **设置请求头**:注入 `serve_multiplexed_model_id` 和 `Authorization` 等必要的认证头 -3. **返回 `ServiceClient`**:返回一个标准的 Tinker `ServiceClient` 对象,后续操作与原生 Tinker 完全一致 -这意味着在初始化之后,**所有已有的 Tinker 训练代码都可以直接使用**,无需任何修改。 +初始化之后,直接导入 `from tinker import ServiceClient` 即可连接到 Twinkle Server,**所有已有的 Tinker 训练代码都可以直接使用**,无需任何修改。 ## 完整训练示例 @@ -38,14 +40,16 @@ import numpy as np import dotenv dotenv.load_dotenv('.env') -from tinker import types +# Step 1: 在导入 ServiceClient 之前,先初始化 Tinker 客户端 +from twinkle_client import init_tinker_client +init_tinker_client() + +from tinker import types, ServiceClient from modelscope import AutoTokenizer -from twinkle_client import init_tinker_compat_client -# Step 1: 初始化客户端(会自动 patch Tinker SDK) -service_client = init_tinker_compat_client( +service_client = ServiceClient( base_url='http://localhost:8000', - api_key=os.environ.get('MODELSCOPE_TOKEN') + api_key=os.environ.get('MODELSCOPE_TOKEN') # 建议设置为 ModelScope Token ) # Step 2: 查询已有训练运行(可选) @@ -135,12 +139,17 @@ Tinker 兼容模式也可以利用 Twinkle 的数据集组件来简化数据准 ```python from tqdm import tqdm from tinker import types -from twinkle_client import init_tinker_compat_client +from twinkle_client import init_tinker_client from twinkle.dataloader import DataLoader from twinkle.dataset import Dataset, DatasetMeta from twinkle.preprocessor import SelfCognitionProcessor from twinkle.server.tinker.common import input_feature_to_datum +# 在导入 ServiceClient 之前,先初始化 Tinker 客户端 +init_tinker_client() + +from tinker import ServiceClient + base_model = "Qwen/Qwen2.5-0.5B-Instruct" # 使用 Twinkle 的 Dataset 组件加载和预处理数据 @@ -150,8 +159,11 @@ dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队'), load_from_ dataset.encode(batched=True, load_from_cache_file=False) dataloader = DataLoader(dataset=dataset, batch_size=8) -# 初始化 Tinker 兼容客户端 -service_client = init_tinker_compat_client(base_url='http://localhost:8000') +# 初始化客户端 +service_client = ServiceClient( + base_url='http://localhost:8000', + api_key=os.environ.get('MODELSCOPE_TOKEN') # 建议设置为 ModelScope Token +) training_client = service_client.create_lora_training_client(base_model=base_model, rank=16) # 训练循环:使用 input_feature_to_datum 转换数据格式 @@ -201,14 +213,22 @@ for i, seq in enumerate(result.sequences): 也可以加载已保存的检查点进行推理: ```python +import os from tinker import types from modelscope import AutoTokenizer -from twinkle_client import init_tinker_compat_client +from twinkle_client import init_tinker_client + +# 在导入 ServiceClient 之前,先初始化 Tinker 客户端 +init_tinker_client() + +from tinker import ServiceClient base_model = "Qwen/Qwen2.5-0.5B-Instruct" -# 初始化客户端 -service_client = init_tinker_compat_client(base_url='http://localhost:8000') +service_client = ServiceClient( + base_url='http://localhost:8000', + api_key=os.environ.get('MODELSCOPE_TOKEN') # 建议设置为 ModelScope Token +) # 从已保存的检查点创建采样客户端 sampling_client = service_client.create_sampling_client( diff --git "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\234\215\345\212\241\347\253\257.md" "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\234\215\345\212\241\347\253\257.md" index a09b81e2..fd1ef94e 100644 --- "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\234\215\345\212\241\347\253\257.md" +++ "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\234\215\345\212\241\347\253\257.md" @@ -153,7 +153,6 @@ CLI 支持的参数: | `-c, --config` | YAML 配置文件路径(必须) | — | | `-t, --server-type` | Server 模式:`twinkle` 或 `tinker` | `twinkle` | | `--namespace` | Ray 命名空间 | tinker 模式默认 `twinkle_cluster` | -| `--no-wait` | 不阻塞等待(守护模式) | `False` | | `--log-level` | 日志级别 | `INFO` | ## YAML 配置详解 diff --git "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\246\202\350\277\260.md" "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\246\202\350\277\260.md" index e4617854..f9a7e1b9 100644 --- "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\246\202\350\277\260.md" +++ "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\246\202\350\277\260.md" @@ -1,4 +1,4 @@ -# 服务端和客户端 +# 概述 Twinkle 提供了完整的 HTTP Server/Client 架构,支持将模型部署为服务,并通过客户端远程调用完成训练、推理等任务。这种架构将**模型承载(Server 端)**和**训练逻辑(Client 端)**解耦,使得多个用户可以共享同一个基座模型进行训练。 @@ -14,7 +14,7 @@ Twinkle Server 支持两种协议模式: | 模式 | server_type | 说明 | |------|------------|------| | **Twinkle Server** | `twinkle` | 原生 Twinkle 协议,搭配 `twinkle_client` 使用,API 更简洁 | -| **Tinker 兼容 Server** | `tinker` | 兼容 Tinker 协议,搭配 `init_tinker_compat_client` 使用,可复用已有 Tinker 训练代码 | +| **Tinker 兼容 Server** | `tinker` | 兼容 Tinker 协议,可复用已有 Tinker 训练代码 | ### 两种模型后端 @@ -30,7 +30,7 @@ Twinkle Server 支持两种协议模式: | Client | 初始化方式 | 说明 | |--------|---------|------| | **Twinkle Client** | `init_twinkle_client` | 原生客户端,将 `from twinkle import` 改为 `from twinkle_client import` 即可将本地训练代码迁移为远端调用 | -| **Tinker 兼容 Client** | `init_tinker_compat_client` | 对 Tinker SDK 进行 patch,使已有 Tinker 训练代码可直接复用 | +| **Tinker Client** | `init_tinker_client` | 对 Tinker SDK 进行 patch,使已有 Tinker 训练代码可直接复用 | ## 如何选择 @@ -47,7 +47,7 @@ Twinkle Server 支持两种协议模式: | 场景 | 推荐 | |------|------| | 已有 Twinkle 本地训练代码,希望改为远端 | Twinkle Client — 仅需改 import 路径 | -| 已有 Tinker 训练代码,希望复用 | Tinker 兼容 Client — 仅需初始化 patch | +| 已有 Tinker 训练代码,希望复用 | Tinker Client — 仅需初始化 patch | | 全新项目 | Twinkle Client — API 更简洁 | ### 模型后端选择 @@ -65,33 +65,35 @@ Twinkle Server 支持两种协议模式: ``` cookbook/client/ ├── twinkle/ # Twinkle 原生协议示例 -│ ├── transformer/ # Transformers 后端 +│ ├── transformer/ # Transformers 后端服务配置 │ │ ├── server.py # 启动脚本 -│ │ ├── server_config.yaml # 配置文件 -│ │ └── lora.py # LoRA 训练客户端 -│ └── megatron/ # Megatron 后端 -│ ├── server.py -│ ├── server_config.yaml -│ └── lora.py +│ │ └── server_config.yaml # 配置文件 +│ ├── megatron/ # Megatron 后端服务配置 +│ │ ├── server.py +│ │ └── server_config.yaml +│ ├── grpo.py # GRPO 训练客户端 +│ ├── sample.py # 推理采样客户端 +│ └── self_congnition.py # 自我认知训练客户端 └── tinker/ # Tinker 兼容协议示例 - ├── transformer/ # Transformers 后端 + ├── transformer/ # Transformers 后端服务配置 + │ ├── server.py + │ └── server_config.yaml + ├── megatron/ # Megatron 后端服务配置 │ ├── server.py │ ├── server_config.yaml - │ ├── lora.py # LoRA 训练 - │ ├── sample.py # 推理采样 - │ └── self_congnition.py # 自我认知训练+评估 - └── megatron/ # Megatron 后端 - ├── server.py - ├── server_config.yaml - └── lora.py + │ └── server_config_7b.yaml + ├── lora.py # LoRA 训练客户端 + ├── sample.py # 推理采样客户端 + ├── self_congnition.py # 自我认知训练+评估 + └── short_math_grpo.py # GRPO 数学训练客户端 ``` 运行步骤: ```bash # 1. 先启动 Server -python cookbook/client/twinkle/transformer/server.py +python cookbook/client/tinker/transformer/server.py # 2. 在另一个终端运行 Client -python cookbook/client/twinkle/transformer/lora.py +python cookbook/client/tinker/lora.py ``` diff --git a/src/twinkle/server/__main__.py b/src/twinkle/server/__main__.py index c0c942c5..17fe87fc 100644 --- a/src/twinkle/server/__main__.py +++ b/src/twinkle/server/__main__.py @@ -36,9 +36,6 @@ def create_parser() -> argparse.ArgumentParser: # Start tinker server with specific config python -m twinkle.server -c config.yaml -t tinker - - # Run in background (daemon mode) - python -m twinkle.server -c config.yaml --no-wait """, ) @@ -72,11 +69,6 @@ def create_parser() -> argparse.ArgumentParser: ) # Runtime options - parser.add_argument( - '--no-wait', - action='store_true', - help="Don't block waiting for Enter (daemon mode)", - ) parser.add_argument( '--log-level', type=str, @@ -115,7 +107,6 @@ def main(args: list[str] | None = None) -> int: config_path=config_path, server_type=parsed_args.server_type, ray_namespace=parsed_args.namespace, - wait=not parsed_args.no_wait, ) return 0 diff --git a/src/twinkle/server/launcher.py b/src/twinkle/server/launcher.py index e1af794d..b5b53f6a 100644 --- a/src/twinkle/server/launcher.py +++ b/src/twinkle/server/launcher.py @@ -220,12 +220,9 @@ def _deploy_application(self, app_config: dict[str, Any]) -> None: serve.run(app, name=name, route_prefix=route_prefix) logger.info(f'Deployed {name} at {route_prefix}') - def launch(self, wait: bool = True) -> None: + def launch(self) -> None: """ Launch the server with all configured applications. - - Args: - wait: If True, block and wait for Enter to stop the server """ self._init_ray() self._start_serve() @@ -255,9 +252,8 @@ def launch(self, wait: bool = True) -> None: dict) else app_config.route_prefix print(f' - http://{host}:{port}{route_prefix}') - if wait: - while True: - time.sleep(3600) + while True: + time.sleep(3600) @classmethod def from_yaml( @@ -302,7 +298,6 @@ def launch_server( config_path: str | Path | None = None, server_type: str = 'twinkle', ray_namespace: str | None = None, - wait: bool = True, ) -> ServerLauncher: """ Launch a twinkle server with flexible configuration options. @@ -314,7 +309,6 @@ def launch_server( config_path: Path to YAML config file server_type: Server type ('tinker' or 'twinkle'), default is 'twinkle' ray_namespace: Ray namespace - wait: If True, block and wait for Enter to stop the server Returns: The ServerLauncher instance @@ -357,5 +351,5 @@ def launch_server( ray_namespace=ray_namespace, ) - launcher.launch(wait=wait) + launcher.launch() return launcher diff --git a/src/twinkle/server/tinker/common/compat_base.py b/src/twinkle/server/tinker/common/compat_base.py index 1e476bbb..54d665e3 100644 --- a/src/twinkle/server/tinker/common/compat_base.py +++ b/src/twinkle/server/tinker/common/compat_base.py @@ -3,9 +3,8 @@ from tinker import types from typing import List +from twinkle import DeviceMesh from twinkle.template import Template -from twinkle.utils.platform import DeviceMesh -from twinkle.utils.torch_utils import selective_log_softmax def collect_forward_backward_results(results, device_mesh: DeviceMesh): @@ -117,6 +116,8 @@ def get_template(self, adapter_name: str) -> Template: @staticmethod def _get_forward_output(inputs: List[types.Datum], logits: torch.Tensor) -> List[dict]: """Convert raw logits to the expected output format with logprobs and elementwise_loss.""" + from twinkle.utils.torch_utils import selective_log_softmax + results = [] for feature, logit in zip(inputs, logits): # Ensure 1D shape and correct device to avoid dimension mismatch and device errors diff --git a/src/twinkle/server/tinker/server.py b/src/twinkle/server/tinker/server.py index 1a706b45..3c9f4493 100644 --- a/src/twinkle/server/tinker/server.py +++ b/src/twinkle/server/tinker/server.py @@ -91,16 +91,17 @@ def __init__(self, def normalize_models(self, supported_models): # Normalize supported_models to objects; passing raw dicts can trigger internal errors # when creating LoRA training clients via the tinker API. - if supported_models: - normalized = [] - for item in supported_models: - if isinstance(item, types.SupportedModel): - normalized.append(item) - elif isinstance(item, dict): - normalized.append(types.SupportedModel(**item)) - else: - normalized.append(types.SupportedModel(name=item)) - return normalized + if not supported_models: + return [] + normalized = [] + for item in supported_models: + if isinstance(item, types.SupportedModel): + normalized.append(item) + elif isinstance(item, dict): + normalized.append(types.SupportedModel(**item)) + elif isinstance(item, str): + normalized.append(types.SupportedModel(model_name=item)) + return normalized def _validate_base_model(self, base_model: str) -> None: """Validate that base_model is in supported_models list. diff --git a/src/twinkle_client/__init__.py b/src/twinkle_client/__init__.py index f236f734..25564306 100644 --- a/src/twinkle_client/__init__.py +++ b/src/twinkle_client/__init__.py @@ -1,41 +1,35 @@ # Copyright (c) ModelScope Contributors. All rights reserved. from __future__ import annotations -from typing import TYPE_CHECKING, Optional - from twinkle.utils import requires from .http.utils import get_api_key, get_base_url, set_api_key, set_base_url from .manager import TwinkleClient, TwinkleClientError -if TYPE_CHECKING: - from tinker import ServiceClient - -def init_tinker_compat_client(base_url: str | None = None, api_key: str | None = None, **kwargs) -> ServiceClient: - requires('tinker') - from tinker import ServiceClient - from twinkle_client.http.utils import get_api_key, get_request_id - from twinkle_client.utils.patch_tinker import patch_tinker +def init_tinker_client(**kwargs) -> None: + """Initialize Tinker client with Twinkle-specific headers. - # Apply patch to bypass tinker:// prefix validation - patch_tinker() + After calling this function, users can directly use: + from tinker import ServiceClient + client = ServiceClient(base_url='...', api_key='...') - if not api_key: - api_key = get_api_key() + The ServiceClient will automatically include Twinkle-specific headers. - if base_url and not base_url.startswith(('http://', 'https://')): - base_url = f'http://{base_url}' + Args: + **kwargs: Additional keyword arguments (currently unused, reserved for future) - default_headers = { - 'serve_multiplexed_model_id': get_request_id(), - 'Authorization': 'Bearer ' + api_key, - 'Twinkle-Authorization': 'Bearer ' + api_key, # For server compatibility - } | kwargs.pop('default_headers', {}) - - service_client = ServiceClient(base_url=base_url, api_key=api_key, default_headers=default_headers, **kwargs) + Example: + >>> from twinkle_client import init_tinker_client + >>> init_tinker_client() + >>> from tinker import ServiceClient + >>> client = ServiceClient(base_url='http://localhost:8000', api_key='your_token') + """ + requires('tinker') + from twinkle_client.utils.patch_tinker import patch_tinker - return service_client + # Apply patches to tinker library (includes header injection) + patch_tinker() def init_twinkle_client(base_url: str | None = None, api_key: str | None = None, **kwargs) -> TwinkleClient: @@ -55,4 +49,4 @@ def init_twinkle_client(base_url: str | None = None, api_key: str | None = None, return TwinkleClient(base_url=base_url, api_key=api_key, **kwargs) -__all__ = ['TwinkleClient', 'TwinkleClientError', 'init_tinker_compat_client', 'init_twinkle_client'] +__all__ = ['TwinkleClient', 'TwinkleClientError', 'init_tinker_client', 'init_twinkle_client'] diff --git a/src/twinkle_client/utils/patch_tinker.py b/src/twinkle_client/utils/patch_tinker.py index 4f9b2760..73363472 100644 --- a/src/twinkle_client/utils/patch_tinker.py +++ b/src/twinkle_client/utils/patch_tinker.py @@ -123,6 +123,7 @@ def patch_tinker(): 1. InternalClientHolder._create_sampling_session to bypass 'tinker://' prefix validation 2. AsyncTinker.__init__ to bypass 'tml-' prefix validation for api_key 3. ParsedCheckpointTinkerPath.from_tinker_path to support both 'tinker://' and 'twinkle://' prefixes + 4. _get_default_headers to inject Twinkle-specific headers This patch is idempotent - calling it multiple times has no additional effect. """ @@ -143,6 +144,32 @@ def patch_tinker(): from tinker.types.checkpoint import ParsedCheckpointTinkerPath ParsedCheckpointTinkerPath.from_tinker_path = classmethod(_patched_from_tinker_path) + # Patch 4: inject Twinkle-specific headers by patching ServiceClient.__init__. + from tinker.lib.public_interfaces.service_client import ServiceClient + from twinkle_client.http.utils import get_request_id, get_api_key + + _original_service_client_init = ServiceClient.__init__ + + def _patched_service_client_init(self, user_metadata=None, **kwargs): + # Resolve api_key with the same priority order used by AsyncTinker: + # 1. explicit kwarg 2. TINKER_API_KEY env var 3. TWINKLE_SERVER_TOKEN env var + api_key = kwargs.get('api_key') + if api_key is None: + api_key = get_api_key() + + twinkle_headers = { + 'serve_multiplexed_model_id': get_request_id(), + 'Authorization': 'Bearer ' + api_key, + 'Twinkle-Authorization': 'Bearer ' + api_key, + } + # Merge: caller-supplied default_headers take precedence over twinkle_headers + user_default_headers = kwargs.pop('default_headers', {}) + kwargs['default_headers'] = twinkle_headers | user_default_headers + + _original_service_client_init(self, user_metadata=user_metadata, **kwargs) + + ServiceClient.__init__ = _patched_service_client_init + _patched = True except ImportError: # tinker not installed, skip patching diff --git a/tests/DeviceMesh/test_device_mesh.py b/tests/DeviceMesh/test_device_mesh.py index c35ea8fa..5d3f11ac 100644 --- a/tests/DeviceMesh/test_device_mesh.py +++ b/tests/DeviceMesh/test_device_mesh.py @@ -5,7 +5,7 @@ from unittest.mock import patch import twinkle -from twinkle.utils.platform import DeviceMesh, Platform +from twinkle import DeviceMesh, Platform twinkle.initialize(mode='local')