diff --git a/README.md b/README.md
index 7df948cb..39cb0832 100644
--- a/README.md
+++ b/README.md
@@ -203,7 +203,7 @@ if __name__ == '__main__':
 import os
 from tqdm import tqdm
 from tinker import types
-from twinkle_client import init_tinker_compat_client
+from twinkle_client import init_tinker_client
 from twinkle.dataloader import DataLoader
 from twinkle.dataset import Dataset, DatasetMeta
 from twinkle.preprocessor import SelfCognitionProcessor
@@ -220,8 +220,11 @@ dataset.map(SelfCognitionProcessor('twinkle Model', 'twinkle Team'), load_from_c
 dataset.encode(batched=True, load_from_cache_file=False)
 dataloader = DataLoader(dataset=dataset, batch_size=8)
 
-# Initialize tinker client
-service_client = init_tinker_compat_client(base_url, api_key)
+# Initialize Tinker client before importing ServiceClient
+init_tinker_client()
+from tinker import ServiceClient
+
+service_client = ServiceClient(base_url=base_url, api_key=api_key)
 training_client = service_client.create_lora_training_client(base_model=base_model[len('ms://'):], rank=16)
 
 # Training loop: use input_feature_to_datum to transfer the input format
diff --git a/README_ZH.md b/README_ZH.md
index 132fc244..65edf58d 100644
--- a/README_ZH.md
+++ b/README_ZH.md
@@ -186,7 +186,7 @@ if __name__ == '__main__':
 import os
 from tqdm import tqdm
 from tinker import types
-from twinkle_client import init_tinker_compat_client
+from twinkle_client import init_tinker_client
 from twinkle.dataloader import DataLoader
 from twinkle.dataset import Dataset, DatasetMeta
 from twinkle.preprocessor import SelfCognitionProcessor
@@ -203,8 +203,11 @@ dataset.map(SelfCognitionProcessor('twinkle Model', 'twinkle Team'), load_from_c
 dataset.encode(batched=True, load_from_cache_file=False)
 dataloader = DataLoader(dataset=dataset, batch_size=8)
 
-# Initialize tinker client
-service_client = init_tinker_compat_client(base_url, api_key)
+# Initialize Tinker client before importing ServiceClient
+init_tinker_client()
+from tinker import ServiceClient
+
+service_client = ServiceClient(base_url=base_url, api_key=api_key)
 training_client = service_client.create_lora_training_client(base_model=base_model[len('ms://'):], rank=16)
 
 # Training loop: use input_feature_to_datum to transfer the input format
diff --git a/cookbook/client/tinker/lora.py b/cookbook/client/tinker/lora.py
index 2714e0af..e94719bc 100644
--- a/cookbook/client/tinker/lora.py
+++ b/cookbook/client/tinker/lora.py
@@ -13,20 +13,25 @@
 
 import os
 
-from twinkle_client import init_tinker_compat_client
+# Step 2: Initialize Tinker client before importing ServiceClient
+from twinkle_client import init_tinker_client
 
-# Step 2: Initialize the Tinker-compatible client to communicate with the server.
-# - base_url: the address of the running server
-# - api_key: authentication token (loaded from environment variable)
-service_client = init_tinker_compat_client(
-    base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_TOKEN'))
+init_tinker_client()
 
-# Step 3: List models available on the server to verify the connection
+# Step 3: Use ServiceClient directly from tinker
+from tinker import ServiceClient
+
+service_client = ServiceClient(
+    base_url='http://www.modelscope.cn/twinkle',
+    api_key=os.environ.get('MODELSCOPE_TOKEN')
+)
+
+# Step 4: List models available on the server to verify the connection
 print('Available models:')
 for item in service_client.get_server_capabilities().supported_models:
     print('- ' + item.model_name)
 
-# Step 4: Create a REST client for querying training runs and checkpoints.
+# Step 5: Create a REST client for querying training runs and checkpoints.
 # This is useful for inspecting previous training sessions or resuming training.
 rest_client = service_client.create_rest_client()
 
@@ -51,7 +56,7 @@
         # Uncomment the line below to resume from the last checkpoint:
         # resume_path = chpt.tinker_path
 
-# Step 5: Create or resume a training client.
+# Step 6: Create or resume a training client.
 # If resume_path is set, it restores both model weights and optimizer state.
 base_model = 'Qwen/Qwen2.5-7B-Instruct'
 if not resume_path:
@@ -60,7 +65,7 @@
     print('Resuming from ' + resume_path)
     training_client = service_client.create_training_client_from_state_with_optimizer(path=resume_path)
 
-# Step 6: Prepare training data manually
+# Step 7: Prepare training data manually
 #
 # This example teaches the model to translate English into Pig Latin.
 # Each example has an "input" (English phrase) and "output" (Pig Latin).
@@ -146,7 +151,7 @@ def process_example(example: dict, tokenizer) -> types.Datum:
             datum0.loss_fn_inputs['weights'].tolist())):
     print(f'{repr(tokenizer.decode([inp])):<20} {repr(tokenizer.decode([tgt])):<20} {wgt:<10}')
 
-# Step 7: Run the training loop
+# Step 8: Run the training loop
 #
 # For each epoch, iterate over multiple batches:
 #   - forward_backward: sends data to the server, computes loss & gradients
@@ -174,7 +179,7 @@ def process_example(example: dict, tokenizer) -> types.Datum:
     save_result = save_future.result()
     print(f'Saved checkpoint for epoch {epoch} to {save_result.path}')
 
-# Step 8: Publish the final checkpoint to ModelScope Hub.
+# Step 9: Publish the final checkpoint to ModelScope Hub.
 # NOTE: Requires a valid ModelScope token set as api_key when initializing the client.
 # The published model name will be: {run_id}_{checkpoint_name}
 rest_client.publish_checkpoint_from_tinker_path(save_result.path).result()
diff --git a/cookbook/client/tinker/megatron/server_config_7b.yaml b/cookbook/client/tinker/megatron/server_config_7b.yaml
index cdac55f7..0c8c0550 100644
--- a/cookbook/client/tinker/megatron/server_config_7b.yaml
+++ b/cookbook/client/tinker/megatron/server_config_7b.yaml
@@ -21,7 +21,8 @@ applications:
     route_prefix: /api/v1          # API endpoint prefix (Tinker-compatible)
     import_path: server            # Python module to import
     args:
-
+      supported_models:
+        - Qwen/Qwen2.5-7B-Instruct
     deployments:
       - name: TinkerCompatServer
         autoscaling_config:
diff --git a/cookbook/client/tinker/sample.py b/cookbook/client/tinker/sample.py
index eacd043b..84931a59 100644
--- a/cookbook/client/tinker/sample.py
+++ b/cookbook/client/tinker/sample.py
@@ -4,27 +4,34 @@
 # for text generation (sampling) via the Tinker-compatible client API.
 # The server must be running first (see server.py and server_config.yaml).
 
+import os
 from tinker import types
 
 from twinkle.data_format import Message, Trajectory
 from twinkle.template import Template
-from twinkle_client import init_tinker_compat_client
+from twinkle_client import init_tinker_client
 
-# Step 1: Define the base model and connect to the server
+# Step 1: Initialize Tinker client
+init_tinker_client()
+
+from tinker import ServiceClient
+
+# Step 2: Define the base model and connect to the server
 base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
-service_client = init_tinker_compat_client(
+service_client = ServiceClient(
     base_url='http://www.modelscope.cn/twinkle',
     api_key=os.environ.get('MODELSCOPE_TOKEN')
 )
-# Step 2: Create a sampling client by loading weights from a saved checkpoint.
+
+# Step 3: Create a sampling client by loading weights from a saved checkpoint.
 # The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint.
 # The server will load the base model and apply the LoRA adapter weights.
-service_client.create_sampling_client(
+sampling_client = service_client.create_sampling_client(
     model_path='twinkle://xxx-Qwen_Qwen3-30B-A3B-Instruct-2507-xxx/weights/twinkle-lora-1',
     base_model=base_model
 )
 
-# Step 3: Load the tokenizer locally to encode the prompt and decode the results
+# Step 4: Load the tokenizer locally to encode the prompt and decode the results
 print(f'Using model {base_model}')
 
 template = Template(model_id=f'ms://{base_model}')
@@ -40,7 +47,7 @@
 
 input_ids = input_feature['input_ids'].tolist()
 
-# Step 4: Prepare the prompt and sampling parameters
+# Step 5: Prepare the prompt and sampling parameters
 prompt = types.ModelInput.from_ints(input_ids)
 params = types.SamplingParams(
     max_tokens=128,       # Maximum number of tokens to generate
@@ -48,13 +55,13 @@
     stop=['\n']          # Stop generation when a newline character is produced
 )
 
-# Step 5: Send the sampling request to the server.
+# Step 6: Send the sampling request to the server.
 # num_samples=8 generates 8 independent completions for the same prompt.
 print('Sampling...')
 future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=1)
 result = future.result()
 
-# Step 6: Decode and print the generated responses
+# Step 7: Decode and print the generated responses
 print('Responses:')
 for i, seq in enumerate(result.sequences):
     print(f'{i}: {repr(template.decode(seq.tokens))}')
diff --git a/cookbook/client/tinker/self_congnition.py b/cookbook/client/tinker/self_congnition.py
index 13a462b4..240c25c6 100644
--- a/cookbook/client/tinker/self_congnition.py
+++ b/cookbook/client/tinker/self_congnition.py
@@ -10,7 +10,7 @@
 import os
 from tqdm import tqdm
 from tinker import types
-from twinkle_client import init_tinker_compat_client
+from twinkle_client import init_tinker_client
 from twinkle.data_format import Message, Trajectory
 from twinkle.template import Template
 from twinkle.dataloader import DataLoader
@@ -18,8 +18,15 @@
 from twinkle.preprocessor import SelfCognitionProcessor
 from twinkle.server.tinker.common import input_feature_to_datum
 
+# Initialize the Tinker client before importing ServiceClient
+init_tinker_client()
+
+from tinker import ServiceClient
+
 # The base model to fine-tune / evaluate
-base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+# base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'Qwen/Qwen2.5-7B-Instruct'
+base_url = 'http://localhost:8000'
 
 
 def train():
@@ -42,9 +49,11 @@ def train():
 
     # Step 2: Initialize the training client
 
-    # Connect to the Twinkle server running locally
-    service_client = init_tinker_compat_client(
-        base_url='localhost:9000', api_key=os.environ.get('MODELSCOPE_TOKEN'))
+
+    service_client = ServiceClient(
+        base_url=base_url,
+        api_key=os.environ.get('MODELSCOPE_TOKEN')
+    )
 
     # Create a LoRA training client for the base model (rank=16 for the LoRA adapter)
     training_client = service_client.create_lora_training_client(base_model=base_model, rank=16)
@@ -85,8 +94,7 @@ def eval():
     # Path to a previously saved LoRA checkpoint (twinkle:// URI)
     weight_path = 'twinkle://20260212_174205-Qwen_Qwen2_5-7B-Instruct-51edc9ed/weights/twinkle-lora-2'
 
-    # Connect to the server and create a sampling client with the trained weights
-    service_client = init_tinker_compat_client(base_url='http://localhost:9000')
+    service_client = ServiceClient(base_url=base_url, api_key=os.environ.get('MODELSCOPE_TOKEN'))
     sampling_client = service_client.create_sampling_client(model_path=weight_path, base_model=base_model)
 
     # Step 2: Prepare the chat prompt
diff --git a/cookbook/client/tinker/short_math_grpo.py b/cookbook/client/tinker/short_math_grpo.py
index d843322b..43647ab3 100644
--- a/cookbook/client/tinker/short_math_grpo.py
+++ b/cookbook/client/tinker/short_math_grpo.py
@@ -24,7 +24,7 @@
 from tinker import types
 from typing import List, Tuple
 
-from twinkle_client import init_tinker_compat_client
+from twinkle_client import init_tinker_client
 from twinkle import get_logger
 from twinkle.advantage import GRPOAdvantage
 from twinkle.data_format import Message, Trajectory
@@ -206,8 +206,13 @@ def main():
 
     # Step 2: Initialize the Tinker-compatible client
     logger.info('Connecting to Tinker server...')
-    service_client = init_tinker_compat_client(
-        base_url='http://www.modelscope.cn/twinkle', api_key=os.environ.get('MODELSCOPE_TOKEN'))
+    init_tinker_client()
+
+    from tinker import ServiceClient
+    service_client = ServiceClient(
+        base_url='http://www.modelscope.cn/twinkle',
+        api_key=os.environ.get('MODELSCOPE_TOKEN')
+    )
 
     logger.info('Creating LoRA training client...')
     # Create a LoRA training client for GRPO
diff --git a/docs/source_en/Usage Guide/Server and Client/Overview.md b/docs/source_en/Usage Guide/Server and Client/Overview.md
index a91ccfbf..478c8fbc 100644
--- a/docs/source_en/Usage Guide/Server and Client/Overview.md	
+++ b/docs/source_en/Usage Guide/Server and Client/Overview.md	
@@ -1,4 +1,4 @@
-# Server and Client
+# Overview
 
 Twinkle provides a complete HTTP Server/Client architecture that supports deploying models as services and remotely calling them through clients to complete training, inference, and other tasks. This architecture decouples **model hosting (Server side)** and **training logic (Client side)**, allowing multiple users to share the same base model for training.
 
@@ -14,7 +14,7 @@ Twinkle Server supports two protocol modes:
 | Mode | server_type | Description |
 |------|------------|------|
 | **Twinkle Server** | `twinkle` | Native Twinkle protocol, used with `twinkle_client`, simpler API |
-| **Tinker Compatible Server** | `tinker` | Compatible with Tinker protocol, used with `init_tinker_compat_client`, can reuse existing Tinker training code |
+| **Tinker Compatible Server** | `tinker` | Compatible with Tinker protocol, can reuse existing Tinker training code |
 
 ### Two Model Backends
 
@@ -30,7 +30,7 @@ Regardless of Server mode, model loading supports two backends:
 | Client | Initialization Method | Description |
 |--------|---------|------|
 | **Twinkle Client** | `init_twinkle_client` | Native client, simply change `from twinkle import` to `from twinkle_client import` to migrate local training code to remote calls |
-| **Tinker Compatible Client** | `init_tinker_compat_client` | Patches Tinker SDK, allowing existing Tinker training code to be directly reused |
+| **Tinker Client** | `init_tinker_client` | Patches Tinker SDK, allowing existing Tinker training code to be directly reused |
 
 ## How to Choose
 
@@ -47,7 +47,7 @@ Regardless of Server mode, model loading supports two backends:
 | Scenario | Recommendation |
 |------|------|
 | Existing Twinkle local training code, want to switch to remote | Twinkle Client — only need to change import paths |
-| Existing Tinker training code, want to reuse | Tinker Compatible Client — only need to initialize patch |
+| Existing Tinker training code, want to reuse | Tinker Client — only need to initialize patch |
 | New project | Twinkle Client — simpler API |
 
 ### Model Backend Selection
@@ -65,33 +65,35 @@ Complete runnable examples are located in the `cookbook/client/` directory:
 ```
 cookbook/client/
 ├── twinkle/                    # Twinkle native protocol examples
-│   ├── transformer/            # Transformers backend
+│   ├── transformer/            # Transformers backend server config
 │   │   ├── server.py           # Startup script
-│   │   ├── server_config.yaml  # Configuration file
-│   │   └── lora.py             # LoRA training client
-│   └── megatron/               # Megatron backend
-│       ├── server.py
-│       ├── server_config.yaml
-│       └── lora.py
+│   │   └── server_config.yaml  # Configuration file
+│   ├── megatron/               # Megatron backend server config
+│   │   ├── server.py
+│   │   └── server_config.yaml
+│   ├── grpo.py                 # GRPO training client
+│   ├── sample.py               # Inference sampling client
+│   └── self_congnition.py      # Self-cognition training client
 └── tinker/                     # Tinker compatible protocol examples
-    ├── transformer/            # Transformers backend
+    ├── transformer/            # Transformers backend server config
+    │   ├── server.py
+    │   └── server_config.yaml
+    ├── megatron/               # Megatron backend server config
     │   ├── server.py
     │   ├── server_config.yaml
-    │   ├── lora.py             # LoRA training
-    │   ├── sample.py           # Inference sampling
-    │   └── self_congnition.py  # Self-cognition training+evaluation
-    └── megatron/               # Megatron backend
-        ├── server.py
-        ├── server_config.yaml
-        └── lora.py
+    │   └── server_config_7b.yaml
+    ├── lora.py                 # LoRA training client
+    ├── sample.py               # Inference sampling client
+    ├── self_congnition.py      # Self-cognition training+evaluation
+    └── short_math_grpo.py      # GRPO math training client
 ```
 
 Running steps:
 
 ```bash
 # 1. Start Server first
-python cookbook/client/twinkle/transformer/server.py
+python cookbook/client/tinker/transformer/server.py
 
 # 2. Run Client in another terminal
-python cookbook/client/twinkle/transformer/lora.py
+python cookbook/client/tinker/lora.py
 ```
diff --git a/docs/source_en/Usage Guide/Server and Client/Server.md b/docs/source_en/Usage Guide/Server and Client/Server.md
index 302a5875..a82002a9 100644
--- a/docs/source_en/Usage Guide/Server and Client/Server.md	
+++ b/docs/source_en/Usage Guide/Server and Client/Server.md	
@@ -210,7 +210,6 @@ CLI supported parameters:
 | `-c, --config` | YAML configuration file path (required) | — |
 | `-t, --server-type` | Server mode: `twinkle` or `tinker` | `twinkle` |
 | `--namespace` | Ray namespace | tinker mode defaults to `twinkle_cluster` |
-| `--no-wait` | Do not block and wait (daemon mode) | `False` |
 | `--log-level` | Log level | `INFO` |
 
 ## YAML Configuration Details
diff --git a/docs/source_en/Usage Guide/Server and Client/Tinker-Compatible-Client.md b/docs/source_en/Usage Guide/Server and Client/Tinker-Compatible-Client.md
index d3cf4a8f..8978e2a3 100644
--- a/docs/source_en/Usage Guide/Server and Client/Tinker-Compatible-Client.md	
+++ b/docs/source_en/Usage Guide/Server and Client/Tinker-Compatible-Client.md	
@@ -1,18 +1,21 @@
-# Tinker Compatible Client
+# Tinker Client
 
-The Tinker Compatible Client is suitable for scenarios with existing Tinker training code. After initializing with `init_tinker_compat_client`, it patches the Tinker SDK to point to the Twinkle Server, **and the rest of the code can directly reuse existing Tinker training code**.
+The Tinker Client is suitable for scenarios with existing Tinker training code. After initializing with `init_tinker_client`, it patches the Tinker SDK to point to the Twinkle Server, **and the rest of the code can directly reuse existing Tinker training code**.
 
 ## Initialization
 
 ```python
-from twinkle_client import init_tinker_compat_client
-
-# Initialize Tinker compatible client
-# init_tinker_compat_client automatically patches the Tinker SDK,
-# allowing it to connect to Twinkle Server instead of Tinker Server
-service_client = init_tinker_compat_client(
-    base_url='http://localhost:8000',    # Server address
-    api_key='your-api-key'               # Authentication token
+# Initialize Tinker client before importing ServiceClient
+from twinkle_client import init_tinker_client
+
+init_tinker_client()
+
+# Use ServiceClient directly from tinker
+from tinker import ServiceClient
+
+service_client = ServiceClient(
+    base_url='http://localhost:8000',                    # Server address
+    api_key=os.environ.get('MODELSCOPE_TOKEN')           # Recommended: set to ModelScope Token
 )
 
 # Verify connection: List available models on Server
@@ -20,15 +23,14 @@ for item in service_client.get_server_capabilities().supported_models:
     print("- " + item.model_name)
 ```
 
-### What does init_tinker_compat_client do?
+### What does init_tinker_client do?
 
-When calling `init_tinker_compat_client`, the following operations are automatically executed:
+When calling `init_tinker_client`, the following operations are automatically executed:
 
 1. **Patch Tinker SDK**: Bypass Tinker's `tinker://` prefix validation, allowing it to connect to standard HTTP addresses
 2. **Set Request Headers**: Inject necessary authentication headers such as `serve_multiplexed_model_id` and `Authorization`
-3. **Return `ServiceClient`**: Returns a standard Tinker `ServiceClient` object, subsequent operations are completely identical to native Tinker
 
-This means that after initialization, **all existing Tinker training code can be used directly** without any modifications.
+After initialization, simply import `from tinker import ServiceClient` to connect to Twinkle Server, and **all existing Tinker training code can be used directly** without any modifications.
 
 ## Complete Training Example
 
@@ -38,14 +40,16 @@ import numpy as np
 import dotenv
 dotenv.load_dotenv('.env')
 
-from tinker import types
+# Step 1: Initialize Tinker client before importing ServiceClient
+from twinkle_client import init_tinker_client
+init_tinker_client()
+
+from tinker import types, ServiceClient
 from modelscope import AutoTokenizer
-from twinkle_client import init_tinker_compat_client
 
-# Step 1: Initialize client (automatically patches Tinker SDK)
-service_client = init_tinker_compat_client(
+service_client = ServiceClient(
     base_url='http://localhost:8000',
-    api_key=os.environ.get('MODELSCOPE_TOKEN')
+    api_key=os.environ.get('MODELSCOPE_TOKEN')  # Recommended: set to ModelScope Token
 )
 
 # Step 2: Query existing training runs (optional)
@@ -135,12 +139,17 @@ Tinker compatible mode can also leverage Twinkle's dataset components to simplif
 ```python
 from tqdm import tqdm
 from tinker import types
-from twinkle_client import init_tinker_compat_client
+from twinkle_client import init_tinker_client
 from twinkle.dataloader import DataLoader
 from twinkle.dataset import Dataset, DatasetMeta
 from twinkle.preprocessor import SelfCognitionProcessor
 from twinkle.server.tinker.common import input_feature_to_datum
 
+# Initialize Tinker client before importing ServiceClient
+init_tinker_client()
+
+from tinker import ServiceClient
+
 base_model = "Qwen/Qwen2.5-0.5B-Instruct"
 
 # Use Twinkle's Dataset component to load and preprocess data
@@ -150,8 +159,11 @@ dataset.map(SelfCognitionProcessor('twinkle model', 'twinkle team'), load_from_c
 dataset.encode(batched=True, load_from_cache_file=False)
 dataloader = DataLoader(dataset=dataset, batch_size=8)
 
-# Initialize Tinker compatible client
-service_client = init_tinker_compat_client(base_url='http://localhost:8000')
+# Initialize client
+service_client = ServiceClient(
+    base_url='http://localhost:8000',
+    api_key=os.environ.get('MODELSCOPE_TOKEN')  # Recommended: set to ModelScope Token
+)
 training_client = service_client.create_lora_training_client(base_model=base_model, rank=16)
 
 # Training loop: Use input_feature_to_datum to convert data format
@@ -201,14 +213,22 @@ for i, seq in enumerate(result.sequences):
 You can also load saved checkpoints for inference:
 
 ```python
+import os
 from tinker import types
 from modelscope import AutoTokenizer
-from twinkle_client import init_tinker_compat_client
+from twinkle_client import init_tinker_client
+
+# Initialize Tinker client before importing ServiceClient
+init_tinker_client()
+
+from tinker import ServiceClient
 
 base_model = "Qwen/Qwen2.5-0.5B-Instruct"
 
-# Initialize client
-service_client = init_tinker_compat_client(base_url='http://localhost:8000')
+service_client = ServiceClient(
+    base_url='http://localhost:8000',
+    api_key=os.environ.get('MODELSCOPE_TOKEN')  # Recommended: set to ModelScope Token
+)
 
 # Create sampling client from saved checkpoint
 sampling_client = service_client.create_sampling_client(
diff --git "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/Tinker\345\205\274\345\256\271\345\256\242\346\210\267\347\253\257.md" "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/Tinker\345\205\274\345\256\271\345\256\242\346\210\267\347\253\257.md"
index 35b39536..7c204f2c 100644
--- "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/Tinker\345\205\274\345\256\271\345\256\242\346\210\267\347\253\257.md"
+++ "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/Tinker\345\205\274\345\256\271\345\256\242\346\210\267\347\253\257.md"
@@ -1,18 +1,21 @@
-# Tinker 兼容客户端
+# Tinker 客户端
 
-Tinker 兼容 Client 适用于已有 Tinker 训练代码的场景。通过 `init_tinker_compat_client` 初始化后，会对 Tinker SDK 进行 patch，使其指向 Twinkle Server，**其余代码可直接复用已有的 Tinker 训练代码**。
+Tinker Client 适用于已有 Tinker 训练代码的场景。通过 `init_tinker_client` 初始化后，会对 Tinker SDK 进行 patch，使其指向 Twinkle Server，**其余代码可直接复用已有的 Tinker 训练代码**。
 
 ## 初始化
 
 ```python
-from twinkle_client import init_tinker_compat_client
-
-# 初始化 Tinker 兼容客户端
-# init_tinker_compat_client 会自动 patch Tinker SDK，
-# 使其可以连接到 Twinkle Server 而非 Tinker Server
-service_client = init_tinker_compat_client(
-    base_url='http://localhost:8000',    # Server 地址
-    api_key='your-api-key'               # 认证令牌
+# 在导入 ServiceClient 之前，先初始化 Tinker 客户端
+from twinkle_client import init_tinker_client
+
+init_tinker_client()
+
+# 直接使用 tinker 中的 ServiceClient
+from tinker import ServiceClient
+
+service_client = ServiceClient(
+    base_url='http://localhost:8000',                    # Server 地址
+    api_key=os.environ.get('MODELSCOPE_TOKEN')           # 建议设置为 ModelScope Token
 )
 
 # 验证连接：列出 Server 上可用的模型
@@ -20,15 +23,14 @@ for item in service_client.get_server_capabilities().supported_models:
     print("- " + item.model_name)
 ```
 
-### init_tinker_compat_client 做了什么？
+### init_tinker_client 做了什么？
 
-调用 `init_tinker_compat_client` 时，会自动执行以下操作：
+调用 `init_tinker_client` 时，会自动执行以下操作：
 
 1. **Patch Tinker SDK**：绕过 Tinker 的 `tinker://` 前缀校验，使其可以连接到标准 HTTP 地址
 2. **设置请求头**：注入 `serve_multiplexed_model_id` 和 `Authorization` 等必要的认证头
-3. **返回 `ServiceClient`**：返回一个标准的 Tinker `ServiceClient` 对象，后续操作与原生 Tinker 完全一致
 
-这意味着在初始化之后，**所有已有的 Tinker 训练代码都可以直接使用**，无需任何修改。
+初始化之后，直接导入 `from tinker import ServiceClient` 即可连接到 Twinkle Server，**所有已有的 Tinker 训练代码都可以直接使用**，无需任何修改。
 
 ## 完整训练示例
 
@@ -38,14 +40,16 @@ import numpy as np
 import dotenv
 dotenv.load_dotenv('.env')
 
-from tinker import types
+# Step 1: 在导入 ServiceClient 之前，先初始化 Tinker 客户端
+from twinkle_client import init_tinker_client
+init_tinker_client()
+
+from tinker import types, ServiceClient
 from modelscope import AutoTokenizer
-from twinkle_client import init_tinker_compat_client
 
-# Step 1: 初始化客户端（会自动 patch Tinker SDK）
-service_client = init_tinker_compat_client(
+service_client = ServiceClient(
     base_url='http://localhost:8000',
-    api_key=os.environ.get('MODELSCOPE_TOKEN')
+    api_key=os.environ.get('MODELSCOPE_TOKEN')  # 建议设置为 ModelScope Token
 )
 
 # Step 2: 查询已有训练运行（可选）
@@ -135,12 +139,17 @@ Tinker 兼容模式也可以利用 Twinkle 的数据集组件来简化数据准
 ```python
 from tqdm import tqdm
 from tinker import types
-from twinkle_client import init_tinker_compat_client
+from twinkle_client import init_tinker_client
 from twinkle.dataloader import DataLoader
 from twinkle.dataset import Dataset, DatasetMeta
 from twinkle.preprocessor import SelfCognitionProcessor
 from twinkle.server.tinker.common import input_feature_to_datum
 
+# 在导入 ServiceClient 之前，先初始化 Tinker 客户端
+init_tinker_client()
+
+from tinker import ServiceClient
+
 base_model = "Qwen/Qwen2.5-0.5B-Instruct"
 
 # 使用 Twinkle 的 Dataset 组件加载和预处理数据
@@ -150,8 +159,11 @@ dataset.map(SelfCognitionProcessor('twinkle模型', 'twinkle团队'), load_from_
 dataset.encode(batched=True, load_from_cache_file=False)
 dataloader = DataLoader(dataset=dataset, batch_size=8)
 
-# 初始化 Tinker 兼容客户端
-service_client = init_tinker_compat_client(base_url='http://localhost:8000')
+# 初始化客户端
+service_client = ServiceClient(
+    base_url='http://localhost:8000',
+    api_key=os.environ.get('MODELSCOPE_TOKEN')  # 建议设置为 ModelScope Token
+)
 training_client = service_client.create_lora_training_client(base_model=base_model, rank=16)
 
 # 训练循环：使用 input_feature_to_datum 转换数据格式
@@ -201,14 +213,22 @@ for i, seq in enumerate(result.sequences):
 也可以加载已保存的检查点进行推理：
 
 ```python
+import os
 from tinker import types
 from modelscope import AutoTokenizer
-from twinkle_client import init_tinker_compat_client
+from twinkle_client import init_tinker_client
+
+# 在导入 ServiceClient 之前，先初始化 Tinker 客户端
+init_tinker_client()
+
+from tinker import ServiceClient
 
 base_model = "Qwen/Qwen2.5-0.5B-Instruct"
 
-# 初始化客户端
-service_client = init_tinker_compat_client(base_url='http://localhost:8000')
+service_client = ServiceClient(
+    base_url='http://localhost:8000',
+    api_key=os.environ.get('MODELSCOPE_TOKEN')  # 建议设置为 ModelScope Token
+)
 
 # 从已保存的检查点创建采样客户端
 sampling_client = service_client.create_sampling_client(
diff --git "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\234\215\345\212\241\347\253\257.md" "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\234\215\345\212\241\347\253\257.md"
index a09b81e2..fd1ef94e 100644
--- "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\234\215\345\212\241\347\253\257.md"
+++ "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\234\215\345\212\241\347\253\257.md"
@@ -153,7 +153,6 @@ CLI 支持的参数：
 | `-c, --config` | YAML 配置文件路径（必须） | — |
 | `-t, --server-type` | Server 模式：`twinkle` 或 `tinker` | `twinkle` |
 | `--namespace` | Ray 命名空间 | tinker 模式默认 `twinkle_cluster` |
-| `--no-wait` | 不阻塞等待（守护模式） | `False` |
 | `--log-level` | 日志级别 | `INFO` |
 
 ## YAML 配置详解
diff --git "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\246\202\350\277\260.md" "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\246\202\350\277\260.md"
index e4617854..f9a7e1b9 100644
--- "a/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\246\202\350\277\260.md"
+++ "b/docs/source_zh/\344\275\277\347\224\250\346\214\207\345\274\225/\346\234\215\345\212\241\347\253\257\345\222\214\345\256\242\346\210\267\347\253\257/\346\246\202\350\277\260.md"
@@ -1,4 +1,4 @@
-# 服务端和客户端
+# 概述
 
 Twinkle 提供了完整的 HTTP Server/Client 架构，支持将模型部署为服务，并通过客户端远程调用完成训练、推理等任务。这种架构将**模型承载（Server 端）**和**训练逻辑（Client 端）**解耦，使得多个用户可以共享同一个基座模型进行训练。
 
@@ -14,7 +14,7 @@ Twinkle Server 支持两种协议模式：
 | 模式 | server_type | 说明 |
 |------|------------|------|
 | **Twinkle Server** | `twinkle` | 原生 Twinkle 协议，搭配 `twinkle_client` 使用，API 更简洁 |
-| **Tinker 兼容 Server** | `tinker` | 兼容 Tinker 协议，搭配 `init_tinker_compat_client` 使用，可复用已有 Tinker 训练代码 |
+| **Tinker 兼容 Server** | `tinker` | 兼容 Tinker 协议，可复用已有 Tinker 训练代码 |
 
 ### 两种模型后端
 
@@ -30,7 +30,7 @@ Twinkle Server 支持两种协议模式：
 | Client | 初始化方式 | 说明 |
 |--------|---------|------|
 | **Twinkle Client** | `init_twinkle_client` | 原生客户端，将 `from twinkle import` 改为 `from twinkle_client import` 即可将本地训练代码迁移为远端调用 |
-| **Tinker 兼容 Client** | `init_tinker_compat_client` | 对 Tinker SDK 进行 patch，使已有 Tinker 训练代码可直接复用 |
+| **Tinker Client** | `init_tinker_client` | 对 Tinker SDK 进行 patch，使已有 Tinker 训练代码可直接复用 |
 
 ## 如何选择
 
@@ -47,7 +47,7 @@ Twinkle Server 支持两种协议模式：
 | 场景 | 推荐 |
 |------|------|
 | 已有 Twinkle 本地训练代码，希望改为远端 | Twinkle Client — 仅需改 import 路径 |
-| 已有 Tinker 训练代码，希望复用 | Tinker 兼容 Client — 仅需初始化 patch |
+| 已有 Tinker 训练代码，希望复用 | Tinker Client — 仅需初始化 patch |
 | 全新项目 | Twinkle Client — API 更简洁 |
 
 ### 模型后端选择
@@ -65,33 +65,35 @@ Twinkle Server 支持两种协议模式：
 ```
 cookbook/client/
 ├── twinkle/                    # Twinkle 原生协议示例
-│   ├── transformer/            # Transformers 后端
+│   ├── transformer/            # Transformers 后端服务配置
 │   │   ├── server.py           # 启动脚本
-│   │   ├── server_config.yaml  # 配置文件
-│   │   └── lora.py             # LoRA 训练客户端
-│   └── megatron/               # Megatron 后端
-│       ├── server.py
-│       ├── server_config.yaml
-│       └── lora.py
+│   │   └── server_config.yaml  # 配置文件
+│   ├── megatron/               # Megatron 后端服务配置
+│   │   ├── server.py
+│   │   └── server_config.yaml
+│   ├── grpo.py                 # GRPO 训练客户端
+│   ├── sample.py               # 推理采样客户端
+│   └── self_congnition.py      # 自我认知训练客户端
 └── tinker/                     # Tinker 兼容协议示例
-    ├── transformer/            # Transformers 后端
+    ├── transformer/            # Transformers 后端服务配置
+    │   ├── server.py
+    │   └── server_config.yaml
+    ├── megatron/               # Megatron 后端服务配置
     │   ├── server.py
     │   ├── server_config.yaml
-    │   ├── lora.py             # LoRA 训练
-    │   ├── sample.py           # 推理采样
-    │   └── self_congnition.py  # 自我认知训练+评估
-    └── megatron/               # Megatron 后端
-        ├── server.py
-        ├── server_config.yaml
-        └── lora.py
+    │   └── server_config_7b.yaml
+    ├── lora.py                 # LoRA 训练客户端
+    ├── sample.py               # 推理采样客户端
+    ├── self_congnition.py      # 自我认知训练+评估
+    └── short_math_grpo.py      # GRPO 数学训练客户端
 ```
 
 运行步骤：
 
 ```bash
 # 1. 先启动 Server
-python cookbook/client/twinkle/transformer/server.py
+python cookbook/client/tinker/transformer/server.py
 
 # 2. 在另一个终端运行 Client
-python cookbook/client/twinkle/transformer/lora.py
+python cookbook/client/tinker/lora.py
 ```
diff --git a/src/twinkle/server/__main__.py b/src/twinkle/server/__main__.py
index c0c942c5..17fe87fc 100644
--- a/src/twinkle/server/__main__.py
+++ b/src/twinkle/server/__main__.py
@@ -36,9 +36,6 @@ def create_parser() -> argparse.ArgumentParser:
 
   # Start tinker server with specific config
   python -m twinkle.server -c config.yaml -t tinker
-
-  # Run in background (daemon mode)
-  python -m twinkle.server -c config.yaml --no-wait
         """,
     )
 
@@ -72,11 +69,6 @@ def create_parser() -> argparse.ArgumentParser:
     )
 
     # Runtime options
-    parser.add_argument(
-        '--no-wait',
-        action='store_true',
-        help="Don't block waiting for Enter (daemon mode)",
-    )
     parser.add_argument(
         '--log-level',
         type=str,
@@ -115,7 +107,6 @@ def main(args: list[str] | None = None) -> int:
             config_path=config_path,
             server_type=parsed_args.server_type,
             ray_namespace=parsed_args.namespace,
-            wait=not parsed_args.no_wait,
         )
 
         return 0
diff --git a/src/twinkle/server/launcher.py b/src/twinkle/server/launcher.py
index e1af794d..b5b53f6a 100644
--- a/src/twinkle/server/launcher.py
+++ b/src/twinkle/server/launcher.py
@@ -220,12 +220,9 @@ def _deploy_application(self, app_config: dict[str, Any]) -> None:
         serve.run(app, name=name, route_prefix=route_prefix)
         logger.info(f'Deployed {name} at {route_prefix}')
 
-    def launch(self, wait: bool = True) -> None:
+    def launch(self) -> None:
         """
         Launch the server with all configured applications.
-
-        Args:
-            wait: If True, block and wait for Enter to stop the server
         """
         self._init_ray()
         self._start_serve()
@@ -255,9 +252,8 @@ def launch(self, wait: bool = True) -> None:
                                                                              dict) else app_config.route_prefix
             print(f'  - http://{host}:{port}{route_prefix}')
 
-        if wait:
-            while True:
-                time.sleep(3600)
+        while True:
+            time.sleep(3600)
 
     @classmethod
     def from_yaml(
@@ -302,7 +298,6 @@ def launch_server(
     config_path: str | Path | None = None,
     server_type: str = 'twinkle',
     ray_namespace: str | None = None,
-    wait: bool = True,
 ) -> ServerLauncher:
     """
     Launch a twinkle server with flexible configuration options.
@@ -314,7 +309,6 @@ def launch_server(
         config_path: Path to YAML config file
         server_type: Server type ('tinker' or 'twinkle'), default is 'twinkle'
         ray_namespace: Ray namespace
-        wait: If True, block and wait for Enter to stop the server
 
     Returns:
         The ServerLauncher instance
@@ -357,5 +351,5 @@ def launch_server(
             ray_namespace=ray_namespace,
         )
 
-    launcher.launch(wait=wait)
+    launcher.launch()
     return launcher
diff --git a/src/twinkle/server/tinker/common/compat_base.py b/src/twinkle/server/tinker/common/compat_base.py
index 1e476bbb..54d665e3 100644
--- a/src/twinkle/server/tinker/common/compat_base.py
+++ b/src/twinkle/server/tinker/common/compat_base.py
@@ -3,9 +3,8 @@
 from tinker import types
 from typing import List
 
+from twinkle import DeviceMesh
 from twinkle.template import Template
-from twinkle.utils.platform import DeviceMesh
-from twinkle.utils.torch_utils import selective_log_softmax
 
 
 def collect_forward_backward_results(results, device_mesh: DeviceMesh):
@@ -117,6 +116,8 @@ def get_template(self, adapter_name: str) -> Template:
     @staticmethod
     def _get_forward_output(inputs: List[types.Datum], logits: torch.Tensor) -> List[dict]:
         """Convert raw logits to the expected output format with logprobs and elementwise_loss."""
+        from twinkle.utils.torch_utils import selective_log_softmax
+
         results = []
         for feature, logit in zip(inputs, logits):
             # Ensure 1D shape and correct device to avoid dimension mismatch and device errors
diff --git a/src/twinkle/server/tinker/server.py b/src/twinkle/server/tinker/server.py
index 1a706b45..3c9f4493 100644
--- a/src/twinkle/server/tinker/server.py
+++ b/src/twinkle/server/tinker/server.py
@@ -91,16 +91,17 @@ def __init__(self,
         def normalize_models(self, supported_models):
             # Normalize supported_models to objects; passing raw dicts can trigger internal errors
             # when creating LoRA training clients via the tinker API.
-            if supported_models:
-                normalized = []
-                for item in supported_models:
-                    if isinstance(item, types.SupportedModel):
-                        normalized.append(item)
-                    elif isinstance(item, dict):
-                        normalized.append(types.SupportedModel(**item))
-                    else:
-                        normalized.append(types.SupportedModel(name=item))
-                return normalized
+            if not supported_models:
+                return []
+            normalized = []
+            for item in supported_models:
+                if isinstance(item, types.SupportedModel):
+                    normalized.append(item)
+                elif isinstance(item, dict):
+                    normalized.append(types.SupportedModel(**item))
+                elif isinstance(item, str):
+                    normalized.append(types.SupportedModel(model_name=item))
+            return normalized
 
         def _validate_base_model(self, base_model: str) -> None:
             """Validate that base_model is in supported_models list.
diff --git a/src/twinkle_client/__init__.py b/src/twinkle_client/__init__.py
index f236f734..25564306 100644
--- a/src/twinkle_client/__init__.py
+++ b/src/twinkle_client/__init__.py
@@ -1,41 +1,35 @@
 # Copyright (c) ModelScope Contributors. All rights reserved.
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Optional
-
 from twinkle.utils import requires
 from .http.utils import get_api_key, get_base_url, set_api_key, set_base_url
 from .manager import TwinkleClient, TwinkleClientError
 
-if TYPE_CHECKING:
-    from tinker import ServiceClient
-
 
-def init_tinker_compat_client(base_url: str | None = None, api_key: str | None = None, **kwargs) -> ServiceClient:
-    requires('tinker')
-    from tinker import ServiceClient
 
-    from twinkle_client.http.utils import get_api_key, get_request_id
-    from twinkle_client.utils.patch_tinker import patch_tinker
+def init_tinker_client(**kwargs) -> None:
+    """Initialize Tinker client with Twinkle-specific headers.
 
-    # Apply patch to bypass tinker:// prefix validation
-    patch_tinker()
+    After calling this function, users can directly use:
+        from tinker import ServiceClient
+        client = ServiceClient(base_url='...', api_key='...')
 
-    if not api_key:
-        api_key = get_api_key()
+    The ServiceClient will automatically include Twinkle-specific headers.
 
-    if base_url and not base_url.startswith(('http://', 'https://')):
-        base_url = f'http://{base_url}'
+    Args:
+        **kwargs: Additional keyword arguments (currently unused, reserved for future)
 
-    default_headers = {
-        'serve_multiplexed_model_id': get_request_id(),
-        'Authorization': 'Bearer ' + api_key,
-        'Twinkle-Authorization': 'Bearer ' + api_key,  # For server compatibility
-    } | kwargs.pop('default_headers', {})
-
-    service_client = ServiceClient(base_url=base_url, api_key=api_key, default_headers=default_headers, **kwargs)
+    Example:
+        >>> from twinkle_client import init_tinker_client
+        >>> init_tinker_client()
+        >>> from tinker import ServiceClient
+        >>> client = ServiceClient(base_url='http://localhost:8000', api_key='your_token')
+    """
+    requires('tinker')
+    from twinkle_client.utils.patch_tinker import patch_tinker
 
-    return service_client
+    # Apply patches to tinker library (includes header injection)
+    patch_tinker()
 
 
 def init_twinkle_client(base_url: str | None = None, api_key: str | None = None, **kwargs) -> TwinkleClient:
@@ -55,4 +49,4 @@ def init_twinkle_client(base_url: str | None = None, api_key: str | None = None,
     return TwinkleClient(base_url=base_url, api_key=api_key, **kwargs)
 
 
-__all__ = ['TwinkleClient', 'TwinkleClientError', 'init_tinker_compat_client', 'init_twinkle_client']
+__all__ = ['TwinkleClient', 'TwinkleClientError', 'init_tinker_client', 'init_twinkle_client']
diff --git a/src/twinkle_client/utils/patch_tinker.py b/src/twinkle_client/utils/patch_tinker.py
index 4f9b2760..73363472 100644
--- a/src/twinkle_client/utils/patch_tinker.py
+++ b/src/twinkle_client/utils/patch_tinker.py
@@ -123,6 +123,7 @@ def patch_tinker():
     1. InternalClientHolder._create_sampling_session to bypass 'tinker://' prefix validation
     2. AsyncTinker.__init__ to bypass 'tml-' prefix validation for api_key
     3. ParsedCheckpointTinkerPath.from_tinker_path to support both 'tinker://' and 'twinkle://' prefixes
+    4. _get_default_headers to inject Twinkle-specific headers
 
     This patch is idempotent - calling it multiple times has no additional effect.
     """
@@ -143,6 +144,32 @@ def patch_tinker():
         from tinker.types.checkpoint import ParsedCheckpointTinkerPath
         ParsedCheckpointTinkerPath.from_tinker_path = classmethod(_patched_from_tinker_path)
 
+        # Patch 4: inject Twinkle-specific headers by patching ServiceClient.__init__.
+        from tinker.lib.public_interfaces.service_client import ServiceClient
+        from twinkle_client.http.utils import get_request_id, get_api_key
+
+        _original_service_client_init = ServiceClient.__init__
+
+        def _patched_service_client_init(self, user_metadata=None, **kwargs):
+            # Resolve api_key with the same priority order used by AsyncTinker:
+            #   1. explicit kwarg  2. TINKER_API_KEY env var  3. TWINKLE_SERVER_TOKEN env var
+            api_key = kwargs.get('api_key')
+            if api_key is None:
+                api_key = get_api_key()
+
+            twinkle_headers = {
+                'serve_multiplexed_model_id': get_request_id(),
+                'Authorization': 'Bearer ' + api_key,
+                'Twinkle-Authorization': 'Bearer ' + api_key,
+            }
+            # Merge: caller-supplied default_headers take precedence over twinkle_headers
+            user_default_headers = kwargs.pop('default_headers', {})
+            kwargs['default_headers'] = twinkle_headers | user_default_headers
+
+            _original_service_client_init(self, user_metadata=user_metadata, **kwargs)
+
+        ServiceClient.__init__ = _patched_service_client_init
+
         _patched = True
     except ImportError:
         # tinker not installed, skip patching
diff --git a/tests/DeviceMesh/test_device_mesh.py b/tests/DeviceMesh/test_device_mesh.py
index c35ea8fa..5d3f11ac 100644
--- a/tests/DeviceMesh/test_device_mesh.py
+++ b/tests/DeviceMesh/test_device_mesh.py
@@ -5,7 +5,7 @@
 from unittest.mock import patch
 
 import twinkle
-from twinkle.utils.platform import DeviceMesh, Platform
+from twinkle import DeviceMesh, Platform
 
 twinkle.initialize(mode='local')