Skip to content

Commit e7b842f

Browse files
committed
update doc
1 parent ade4359 commit e7b842f

File tree

12 files changed

+584
-18
lines changed

12 files changed

+584
-18
lines changed

cookbook/client/tinker/lora.py renamed to cookbook/client/tinker/custom_service/lora.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@
2222
# BASE_URL can be a local server endpoint such as http://localhost:8000, or
2323
# points to a previously deployed remote server, or
2424
# modelscope server such as 'http://www.modelscope.cn/twinkle'
25-
base_url='<BASE_URL>',
25+
base_url='http://localhost:8000',
2626
# API_KEY can be empty or a meaninful one according to sever configuration
27-
api_key='<API_KEY>'
27+
api_key='EMPTY-TOKEN'
2828
)
2929

3030
# Step 4: List models available on the server to verify the connection
@@ -61,7 +61,7 @@
6161

6262
# Step 6: Create or resume a training client.
6363
# If resume_path is set, it restores both model weights and optimizer state.
64-
base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
64+
base_model = 'Qwen/Qwen3-4B'
6565
if not resume_path:
6666
training_client = service_client.create_lora_training_client(base_model=base_model)
6767
else:
File renamed without changes.

cookbook/client/tinker/megatron/server_config_7b.yaml renamed to cookbook/client/tinker/custom_service/megatron/server_config.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ applications:
2424
server_config:
2525
per_token_model_limit: 3 # Maximum number of models (adapters) per token (server-globally enforced)
2626
supported_models:
27-
- Qwen/Qwen2.5-7B-Instruct
27+
- Qwen/Qwen3-4B
2828
deployments:
2929
- name: TinkerCompatServer
3030
autoscaling_config:
@@ -37,11 +37,11 @@ applications:
3737
# 2. Model Service (commented out) - Would host the base model for training.
3838
# Uncomment and configure if you need a training model worker.
3939
- name: models-Qwen2.5-7B-Instruct
40-
route_prefix: /api/v1/model/Qwen/Qwen2.5-7B-Instruct
40+
route_prefix: /api/v1/model/Qwen/Qwen3-4B
4141
import_path: model
4242
args:
4343
use_megatron: true
44-
model_id: "ms://Qwen/Qwen2.5-7B-Instruct" # ModelScope model identifier
44+
model_id: "ms://Qwen/Qwen3-4B" # ModelScope model identifier
4545
max_length: 10240
4646
nproc_per_node: 2 # Number of GPU processes per node
4747
device_group:
@@ -74,10 +74,10 @@ applications:
7474
# 3. Sampler Service - Runs inference / sampling using vLLM engine
7575
# Used for generating text from the model (e.g., evaluating LoRA results).
7676
# - name: sampler-Qwen2.5-7B-Instruct
77-
# route_prefix: /api/v1/sampler/Qwen/Qwen2.5-7B-Instruct
77+
# route_prefix: /api/v1/sampler/Qwen/Qwen3-4B
7878
# import_path: sampler
7979
# args:
80-
# model_id: "ms://Qwen/Qwen2.5-7B-Instruct" # ModelScope model identifier
80+
# model_id: "ms://Qwen/Qwen3-4B" # ModelScope model identifier
8181
# nproc_per_node: 2 # Number of GPU processes per node
8282
# sampler_type: vllm # Inference engine: 'vllm' (fast) or 'torch' (TorchSampler)
8383
# engine_args: # vLLM engine-specific settings

cookbook/client/tinker/sample.py renamed to cookbook/client/tinker/custom_service/sample.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@
1717
from tinker import ServiceClient
1818

1919
# Step 2: Define the base model and connect to the server
20-
base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
20+
base_model = 'Qwen/Qwen3-4B'
2121
service_client = ServiceClient(
22-
base_url='http://www.modelscope.cn/twinkle',
23-
api_key=os.environ.get('MODELSCOPE_TOKEN')
22+
base_url='http://localhost:8000',
23+
api_key='EMPTY-TOKEN'
2424
)
2525

2626
# Step 3: Create a sampling client by loading weights from a saved checkpoint.

cookbook/client/tinker/self_congnition.py renamed to cookbook/client/tinker/custom_service/self_congnition.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@
2323
from tinker import ServiceClient
2424

2525
# The base model to fine-tune / evaluate
26-
# base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
27-
base_model = 'Qwen/Qwen2.5-7B-Instruct'
26+
base_model = 'Qwen/Qwen3-4B'
2827
base_url = 'http://localhost:8000'
2928

3029

0 commit comments

Comments
 (0)