Skip to content

Commit 08ee10a

Browse files
committed
change config
1 parent ba835f2 commit 08ee10a

File tree

1 file changed

+10
-8
lines changed

1 file changed

+10
-8
lines changed

cookbook/client/tinker/megatron/server_config.yaml

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,11 @@ applications:
3737

3838
# 3. Sampler Service - Runs inference / sampling using vLLM engine
3939
# Used for generating text from the model (e.g., evaluating LoRA results).
40-
- name: sampler-Qwen2.5-3B-Instruct
41-
route_prefix: /api/v1/sampler/Qwen/Qwen2.5-3B-Instruct
40+
- name: sampler-Qwen3-30B-A3B-Instruct-2507
41+
route_prefix: /api/v1/sampler/Qwen/Qwen3-30B-A3B-Instruct-2507
4242
import_path: sampler
4343
args:
44-
model_id: "ms://Qwen/Qwen2.5-3B-Instruct" # ModelScope model identifier
44+
model_id: "ms://Qwen/Qwen3-30B-A3B-Instruct-2507" # ModelScope model identifier
4545
nproc_per_node: 4 # Number of GPU processes per node
4646
sampler_type: vllm # Inference engine: 'vllm' (fast) or 'torch' (TorchSampler)
4747
engine_args: # vLLM engine-specific settings
@@ -71,20 +71,22 @@ applications:
7171

7272
# 2. Model Service (commented out) - Would host the base model for training.
7373
# Uncomment and configure if you need a training model worker.
74-
- name: models-Qwen2.5-3B-Instruct
75-
route_prefix: /api/v1/model/Qwen/Qwen2.5-3B-Instruct
74+
- name: models-Qwen3-30B-A3B-Instruct-2507
75+
route_prefix: /api/v1/model/Qwen/Qwen3-30B-A3B-Instruct-2507
7676
import_path: model
7777
args:
78-
use_megatron: false # Use HuggingFace Transformers backend
79-
model_id: "ms://Qwen/Qwen2.5-3B-Instruct" # ModelScope model identifier
78+
use_megatron: true # Use HuggingFace Transformers backend
79+
model_id: "ms://Qwen/Qwen3-30B-A3B-Instruct-2507" # ModelScope model identifier
8080
nproc_per_node: 4 # Number of GPU processes per node
8181
device_group:
8282
name: model
8383
ranks: [4,5,6,7] # GPU rank indices
8484
device_type: cuda
8585
device_mesh:
8686
device_type: cuda
87-
dp_size: 4
87+
dp_size: 2
88+
tp_size: 2
89+
ep_size: 2
8890

8991
queue_config:
9092
rps_limit: 100 # Max requests per second

0 commit comments

Comments
 (0)