Skip to content

Commit 4f28480

Browse files
committed
Merge branch 'fix_moe' of https://github.com/modelscope/twinkle into fix_moe
2 parents 23038ff + 37fcf17 commit 4f28480

File tree

1 file changed

+8
-4
lines changed

1 file changed

+8
-4
lines changed

cookbook/client/tinker/megatron/server_config.yaml

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ applications:
5656
device_mesh:
5757
device_type: cuda
5858
dp_size: 4
59+
queue_config:
60+
rps_limit: 20 # Max requests per second
61+
tps_limit: 10000 # Max tokens per second
5962
deployments:
6063
- name: SamplerManagement
6164
autoscaling_config:
@@ -90,11 +93,12 @@ applications:
9093
ep_size: 2
9194

9295
queue_config:
93-
rps_limit: 100 # Max requests per second
94-
tps_limit: 100000 # Max tokens per second
96+
rps_limit: 20 # Max requests per second
97+
tps_limit: 10000 # Max tokens per second
9598
adapter_config:
96-
per_token_adapter_limit: 30 # Max concurrent LoRA adapters
97-
adapter_timeout: 1800 # Seconds before idle adapter unload
99+
per_token_adapter_limit: 3 # Max concurrent LoRA adapters
100+
adapter_timeout: 30 # Seconds before idle adapter unload
101+
adapter_max_lifetime: 36000 # Maximum lifetime of an adapter in seconds (e.g., 10 hours)
98102
deployments:
99103
- name: ModelManagement
100104
autoscaling_config:

0 commit comments

Comments
 (0)