File tree Expand file tree Collapse file tree 1 file changed +8
-4
lines changed
cookbook/client/tinker/megatron Expand file tree Collapse file tree 1 file changed +8
-4
lines changed Original file line number Diff line number Diff line change @@ -56,6 +56,9 @@ applications:
5656 device_mesh :
5757 device_type : cuda
5858 dp_size : 4
59+ queue_config :
60+ rps_limit : 20 # Max requests per second
61+ tps_limit : 10000 # Max tokens per second
5962 deployments :
6063 - name : SamplerManagement
6164 autoscaling_config :
@@ -90,11 +93,12 @@ applications:
9093 ep_size : 2
9194
9295 queue_config :
93- rps_limit : 100 # Max requests per second
94- tps_limit : 100000 # Max tokens per second
96+ rps_limit : 20 # Max requests per second
97+ tps_limit : 10000 # Max tokens per second
9598 adapter_config :
96- per_token_adapter_limit : 30 # Max concurrent LoRA adapters
97- adapter_timeout : 1800 # Seconds before idle adapter unload
99+ per_token_adapter_limit : 3 # Max concurrent LoRA adapters
100+ adapter_timeout : 30 # Seconds before idle adapter unload
101+ adapter_max_lifetime : 36000 # Maximum lifetime of an adapter in seconds (e.g., 10 hours)
98102 deployments :
99103 - name : ModelManagement
100104 autoscaling_config :
You can’t perform that action at this time.
0 commit comments