Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,4 @@ megatron_output/
ast_index_file.py
test_cookbook/
/test*.py
swanlog/
16 changes: 8 additions & 8 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,23 @@ repos:
hooks:
- id: pyupgrade
args: [--py38-plus]
exclude: ^client_tools/
exclude: ^(examples/|cookbook/|client_tools/|src/twinkle_client/)

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v6.0.0
hooks:
- id: trailing-whitespace
exclude: ^client_tools/
exclude: ^(client_tools/|src/twinkle_client/)
- id: check-yaml
exclude: ^client_tools/
exclude: ^(client_tools/|src/twinkle_client/)
- id: end-of-file-fixer
exclude: ^client_tools/
exclude: ^(client_tools/|src/twinkle_client/)
- id: requirements-txt-fixer
exclude: ^client_tools/
exclude: ^(client_tools/|src/twinkle_client/)
- id: double-quote-string-fixer
exclude: ^client_tools/
exclude: ^(client_tools/|src/twinkle_client/)
- id: check-merge-conflict
exclude: ^client_tools/
exclude: ^(client_tools/|src/twinkle_client/)
- id: mixed-line-ending
args: ["--fix=lf"]
exclude: ^client_tools/
exclude: ^(client_tools/|src/twinkle_client/)
278 changes: 0 additions & 278 deletions cookbook/client/tinker/grpo.py

This file was deleted.

16 changes: 11 additions & 5 deletions cookbook/client/tinker/megatron/server_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ applications:
device_mesh:
device_type: cuda
dp_size: 4
queue_config:
rps_limit: 20 # Max requests per second
tps_limit: 10000 # Max tokens per second
deployments:
- name: SamplerManagement
autoscaling_config:
Expand All @@ -77,7 +80,9 @@ applications:
args:
use_megatron: true # Use HuggingFace Transformers backend
model_id: "ms://Qwen/Qwen3-30B-A3B-Instruct-2507" # ModelScope model identifier
nproc_per_node: 4 # Number of GPU processes per node
max_length: 10240 # model max length
max_loras: 5 # model max loras
nproc_per_node: 4 # Number of GPU processes per node
device_group:
name: model
ranks: [4,5,6,7] # GPU rank indices
Expand All @@ -88,11 +93,12 @@ applications:
ep_size: 2

queue_config:
rps_limit: 100 # Max requests per second
tps_limit: 100000 # Max tokens per second
rps_limit: 20 # Max requests per second
tps_limit: 10000 # Max tokens per second
adapter_config:
per_token_adapter_limit: 30 # Max concurrent LoRA adapters
adapter_timeout: 1800 # Seconds before idle adapter unload
per_token_adapter_limit: 3 # Max concurrent LoRA adapters
adapter_timeout: 30 # Seconds before idle adapter unload
adapter_max_lifetime: 36000 # Maximum lifetime of an adapter in seconds (e.g., 10 hours)
deployments:
- name: ModelManagement
autoscaling_config:
Expand Down
8 changes: 5 additions & 3 deletions cookbook/client/tinker/megatron/server_config_7b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,12 @@ applications:
dp_size: 2
queue_config:
rps_limit: 100 # Max requests per second
tps_limit: 100000 # Max tokens per second
tps_limit: 10000 # Max tokens per second for a single user
max_input_tokens: 10000 # Maximum input tokens per request
adapter_config:
per_token_adapter_limit: 30 # Max concurrent LoRA adapters
adapter_timeout: 1800 # Seconds before idle adapter unload
adapter_timeout: 30 # Seconds before idle adapter unload
adapter_max_lifetime: 36000 # Maximum lifetime of an adapter in seconds (e.g., 10 hours)
per_token_adapter_limit: 30
deployments:
- name: ModelManagement
autoscaling_config:
Expand Down
Loading
Loading