Skip to content

Commit eec089a

Browse files
committed
update io
1 parent 3bc466f commit eec089a

31 files changed

+153
-150
lines changed

client_tools/client_generator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ class {class_name}({inheritance}):
347347
def __init__({init_params}):
348348
from twinkle_client.http import get_base_url
349349
350-
self.server_url = f'{{get_base_url()}}/processors/twinkle'
350+
self.server_url = f'{{get_base_url()}}/processor/twinkle'
351351
response = http_post(
352352
url=f'{{self.server_url}}/create',
353353
json_data={{
@@ -466,7 +466,7 @@ def __init__(self, model_id: str, **kwargs):
466466
self.model_id = model_id
467467
if '://' in model_id:
468468
model_id = model_id.split('://')[1]
469-
self.server_url = f'{self.server_url}/models/{model_id}/twinkle'
469+
self.server_url = f'{self.server_url}/model/{model_id}/twinkle'
470470
self.adapter_name = None
471471
response = http_post(
472472
url=f'{self.server_url}/create',
@@ -743,7 +743,7 @@ def __init__(self, model_id: str, **kwargs):
743743
self.adapter_name = None
744744
if '://' in model_id:
745745
model_id = model_id.split('://')[1]
746-
self.server_url = f'{self.server_url}/samplers/{model_id}/twinkle'
746+
self.server_url = f'{self.server_url}/sampler/{model_id}/twinkle'
747747
response = http_post(
748748
url=f'{self.server_url}/create',
749749
json_data=kwargs

cookbook/client/server/transformer/server_config.yaml

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,3 +100,26 @@ applications:
100100
# runtime_env:
101101
# env_vars:
102102
# TWINKLE_TRUST_REMOTE_CODE: "0"
103+
104+
# 4. Processor Service - Runs inference / sampling using vLLM engine
105+
- name: processor
106+
route_prefix: /api/v1/processor
107+
import_path: processor
108+
args:
109+
nproc_per_node: 2 # 每节点处理器 worker 数
110+
ncpu_proc_per_node: 2 # 每节点 CPU 进程数
111+
device_group:
112+
name: model
113+
ranks: 2
114+
device_type: CPU
115+
device_mesh:
116+
device_type: CPU
117+
dp_size: 2 # 数据并行大小
118+
deployments:
119+
- name: ProcessorManagement
120+
autoscaling_config:
121+
min_replicas: 1
122+
max_replicas: 1
123+
target_ongoing_requests: 128
124+
ray_actor_options:
125+
num_cpus: 0.1

src/twinkle/server/common/__init__.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Copyright (c) ModelScope Contributors. All rights reserved.
2+
from .checkpoint_factory import create_checkpoint_manager, create_training_run_manager
23
from .datum import datum_to_input_feature, extract_rl_feature, input_feature_to_datum
3-
from .io_utils import create_checkpoint_manager, create_training_run_manager, validate_ownership, validate_user_path
44
from .router import StickyLoraRequestRouter
55
from .serialize import deserialize_object, serialize_object
66

@@ -10,8 +10,6 @@
1010
'input_feature_to_datum',
1111
'create_checkpoint_manager',
1212
'create_training_run_manager',
13-
'validate_user_path',
14-
'validate_ownership',
1513
'StickyLoraRequestRouter',
1614
'deserialize_object',
1715
'serialize_object',
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Copyright (c) ModelScope Contributors. All rights reserved.
2+
"""
3+
Factory functions for creating checkpoint and training-run manager instances.
4+
5+
Use these functions as the entry point rather than instantiating managers directly:
6+
7+
from twinkle.server.common.checkpoint_factory import (
8+
create_checkpoint_manager,
9+
create_training_run_manager,
10+
)
11+
"""
12+
from twinkle.server.common.tinker_checkpoint import TinkerCheckpointManager, TinkerTrainingRunManager
13+
from twinkle.server.common.twinkle_checkpoint import TwinkleCheckpointManager, TwinkleTrainingRunManager
14+
15+
16+
def create_training_run_manager(token: str, client_type: str = 'twinkle'):
17+
"""Create a TrainingRunManager for the given token.
18+
19+
Args:
20+
token: User authentication token.
21+
client_type: 'tinker' or 'twinkle' (default 'twinkle').
22+
"""
23+
if client_type == 'tinker':
24+
return TinkerTrainingRunManager(token)
25+
return TwinkleTrainingRunManager(token)
26+
27+
28+
def create_checkpoint_manager(token: str, client_type: str = 'twinkle'):
29+
"""Create a CheckpointManager for the given token.
30+
31+
Args:
32+
token: User authentication token.
33+
client_type: 'tinker' or 'twinkle' (default 'twinkle').
34+
"""
35+
if client_type == 'tinker':
36+
run_mgr = TinkerTrainingRunManager(token)
37+
return TinkerCheckpointManager(token, run_mgr)
38+
run_mgr = TwinkleTrainingRunManager(token)
39+
return TwinkleCheckpointManager(token, run_mgr)

src/twinkle/server/common/io_utils.py

Lines changed: 0 additions & 68 deletions
This file was deleted.

src/twinkle/server/common/tinker_io_utils.py renamed to src/twinkle/server/common/tinker_checkpoint.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
# Copyright (c) ModelScope Contributors. All rights reserved.
22
"""
3-
Tinker-specific IO managers for training runs and checkpoints.
3+
Tinker-specific checkpoint and training-run managers.
44
55
Uses ``tinker.types`` models for all serialization and response construction.
66
"""
77
from datetime import datetime
88
from tinker import types as tinker_types
99
from typing import Any, Dict, List, Optional
1010

11-
from twinkle.server.utils.io_utils import TRAIN_RUN_INFO_FILENAME, BaseCheckpointManager, BaseTrainingRunManager
11+
from twinkle.server.utils.checkpoint_base import TRAIN_RUN_INFO_FILENAME, BaseCheckpointManager, BaseTrainingRunManager
1212

1313

1414
class TinkerTrainingRunManager(BaseTrainingRunManager):

src/twinkle/server/common/twinkle_io_utils.py renamed to src/twinkle/server/common/twinkle_checkpoint.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,17 @@
11
# Copyright (c) ModelScope Contributors. All rights reserved.
22
"""
3-
Twinkle-specific IO managers for training runs and checkpoints.
3+
Twinkle-specific checkpoint and training-run managers.
44
55
Uses ``twinkle_client.types.training`` models for all serialization and response construction.
66
"""
77
from datetime import datetime
88
from typing import Any, Dict, List, Optional
99

10-
from twinkle.server.utils.io_utils import (TRAIN_RUN_INFO_FILENAME, BaseCheckpointManager, BaseTrainingRunManager,
11-
validate_ownership)
12-
from twinkle_client.types.training import Checkpoint as TwinkleCheckpoint
13-
from twinkle_client.types.training import (CheckpointsListResponse, CreateModelRequest, Cursor,
14-
ParsedCheckpointTwinklePath)
15-
from twinkle_client.types.training import TrainingRun as TwinkleTrainingRun
16-
from twinkle_client.types.training import TrainingRunsResponse, WeightsInfoResponse
10+
from twinkle.server.utils.checkpoint_base import (TRAIN_RUN_INFO_FILENAME, BaseCheckpointManager,
11+
BaseTrainingRunManager, validate_ownership)
12+
from twinkle_client.types.training import (Checkpoint, CheckpointsListResponse, CreateModelRequest, Cursor,
13+
ParsedCheckpointTwinklePath, TrainingRun, TrainingRunsResponse,
14+
WeightsInfoResponse)
1715

1816

1917
class TwinkleTrainingRunManager(BaseTrainingRunManager):
@@ -25,7 +23,7 @@ def train_run_info_filename(self) -> str:
2523

2624
def _create_training_run(self, model_id: str, run_config: CreateModelRequest) -> Dict[str, Any]:
2725
lora_config = run_config.lora_config
28-
train_run_data = TwinkleTrainingRun(
26+
train_run_data = TrainingRun(
2927
training_run_id=model_id,
3028
base_model=run_config.base_model,
3129
model_owner=self.token,
@@ -44,14 +42,14 @@ def _create_training_run(self, model_id: str, run_config: CreateModelRequest) ->
4442
new_data['train_attn'] = lora_config.train_attn
4543
return new_data
4644

47-
def _parse_training_run(self, data: Dict[str, Any]) -> TwinkleTrainingRun:
48-
return TwinkleTrainingRun(**data)
45+
def _parse_training_run(self, data: Dict[str, Any]) -> TrainingRun:
46+
return TrainingRun(**data)
4947

50-
def _create_training_runs_response(self, runs: List[TwinkleTrainingRun], limit: int, offset: int,
48+
def _create_training_runs_response(self, runs: List[TrainingRun], limit: int, offset: int,
5149
total: int) -> TrainingRunsResponse:
5250
return TrainingRunsResponse(training_runs=runs, cursor=Cursor(limit=limit, offset=offset, total_count=total))
5351

54-
def get_with_permission(self, model_id: str) -> Optional[TwinkleTrainingRun]:
52+
def get_with_permission(self, model_id: str) -> Optional[TrainingRun]:
5553
run = self.get(model_id)
5654
if run and validate_ownership(self.token, run.model_owner):
5755
return run
@@ -82,7 +80,7 @@ def _create_checkpoint(self,
8280
train_mlp=None,
8381
train_attn=None,
8482
user_metadata=None) -> Dict[str, Any]:
85-
checkpoint = TwinkleCheckpoint(
83+
checkpoint = Checkpoint(
8684
checkpoint_id=checkpoint_id,
8785
checkpoint_type=checkpoint_type,
8886
time=datetime.now(),
@@ -98,15 +96,15 @@ def _create_checkpoint(self,
9896
user_metadata=user_metadata)
9997
return checkpoint.model_dump(mode='json')
10098

101-
def _parse_checkpoint(self, data: Dict[str, Any]) -> TwinkleCheckpoint:
99+
def _parse_checkpoint(self, data: Dict[str, Any]) -> Checkpoint:
102100
data = data.copy()
103101
if 'tinker_path' in data and 'twinkle_path' not in data:
104102
data['twinkle_path'] = data.pop('tinker_path')
105103
elif 'twinkle_path' not in data and 'path' in data:
106104
data['twinkle_path'] = data.pop('path')
107-
return TwinkleCheckpoint(**data)
105+
return Checkpoint(**data)
108106

109-
def get(self, model_id: str, checkpoint_id: str) -> Optional[TwinkleCheckpoint]:
107+
def get(self, model_id: str, checkpoint_id: str) -> Optional[Checkpoint]:
110108
data = self._read_ckpt_info(model_id, checkpoint_id)
111109
if not data:
112110
return None
@@ -116,7 +114,7 @@ def get(self, model_id: str, checkpoint_id: str) -> Optional[TwinkleCheckpoint]:
116114
data['twinkle_path'] = f"{self.path_prefix}{model_id}/{data['checkpoint_id']}"
117115
return self._parse_checkpoint(data)
118116

119-
def _create_checkpoints_response(self, checkpoints: List[TwinkleCheckpoint]) -> CheckpointsListResponse:
117+
def _create_checkpoints_response(self, checkpoints: List[Checkpoint]) -> CheckpointsListResponse:
120118
return CheckpointsListResponse(checkpoints=checkpoints, cursor=None)
121119

122120
def _create_parsed_path(self, path, training_run_id, checkpoint_type, checkpoint_id) -> ParsedCheckpointTwinklePath:

src/twinkle/server/gateway/tinker_gateway_handlers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from .server import GatewayServer
1818

1919
from twinkle.hub import HubOperation
20-
from twinkle.server.common.io_utils import create_checkpoint_manager, create_training_run_manager
20+
from twinkle.server.common.checkpoint_factory import create_checkpoint_manager, create_training_run_manager
2121
from twinkle.server.utils.task_queue import QueueState
2222
from twinkle.server.utils.validation import get_token_from_request
2323
from twinkle.utils.logger import get_logger

src/twinkle/server/gateway/twinkle_gateway_handlers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
if TYPE_CHECKING:
1313
from .server import GatewayServer
1414

15-
from twinkle.server.common.io_utils import create_checkpoint_manager, create_training_run_manager, validate_user_path
15+
from twinkle.server.common.checkpoint_factory import create_checkpoint_manager, create_training_run_manager
16+
from twinkle.server.utils.checkpoint_base import validate_user_path
1617
from twinkle.server.utils.validation import get_token_from_request
1718
from twinkle.utils.logger import get_logger
1819
from twinkle_client.types.server import DeleteCheckpointResponse, HealthResponse, WeightsInfoRequest

src/twinkle/server/model/backends/megatron_model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def load(self, checkpoint_dir: str, **kwargs):
118118
token = kwargs.pop('token', None)
119119
if not token:
120120
raise ValueError('Token is required for loading checkpoints')
121-
from twinkle.server.common.io_utils import create_checkpoint_manager
121+
from twinkle.server.common.checkpoint_factory import create_checkpoint_manager
122122
checkpoint_manager = create_checkpoint_manager(token, client_type='tinker')
123123
resolved = checkpoint_manager.resolve_load_path(checkpoint_dir)
124124
if resolved.is_twinkle_path:

0 commit comments

Comments
 (0)