Skip to content

Commit 1dfcf43

Browse files
authored
Merge pull request #38 from modelscope/update_sample
Update sample and server
2 parents 04f9f71 + 5054ef6 commit 1dfcf43

39 files changed

+1204
-752
lines changed
Lines changed: 3 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,9 @@
11
import os
22
os.environ['RAY_DEBUG'] = '1'
3-
import ray
4-
from omegaconf import OmegaConf
5-
from ray import serve
6-
from twinkle.server.tinker import build_model_app, build_server_app
73

8-
ray.init(namespace="twinkle_cluster")
9-
serve.shutdown()
10-
import time
11-
time.sleep(5)
4+
from twinkle.server import launch_server
125

136
file_dir = os.path.abspath(os.path.dirname(__file__))
14-
config = OmegaConf.load(os.path.join(file_dir, 'server_config.yaml'))
7+
config_path = os.path.join(file_dir, 'server_config.yaml')
158

16-
# Start Ray Serve with http_options from config
17-
http_options = OmegaConf.to_container(config.http_options, resolve=True)
18-
serve.start(http_options=http_options)
19-
20-
APP_BUILDERS = {
21-
'main:build_server_app': build_server_app,
22-
'main:build_model_app': build_model_app,
23-
# 'main:build_sampler_app': build_sampler_app,
24-
}
25-
26-
for app_config in config.applications:
27-
print(f"Starting {app_config.name} at {app_config.route_prefix}...")
28-
29-
builder = APP_BUILDERS[app_config.import_path]
30-
args = OmegaConf.to_container(app_config.args, resolve=True) if app_config.args else {}
31-
32-
deploy_options = {}
33-
deploy_config = app_config.deployments[0]
34-
if 'autoscaling_config' in deploy_config:
35-
deploy_options['autoscaling_config'] = OmegaConf.to_container(deploy_config.autoscaling_config)
36-
if 'ray_actor_options' in deploy_config:
37-
deploy_options['ray_actor_options'] = OmegaConf.to_container(deploy_config.ray_actor_options)
38-
39-
app = builder(
40-
deploy_options=deploy_options,
41-
**{k: v for k, v in args.items()}
42-
)
43-
44-
serve.run(app, name=app_config.name, route_prefix=app_config.route_prefix)
45-
46-
print("\nAll applications started!")
47-
print("Endpoints:")
48-
for app_config in config.applications:
49-
print(f" - http://localhost:8000{app_config.route_prefix}")
50-
51-
input("\nPress Enter to stop the server...")
9+
launch_server(config_path=config_path)
Lines changed: 3 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
server_type: tinker
12
proxy_location: EveryNode
23
http_options:
34
host: 0.0.0.0
@@ -6,7 +7,7 @@ http_options:
67
applications:
78
- name: server
89
route_prefix: /api/v1
9-
import_path: main:build_server_app
10+
import_path: server
1011
args:
1112

1213
deployments:
@@ -22,7 +23,7 @@ applications:
2223

2324
- name: models-Qwen2.5-0.5B-Instruct
2425
route_prefix: /api/v1/model/Qwen/Qwen2.5-0.5B-Instruct
25-
import_path: main:build_model_app
26+
import_path: model
2627
args:
2728
use_megatron: true
2829
model_id: "ms://Qwen/Qwen2.5-0.5B-Instruct"
@@ -46,29 +47,3 @@ applications:
4647
logging_config:
4748
log_level: DEBUG
4849

49-
# Example: Add more models as needed
50-
# - name: models-Qwen2.5-7B-Instruct
51-
# route_prefix: /api/v1/model/Qwen/Qwen2.5-7B-Instruct
52-
# import_path: main:build_model_app
53-
# args:
54-
# model_id: "ms://Qwen/Qwen2.5-7B-Instruct"
55-
# nproc_per_node: 4
56-
# device_group:
57-
# name: model7b
58-
# ranks: [2, 3, 4, 5]
59-
# device_type: cuda
60-
# device_mesh:
61-
# device_type: cuda
62-
# mesh: [2, 3, 4, 5]
63-
# mesh_dim_names: ['dp']
64-
# deployments:
65-
# - name: ModelManagement
66-
# autoscaling_config:
67-
# min_replicas: 1
68-
# max_replicas: 1
69-
# target_ongoing_requests: 16
70-
# ray_actor_options:
71-
# num_cpus: 0.1
72-
# logging_config:
73-
# log_level: DEBUG
74-

cookbook/client/tinker/transformer/lora.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
#%%
2+
import dotenv
3+
dotenv.load_dotenv('.env')
4+
5+
import os
26
from twinkle_client import init_tinker_compat_client
3-
service_client = init_tinker_compat_client(base_url='http://localhost:8000')
7+
service_client = init_tinker_compat_client(base_url='http://localhost:8000', api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
48

59
print("Available models:")
610
for item in service_client.get_server_capabilities().supported_models:
@@ -12,7 +16,9 @@
1216

1317
future = rest_client.list_training_runs(limit=50)
1418
response = future.result()
19+
# Support resume from twinkle path or model id
1520
# resume_path = "twinkle://20260131_170251-Qwen_Qwen2_5-0_5B-Instruct-7275126c/weights/pig-latin-lora-epoch-1"
21+
# resume_path = "AlexEz/20260205_163645-Qwen_Qwen2_5-7B-Instruct-385d5c17_pig-latin-lora-epoch-1"
1622
resume_path = ""
1723
print(f"Found {len(response.training_runs)} training runs")
1824
for tr in response.training_runs:
@@ -24,12 +30,13 @@
2430
# resume_path = chpt.tinker_path # Just get the last one for demo purposes
2531

2632
#%%
27-
base_model = "Qwen/Qwen2.5-7B-Instruct"
33+
base_model = "Qwen/Qwen2.5-0.5B-Instruct"
2834
if not resume_path:
2935
training_client = service_client.create_lora_training_client(
3036
base_model=base_model
3137
)
3238
else:
39+
print("Resuming from " + resume_path)
3340
training_client = service_client.create_training_client_from_state_with_optimizer(path=resume_path)
3441

3542
#%%
@@ -106,9 +113,12 @@ def process_example(example: dict, tokenizer) -> types.Datum:
106113
weights = np.concatenate([example.loss_fn_inputs['weights'].tolist() for example in processed_examples])
107114
print(f"Loss per token: {-np.dot(logprobs, weights) / weights.sum():.4f}")
108115

116+
# Save the model and optimizer state
109117
save_future = training_client.save_state(f"pig-latin-lora-epoch-{epoch}")
110118
save_result = save_future.result()
111119
print(f"Saved checkpoint for epoch {epoch} to {save_result.path}")
112120

113-
# sampling_client = training_client.save_weights_and_get_sampling_client(name='pig-latin-model')
114-
121+
# NOTE: Need to set your modelscope token as api_key when initializing the service client
122+
# model name is {run_id}_{checkpoint_name}
123+
# rest_client.publish_checkpoint_from_tinker_path(save_result.path).result()
124+
# print("Published checkpoint")

cookbook/client/tinker/transformer/self_congnition.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from twinkle.server.tinker.common import input_feature_to_datum
99
from modelscope import AutoTokenizer
1010

11-
base_model = "Qwen/Qwen2.5-7B-Instruct"
11+
base_model = "Qwen/Qwen2.5-0.5B-Instruct"
1212

1313
def train():
1414
# process data
@@ -46,7 +46,7 @@ def train():
4646
print(f"Saved checkpoint to {save_result.path}")
4747

4848
def eval():
49-
weight_path = "twinkle://20260203_194633-Qwen_Qwen2_5-0_5B-Instruct-03aa3f06/weights/twinkle-lora"
49+
weight_path = "twinkle://20260207_110850-Qwen_Qwen2_5-0_5B-Instruct-ce7e819f/weights/twinkle-lora-2"
5050

5151
service_client = init_tinker_compat_client(base_url='http://localhost:8000')
5252
sampling_client = service_client.create_sampling_client(
@@ -56,6 +56,10 @@ def eval():
5656
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
5757

5858
inputs = [
59+
{
60+
'role': 'system',
61+
'content': 'You are a helpful assistant.'
62+
},
5963
{
6064
'role': 'user',
6165
'content': 'what is your name?'
@@ -78,5 +82,5 @@ def eval():
7882
print(f"{i}: {repr(tokenizer.decode(seq.tokens))}")
7983

8084
if __name__ == "__main__":
81-
train()
82-
# eval()
85+
# train()
86+
eval()
Lines changed: 3 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,54 +1,9 @@
11
import os
22
os.environ['RAY_DEBUG'] = '1'
3-
import ray
4-
from omegaconf import OmegaConf
5-
from ray import serve
6-
from twinkle.server.tinker import build_model_app, build_sampler_app, build_server_app
73

8-
ray.init(namespace="twinkle_cluster")
9-
serve.shutdown()
10-
import time
11-
time.sleep(5)
4+
from twinkle.server import launch_server
125

136
file_dir = os.path.abspath(os.path.dirname(__file__))
14-
config = OmegaConf.load(os.path.join(file_dir, 'server_config.yaml'))
7+
config_path = os.path.join(file_dir, 'server_config.yaml')
158

16-
# Start Ray Serve with http_options from config
17-
http_options = OmegaConf.to_container(config.http_options, resolve=True)
18-
serve.start(http_options=http_options)
19-
20-
APP_BUILDERS = {
21-
'main:build_server_app': build_server_app,
22-
'main:build_model_app': build_model_app,
23-
# 'main:build_sampler_app': build_sampler_app,
24-
}
25-
26-
for app_config in config.applications:
27-
print(f"Starting {app_config.name} at {app_config.route_prefix}...")
28-
29-
if app_config.import_path not in APP_BUILDERS:
30-
continue
31-
32-
builder = APP_BUILDERS[app_config.import_path]
33-
args = OmegaConf.to_container(app_config.args, resolve=True) if app_config.args else {}
34-
35-
deploy_options = {}
36-
deploy_config = app_config.deployments[0]
37-
if 'autoscaling_config' in deploy_config:
38-
deploy_options['autoscaling_config'] = OmegaConf.to_container(deploy_config.autoscaling_config)
39-
if 'ray_actor_options' in deploy_config:
40-
deploy_options['ray_actor_options'] = OmegaConf.to_container(deploy_config.ray_actor_options)
41-
42-
app = builder(
43-
deploy_options=deploy_options,
44-
**{k: v for k, v in args.items()}
45-
)
46-
47-
serve.run(app, name=app_config.name, route_prefix=app_config.route_prefix)
48-
49-
print("\nAll applications started!")
50-
print("Endpoints:")
51-
for app_config in config.applications:
52-
print(f" - http://localhost:8000{app_config.route_prefix}")
53-
54-
input("\nPress Enter to stop the server...")
9+
launch_server(config_path=config_path)

cookbook/client/tinker/transformer/server_config.yaml

Lines changed: 6 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
server_type: tinker
12
proxy_location: EveryNode
23
http_options:
34
host: 0.0.0.0
@@ -6,7 +7,7 @@ http_options:
67
applications:
78
- name: server
89
route_prefix: /api/v1
9-
import_path: main:build_server_app
10+
import_path: server
1011
args:
1112

1213
deployments:
@@ -21,11 +22,11 @@ applications:
2122
log_level: DEBUG
2223

2324
- name: models-Qwen2.5-0.5B-Instruct
24-
route_prefix: /api/v1/model/Qwen/Qwen2.5-7B-Instruct
25-
import_path: main:build_model_app
25+
route_prefix: /api/v1/model/Qwen/Qwen2.5-0.5B-Instruct
26+
import_path: model
2627
args:
2728
use_megatron: false
28-
model_id: "ms://Qwen/Qwen2.5-7B-Instruct"
29+
model_id: "ms://Qwen/Qwen2.5-0.5B-Instruct"
2930
nproc_per_node: 2
3031
device_group:
3132
name: model
@@ -54,7 +55,7 @@ applications:
5455

5556
- name: sampler-Qwen2.5-0.5B-Instruct
5657
route_prefix: /api/v1/sampler/Qwen/Qwen2.5-0.5B-Instruct
57-
import_path: main:build_sampler_app
58+
import_path: sampler
5859
args:
5960
model_id: "ms://Qwen/Qwen2.5-0.5B-Instruct"
6061
nproc_per_node: 1
@@ -83,28 +84,3 @@ applications:
8384
logging_config:
8485
log_level: DEBUG
8586

86-
# Example: Add more models as needed
87-
# - name: models-Qwen2.5-7B-Instruct
88-
# route_prefix: /api/v1/model/Qwen/Qwen2.5-7B-Instruct
89-
# import_path: main:build_model_app
90-
# args:
91-
# model_id: "ms://Qwen/Qwen2.5-7B-Instruct"
92-
# nproc_per_node: 4
93-
# device_group:
94-
# name: model7b
95-
# ranks: [2, 3, 4, 5]
96-
# device_type: cuda
97-
# device_mesh:
98-
# device_type: cuda
99-
# mesh: [2, 3, 4, 5]
100-
# mesh_dim_names: ['dp']
101-
# deployments:
102-
# - name: ModelManagement
103-
# autoscaling_config:
104-
# min_replicas: 1
105-
# max_replicas: 1
106-
# target_ongoing_requests: 16
107-
# ray_actor_options:
108-
# num_cpus: 0.1
109-
# logging_config:
110-
# log_level: DEBUG
Lines changed: 3 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,9 @@
11
import os
22
os.environ['RAY_DEBUG'] = '1'
3-
import ray
4-
from omegaconf import OmegaConf
5-
from ray import serve
6-
from twinkle.server import build_processor_app, build_sampler_app, build_model_app, build_server_app
73

8-
ray.init()
9-
serve.shutdown()
10-
import time
11-
time.sleep(5)
4+
from twinkle.server import launch_server
125

136
file_dir = os.path.abspath(os.path.dirname(__file__))
14-
config = OmegaConf.load(os.path.join(file_dir, 'server_config.yaml'))
7+
config_path = os.path.join(file_dir, 'server_config.yaml')
158

16-
# Start Ray Serve with http_options from config
17-
http_options = OmegaConf.to_container(config.http_options, resolve=True)
18-
serve.start(http_options=http_options)
19-
20-
APP_BUILDERS = {
21-
'main:model_qwen25_7B': build_model_app,
22-
# 'main:build_sampler_app': build_sampler_app,
23-
'main:processor_app': build_processor_app,
24-
'main:build_server_app': build_server_app,
25-
}
26-
27-
for app_config in config.applications:
28-
print(f"Starting {app_config.name} at {app_config.route_prefix}...")
29-
30-
builder = APP_BUILDERS[app_config.import_path]
31-
args = OmegaConf.to_container(app_config.args, resolve=True) if app_config.args else {}
32-
33-
deploy_options = {}
34-
deploy_config = app_config.deployments[0]
35-
if 'autoscaling_config' in deploy_config:
36-
deploy_options['autoscaling_config'] = OmegaConf.to_container(deploy_config.autoscaling_config)
37-
if 'ray_actor_options' in deploy_config:
38-
deploy_options['ray_actor_options'] = OmegaConf.to_container(deploy_config.ray_actor_options)
39-
40-
app = builder(
41-
deploy_options=deploy_options,
42-
**{k: v for k, v in args.items()}
43-
)
44-
45-
serve.run(app, name=app_config.name, route_prefix=app_config.route_prefix)
46-
47-
print("\nAll applications started!")
48-
print("Endpoints:")
49-
for app_config in config.applications:
50-
print(f" - http://localhost:8000{app_config.route_prefix}")
51-
52-
input("\nPress Enter to stop the server...")
9+
launch_server(config_path=config_path)

0 commit comments

Comments
 (0)