Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 3 additions & 45 deletions cookbook/client/tinker/megatron/server.py
Original file line number Diff line number Diff line change
@@ -1,51 +1,9 @@
import os
os.environ['RAY_DEBUG'] = '1'
import ray
from omegaconf import OmegaConf
from ray import serve
from twinkle.server.tinker import build_model_app, build_server_app

ray.init(namespace="twinkle_cluster")
serve.shutdown()
import time
time.sleep(5)
from twinkle.server import launch_server

file_dir = os.path.abspath(os.path.dirname(__file__))
config = OmegaConf.load(os.path.join(file_dir, 'server_config.yaml'))
config_path = os.path.join(file_dir, 'server_config.yaml')

# Start Ray Serve with http_options from config
http_options = OmegaConf.to_container(config.http_options, resolve=True)
serve.start(http_options=http_options)

APP_BUILDERS = {
'main:build_server_app': build_server_app,
'main:build_model_app': build_model_app,
# 'main:build_sampler_app': build_sampler_app,
}

for app_config in config.applications:
print(f"Starting {app_config.name} at {app_config.route_prefix}...")

builder = APP_BUILDERS[app_config.import_path]
args = OmegaConf.to_container(app_config.args, resolve=True) if app_config.args else {}

deploy_options = {}
deploy_config = app_config.deployments[0]
if 'autoscaling_config' in deploy_config:
deploy_options['autoscaling_config'] = OmegaConf.to_container(deploy_config.autoscaling_config)
if 'ray_actor_options' in deploy_config:
deploy_options['ray_actor_options'] = OmegaConf.to_container(deploy_config.ray_actor_options)

app = builder(
deploy_options=deploy_options,
**{k: v for k, v in args.items()}
)

serve.run(app, name=app_config.name, route_prefix=app_config.route_prefix)

print("\nAll applications started!")
print("Endpoints:")
for app_config in config.applications:
print(f" - http://localhost:8000{app_config.route_prefix}")

input("\nPress Enter to stop the server...")
launch_server(config_path=config_path)
31 changes: 3 additions & 28 deletions cookbook/client/tinker/megatron/server_config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
server_type: tinker
proxy_location: EveryNode
http_options:
host: 0.0.0.0
Expand All @@ -6,7 +7,7 @@ http_options:
applications:
- name: server
route_prefix: /api/v1
import_path: main:build_server_app
import_path: server
args:

deployments:
Expand All @@ -22,7 +23,7 @@ applications:

- name: models-Qwen2.5-0.5B-Instruct
route_prefix: /api/v1/model/Qwen/Qwen2.5-0.5B-Instruct
import_path: main:build_model_app
import_path: model
args:
use_megatron: true
model_id: "ms://Qwen/Qwen2.5-0.5B-Instruct"
Expand All @@ -46,29 +47,3 @@ applications:
logging_config:
log_level: DEBUG

# Example: Add more models as needed
# - name: models-Qwen2.5-7B-Instruct
# route_prefix: /api/v1/model/Qwen/Qwen2.5-7B-Instruct
# import_path: main:build_model_app
# args:
# model_id: "ms://Qwen/Qwen2.5-7B-Instruct"
# nproc_per_node: 4
# device_group:
# name: model7b
# ranks: [2, 3, 4, 5]
# device_type: cuda
# device_mesh:
# device_type: cuda
# mesh: [2, 3, 4, 5]
# mesh_dim_names: ['dp']
# deployments:
# - name: ModelManagement
# autoscaling_config:
# min_replicas: 1
# max_replicas: 1
# target_ongoing_requests: 16
# ray_actor_options:
# num_cpus: 0.1
# logging_config:
# log_level: DEBUG

18 changes: 14 additions & 4 deletions cookbook/client/tinker/transformer/lora.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#%%
import dotenv
dotenv.load_dotenv('.env')

import os
from twinkle_client import init_tinker_compat_client
service_client = init_tinker_compat_client(base_url='http://localhost:8000')
service_client = init_tinker_compat_client(base_url='http://localhost:8000', api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))

print("Available models:")
for item in service_client.get_server_capabilities().supported_models:
Expand All @@ -12,7 +16,9 @@

future = rest_client.list_training_runs(limit=50)
response = future.result()
# Support resume from twinkle path or model id
# resume_path = "twinkle://20260131_170251-Qwen_Qwen2_5-0_5B-Instruct-7275126c/weights/pig-latin-lora-epoch-1"
# resume_path = "AlexEz/20260205_163645-Qwen_Qwen2_5-7B-Instruct-385d5c17_pig-latin-lora-epoch-1"
resume_path = ""
print(f"Found {len(response.training_runs)} training runs")
for tr in response.training_runs:
Expand All @@ -24,12 +30,13 @@
# resume_path = chpt.tinker_path # Just get the last one for demo purposes

#%%
base_model = "Qwen/Qwen2.5-7B-Instruct"
base_model = "Qwen/Qwen2.5-0.5B-Instruct"
if not resume_path:
training_client = service_client.create_lora_training_client(
base_model=base_model
)
else:
print("Resuming from " + resume_path)
training_client = service_client.create_training_client_from_state_with_optimizer(path=resume_path)

#%%
Expand Down Expand Up @@ -106,9 +113,12 @@ def process_example(example: dict, tokenizer) -> types.Datum:
weights = np.concatenate([example.loss_fn_inputs['weights'].tolist() for example in processed_examples])
print(f"Loss per token: {-np.dot(logprobs, weights) / weights.sum():.4f}")

# Save the model and optimizer state
save_future = training_client.save_state(f"pig-latin-lora-epoch-{epoch}")
save_result = save_future.result()
print(f"Saved checkpoint for epoch {epoch} to {save_result.path}")

# sampling_client = training_client.save_weights_and_get_sampling_client(name='pig-latin-model')

# NOTE: Need to set your modelscope token as api_key when initializing the service client
# model name is {run_id}_{checkpoint_name}
# rest_client.publish_checkpoint_from_tinker_path(save_result.path).result()
# print("Published checkpoint")
12 changes: 8 additions & 4 deletions cookbook/client/tinker/transformer/self_congnition.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from twinkle.server.tinker.common import input_feature_to_datum
from modelscope import AutoTokenizer

base_model = "Qwen/Qwen2.5-7B-Instruct"
base_model = "Qwen/Qwen2.5-0.5B-Instruct"

def train():
# process data
Expand Down Expand Up @@ -46,7 +46,7 @@ def train():
print(f"Saved checkpoint to {save_result.path}")

def eval():
weight_path = "twinkle://20260203_194633-Qwen_Qwen2_5-0_5B-Instruct-03aa3f06/weights/twinkle-lora"
weight_path = "twinkle://20260207_110850-Qwen_Qwen2_5-0_5B-Instruct-ce7e819f/weights/twinkle-lora-2"

service_client = init_tinker_compat_client(base_url='http://localhost:8000')
sampling_client = service_client.create_sampling_client(
Expand All @@ -56,6 +56,10 @@ def eval():
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

inputs = [
{
'role': 'system',
'content': 'You are a helpful assistant.'
},
{
'role': 'user',
'content': 'what is your name?'
Expand All @@ -78,5 +82,5 @@ def eval():
print(f"{i}: {repr(tokenizer.decode(seq.tokens))}")

if __name__ == "__main__":
train()
# eval()
# train()
eval()
51 changes: 3 additions & 48 deletions cookbook/client/tinker/transformer/server.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,9 @@
import os
os.environ['RAY_DEBUG'] = '1'
import ray
from omegaconf import OmegaConf
from ray import serve
from twinkle.server.tinker import build_model_app, build_sampler_app, build_server_app

ray.init(namespace="twinkle_cluster")
serve.shutdown()
import time
time.sleep(5)
from twinkle.server import launch_server

file_dir = os.path.abspath(os.path.dirname(__file__))
config = OmegaConf.load(os.path.join(file_dir, 'server_config.yaml'))
config_path = os.path.join(file_dir, 'server_config.yaml')

# Start Ray Serve with http_options from config
http_options = OmegaConf.to_container(config.http_options, resolve=True)
serve.start(http_options=http_options)

APP_BUILDERS = {
'main:build_server_app': build_server_app,
'main:build_model_app': build_model_app,
# 'main:build_sampler_app': build_sampler_app,
}

for app_config in config.applications:
print(f"Starting {app_config.name} at {app_config.route_prefix}...")

if app_config.import_path not in APP_BUILDERS:
continue

builder = APP_BUILDERS[app_config.import_path]
args = OmegaConf.to_container(app_config.args, resolve=True) if app_config.args else {}

deploy_options = {}
deploy_config = app_config.deployments[0]
if 'autoscaling_config' in deploy_config:
deploy_options['autoscaling_config'] = OmegaConf.to_container(deploy_config.autoscaling_config)
if 'ray_actor_options' in deploy_config:
deploy_options['ray_actor_options'] = OmegaConf.to_container(deploy_config.ray_actor_options)

app = builder(
deploy_options=deploy_options,
**{k: v for k, v in args.items()}
)

serve.run(app, name=app_config.name, route_prefix=app_config.route_prefix)

print("\nAll applications started!")
print("Endpoints:")
for app_config in config.applications:
print(f" - http://localhost:8000{app_config.route_prefix}")

input("\nPress Enter to stop the server...")
launch_server(config_path=config_path)
36 changes: 6 additions & 30 deletions cookbook/client/tinker/transformer/server_config.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
server_type: tinker
proxy_location: EveryNode
http_options:
host: 0.0.0.0
Expand All @@ -6,7 +7,7 @@ http_options:
applications:
- name: server
route_prefix: /api/v1
import_path: main:build_server_app
import_path: server
args:

deployments:
Expand All @@ -21,11 +22,11 @@ applications:
log_level: DEBUG

- name: models-Qwen2.5-0.5B-Instruct
route_prefix: /api/v1/model/Qwen/Qwen2.5-7B-Instruct
import_path: main:build_model_app
route_prefix: /api/v1/model/Qwen/Qwen2.5-0.5B-Instruct
import_path: model
args:
use_megatron: false
model_id: "ms://Qwen/Qwen2.5-7B-Instruct"
model_id: "ms://Qwen/Qwen2.5-0.5B-Instruct"
nproc_per_node: 2
device_group:
name: model
Expand Down Expand Up @@ -54,7 +55,7 @@ applications:

- name: sampler-Qwen2.5-0.5B-Instruct
route_prefix: /api/v1/sampler/Qwen/Qwen2.5-0.5B-Instruct
import_path: main:build_sampler_app
import_path: sampler
args:
model_id: "ms://Qwen/Qwen2.5-0.5B-Instruct"
nproc_per_node: 1
Expand Down Expand Up @@ -83,28 +84,3 @@ applications:
logging_config:
log_level: DEBUG

# Example: Add more models as needed
# - name: models-Qwen2.5-7B-Instruct
# route_prefix: /api/v1/model/Qwen/Qwen2.5-7B-Instruct
# import_path: main:build_model_app
# args:
# model_id: "ms://Qwen/Qwen2.5-7B-Instruct"
# nproc_per_node: 4
# device_group:
# name: model7b
# ranks: [2, 3, 4, 5]
# device_type: cuda
# device_mesh:
# device_type: cuda
# mesh: [2, 3, 4, 5]
# mesh_dim_names: ['dp']
# deployments:
# - name: ModelManagement
# autoscaling_config:
# min_replicas: 1
# max_replicas: 1
# target_ongoing_requests: 16
# ray_actor_options:
# num_cpus: 0.1
# logging_config:
# log_level: DEBUG
49 changes: 3 additions & 46 deletions cookbook/client/twinkle/megatron/server.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,9 @@
import os
os.environ['RAY_DEBUG'] = '1'
import ray
from omegaconf import OmegaConf
from ray import serve
from twinkle.server import build_processor_app, build_sampler_app, build_model_app, build_server_app

ray.init()
serve.shutdown()
import time
time.sleep(5)
from twinkle.server import launch_server

file_dir = os.path.abspath(os.path.dirname(__file__))
config = OmegaConf.load(os.path.join(file_dir, 'server_config.yaml'))
config_path = os.path.join(file_dir, 'server_config.yaml')

# Start Ray Serve with http_options from config
http_options = OmegaConf.to_container(config.http_options, resolve=True)
serve.start(http_options=http_options)

APP_BUILDERS = {
'main:model_qwen25_7B': build_model_app,
# 'main:build_sampler_app': build_sampler_app,
'main:processor_app': build_processor_app,
'main:build_server_app': build_server_app,
}

for app_config in config.applications:
print(f"Starting {app_config.name} at {app_config.route_prefix}...")

builder = APP_BUILDERS[app_config.import_path]
args = OmegaConf.to_container(app_config.args, resolve=True) if app_config.args else {}

deploy_options = {}
deploy_config = app_config.deployments[0]
if 'autoscaling_config' in deploy_config:
deploy_options['autoscaling_config'] = OmegaConf.to_container(deploy_config.autoscaling_config)
if 'ray_actor_options' in deploy_config:
deploy_options['ray_actor_options'] = OmegaConf.to_container(deploy_config.ray_actor_options)

app = builder(
deploy_options=deploy_options,
**{k: v for k, v in args.items()}
)

serve.run(app, name=app_config.name, route_prefix=app_config.route_prefix)

print("\nAll applications started!")
print("Endpoints:")
for app_config in config.applications:
print(f" - http://localhost:8000{app_config.route_prefix}")

input("\nPress Enter to stop the server...")
launch_server(config_path=config_path)
Loading
Loading