modelscope · Yunnglin · Feb 7, 2026 · Feb 5, 2026 · Feb 5, 2026 · Feb 5, 2026
diff --git a/cookbook/client/tinker/megatron/server.py b/cookbook/client/tinker/megatron/server.py
@@ -1,51 +1,9 @@
 import os
 os.environ['RAY_DEBUG'] = '1'
-import ray
-from omegaconf import OmegaConf
-from ray import serve
-from twinkle.server.tinker import build_model_app, build_server_app
 
-ray.init(namespace="twinkle_cluster")
-serve.shutdown()
-import time
-time.sleep(5)
+from twinkle.server import launch_server
 
 file_dir = os.path.abspath(os.path.dirname(__file__))
-config = OmegaConf.load(os.path.join(file_dir, 'server_config.yaml'))
+config_path = os.path.join(file_dir, 'server_config.yaml')
 
-# Start Ray Serve with http_options from config
-http_options = OmegaConf.to_container(config.http_options, resolve=True)
-serve.start(http_options=http_options)
-
-APP_BUILDERS = {
-    'main:build_server_app': build_server_app,
-    'main:build_model_app': build_model_app,
-    # 'main:build_sampler_app': build_sampler_app,
-}
-
-for app_config in config.applications:
-    print(f"Starting {app_config.name} at {app_config.route_prefix}...")
-
-    builder = APP_BUILDERS[app_config.import_path]
-    args = OmegaConf.to_container(app_config.args, resolve=True) if app_config.args else {}
-
-    deploy_options = {}
-    deploy_config = app_config.deployments[0]
-    if 'autoscaling_config' in deploy_config:
-        deploy_options['autoscaling_config'] = OmegaConf.to_container(deploy_config.autoscaling_config)
-    if 'ray_actor_options' in deploy_config:
-        deploy_options['ray_actor_options'] = OmegaConf.to_container(deploy_config.ray_actor_options)
-
-    app = builder(
-        deploy_options=deploy_options,
-        **{k: v for k, v in args.items()}
-    )
-
-    serve.run(app, name=app_config.name, route_prefix=app_config.route_prefix)
-
-print("\nAll applications started!")
-print("Endpoints:")
-for app_config in config.applications:
-    print(f"  - http://localhost:8000{app_config.route_prefix}")
-
-input("\nPress Enter to stop the server...")
+launch_server(config_path=config_path)
diff --git a/cookbook/client/tinker/megatron/server_config.yaml b/cookbook/client/tinker/megatron/server_config.yaml
@@ -1,3 +1,4 @@
+server_type: tinker
 proxy_location: EveryNode
 http_options:
   host: 0.0.0.0
@@ -6,7 +7,7 @@ http_options:
 applications:
   - name: server
     route_prefix: /api/v1
-    import_path: main:build_server_app
+    import_path: server
     args:
 
     deployments:
@@ -22,7 +23,7 @@ applications:
 
   - name: models-Qwen2.5-0.5B-Instruct
     route_prefix: /api/v1/model/Qwen/Qwen2.5-0.5B-Instruct
-    import_path: main:build_model_app
+    import_path: model
     args:
       use_megatron: true
       model_id: "ms://Qwen/Qwen2.5-0.5B-Instruct"
@@ -46,29 +47,3 @@ applications:
         logging_config:
           log_level: DEBUG
 
-  # Example: Add more models as needed
-  # - name: models-Qwen2.5-7B-Instruct
-  #   route_prefix: /api/v1/model/Qwen/Qwen2.5-7B-Instruct
-  #   import_path: main:build_model_app
-  #   args:
-  #     model_id: "ms://Qwen/Qwen2.5-7B-Instruct"
-  #     nproc_per_node: 4
-  #     device_group:
-  #       name: model7b
-  #       ranks: [2, 3, 4, 5]
-  #       device_type: cuda
-  #     device_mesh:
-  #       device_type: cuda
-  #       mesh: [2, 3, 4, 5]
-  #       mesh_dim_names: ['dp']
-  #   deployments:
-  #     - name: ModelManagement
-  #       autoscaling_config:
-  #         min_replicas: 1
-  #         max_replicas: 1
-  #         target_ongoing_requests: 16
-  #       ray_actor_options:
-  #         num_cpus: 0.1
-  #       logging_config:
-  #         log_level: DEBUG
-
diff --git a/cookbook/client/tinker/transformer/lora.py b/cookbook/client/tinker/transformer/lora.py
@@ -1,6 +1,10 @@
 #%%
+import dotenv
+dotenv.load_dotenv('.env')
+
+import os
 from twinkle_client import init_tinker_compat_client
-service_client = init_tinker_compat_client(base_url='http://localhost:8000')
+service_client = init_tinker_compat_client(base_url='http://localhost:8000', api_key=os.environ.get('MODELSCOPE_SDK_TOKEN'))
 
 print("Available models:")
 for item in service_client.get_server_capabilities().supported_models:
@@ -12,7 +16,9 @@
 
 future = rest_client.list_training_runs(limit=50)
 response = future.result()
+# Support resume from twinkle path or model id
 # resume_path = "twinkle://20260131_170251-Qwen_Qwen2_5-0_5B-Instruct-7275126c/weights/pig-latin-lora-epoch-1"
+# resume_path = "AlexEz/20260205_163645-Qwen_Qwen2_5-7B-Instruct-385d5c17_pig-latin-lora-epoch-1"
 resume_path = ""
 print(f"Found {len(response.training_runs)} training runs")
 for tr in response.training_runs:
@@ -24,12 +30,13 @@
         # resume_path = chpt.tinker_path  # Just get the last one for demo purposes
 
 #%%
-base_model = "Qwen/Qwen2.5-7B-Instruct"
+base_model = "Qwen/Qwen2.5-0.5B-Instruct"
 if not resume_path:
     training_client = service_client.create_lora_training_client(
         base_model=base_model
     )
 else:
+    print("Resuming from " + resume_path)
     training_client = service_client.create_training_client_from_state_with_optimizer(path=resume_path)
 
 #%%
@@ -106,9 +113,12 @@ def process_example(example: dict, tokenizer) -> types.Datum:
         weights = np.concatenate([example.loss_fn_inputs['weights'].tolist() for example in processed_examples])
         print(f"Loss per token: {-np.dot(logprobs, weights) / weights.sum():.4f}")
 
+    # Save the model and optimizer state
     save_future = training_client.save_state(f"pig-latin-lora-epoch-{epoch}")
     save_result = save_future.result()
     print(f"Saved checkpoint for epoch {epoch} to {save_result.path}")
 
-# sampling_client = training_client.save_weights_and_get_sampling_client(name='pig-latin-model')
-
+# NOTE: Need to set your modelscope token as api_key when initializing the service client
+# model name is {run_id}_{checkpoint_name}
+# rest_client.publish_checkpoint_from_tinker_path(save_result.path).result()
+# print("Published checkpoint")
diff --git a/cookbook/client/tinker/transformer/self_congnition.py b/cookbook/client/tinker/transformer/self_congnition.py
@@ -8,7 +8,7 @@
 from twinkle.server.tinker.common import input_feature_to_datum
 from modelscope import AutoTokenizer
 
-base_model = "Qwen/Qwen2.5-7B-Instruct"
+base_model = "Qwen/Qwen2.5-0.5B-Instruct"
 
 def train():
     # process data
@@ -46,7 +46,7 @@ def train():
         print(f"Saved checkpoint to {save_result.path}")
 
 def eval():
-    weight_path = "twinkle://20260203_194633-Qwen_Qwen2_5-0_5B-Instruct-03aa3f06/weights/twinkle-lora"
+    weight_path = "twinkle://20260207_110850-Qwen_Qwen2_5-0_5B-Instruct-ce7e819f/weights/twinkle-lora-2"
 
     service_client = init_tinker_compat_client(base_url='http://localhost:8000')
     sampling_client = service_client.create_sampling_client(
@@ -56,6 +56,10 @@ def eval():
     tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
 
     inputs = [
+        {
+            'role': 'system',
+            'content': 'You are a helpful assistant.'
+        },
         {
             'role': 'user',
             'content': 'what is your name?'
@@ -78,5 +82,5 @@ def eval():
         print(f"{i}: {repr(tokenizer.decode(seq.tokens))}")
 
 if __name__ == "__main__":
-    train()
-    # eval()
+    # train()
+    eval()
diff --git a/cookbook/client/tinker/transformer/server.py b/cookbook/client/tinker/transformer/server.py
@@ -1,54 +1,9 @@
 import os
 os.environ['RAY_DEBUG'] = '1'
-import ray
-from omegaconf import OmegaConf
-from ray import serve
-from twinkle.server.tinker import build_model_app, build_sampler_app, build_server_app
 
-ray.init(namespace="twinkle_cluster")
-serve.shutdown()
-import time
-time.sleep(5)
+from twinkle.server import launch_server
 
 file_dir = os.path.abspath(os.path.dirname(__file__))
-config = OmegaConf.load(os.path.join(file_dir, 'server_config.yaml'))
+config_path = os.path.join(file_dir, 'server_config.yaml')
 
-# Start Ray Serve with http_options from config
-http_options = OmegaConf.to_container(config.http_options, resolve=True)
-serve.start(http_options=http_options)
-
-APP_BUILDERS = {
-    'main:build_server_app': build_server_app,
-    'main:build_model_app': build_model_app,
-    # 'main:build_sampler_app': build_sampler_app,
-}
-
-for app_config in config.applications:
-    print(f"Starting {app_config.name} at {app_config.route_prefix}...")
-
-    if app_config.import_path not in APP_BUILDERS:
-        continue
-
-    builder = APP_BUILDERS[app_config.import_path]
-    args = OmegaConf.to_container(app_config.args, resolve=True) if app_config.args else {}
-
-    deploy_options = {}
-    deploy_config = app_config.deployments[0]
-    if 'autoscaling_config' in deploy_config:
-        deploy_options['autoscaling_config'] = OmegaConf.to_container(deploy_config.autoscaling_config)
-    if 'ray_actor_options' in deploy_config:
-        deploy_options['ray_actor_options'] = OmegaConf.to_container(deploy_config.ray_actor_options)
-
-    app = builder(
-        deploy_options=deploy_options,
-        **{k: v for k, v in args.items()}
-    )
-
-    serve.run(app, name=app_config.name, route_prefix=app_config.route_prefix)
-
-print("\nAll applications started!")
-print("Endpoints:")
-for app_config in config.applications:
-    print(f"  - http://localhost:8000{app_config.route_prefix}")
-
-input("\nPress Enter to stop the server...")
+launch_server(config_path=config_path)
diff --git a/cookbook/client/tinker/transformer/server_config.yaml b/cookbook/client/tinker/transformer/server_config.yaml
@@ -1,3 +1,4 @@
+server_type: tinker
 proxy_location: EveryNode
 http_options:
   host: 0.0.0.0
@@ -6,7 +7,7 @@ http_options:
 applications:
   - name: server
     route_prefix: /api/v1
-    import_path: main:build_server_app
+    import_path: server
     args:
 
     deployments:
@@ -21,11 +22,11 @@ applications:
           log_level: DEBUG
 
   - name: models-Qwen2.5-0.5B-Instruct
-    route_prefix: /api/v1/model/Qwen/Qwen2.5-7B-Instruct
-    import_path: main:build_model_app
+    route_prefix: /api/v1/model/Qwen/Qwen2.5-0.5B-Instruct
+    import_path: model
     args:
       use_megatron: false
-      model_id: "ms://Qwen/Qwen2.5-7B-Instruct"
+      model_id: "ms://Qwen/Qwen2.5-0.5B-Instruct"
       nproc_per_node: 2
       device_group:
         name: model
@@ -54,7 +55,7 @@ applications:
 
   - name: sampler-Qwen2.5-0.5B-Instruct
     route_prefix: /api/v1/sampler/Qwen/Qwen2.5-0.5B-Instruct
-    import_path: main:build_sampler_app
+    import_path: sampler
     args:
       model_id: "ms://Qwen/Qwen2.5-0.5B-Instruct"
       nproc_per_node: 1
@@ -83,28 +84,3 @@ applications:
         logging_config:
           log_level: DEBUG
 
-  # Example: Add more models as needed
-  # - name: models-Qwen2.5-7B-Instruct
-  #   route_prefix: /api/v1/model/Qwen/Qwen2.5-7B-Instruct
-  #   import_path: main:build_model_app
-  #   args:
-  #     model_id: "ms://Qwen/Qwen2.5-7B-Instruct"
-  #     nproc_per_node: 4
-  #     device_group:
-  #       name: model7b
-  #       ranks: [2, 3, 4, 5]
-  #       device_type: cuda
-  #     device_mesh:
-  #       device_type: cuda
-  #       mesh: [2, 3, 4, 5]
-  #       mesh_dim_names: ['dp']
-  #   deployments:
-  #     - name: ModelManagement
-  #       autoscaling_config:
-  #         min_replicas: 1
-  #         max_replicas: 1
-  #         target_ongoing_requests: 16
-  #       ray_actor_options:
-  #         num_cpus: 0.1
-  #       logging_config:
-  #         log_level: DEBUG
diff --git a/cookbook/client/twinkle/megatron/server.py b/cookbook/client/twinkle/megatron/server.py
@@ -1,52 +1,9 @@
 import os
 os.environ['RAY_DEBUG'] = '1'
-import ray
-from omegaconf import OmegaConf
-from ray import serve
-from twinkle.server import build_processor_app, build_sampler_app, build_model_app, build_server_app
 
-ray.init()
-serve.shutdown()
-import time
-time.sleep(5)
+from twinkle.server import launch_server
 
 file_dir = os.path.abspath(os.path.dirname(__file__))
-config = OmegaConf.load(os.path.join(file_dir, 'server_config.yaml'))
+config_path = os.path.join(file_dir, 'server_config.yaml')
 
-# Start Ray Serve with http_options from config
-http_options = OmegaConf.to_container(config.http_options, resolve=True)
-serve.start(http_options=http_options)
-
-APP_BUILDERS = {
-    'main:model_qwen25_7B': build_model_app,
-    # 'main:build_sampler_app': build_sampler_app,
-    'main:processor_app': build_processor_app,
-    'main:build_server_app': build_server_app,
-}
-
-for app_config in config.applications:
-    print(f"Starting {app_config.name} at {app_config.route_prefix}...")
-
-    builder = APP_BUILDERS[app_config.import_path]
-    args = OmegaConf.to_container(app_config.args, resolve=True) if app_config.args else {}
-
-    deploy_options = {}
-    deploy_config = app_config.deployments[0]
-    if 'autoscaling_config' in deploy_config:
-        deploy_options['autoscaling_config'] = OmegaConf.to_container(deploy_config.autoscaling_config)
-    if 'ray_actor_options' in deploy_config:
-        deploy_options['ray_actor_options'] = OmegaConf.to_container(deploy_config.ray_actor_options)
-
-    app = builder(
-        deploy_options=deploy_options,
-        **{k: v for k, v in args.items()}
-    )
-
-    serve.run(app, name=app_config.name, route_prefix=app_config.route_prefix)
-
-print("\nAll applications started!")
-print("Endpoints:")
-for app_config in config.applications:
-    print(f"  - http://localhost:8000{app_config.route_prefix}")
-
-input("\nPress Enter to stop the server...")
+launch_server(config_path=config_path)