From 6f9181bd763213833baad2e5bf67770b2e388ed9 Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Mon, 2 Mar 2026 19:54:06 +0800 Subject: [PATCH 1/3] wip --- .../tinker/modelscope_service/sample.py | 69 +++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 cookbook/client/tinker/modelscope_service/sample.py diff --git a/cookbook/client/tinker/modelscope_service/sample.py b/cookbook/client/tinker/modelscope_service/sample.py new file mode 100644 index 00000000..710fc033 --- /dev/null +++ b/cookbook/client/tinker/modelscope_service/sample.py @@ -0,0 +1,69 @@ +# Tinker-Compatible Client - Sampling / Inference Example +# +# This script demonstrates how to use a previously trained LoRA checkpoint +# for text generation (sampling) via the Tinker-compatible client API. +# The server must be running first (see server.py and server_config.yaml). + +import os +from tinker import types + +from twinkle.data_format import Message, Trajectory +from twinkle.template import Template +from twinkle import init_tinker_client + +# Step 1: Initialize Tinker client +init_tinker_client() + +from tinker import ServiceClient + +base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507' +base_url = 'http://www.modelscope.cn/twinkle' + +# Step 2: Define the base model and connect to the server +service_client = ServiceClient( + base_url=base_url, + api_key=os.environ.get('MODELSCOPE_TOKEN') +) + +# Step 3: Create a sampling client by loading weights from a saved checkpoint. +# The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint. +# The server will load the base model and apply the LoRA adapter weights. +sampling_client = service_client.create_sampling_client( + model_path='twinkle://xxx-Qwen_Qwen3-30B-A3B-Instruct-2507-xxx/weights/twinkle-lora-1', + base_model=base_model +) + +# Step 4: Load the tokenizer locally to encode the prompt and decode the results +print(f'Using model {base_model}') + +template = Template(model_id=f'ms://{base_model}') + +trajectory = Trajectory( + messages=[ + Message(role='system', content='You are a helpful assistant'), + Message(role='user', content='你是谁?'), + ] +) + +input_feature = template.encode(trajectory, add_generation_prompt=True) + +input_ids = input_feature['input_ids'].tolist() + +# Step 5: Prepare the prompt and sampling parameters +prompt = types.ModelInput.from_ints(input_ids) +params = types.SamplingParams( + max_tokens=128, # Maximum number of tokens to generate + temperature=0.7, + stop=['\n'] # Stop generation when a newline character is produced +) + +# Step 6: Send the sampling request to the server. +# num_samples=8 generates 8 independent completions for the same prompt. +print('Sampling...') +future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=1) +result = future.result() + +# Step 7: Decode and print the generated responses +print('Responses:') +for i, seq in enumerate(result.sequences): + print(f'{i}: {repr(template.decode(seq.tokens))}') From e2f609d0ea708faf45eb802b3b52482fc21991a5 Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Mon, 2 Mar 2026 19:55:52 +0800 Subject: [PATCH 2/3] fix --- cookbook/client/tinker/custom_service/sample.py | 2 +- cookbook/client/tinker/modelscope_service/sample.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cookbook/client/tinker/custom_service/sample.py b/cookbook/client/tinker/custom_service/sample.py index dc48833c..f6cb4df0 100644 --- a/cookbook/client/tinker/custom_service/sample.py +++ b/cookbook/client/tinker/custom_service/sample.py @@ -39,7 +39,7 @@ trajectory = Trajectory( messages=[ Message(role='system', content='You are a helpful assistant'), - Message(role='user', content='你是谁?'), + Message(role='user', content='Who are you?'), ] ) diff --git a/cookbook/client/tinker/modelscope_service/sample.py b/cookbook/client/tinker/modelscope_service/sample.py index 710fc033..cd526566 100644 --- a/cookbook/client/tinker/modelscope_service/sample.py +++ b/cookbook/client/tinker/modelscope_service/sample.py @@ -41,7 +41,7 @@ trajectory = Trajectory( messages=[ Message(role='system', content='You are a helpful assistant'), - Message(role='user', content='你是谁?'), + Message(role='user', content='Who are you?'), ] ) From a7a5f81600cc05505d68cd3fabc69e7c62277f07 Mon Sep 17 00:00:00 2001 From: tastelikefeet Date: Mon, 2 Mar 2026 20:09:13 +0800 Subject: [PATCH 3/3] fix --- cookbook/client/tinker/custom_service/sample.py | 2 +- cookbook/client/tinker/modelscope_service/sample.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cookbook/client/tinker/custom_service/sample.py b/cookbook/client/tinker/custom_service/sample.py index f6cb4df0..278f24bf 100644 --- a/cookbook/client/tinker/custom_service/sample.py +++ b/cookbook/client/tinker/custom_service/sample.py @@ -56,7 +56,7 @@ ) # Step 6: Send the sampling request to the server. -# num_samples=8 generates 8 independent completions for the same prompt. +# num_samples=1 generates 1 independent completions for the same prompt. print('Sampling...') future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=1) result = future.result() diff --git a/cookbook/client/tinker/modelscope_service/sample.py b/cookbook/client/tinker/modelscope_service/sample.py index cd526566..34a0064c 100644 --- a/cookbook/client/tinker/modelscope_service/sample.py +++ b/cookbook/client/tinker/modelscope_service/sample.py @@ -58,7 +58,7 @@ ) # Step 6: Send the sampling request to the server. -# num_samples=8 generates 8 independent completions for the same prompt. +# num_samples=1 generates 1 independent completions for the same prompt. print('Sampling...') future = sampling_client.sample(prompt=prompt, sampling_params=params, num_samples=1) result = future.result()