modelscope
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎README_ZH.md‎
Lines changed: 2 additions & 2 deletions b/‎README_ZH.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cookbook/client/server/megatron/server_config.yaml‎
Lines changed: 6 additions & 6 deletions b/‎cookbook/client/server/megatron/server_config.yaml‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎cookbook/client/server/megatron/server_config_4b.yaml‎
Lines changed: 1 addition & 0 deletions b/‎cookbook/client/server/megatron/server_config_4b.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎cookbook/client/tinker/modelscope/sample.py‎
Lines changed: 2 additions & 2 deletions b/‎cookbook/client/tinker/modelscope/sample.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cookbook/client/tinker/modelscope/self_cognition.py‎
Lines changed: 1 addition & 1 deletion b/‎cookbook/client/tinker/modelscope/self_cognition.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cookbook/client/tinker/modelscope/short_math_grpo.py‎
Lines changed: 1 addition & 1 deletion b/‎cookbook/client/tinker/modelscope/short_math_grpo.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cookbook/client/tinker/self_host/sample.py‎
Lines changed: 1 addition & 1 deletion b/‎cookbook/client/tinker/self_host/sample.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cookbook/client/twinkle/modelscope/multi_modal.py‎
Lines changed: 168 additions & 0 deletions b/‎cookbook/client/twinkle/modelscope/multi_modal.py‎
Lines changed: 168 additions & 0 deletions
diff --git a/‎cookbook/client/twinkle/modelscope/self_congnition.py‎
Lines changed: 1 addition & 1 deletion b/‎cookbook/client/twinkle/modelscope/self_congnition.py‎
Lines changed: 1 addition & 1 deletion
@@ -129,7 +129,7 @@ supported on Twinkle✨ framework.
 > For serverless training service accessed via `base_url=https://www.modelscope.cn/twinkle`, it
 > is currently provided via the Tinker-compatible APIs. We will be rolling out services that support
 > both Tinker APIs, as well as the full-fledged Twinkle✨ native APIs. The serverless endpoint is backed
-> by one training base at a time, and currently it is [Qwen3-30B-A3B-Instruct-2507](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507).
+> by one training base at a time, and currently it is [Qwen3.5-4B](https://modelscope.cn/models/Qwen/Qwen3.5-4B).
 
 | Model Type          | Model ID on [ModelScope](https://modelscope.cn)                                                                 |               Model Size                | Requires             | Support Megatron |                                                HF Model ID                                                |
 |---------------------|-----------------------------------------------------------------------------------------------------------------|:---------------------------------------:|----------------------|:----------------:|:---------------------------------------------------------------------------------------------------------:|
@@ -234,7 +234,7 @@ from twinkle.dataset import Dataset, DatasetMeta
 from twinkle.preprocessor import SelfCognitionProcessor
 from twinkle.server.common import input_feature_to_datum
 
-base_model = 'ms://Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'ms://Qwen/Qwen3.5-4B'
 base_url='your-base-url'
 api_key='your-api-key'
 
 
@@ -112,7 +112,7 @@ Twinkle✨支持相同的算法接口运行在单GPU、torchrun多机、Ray、Cl
 随着新模型的发布，我们将添加对更多模型的支持。下表列出了 Twinkle✨ 框架当前支持的模型。
 
 >[!Note]
-> 通过 `base_url=https://www.modelscope.cn/twinkle` 访问的无服务器训练服务，目前是通过兼容Tinker的API提供的。我们将陆续推出同时支持Tinker API和完整Twinkle✨原生 API的服务。无服务器端点每次由一个训练基座支持，目前使用的是[Qwen3-30B-A3B-Instruct-2507](https://modelscope.cn/models/Qwen/Qwen3-30B-A3B-Instruct-2507)。
+> 通过 `base_url=https://www.modelscope.cn/twinkle` 访问的无服务器训练服务，目前是通过兼容Tinker的API提供的。我们将陆续推出同时支持Tinker API和完整Twinkle✨原生 API的服务。无服务器端点每次由一个训练基座支持，目前使用的是[Qwen3.5-4B](https://modelscope.cn/models/Qwen/Qwen3.5-4B)。
 
 | Model Type          | Model ID 举例                                                                                                     |               Model Size                | Requires             | Support Megatron |                                                HF Model ID                                                |
 |---------------------|-----------------------------------------------------------------------------------------------------------------|:---------------------------------------:|----------------------|:----------------:|:---------------------------------------------------------------------------------------------------------:|
@@ -216,7 +216,7 @@ from twinkle.dataset import Dataset, DatasetMeta
 from twinkle.preprocessor import SelfCognitionProcessor
 from twinkle.server.common import input_feature_to_datum
 
-base_model = 'ms://Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'ms://Qwen/Qwen3.5-4B'
 base_url='your-base-url'
 api_key='your-api-key'
 
 
@@ -36,11 +36,11 @@ applications:
 
   # 3. Sampler Service - Runs inference / sampling using vLLM engine
   #    Used for generating text from the model (e.g., evaluating LoRA results).
-  - name: sampler-Qwen3-30B-A3B-Instruct-2507
-    route_prefix: /api/v1/sampler/Qwen/Qwen3-30B-A3B-Instruct-2507
+  - name: sampler-Qwen3.5-4B
+    route_prefix: /api/v1/sampler/Qwen/Qwen3.5-4B
     import_path: sampler
     args:
-      model_id: "ms://Qwen/Qwen3-30B-A3B-Instruct-2507"   # ModelScope model identifier
+      model_id: "ms://Qwen/Qwen3.5-4B"   # ModelScope model identifier
       nproc_per_node: 4               # Number of GPU processes per node
       sampler_type: vllm              # Inference engine: 'vllm' (fast) or 'torch' (TorchSampler)
       engine_args:                    # vLLM engine-specific settings
@@ -73,12 +73,12 @@ applications:
 
   # 2. Model Service (commented out) - Would host the base model for training.
   #    Uncomment and configure if you need a training model worker.
-  - name: models-Qwen3-30B-A3B-Instruct-2507
-    route_prefix: /api/v1/model/Qwen/Qwen3-30B-A3B-Instruct-2507
+  - name: models-Qwen3.5-4B
+    route_prefix: /api/v1/model/Qwen/Qwen3.5-4B
     import_path: model
     args:
       use_megatron: true                          # Use HuggingFace Transformers backend
-      model_id: "ms://Qwen/Qwen3-30B-A3B-Instruct-2507" # ModelScope model identifier
+      model_id: "ms://Qwen/Qwen3.5-4B" # ModelScope model identifier
       max_length: 16000                           # model max length
       max_loras: 5                                # model max loras
       nproc_per_node: 4                           # Number of GPU processes per node
 
@@ -39,6 +39,7 @@ applications:
     import_path: model
     args:
       use_megatron: true
+      model_cls: Qwen3_5ForConditionalGeneration
       model_id: "ms://Qwen/Qwen3.5-4B" # ModelScope model identifier
       max_length: 10240
       nproc_per_node: 2                            # Number of GPU processes per node
 
@@ -16,7 +16,7 @@
 
 from tinker import ServiceClient
 
-base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'Qwen/Qwen3.5-4B'
 base_url = 'http://www.modelscope.cn/twinkle'
 
 # Step 2: Define the base model and connect to the server
@@ -29,7 +29,7 @@
 # The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint.
 # The server will load the base model and apply the LoRA adapter weights.
 sampling_client = service_client.create_sampling_client(
-    model_path='twinkle://xxx-Qwen_Qwen3-30B-A3B-Instruct-2507-xxx/weights/twinkle-lora-1',
+    model_path='twinkle://xxx-Qwen_Qwen3.5-4B-xxx/weights/twinkle-lora-1',
     base_model=base_model
 )
 
 
@@ -23,7 +23,7 @@
 from tinker import ServiceClient
 
 # The base model to fine-tune / evaluate
-base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'Qwen/Qwen3.5-4B'
 base_url = 'http://www.modelscope.cn/twinkle'
 
 
 
@@ -38,7 +38,7 @@
 logger = get_logger()
 
 # ========== Configuration ==========
-BASE_MODEL = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+BASE_MODEL = 'Qwen/Qwen3.5-4B'
 NUM_GENERATIONS = 8
 MAX_NEW_TOKENS = 4096
 LEARNING_RATE = 1e-4
 
@@ -27,7 +27,7 @@
 # The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint.
 # The server will load the base model and apply the LoRA adapter weights.
 sampling_client = service_client.create_sampling_client(
-    model_path='twinkle://xxx-Qwen_Qwen3-30B-A3B-Instruct-2507-xxx/weights/twinkle-lora-1',
+    model_path='twinkle://xxx-Qwen_Qwen3.5-4B-xxx/weights/twinkle-lora-1',
     base_model=base_model
 )
 
 
@@ -0,0 +1,168 @@
+# Twinkle Client - Transformers LoRA Training Example
+#
+# This script demonstrates how to fine-tune a language model using LoRA
+# (Low-Rank Adaptation) through the Twinkle client-server architecture.
+# The server must be running first (see server.py and server_config.yaml).
+
+# Step 1: Load environment variables from a .env file (e.g., API tokens)
+import dotenv
+import os
+from twinkle.data_format import Trajectory, Message
+from twinkle.preprocessor import Preprocessor
+
+dotenv.load_dotenv('.env')
+import numpy as np
+import torch
+from peft import LoraConfig
+
+from twinkle import get_logger
+from twinkle.dataset import DatasetMeta
+from twinkle_client import init_twinkle_client
+from twinkle.dataloader import DataLoader
+from twinkle.dataset import LazyDataset
+from twinkle_client.model import MultiLoraTransformersModel
+
+logger = get_logger()
+
+base_model = 'Qwen/Qwen3.5-4B'
+base_url = 'http://www.modelscope.cn/twinkle'
+
+# Step 2: Initialize the Twinkle client to communicate with the remote server.
+# - base_url: the address of the running Twinkle server
+# - api_key: authentication token (loaded from environment variable)
+client = init_twinkle_client(base_url=base_url, api_key=os.environ.get('MODELSCOPE_TOKEN'))
+
+# Step 3: Query the server for existing training runs and their checkpoints.
+# This is useful for resuming a previous training session.
+runs = client.list_training_runs()
+
+resume_path = None
+for run in runs:
+    logger.info(run.model_dump_json(indent=2))
+    # List all saved checkpoints for this training run
+    checkpoints = client.list_checkpoints(run.training_run_id)
+
+    for checkpoint in checkpoints:
+        logger.info(checkpoint.model_dump_json(indent=2))
+        # Uncomment the line below to resume from a specific checkpoint:
+        # resume_path = checkpoint.twinkle_path
+
+
+class LatexOCRProcessor(Preprocessor):
+
+    def __call__(self, rows):
+        rows = self.map_col_to_row(rows)
+        rows = [self.preprocess(row) for row in rows]
+        rows = self.map_row_to_col(rows)
+        return rows
+
+    def preprocess(self, row) -> Trajectory:
+        return Trajectory(
+            messages=[
+                Message(role='user', content='<image>Using LaTeX to perform OCR on the image.', images=[row['image']]),
+                Message(role='assistant', content=row['text']),
+            ]
+        )
+
+
+def train():
+    # Step 4: Prepare the dataset
+
+    # Load the latex dataset from ModelScope
+    dataset = LazyDataset(dataset_meta=DatasetMeta('ms://AI-ModelScope/LaTeX_OCR', data_slice=range(500)))
+
+    # Apply a chat template so the data matches the model's expected input format
+    dataset.set_template('Qwen3_5Template', model_id=f'ms://{base_model}', max_length=512)
+
+    # Replace placeholder names in the dataset with custom model/author names
+    dataset.map(LatexOCRProcessor)
+
+    # Tokenize and encode the dataset into model-ready input features
+    dataset.encode(batched=True)
+
+    # Wrap the dataset into a DataLoader that yields batches of size 4
+    dataloader = DataLoader(dataset=dataset, batch_size=4)
+
+    # Step 5: Configure the model
+
+    # Create a multi-LoRA Transformers model pointing to the base model on ModelScope
+    model = MultiLoraTransformersModel(model_id=f'ms://{base_model}')
+
+    # Define LoRA configuration: apply low-rank adapters to all linear layers
+    lora_config = LoraConfig(target_modules='all-linear')
+
+    # Attach the LoRA adapter named 'default' to the model.
+    # gradient_accumulation_steps=2 means gradients are accumulated over 2 micro-batches
+    # before an optimizer step, effectively doubling the batch size.
+    model.add_adapter_to_model('default', lora_config, gradient_accumulation_steps=2)
+
+    # Set the same chat template used during data preprocessing
+    model.set_template('Qwen3_5Template')
+
+    # Set the input processor (pads sequences on the right side)
+    model.set_processor('InputProcessor', padding_side='right')
+
+    # Use cross-entropy loss for language modeling
+    model.set_loss('CrossEntropyLoss')
+
+    # Use Adam optimizer with a learning rate of 1e-4 (Only support Adam optimizer if server use megatron)
+    model.set_optimizer('Adam', lr=1e-4)
+
+    # Use a linear learning rate scheduler (Do not support LR scheduler if server use megatron)
+    # model.set_lr_scheduler('LinearLR')
+
+    # Step 6: Optionally resume from a previous checkpoint
+    if resume_path:
+        logger.info(f'Resuming training from {resume_path}')
+        model.load(resume_path, load_optimizer=True)
+
+    # Step 7: Run the training loop
+    logger.info(model.get_train_configs().model_dump())
+
+    for epoch in range(3):
+        logger.info(f'Starting epoch {epoch}')
+        for step, batch in enumerate(dataloader):
+            for sample in batch:
+                for key in sample:
+                    if isinstance(sample[key], np.ndarray):
+                        sample[key] = sample[key].tolist()
+                    elif isinstance(sample[key], torch.Tensor):
+                        sample[key] = sample[key].cpu().numpy().tolist()
+            # Forward pass + backward pass (computes gradients)
+            model.forward_backward(inputs=batch)
+
+            # Step
+            model.clip_grad_and_step()
+            # Equal to the following steps:
+            # # Clip gradients to prevent exploding gradients (max norm = 1.0)
+            # model.clip_grad_norm(1.0)
+            # # Perform one optimizer step (update model weights)
+            # model.step()
+            # # Reset gradients to zero for the next iteration
+            # model.zero_grad()
+            # # Advance the learning rate scheduler by one step
+            # model.lr_step()
+
+            # Log the loss every 2 steps (aligned with gradient accumulation)
+            if step % 2 == 0:
+                # Print metric
+                metric = model.calculate_metric(is_training=True)
+                logger.info(f'Current is step {step} of {len(dataloader)}, metric: {metric.result}')
+
+        # Step 8: Save the trained checkpoint
+        twinkle_path = model.save(name=f'twinkle-epoch-{epoch}', save_optimizer=True)
+        logger.info(f'Saved checkpoint: {twinkle_path}')
+
+    # Step 9: Upload the checkpoint to ModelScope Hub
+    # YOUR_USER_NAME = "your_username"
+    # hub_model_id = f'{YOUR_USER_NAME}/twinkle-multi-modal'
+    # model.upload_to_hub(
+    #     checkpoint_dir=twinkle_path,
+    #     hub_model_id=hub_model_id,
+    #     async_upload=False
+    # )
+    # logger.info(f"Uploaded checkpoint to hub: {hub_model_id}")
+
+
+if __name__ == '__main__':
+    train()
@@ -21,7 +21,7 @@
 
 logger = get_logger()
 
-base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'Qwen/Qwen3.5-4B'
 base_url = 'http://www.modelscope.cn/twinkle'
 
 # Step 2: Initialize the Twinkle client to communicate with the remote server.
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@`
`27`	`27`	`# The model_path is a twinkle:// URI pointing to a previously saved LoRA checkpoint.`
`28`	`28`	`# The server will load the base model and apply the LoRA adapter weights.`
`29`	`29`	`sampling_client = service_client.create_sampling_client(`
`30`		`- model_path='twinkle://xxx-Qwen_Qwen3-30B-A3B-Instruct-2507-xxx/weights/twinkle-lora-1',`
	`30`	`+ model_path='twinkle://xxx-Qwen_Qwen3.5-4B-xxx/weights/twinkle-lora-1',`
`31`	`31`	`base_model=base_model`
`32`	`32`	`)`
`33`	`33`