modelscope
diff --git a/‎cookbook/client/tinker/lora.py‎ ‎…ook/client/tinker/custom_service/lora.py‎cookbook/client/tinker/lora.py renamed to cookbook/client/tinker/custom_service/lora.py
Lines changed: 3 additions & 3 deletions b/‎cookbook/client/tinker/lora.py‎ ‎…ook/client/tinker/custom_service/lora.py‎cookbook/client/tinker/lora.py renamed to cookbook/client/tinker/custom_service/lora.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎cookbook/client/tinker/megatron/server.py‎ ‎…tinker/custom_service/megatron/server.py‎cookbook/client/tinker/megatron/server.py renamed to cookbook/client/tinker/custom_service/megatron/server.py b/‎cookbook/client/tinker/megatron/server.py‎ ‎…tinker/custom_service/megatron/server.py‎cookbook/client/tinker/megatron/server.py renamed to cookbook/client/tinker/custom_service/megatron/server.py
diff --git a/‎…nt/tinker/megatron/server_config_7b.yaml‎ ‎…stom_service/megatron/server_config.yaml‎cookbook/client/tinker/megatron/server_config_7b.yaml renamed to cookbook/client/tinker/custom_service/megatron/server_config.yaml
Lines changed: 5 additions & 5 deletions b/‎…nt/tinker/megatron/server_config_7b.yaml‎ ‎…stom_service/megatron/server_config.yaml‎cookbook/client/tinker/megatron/server_config_7b.yaml renamed to cookbook/client/tinker/custom_service/megatron/server_config.yaml
Lines changed: 5 additions & 5 deletions
diff --git a/‎cookbook/client/tinker/sample.py‎ ‎…k/client/tinker/custom_service/sample.py‎cookbook/client/tinker/sample.py renamed to cookbook/client/tinker/custom_service/sample.py
Lines changed: 3 additions & 3 deletions b/‎cookbook/client/tinker/sample.py‎ ‎…k/client/tinker/custom_service/sample.py‎cookbook/client/tinker/sample.py renamed to cookbook/client/tinker/custom_service/sample.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎cookbook/client/tinker/self_congnition.py‎ ‎…tinker/custom_service/self_congnition.py‎cookbook/client/tinker/self_congnition.py renamed to cookbook/client/tinker/custom_service/self_congnition.py
Lines changed: 1 addition & 2 deletions b/‎cookbook/client/tinker/self_congnition.py‎ ‎…tinker/custom_service/self_congnition.py‎cookbook/client/tinker/self_congnition.py renamed to cookbook/client/tinker/custom_service/self_congnition.py
Lines changed: 1 addition & 2 deletions
@@ -22,9 +22,9 @@
 	# BASE_URL can be a local server endpoint such as http://localhost:8000, or
 	# points to a previously deployed remote server, or
 	# modelscope server such as 'http://www.modelscope.cn/twinkle'
-    base_url='<BASE_URL>',
+    base_url='http://localhost:8000',
 	# API_KEY can be empty or a meaninful one according to sever configuration
-    api_key='<API_KEY>'
+    api_key='EMPTY-TOKEN'
 )
 
 # Step 4: List models available on the server to verify the connection
@@ -61,7 +61,7 @@
 
 # Step 6: Create or resume a training client.
 # If resume_path is set, it restores both model weights and optimizer state.
-base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'Qwen/Qwen3-4B'
 if not resume_path:
     training_client = service_client.create_lora_training_client(base_model=base_model)
 else:
 
@@ -24,7 +24,7 @@ applications:
       server_config:
         per_token_model_limit: 3      # Maximum number of models (adapters) per token (server-globally enforced)
       supported_models:
-        - Qwen/Qwen2.5-7B-Instruct
+        - Qwen/Qwen3-4B
     deployments:
       - name: TinkerCompatServer
         autoscaling_config:
@@ -37,11 +37,11 @@ applications:
   # 2. Model Service (commented out) - Would host the base model for training.
   #    Uncomment and configure if you need a training model worker.
   - name: models-Qwen2.5-7B-Instruct
-    route_prefix: /api/v1/model/Qwen/Qwen2.5-7B-Instruct
+    route_prefix: /api/v1/model/Qwen/Qwen3-4B
     import_path: model
     args:
       use_megatron: true
-      model_id: "ms://Qwen/Qwen2.5-7B-Instruct" # ModelScope model identifier
+      model_id: "ms://Qwen/Qwen3-4B" # ModelScope model identifier
       max_length: 10240
       nproc_per_node: 2                            # Number of GPU processes per node
       device_group:
@@ -74,10 +74,10 @@ applications:
   # 3. Sampler Service - Runs inference / sampling using vLLM engine
   #    Used for generating text from the model (e.g., evaluating LoRA results).
   # - name: sampler-Qwen2.5-7B-Instruct
-  #   route_prefix: /api/v1/sampler/Qwen/Qwen2.5-7B-Instruct
+  #   route_prefix: /api/v1/sampler/Qwen/Qwen3-4B
   #   import_path: sampler
   #   args:
-  #     model_id: "ms://Qwen/Qwen2.5-7B-Instruct"   # ModelScope model identifier
+  #     model_id: "ms://Qwen/Qwen3-4B"   # ModelScope model identifier
   #     nproc_per_node: 2               # Number of GPU processes per node
   #     sampler_type: vllm              # Inference engine: 'vllm' (fast) or 'torch' (TorchSampler)
   #     engine_args:                    # vLLM engine-specific settings
 
@@ -17,10 +17,10 @@
 from tinker import ServiceClient
 
 # Step 2: Define the base model and connect to the server
-base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
+base_model = 'Qwen/Qwen3-4B'
 service_client = ServiceClient(
-    base_url='http://www.modelscope.cn/twinkle',
-    api_key=os.environ.get('MODELSCOPE_TOKEN')
+    base_url='http://localhost:8000',
+    api_key='EMPTY-TOKEN'
 )
 
 # Step 3: Create a sampling client by loading weights from a saved checkpoint.
 
@@ -23,8 +23,7 @@
 from tinker import ServiceClient
 
 # The base model to fine-tune / evaluate
-# base_model = 'Qwen/Qwen3-30B-A3B-Instruct-2507'
-base_model = 'Qwen/Qwen2.5-7B-Instruct'
+base_model = 'Qwen/Qwen3-4B'
 base_url = 'http://localhost:8000'