modelscope
diff --git a/‎docs/source_en/Usage Guide/Server and Client/Tinker-Compatible-Client.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source_en/Usage Guide/Server and Client/Tinker-Compatible-Client.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source_zh/使用指引/服务端和客户端/Tinker兼容客户端.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source_zh/使用指引/服务端和客户端/Tinker兼容客户端.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/twinkle/server/launcher.py‎
Lines changed: 5 additions & 0 deletions b/‎src/twinkle/server/launcher.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/twinkle/server/tinker/proxy.py‎
Lines changed: 180 additions & 0 deletions b/‎src/twinkle/server/tinker/proxy.py‎
Lines changed: 180 additions & 0 deletions
@@ -28,7 +28,7 @@ for item in service_client.get_server_capabilities().supported_models:
 When calling `init_tinker_client`, the following operations are automatically executed:
 
 1. **Patch Tinker SDK**: Bypass Tinker's `tinker://` prefix validation, allowing it to connect to standard HTTP addresses
-2. **Set Request Headers**: Inject necessary authentication headers such as `serve_multiplexed_model_id` and `Authorization`
+2. **Set Request Headers**: Inject necessary authentication headers such as `X-Ray-Serve-Request-Id` and `Authorization`
 
 After initialization, simply import `from tinker import ServiceClient` to connect to Twinkle Server, and **all existing Tinker training code can be used directly** without any modifications.
 
 
@@ -28,7 +28,7 @@ for item in service_client.get_server_capabilities().supported_models:
 调用 `init_tinker_client` 时，会自动执行以下操作：
 
 1. **Patch Tinker SDK**：绕过 Tinker 的 `tinker://` 前缀校验，使其可以连接到标准 HTTP 地址
-2. **设置请求头**：注入 `serve_multiplexed_model_id` 和 `Authorization` 等必要的认证头
+2. **设置请求头**：注入 `X-Ray-Serve-Request-Id` 和 `Authorization` 等必要的认证头
 
 初始化之后，直接导入 `from tinker import ServiceClient` 即可连接到 Twinkle Server，**所有已有的 Tinker 训练代码都可以直接使用**，无需任何修改。
 
 
@@ -214,6 +214,11 @@ def _deploy_application(self, app_config: dict[str, Any]) -> None:
                 # Copy all deployment options from the config, except 'name'.
                 deploy_options = {k: v for k, v in deploy_config.items() if k != 'name'}
 
+        # Pass http_options to server apps for internal proxy routing
+        http_options = self.config.get('http_options', {})
+        if http_options:
+            args['http_options'] = http_options
+
         # Build and deploy the application
         app = builder(deploy_options=deploy_options, **{k: v for k, v in args.items()})
 
 
@@ -0,0 +1,180 @@
+# Copyright (c) ModelScope Contributors. All rights reserved.
+"""
+Proxy utilities for forwarding requests to internal services.
+
+This module provides HTTP proxy functionality to route requests from the Tinker server
+to appropriate model or sampler services based on base_model routing.
+"""
+
+from __future__ import annotations
+
+import httpx
+import os
+from fastapi import Request, Response
+from typing import Any
+
+from twinkle.utils.logger import get_logger
+
+logger = get_logger()
+
+
+class ServiceProxy:
+    """HTTP proxy for routing requests to internal model and sampler services.
+
+    This proxy handles:
+    1. URL construction using localhost to avoid external routing loops
+    2. Header forwarding with appropriate cleanup
+    3. Debug logging for troubleshooting
+    4. Error handling and response forwarding
+    """
+
+    def __init__(
+        self,
+        http_options: dict[str, Any] | None = None,
+        route_prefix: str = '/api/v1',
+    ):
+        """Initialize the service proxy.
+
+        Args:
+            http_options: HTTP server options (host, port) for internal routing
+            route_prefix: URL prefix for routing (default: '/api/v1')
+        """
+        self.http_options = http_options or {}
+        self.route_prefix = route_prefix
+        # Disable proxy for internal requests to avoid routing through external proxies
+        self.client = httpx.AsyncClient(timeout=None, trust_env=False)
+
+    def _build_target_url(self, service_type: str, base_model: str, endpoint: str) -> str:
+        """Build the target URL for internal service routing.
+
+        Constructs URLs using localhost to avoid extra external hops.
+        When requests come from www.modelscope.com/twinkle, we proxy to
+        localhost:port directly instead of back to modelscope.com.
+
+        Args:
+            service_type: Either 'model' or 'sampler'
+            base_model: The base model name for routing
+            endpoint: The target endpoint name
+
+        Returns:
+            Complete target URL for the internal service
+        """
+        prefix = self.route_prefix.rstrip('/') if self.route_prefix else ''
+        host = self.http_options.get('host', 'localhost')
+        port = self.http_options.get('port', 8000)
+
+        # Use localhost for internal routing
+        if host == '0.0.0.0':
+            host = 'localhost'
+
+        base_url = f'http://{host}:{port}'
+        return f'{base_url}{prefix}/{service_type}/{base_model}/{endpoint}'
+
+    def _prepare_headers(self, request_headers: dict[str, str]) -> dict[str, str]:
+        """Prepare headers for proxying by removing problematic headers.
+
+        Args:
+            request_headers: Original request headers
+
+        Returns:
+            Cleaned headers safe for proxying
+        """
+        headers = dict(request_headers)
+        # Remove headers that should not be forwarded
+        headers.pop('host', None)
+        headers.pop('content-length', None)
+        # Add serve_multiplexed_model_id for sticky sessions
+        headers['serve_multiplexed_model_id'] = request_headers.get('X-Ray-Serve-Request-Id')
+        return headers
+
+    async def proxy_request(
+        self,
+        request: Request,
+        endpoint: str,
+        base_model: str,
+        service_type: str,
+    ) -> Response:
+        """Generic proxy method to forward requests to model or sampler services.
+
+        This method consolidates the common proxy logic for both model and sampler endpoints.
+
+        Args:
+            request: The incoming FastAPI request
+            endpoint: The target endpoint name (e.g., 'create_model', 'asample')
+            base_model: The base model name for routing
+            service_type: Either 'model' or 'sampler' to determine the target service
+
+        Returns:
+            Proxied response from the target service
+        """
+        body_bytes = await request.body()
+        target_url = self._build_target_url(service_type, base_model, endpoint)
+        headers = self._prepare_headers(dict(request.headers))
+
+        try:
+            # Debug logging for troubleshooting proxy issues
+            if os.environ.get('TWINKLE_DEBUG_PROXY', '0') == '1':
+                logger.info(
+                    'proxy_request service=%s endpoint=%s target_url=%s request_id=%s',
+                    service_type,
+                    endpoint,
+                    target_url,
+                    headers.get('x-ray-serve-request-id'),
+                )
+
+            # Forward the request to the target service
+            response = await self.client.request(
+                method=request.method,
+                url=target_url,
+                content=body_bytes,
+                headers=headers,
+                params=request.query_params,
+            )
+
+            # Debug logging for response
+            if os.environ.get('TWINKLE_DEBUG_PROXY', '0') == '1':
+                logger.info(
+                    'proxy_response status=%s body_preview=%s',
+                    response.status_code,
+                    response.text[:200],
+                )
+
+            return Response(
+                content=response.content,
+                status_code=response.status_code,
+                headers=dict(response.headers),
+                media_type=response.headers.get('content-type'),
+            )
+        except Exception as e:
+            logger.error('Proxy error: %s', str(e), exc_info=True)
+            return Response(content=f'Proxy Error: {str(e)}', status_code=502)
+
+    async def proxy_to_model(self, request: Request, endpoint: str, base_model: str) -> Response:
+        """Proxy request to model endpoint.
+
+        Routes the request to the appropriate model deployment based on base_model.
+
+        Args:
+            request: The incoming FastAPI request
+            endpoint: The target endpoint name (e.g., 'create_model', 'forward')
+            base_model: The base model name for routing
+
+        Returns:
+            Proxied response from the model service
+        """
+        return await self.proxy_request(request, endpoint, base_model, 'model')
+
+    async def proxy_to_sampler(self, request: Request, endpoint: str, base_model: str) -> Response:
+        """Proxy request to sampler endpoint.
+
+        Routes the request to the appropriate sampler deployment based on base_model.
+
+        Args:
+            request: The incoming FastAPI request
+            endpoint: The target endpoint name (e.g., 'asample')
+            base_model: The base model name for routing
+
+        Returns:
+            Proxied response from the sampler service
+        """
+        return await self.proxy_request(request, endpoint, base_model, 'sampler')