Skip to content

Commit 8f7e086

Browse files
committed
wip
1 parent a5e2aef commit 8f7e086

File tree

4 files changed

+17
-49
lines changed

4 files changed

+17
-49
lines changed

ROADMAP.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
- [ ] 支持DPO对齐训练
6666
- [ ] 支持colocate RL训练
6767
- [ ] Preprocess支持batched
68+
- [ ] 对多replica的支持和粘滞路由
6869

6970
### 网络能力
7071

@@ -84,5 +85,6 @@
8485
- [ ] Support for DPO alignment training
8586
- [ ] Support for colocate RL training
8687
- [ ] Support for batched preprocessing
88+
- [ ] Support for multiple replicas and sticky routing
8789

8890
### Networking Capabilities

src/twinkle/server/tinker/model.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ def build_model_app(model_id: str,
5555
Returns:
5656
Configured Ray Serve deployment bound with parameters
5757
"""
58-
import ray
5958
app = FastAPI()
6059

6160
@app.middleware('http')

src/twinkle/server/tinker/server.py

Lines changed: 14 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -161,56 +161,23 @@ async def _proxy_request(self, request: Request, endpoint: str, base_model: str,
161161
headers.pop('content-length', None)
162162

163163
try:
164-
if os.environ.get('TWINKLE_DEBUG_PROXY', '1') == '1':
165-
logger.info('proxy_to_model endpoint=%s target_url=%s serve_multiplexed_model_id=%s', endpoint,
166-
target_url, headers.get('serve_multiplexed_model_id'))
167-
handle = serve.get_deployment_handle(
168-
deployment_name='ModelManagement', app_name='models-Qwen3-30B-A3B-Instruct-2507')
169-
170-
def make_fake_request(original_request: Request):
171-
"""用 SimpleNamespace 模拟 Request"""
172-
from types import SimpleNamespace
173-
fake = SimpleNamespace()
174-
fake.headers = dict(original_request.headers)
175-
176-
fake.state = SimpleNamespace()
177-
fake.state.request_id = headers.get('serve_multiplexed_model_id')
178-
fake.state.token = getattr(original_request.state, 'token', None)
179-
return fake
180-
181-
fake_request = make_fake_request(request)
182-
import json
183-
result = await getattr(
184-
handle.options(multiplexed_model_id=headers.get('serve_multiplexed_model_id')), endpoint).remote(
185-
body=json.loads(body_bytes),
186-
request=fake_request,
187-
)
164+
if os.environ.get('TWINKLE_DEBUG_PROXY', '0') == '1':
165+
logger.info('proxy_to_model endpoint=%s target_url=%s x-ray-serve-request-id=%s', endpoint,
166+
target_url, headers.get('x-ray-serve-request-id'))
167+
rp_ = await self.client.request(
168+
method=request.method,
169+
url=target_url,
170+
content=body_bytes,
171+
headers=headers,
172+
params=request.query_params,
173+
)
188174
if os.environ.get('TWINKLE_DEBUG_PROXY', '0') == '1':
189175
logger.info('proxy_to_model response status=%s body=%s', rp_.status_code, rp_.text[:200])
190-
191-
# 处理返回值
192-
if hasattr(result, 'model_dump'):
193-
# Pydantic v2
194-
content = json.dumps(result.model_dump())
195-
elif hasattr(result, 'dict'):
196-
# Pydantic v1
197-
content = json.dumps(result.dict())
198-
elif isinstance(result, dict):
199-
content = json.dumps(result)
200-
elif isinstance(result, (str, bytes)):
201-
content = result
202-
else:
203-
content = json.dumps(result)
204-
205-
# 判断是否是错误响应
206-
if isinstance(result, types.RequestFailedResponse):
207-
status_code = 500
208-
else:
209-
status_code = 200
210176
return Response(
211-
content=content,
212-
status_code=status_code,
213-
media_type='application/json',
177+
content=rp_.content,
178+
status_code=rp_.status_code,
179+
headers=dict(rp_.headers),
180+
media_type=rp_.headers.get('content-type'),
214181
)
215182
except Exception as e:
216183
return Response(content=f'Proxy Error: {str(e)}', status_code=502)

src/twinkle/server/twinkle/model.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ def upload_to_hub(self, request: Request, body: UploadToHubRequest):
491491
return {'result': body.hub_model_id}
492492

493493
@app.post('/add_adapter_to_model')
494-
async def add_adapter_to_model(self, request: Request, body: AddAdapterRequest):
494+
def add_adapter_to_model(self, request: Request, body: AddAdapterRequest):
495495
"""
496496
Add a new adapter to the model.
497497

0 commit comments

Comments
 (0)