@@ -161,56 +161,23 @@ async def _proxy_request(self, request: Request, endpoint: str, base_model: str,
161161 headers .pop ('content-length' , None )
162162
163163 try :
164- if os .environ .get ('TWINKLE_DEBUG_PROXY' , '1' ) == '1' :
165- logger .info ('proxy_to_model endpoint=%s target_url=%s serve_multiplexed_model_id=%s' , endpoint ,
166- target_url , headers .get ('serve_multiplexed_model_id' ))
167- handle = serve .get_deployment_handle (
168- deployment_name = 'ModelManagement' , app_name = 'models-Qwen3-30B-A3B-Instruct-2507' )
169-
170- def make_fake_request (original_request : Request ):
171- """用 SimpleNamespace 模拟 Request"""
172- from types import SimpleNamespace
173- fake = SimpleNamespace ()
174- fake .headers = dict (original_request .headers )
175-
176- fake .state = SimpleNamespace ()
177- fake .state .request_id = headers .get ('serve_multiplexed_model_id' )
178- fake .state .token = getattr (original_request .state , 'token' , None )
179- return fake
180-
181- fake_request = make_fake_request (request )
182- import json
183- result = await getattr (
184- handle .options (multiplexed_model_id = headers .get ('serve_multiplexed_model_id' )), endpoint ).remote (
185- body = json .loads (body_bytes ),
186- request = fake_request ,
187- )
164+ if os .environ .get ('TWINKLE_DEBUG_PROXY' , '0' ) == '1' :
165+ logger .info ('proxy_to_model endpoint=%s target_url=%s x-ray-serve-request-id=%s' , endpoint ,
166+ target_url , headers .get ('x-ray-serve-request-id' ))
167+ rp_ = await self .client .request (
168+ method = request .method ,
169+ url = target_url ,
170+ content = body_bytes ,
171+ headers = headers ,
172+ params = request .query_params ,
173+ )
188174 if os .environ .get ('TWINKLE_DEBUG_PROXY' , '0' ) == '1' :
189175 logger .info ('proxy_to_model response status=%s body=%s' , rp_ .status_code , rp_ .text [:200 ])
190-
191- # 处理返回值
192- if hasattr (result , 'model_dump' ):
193- # Pydantic v2
194- content = json .dumps (result .model_dump ())
195- elif hasattr (result , 'dict' ):
196- # Pydantic v1
197- content = json .dumps (result .dict ())
198- elif isinstance (result , dict ):
199- content = json .dumps (result )
200- elif isinstance (result , (str , bytes )):
201- content = result
202- else :
203- content = json .dumps (result )
204-
205- # 判断是否是错误响应
206- if isinstance (result , types .RequestFailedResponse ):
207- status_code = 500
208- else :
209- status_code = 200
210176 return Response (
211- content = content ,
212- status_code = status_code ,
213- media_type = 'application/json' ,
177+ content = rp_ .content ,
178+ status_code = rp_ .status_code ,
179+ headers = dict (rp_ .headers ),
180+ media_type = rp_ .headers .get ('content-type' ),
214181 )
215182 except Exception as e :
216183 return Response (content = f'Proxy Error: { str (e )} ' , status_code = 502 )
0 commit comments