-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_proxy.py
More file actions
115 lines (94 loc) · 4.09 KB
/
test_proxy.py
File metadata and controls
115 lines (94 loc) · 4.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python3
"""
Test script for the LLM proxy server.
This script sends a request to the proxy server and prints the streaming response.
"""
import json
import requests
# Proxy server URL
PROXY_URL = "http://localhost:8081/v1/chat/completions"
def test_proxy_server():
"""Test the proxy server with a simple chat completion request."""
headers = {
"Content-Type": "application/json",
"Posit-Client-Type": "positron-assistant" # This header should be stripped by the proxy
}
# Sample request based on the provided example
payload = {
"model": "gpt-4o",
"messages": [
{
"role": "developer",
"content": "You are a helpful assistant that provides concise answers."
},
{
"role": "user",
"content": "Tell me a haiku."
}
],
"stream": True,
"stream_options": {
"include_usage": True
}
}
print("Sending request to proxy server...")
print("URL:", PROXY_URL)
print("Headers:", headers)
print("Payload:", json.dumps(payload, indent=2))
print("\nStreaming response:")
try:
# Send request to the proxy server with stream=True to get chunked response
with requests.post(PROXY_URL, headers=headers, json=payload, stream=True) as response:
if not response.ok:
print(f"Error: {response.status_code} {response.text}")
return
# Process each chunk from the streamed response
accumulated_text = []
for line in response.iter_lines():
if line:
decoded_line = line.decode('utf-8')
parsed_data = None
# Handle the line based on content
if decoded_line.startswith("data: "):
data_str = decoded_line[6:] # Remove 'data: ' prefix
try:
# Parse and pretty-print JSON
parsed_data = json.loads(data_str)
pretty_json = json.dumps(parsed_data, indent=2)
print(f"data: {pretty_json}")
# Try to extract text content based on format
text = None
# OpenAI format
if "choices" in parsed_data and len(parsed_data["choices"]) > 0:
delta = parsed_data["choices"][0].get("delta", {})
if "content" in delta and delta["content"]:
text = delta["content"]
# Anthropic format
elif parsed_data.get("type") == "content_block_delta":
delta = parsed_data.get("delta", {})
if delta.get("type") == "text_delta":
text = delta.get("text", "")
# OpenAI Responses format
elif parsed_data.get("type") == "response.output_text.delta":
text = parsed_data.get("delta")
# Add to accumulated text if we found content
if text:
accumulated_text.append(text)
except json.JSONDecodeError:
# Not valid JSON, print as-is
print(decoded_line)
else:
# Not JSON data, print as-is
print(decoded_line)
# Display the accumulated complete response
if accumulated_text:
print("\n" + "=" * 60)
print("Accumulated Complete Response:")
print("=" * 60)
print("".join(accumulated_text))
print("=" * 60)
except requests.exceptions.RequestException as e:
print(f"Request error: {e}")
return
if __name__ == "__main__":
test_proxy_server()