-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path27_embedding_qdrant_test.py
More file actions
103 lines (92 loc) · 2.93 KB
/
27_embedding_qdrant_test.py
File metadata and controls
103 lines (92 loc) · 2.93 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import time
import json
import httpx
import concurrent.futures
import pandas as pd
from tabulate import tabulate
HF_TOKEN = "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"
QDRANT_API_KEY = "WGw3x8Ck4OuJIF_evXR3xK6zIIlRo34PGuV38hmR1MiRwQ4hs_IKPQ"
HF_EMBEDDING_URL = "https://py9zqfdpz8du3h4r.us-east-1.aws.endpoints.huggingface.cloud"
QDRANT_SEARCH_URL = "https://586ef24e-9132-423e-82f7-43b0078e7a60.us-east4-0.gcp.cloud.qdrant.io/collections/replicas/points/search"
TEXT_INPUT = "Tell me about Prompting Happiness"
REPLICA_UUID = "d79a29c0-24f3-4b70-9046-8d116f9ce1bb"
def run_single_test(index):
# Step 1: Generate embedding
embed_start = time.time()
embed_resp = httpx.post(
HF_EMBEDDING_URL,
headers={
"Content-Type": "application/json",
"Authorization": f"Bearer {HF_TOKEN}",
},
json={"inputs": TEXT_INPUT, "options": {"normalize": True}},
timeout=30,
)
embed_time = round((time.time() - embed_start) * 1000, 2)
embedding = embed_resp.json()[0]
# Step 2: Qdrant search
qdrant_start = time.time()
qdrant_resp = httpx.post(
QDRANT_SEARCH_URL,
headers={
"Content-Type": "application/json",
"api-key": QDRANT_API_KEY,
},
json={
"vector": embedding,
"limit": 5,
"filter": {
"must": [
{"key": "replica_uuid", "match": {"value": REPLICA_UUID}},
{"key": "tag", "match": {"value": "file"}},
]
},
"with_payload": True,
"with_vectors": False,
},
timeout=30,
)
qdrant_time = round((time.time() - qdrant_start) * 1000, 2)
return [
index + 1,
embed_time,
qdrant_time,
embed_resp.status_code,
qdrant_resp.status_code,
]
# Run 30 concurrent tasks
results = []
with concurrent.futures.ThreadPoolExecutor(max_workers=30) as executor:
futures = [executor.submit(run_single_test, i) for i in range(30)]
for future in concurrent.futures.as_completed(futures):
result = future.result()
results.append(result)
print(
tabulate(
[result],
headers=[
"#",
"Embedding (ms)",
"Qdrant (ms)",
"Embed Status",
"Qdrant Status",
],
tablefmt="grid",
)
)
# Sort and tabulate
results.sort(key=lambda x: x[0])
df = pd.DataFrame(
results,
columns=["#", "Embedding (ms)", "Qdrant (ms)", "Embed Status", "Qdrant Status"],
)
df.loc["Average"] = [
"",
round(df["Embedding (ms)"].astype(float).mean(), 2),
round(df["Qdrant (ms)"].astype(float).mean(), 2),
"",
"",
]
print("\nFinal Performance Summary:")
print(tabulate(df, headers="keys", tablefmt="grid"))
df.to_csv("embedding_qdrant_latency.csv", index=False)