-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdebug_vectors.py
More file actions
56 lines (47 loc) · 1.78 KB
/
debug_vectors.py
File metadata and controls
56 lines (47 loc) · 1.78 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
"""
Check all chunks in the vector store
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent / "src"))
from src.config.settings import settings
from pinecone import Pinecone
# Connect to Pinecone
pc = Pinecone(api_key=settings.PINECONE_API_KEY)
index = pc.Index(settings.PINECONE_INDEX_NAME)
# Get index stats
stats = index.describe_index_stats()
print(f"Total vectors in index: {stats['total_vector_count']}")
print(f"Dimension: {stats['dimension']}")
print(f"\nNamespaces: {stats.get('namespaces', {})}")
# Try to fetch a few vectors to see what we have
print("\n" + "="*80)
print("Attempting to list some vectors...")
print("="*80)
# Since we can't list all IDs easily, let's try a broad query
from src.utils.embeddings import EmbeddingGenerator
embedding_gen = EmbeddingGenerator(
model_name=settings.EMBEDDING_MODEL,
device=settings.EMBEDDING_DEVICE
)
# Search for interface content
test_queries = [
"interface class methods",
"abstract class interface difference",
"what is interface definition"
]
for query in test_queries:
print(f"\n--- Searching for: '{query}' ---")
query_emb = embedding_gen.generate_embeddings(query)
results = index.query(vector=query_emb, top_k=5, include_metadata=True)
for i, match in enumerate(results.matches, 1):
text = match.metadata.get('text', '')
has_interface = 'interface' in text.lower()
print(f"{i}. Score: {match.score:.4f}, Chunk: {match.metadata.get('chunk_id', 'N/A')}, "
f"Has 'interface': {'YES' if has_interface else 'NO'}")
if has_interface:
# Find the interface mention
lines = text.split('\n')
for line in lines:
if 'interface' in line.lower():
print(f" -> {line[:150]}")