-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_pipeline.py
More file actions
49 lines (39 loc) · 1.56 KB
/
test_pipeline.py
File metadata and controls
49 lines (39 loc) · 1.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import sys
import os
# Ensure we can import knexa
sys.path.append(os.getcwd())
from knexa.core.models import Document
from knexa.ingestion.chunking import RecursiveCharacterChunker
from knexa.embeddings.model import EmbeddingModel
from knexa.index.faiss_index import FAISSStore
def main():
print("Initializing Pipeline...")
# 1. Create Dummy Document
doc = Document(content="Knexa is an enterprise RAG system. It uses FAISS and FastAPI. It is designed for high scalability.")
print(f"Document created: {doc.id}")
# 2. Chunking
chunker = RecursiveCharacterChunker(chunk_size=50)
chunks = chunker.chunk(doc)
print(f"Chunks created: {len(chunks)}")
for c in chunks:
print(f" - [{len(c.content)} chars] {c.content}")
# 3. Embedding
print("Loading Embedding Model...")
embed_model = EmbeddingModel(model_name="all-MiniLM-L6-v2") # Small model for test
embeddings = embed_model.encode([c.content for c in chunks])
print(f"Embeddings generated: {len(embeddings)} vectors of dim {len(embeddings[0])}")
# 4. Indexing
dimension = len(embeddings[0])
store = FAISSStore(dimension=dimension)
store.add_chunks(chunks, embeddings)
print("Chunks indexed in FAISS.")
# 5. Retrieval
query = "What database does Knexa use?"
print(f"Searching for: '{query}'")
query_vec = embed_model.encode(query)[0]
results = store.search(query_vec, k=2)
print("Results:")
for chunk, score in results:
print(f" - (Score: {score:.4f}) {chunk.content}")
if __name__ == "__main__":
main()