-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathvdb.py
More file actions
96 lines (77 loc) · 2.76 KB
/
vdb.py
File metadata and controls
96 lines (77 loc) · 2.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import JSONLoader
import json
from pathlib import Path
from pprint import pprint
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_iris import IRISVector
# loader =PyPDFLoader("syallabi\MATH246.pdf")
# # documents= loader.load()
# # text_splitter = CharacterTextSplitter(chunk_size=400, chunk_overlap=20)
# docs = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
# embeddings = FastEmbedEmbeddings()
# db.add_documents(docs)
def load_docs(folder_path):
username = 'demo'
password = 'demo'
hostname = os.getenv('IRIS_HOSTNAME', 'localhost')
port = '1972'
namespace = 'USER'
CONNECTION_STRING = f"iris://{username}:{password}@{hostname}:{port}/{namespace}"
COLLECTION_NAME = "notes"
for filename in os.listdir(folder_path):
file_path = os.path.join(folder_path, filename)
# loader = JSONLoader(
# file_path=file_path,
# jq_schema=".",
# text_content=False
# )
# data = loader.load()
loader =PyPDFLoader(file_path)
documents= loader.load()
text_splitter = CharacterTextSplitter(chunk_size=250, chunk_overlap=50)
data= text_splitter.split_documents(documents)
# db = IRISVector.from_documents(
# embedding=embeddings,
# documents=data,
# collection_name=COLLECTION_NAME,
# connection_string=CONNECTION_STRING,
# )
db = IRISVector(
embedding_function=embeddings,
dimension=1536,
collection_name=COLLECTION_NAME,
connection_string=CONNECTION_STRING,
)
db.add_documents(data)
print("done")
ret= db.similarity_search("hello")
print(ret)
def search_q(query, coll="canjson"):
embeddings = OpenAIEmbeddings()
username = 'demo'
password = 'demo'
hostname = os.getenv('IRIS_HOSTNAME', 'localhost')
port = '1972'
namespace = 'USER'
CONNECTION_STRING = f"iris://{username}:{password}@{hostname}:{port}/{namespace}"
COLLECTION_NAME = "main"
db = IRISVector(
embedding_function=embeddings,
dimension=1536,
# collection_name=COLLECTION_NAME,
collection_name=coll,
connection_string=CONNECTION_STRING,
)
ret= db.similarity_search(query)
print(ret)
return ret
# print(f"Number of docs in vector store: {len(db.get()['ids'])}")
search_q("hi")
# load_docs("files")