-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcortex_logic.py
More file actions
89 lines (68 loc) · 2.94 KB
/
cortex_logic.py
File metadata and controls
89 lines (68 loc) · 2.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import textcortex
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
load_dotenv()
def extract_text_from_pdf(pdf_path):
with open(pdf_path, 'rb') as file:
pdf_reader = PdfReader(file)
text = ''
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text()
return text
def get_text_chunks(text):
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
chunks = text_splitter.split_text(text)
return chunks
def get_textcortex_client():
# Initialize TextCortex client with your API key
client = os.getenv('TEXTCORTEX_API_KEY')
return client
def get_conversational_chain():
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=1.0)
return model
def user_input(user_question, textcortex_client, pdf_texts):
# Measure similarity between the user question and each PDF text
similarity_scores = {}
for i, pdf_text in enumerate(pdf_texts):
similarity_score = textcortex_client.measure_similarity(user_question, pdf_text)
similarity_scores[f"Question_PDF_{i + 1}"] = similarity_score
# Get the PDF with the highest similarity score
max_similarity_pdf = max(similarity_scores, key=similarity_scores.get)
max_similarity_pdf_index = int(max_similarity_pdf.split("_")[-1])
# Get conversational chain model
chain = get_conversational_chain()
# Ask the user's question to the selected PDF context
response = chain(pdf_texts[max_similarity_pdf_index - 1], user_question)
return response
def main():
pdf_paths = []
while True:
user_response = input("Upload your contracts and Click on Submit and Process (y/n): ")
if user_response.lower() == "y":
pdf_path = input("Enter the path to your PDF file: ")
pdf_paths.append(pdf_path)
elif user_response.lower() == "n":
if not pdf_paths:
print("No PDF files were uploaded.")
else:
print("Processing PDF files...")
break # Exit the loop if user chooses 'n' and PDF files are uploaded
else:
print("Invalid input. Please enter 'y' or 'n'.")
if pdf_paths:
# Extract text from PDFs
pdf_texts = [extract_text_from_pdf(pdf_path) for pdf_path in pdf_paths]
# Initialize TextCortex client
textcortex_client = get_textcortex_client()
# Ask user for a question related to contracts
user_question = input("Ask questions related to your contracts: ")
if user_question:
response = user_input(user_question, textcortex_client, pdf_texts)
print("Response:", response)
if __name__ == "__main__":
main()