-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpdf-bot.py
More file actions
120 lines (88 loc) · 3.35 KB
/
pdf-bot.py
File metadata and controls
120 lines (88 loc) · 3.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import os
import sys
import openai
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
load_dotenv()
def get_pdf_text(pdf_paths):
text = ""
for pdf_path in pdf_paths:
pdf_reader = PdfReader(pdf_path)
for page in pdf_reader.pages:
text += page.extract_text()
return text
def get_text_chunks(text):
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text)
return chunks
def get_vectorstore(text_chunks):
embeddings = OpenAIEmbeddings(openai_api_key="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")
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore
def get_conversation_chain(vectorstore):
llm = ChatOpenAI()
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
return conversation_chain
def compare_contract_pdfs(pdf_path1, pdf_path2):
text1 = get_pdf_text(pdf_path1)
text2 = get_pdf_text(pdf_path2)
# Create a prompt by concatenating the texts of both contracts
comparison_prompt = "Contract 1: {text1}\nContract 2: {text2}"
# Send the comparison prompt to OpenAI
response = openai.Completion.create(
engine="text-davinci-003", # Adjust the model as needed
prompt=comparison_prompt,
max_tokens=100, # Adjust as needed
temperature=0,
top_p=1,
n=1,
stream=False
)
# Extract the generated comparison from the OpenAI response
comparison_result = response['choices'][0]['text']
return comparison_result
def handle_userinput(user_question, conversation_chain):
response = conversation_chain({'question': user_question})
chat_history = response['chat_history']
for i, message in enumerate(chat_history):
if i % 2 == 0:
print(f"User: {message.content}")
else:
print(f"Bot: {message.content}")
def main(pdf_paths):
# Load environment variables
load_dotenv()
# Get user question
user_question = input("Ask a question about your documents:")
# get pdf text
raw_text = get_pdf_text(pdf_paths)
# get the text chunks
text_chunks = get_text_chunks(raw_text)
# create vector store
vectorstore = get_vectorstore(text_chunks)
# create conversation chain
conversation_chain = get_conversation_chain(vectorstore)
# Handle user input
if user_question:
handle_userinput(user_question, conversation_chain)
comparison_result = compare_contract_pdfs(pdf_paths[0], pdf_paths[1])
print("Comparison", comparison_result)
if __name__ == '__main__':
#Check if PDF file paths are provided
if len(sys.argv) < 2:
print("Usage: python script.py <path_to_pdf1> <path_to_pdf2> ...")
sys.exit(1)
# Pass PDF file paths to the main function
main(sys.argv[1:])