-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathintegrate.py
More file actions
183 lines (131 loc) · 5.56 KB
/
integrate.py
File metadata and controls
183 lines (131 loc) · 5.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import os
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackContext
load_dotenv()
# Telegram Bot Token
TELEGRAM_TOKEN = os.getenv("token")
def read_pdf(file_path):
with PdfReader(file_path) as pdf:
text=""
for page in pdf.pages:
text+=page.extract_text()
return text
def extract_date_and_summary(text):
date = ""
summary = ""
for line in text.splitlines():
if "Date:" in line:
date+= line + "\n"
elif line.strip():
summary += line + "\n"
return date, summary
def compare_documents(doc1_path, doc2_path):
content1 = read_pdf(doc1_path)
content2 = read_pdf(doc2_path)
date1, summary1 = extract_date_and_summary(content1)
date2, summary2 = extract_date_and_summary(content2)
print(f"\nDate of Document 1:\n{date1}")
print(f"\nSummary of Document 1:\n{summary1}")
print(f"\nDate of Document 2:\n{date2}")
print(f"\nSummary of Document 2:\n{summary2}")
def get_pdf_text(pdf_paths):
text = ""
for pdf_path in pdf_paths:
pdf_reader = PdfReader(pdf_path)
for page in pdf_reader.pages:
text += page.extract_text()
return text
def get_text_chunks(text):
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_text(text)
return chunks
def get_vectorstore(text_chunks):
embeddings = OpenAIEmbeddings(openai_api_key="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")
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore
def get_conversation_chain(vectorstore):
llm = ChatOpenAI()
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(),
memory=memory)
return conversation_chain
def compare_documents(doc1_path, doc2_path):
content1 = read_pdf(doc1_path)
content2 = read_pdf(doc2_path)
date1, summary1 = extract_date_and_summary(content1)
date2, summary2 = extract_date_and_summary(content2)
comparison_output = (
f"\nDate of Document 1:\n{date1}\n"
f"\nSummary of Document 1:\n{summary1}\n"
f"\nDate of Document 2:\n{date2}\n"
f"\nSummary of Document 2:\n{summary2}\n"
)
return comparison_output
def handle_userinput(update, context, conversation_chain):
user_question = update.message.text
response = conversation_chain({'question': user_question})
bot_response = response['chat_history'][-1].content
update.message.reply_text(bot_response)
def handle_document(update, context):
if 'document1' not in context.user_data:
context.user_data['document1'] = update.message.document
update.message.reply_text("Please upload the second contract file")
else:
document1 = context.user_data['document1']
document2 = update.message.document
file1 = context.bot.getFile(document1.file_id)
file2 = context.bot.getFile(document2.file_id)
file_path1= f"downloads/{document1.file_name}"
file_path2= f"downloads/{document2.file_name}"
file1.download(file_path1)
file2.download(file_path2)
comparison_output = compare_documents(file_path1, file_path2)
update.message.reply_text(comparison_output)
context.user_data.clear()
pdf_file = context.bot.getFile(update.message.document.file_id)
file_path = f"downloads/{update.message.document.file_name}"
pdf_file.download(file_path)
# Extract text from PDF
pdf_text = get_pdf_text([file_path])
# Get text chunks
text_chunks = get_text_chunks(pdf_text)
# Create vector store
vectorstore = get_vectorstore(text_chunks)
# Create conversation chain
conversation_chain = get_conversation_chain(vectorstore)
update.message.reply_text("I'm here to assist you with the contract")
# Pass the conversation chain to the message handler
context.user_data['conversation_chain'] = conversation_chain
def start(update, context):
update.message.reply_text("Hi! Please upload your contract.")
def compare_documents_command(update, context):
update.message.reply_text("Please upload contracts you'd like to compare")
def main():
updater = Updater(token=TELEGRAM_TOKEN, use_context=True)
dispatcher = updater.dispatcher
# Handler for start command
dispatcher.add_handler(CommandHandler("start", start))
# Handler for comparing documents
dispatcher.add_handler(CommandHandler("compare", compare_documents_command))
# Handler for document uploads
dispatcher.add_handler(MessageHandler(Filters.document, handle_document))
# Handler for text messages
dispatcher.add_handler(MessageHandler(Filters.text & ~Filters.command,
lambda update, context: handle_userinput(update, context, context.user_data[
'conversation_chain'])))
updater.start_polling()
updater.idle()
if __name__ == '__main__':
main()