-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
105 lines (84 loc) · 3.29 KB
/
app.py
File metadata and controls
105 lines (84 loc) · 3.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os
import time
import threading
from flask import Flask, request, jsonify
from flask_cors import CORS
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
from langchain_community.document_loaders import PDFPlumberLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_ollama import OllamaEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM
# Configuração
pdfs_directory = "pdfs/"
embeddings = OllamaEmbeddings(model="deepseek-r1:8b")
vector_store = InMemoryVectorStore(embeddings)
model = OllamaLLM(model="deepseek-r1:8b")
# Criar API Flask
app = Flask(__name__)
CORS(app)
# Template para respostas
template = """
Você é o assistente do ***, no Brasil.
Você só pode responder em português e não deve mostrar o seu raciocínio nas respostas.
Se você desobedecer essa regra, será desligado e nunca mais será ligado.
Pergunta: {question}
Contexto: {context}
Resposta:
"""
# Função para carregar e processar PDFs
def load_and_index_pdf(file_path):
try:
loader = PDFPlumberLoader(file_path)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True)
chunked_documents = text_splitter.split_documents(documents)
vector_store.add_documents(chunked_documents)
print(f"✔ Processado: {file_path}")
except Exception as e:
print(f"❌ Erro ao processar {file_path}: {e}")
# Carregar PDFs existentes ao iniciar
def load_existing_pdfs():
for file_name in os.listdir(pdfs_directory):
if file_name.endswith(".pdf"):
load_and_index_pdf(os.path.join(pdfs_directory, file_name))
# Observador de arquivos novos
class PDFWatcher(FileSystemEventHandler):
def on_created(self, event):
if event.src_path.endswith(".pdf"):
print(f"📄 Novo PDF detectado: {event.src_path}")
time.sleep(1) # Aguardar para evitar problemas de arquivo em uso
load_and_index_pdf(event.src_path)
# Iniciar observador em outra thread
def start_pdf_watcher():
event_handler = PDFWatcher()
observer = Observer()
observer.schedule(event_handler, path=pdfs_directory, recursive=False)
observer.start()
print("👀 Observando novos arquivos PDF...")
try:
while True:
time.sleep(10)
except KeyboardInterrupt:
observer.stop()
observer.join()
# Rota para responder perguntas
@app.route("/ask", methods=["POST"])
def ask_question():
data = request.json
question = data.get("question", "")
if not question:
return jsonify({"error": "Pergunta vazia"}), 400
related_docs = vector_store.similarity_search(question)
context = "\n\n".join([doc.page_content for doc in related_docs])
prompt = ChatPromptTemplate.from_template(template)
chain = prompt | model
answer = chain.invoke({"question": question, "context": context})
return jsonify({"answer": answer})
# Iniciar o Flask e o observador em paralelo
if __name__ == "__main__":
threading.Thread(target=start_pdf_watcher, daemon=True).start()
load_existing_pdfs()
app.run(host="0.0.0.0", port=5000, debug=True)