llm_app/10_llamaindex_sql_memory.py at master · behoss/llm_app · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.chat_engine.context import ContextChatEngine
from llama_index.core.settings import Settings
from llama_index.core import VectorStoreIndex
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.vector_stores.faiss.base import FaissVectorStore
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.readers.database import DatabaseReader
from llama_index.core.utilities.sql_wrapper import SQLDatabase
import os
import asyncio
import faiss
from sqlalchemy.types import UserDefinedType
from sqlalchemy.dialects.postgresql.base import ischema_names

# Define custom type to support PostgreSQL's "vector" type (for pgvector)


class PGVector(UserDefinedType):
    def get_col_spec(self):
        return "vector"

    def bind_processor(self, dialect):
        return lambda value: value

    def result_processor(self, dialect, coltype):
        return lambda value: value


ischema_names["vector"] = PGVector

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = "xxx"

# Import LlamaIndex components

# Connect to your Supabase PostgreSQL database
supabase_uri = "postgresql://postgres:postgres@127.0.0.1:54322/postgres"
db = SQLDatabase.from_uri(supabase_uri)

# Load data from your "chat_history" table
query = "SELECT * FROM chat_history"
sql_reader = DatabaseReader(sql_database=db)
docs = sql_reader.load_data(query=query)

# Set up the OpenAI embedding model
embed_model = OpenAIEmbedding(model="text-embedding-ada-002")

# Create a FAISS vector store (dimension must match OpenAI's embedding output, 1536)
dimension = 1536
faiss_index = faiss.IndexFlatL2(dimension)
vector_store = FaissVectorStore(faiss_index=faiss_index)
doc_store = SimpleDocumentStore()

# Build and persist the index
index = VectorStoreIndex.from_documents(
    docs, embed_model=embed_model, vector_store=vector_store
)
index.storage_context.persist(persist_dir="./storage")

# Set up the OpenAI LLM and global settings
llm = OpenAI(model="gpt-4o", temperature=0.7)
Settings.llm = llm

# Use a low-level chat engine: ContextChatEngine with conversation memory

# Create conversation memory with a token limit
memory = ChatMemoryBuffer.from_defaults(token_limit=1500)

# Create a retriever from the index
retriever = index.as_retriever()

# Initialize the ContextChatEngine with a system prompt (customize as needed)
chat_engine = ContextChatEngine.from_defaults(
    retriever=retriever,
    memory=memory,
    system_prompt="You are a chatbot that responds based solely on the chat_history data.",
    llm=llm,
    verbose=True,
)

# Define an async main function for interactive chat


async def main():
    print("Enter your messages (type 'exit' to quit):")
    while True:
        # Synchronously read user input
        user_query = input("You: ")
        if user_query.lower() == "exit":
            break

        # Use the asynchronous streaming chat interface
        response = await chat_engine.astream_chat(message=user_query)
        # Print tokens as they arrive
        async for token in response.async_response_gen():
            print(token, end="", flush=True)
        print("\n")  # Newline after the full response


if __name__ == "__main__":
    asyncio.run(main())