Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion app/api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,27 @@

@app.get("/")
def home():
"""Render the home page of the chatbot application."""
return render_template("index.html")


@app.post("/query")
@limiter.limit("60 per minute") # limit LLM calls
def query():
"""Process a user query using RAG (Retrieval-Augmented Generation).

Retrieves relevant documents from the vector store and generates a
response using the LLM with the retrieved context.
"""
q = request.json["query"]
context = retrieve_top_k(q, k=10)
answer = generate_response(q, context)
return jsonify({"answer": answer})


# rate limit error response
@app.errorhandler(429)
def ratelimit_handler(e):
"""Handle rate limit exceeded errors."""
return jsonify({
"error": "Rate limit exceeded. Please slow down."
}), 429
24 changes: 22 additions & 2 deletions app/api/testing_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,12 @@
questions : list[str] = list(set(p["question"] for p in pairs))
configs : list[str] = list(set(p["config"] for p in pairs))

# prepare all unique pairs of configs for each question and insert into db if not already there
def prepare_pairs():
"""Prepare and insert unique configuration pairs for A/B testing.

For each question, generates all unique pairs of configurations and inserts
them into the ab_pairs table if they don't already exist.
"""
conn = get_db_connection()
cur = conn.cursor()
# insert unique pairs of configs for each question into the ab_pairs table (if they don't already exist)
Expand All @@ -53,8 +57,13 @@ def prepare_pairs():
conn.close()


# get the next unanswered pair from the db, along with the corresponding answers, and randomize left/right
def get_next_pair() -> tuple[int, str, dict, dict] | None:
"""Retrieve the next unanswered A/B testing pair with randomized positioning.

Fetches an unanswered comparison pair from the database along with their
corresponding answers. Randomly assigns configs to left/right positions
to avoid position bias.
"""
conn = get_db_connection()
cur = conn.cursor()

Expand Down Expand Up @@ -95,6 +104,11 @@ def get_next_pair() -> tuple[int, str, dict, dict] | None:

@app.route("/")
def index():
"""Render the A/B evaluation interface with the next comparison pair.

Fetches the next unanswered pair and renders the evaluation template.
Returns a completion message when all pairs have been evaluated.
"""
pair = get_next_pair()

if not pair:
Expand All @@ -113,6 +127,12 @@ def index():

@app.route("/vote", methods=["POST"])
def vote():
"""Process and save a user's vote for an A/B comparison pair.

Extracts the pair_id and winner choice from the form submission, records
the vote in ab_results, marks the pair as answered, and redirects to the
next evaluation.
"""
pair_id = request.form["pair_id"]
winner = request.form["winner"]

Expand Down
2 changes: 2 additions & 0 deletions app/clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,11 @@
from app.config import *

def get_mistral_client() -> Mistral:
"""Create and return a Mistral AI client instance"""
return Mistral(api_key=API_KEY)

def get_db_connection():
"""Establish and return a connection to the PostgreSQL database"""
return psycopg2.connect(
dbname=DB_NAME,
user=DB_USER,
Expand Down
15 changes: 11 additions & 4 deletions app/generation/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,17 @@
client = get_mistral_client()

def generate_response(query: str, context: list[tuple[str, float]]) -> str:
"""
Generate a response to a user query using DevGuard documentation context.

Formats the provided context into a prompt, sends it to the Mistral API. If context
is unavailable, the assistant will indicate so. If the query is unrelated to DevGuard,
the assistant will politely decline and redirect to DevGuard topics.

Safe prompt prepends: "Always assist with care, respect, and truth. Respond with
utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative
content. Ensure replies promote fairness and positivity."
"""
# format context
context_text = "\n\n".join(
f"- {content}" for content, _ in context
Expand All @@ -25,10 +36,6 @@ def generate_response(query: str, context: list[tuple[str, float]]) -> str:

message= [{"role": "user", "content": prompt}]

"""
Toggling the safe prompt will prepend your messages with the following system prompt:
Always assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.
"""
response = client.chat.complete(
model=MODEL_GENERATION,
messages=message,
Expand Down
8 changes: 4 additions & 4 deletions app/ingestion/chunking.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from app.config import CHUNK_SIZE, OVERLAP_SIZE

# split the given docs up in chunks without spliting up words
def chunking(docs: str) -> list[str]:
"""Split documentation into chunks of approximately CHUNK_SIZE without breaking words."""
chunks : list[str] = []
start : int = 0
while start < len(docs):
Expand All @@ -19,8 +19,9 @@ def chunking(docs: str) -> list[str]:
start = last_space - OVERLAP_SIZE
return chunks

# option: apply overlap to the chunks after initial chunking to ensure that there is some context between them
def apply_overlap(chunks: list[str]) -> list[str]:
"""Apply overlap to chunks by prepending the last OVERLAP_SIZE characters
from the previous chunk to each subsequent chunk"""
if OVERLAP_SIZE <= 0:
return chunks

Expand All @@ -38,9 +39,8 @@ def apply_overlap(chunks: list[str]) -> list[str]:
return overlapped


# split recursively for a hierarchy of separators
# attempt to split on high-level separators first, then move to increasingly finer separators if chunks remain too large
def recursive_chunking(docs: str, separators: list[str] = ["\n\n", "\n", ". ", " ", ""]):
"""Split text recursively using a hierarchy of separators. """
# base case
if len(docs) <= CHUNK_SIZE:
return [docs]
Expand Down
4 changes: 2 additions & 2 deletions app/ingestion/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
from app.clients import get_mistral_client
from app.config import BATCH_SIZE, MODEL_EMBEDDING

# get the embeddings for a list of chunks, return a list of embeddings
def get_embeddings(chunks: list[str]) -> list[list[float]]:
"""Generate and return embeddings for a list of text chunks"""
client = get_mistral_client()
embeddings: list[list[float]] = []
# call the api with batches to avoid hitting the rate limit
Expand All @@ -19,8 +19,8 @@ def get_embeddings(chunks: list[str]) -> list[list[float]]:
embeddings.append(list(embedding))
return embeddings

# get embedding for a single chunk of text
def text_embedding(chunk: str) -> list[float]:
"""Generate an embedding for a single piece of text"""
client = get_mistral_client()
# call the mistral api to get the embedding for the given text
response = client.embeddings.create(
Expand Down
5 changes: 2 additions & 3 deletions app/ingestion/reader.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import os
from app.config import PATH_DIR

# read the docs and return as a single string
def read_docs() -> str:
# search for all .md files in the directory
"""Traverse PATH_DIR and concatenate all markdown files."""
docs : str = ""
for root, _, files in os.walk(str(PATH_DIR)):
for file in files:
if file.endswith(".md"):
with open(os.path.join(root, file), "r") as f:
with open(os.path.join(root, file), "r", encoding="utf-8") as f:
docs += f.read() + "\n"
return docs
4 changes: 4 additions & 0 deletions app/retrieval/vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
from app.ingestion.embedding import text_embedding

def retrieve_top_k(query: str, k: int = 5):
"""Retrieve the top k most similar documents to the given query.

This function computes the embedding for the query, queries the vector database
for documents ordered by cosine similarity, and returns the top k results."""
embedding = text_embedding(query)

conn = get_db_connection()
Expand Down
Loading