Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions ai/factory/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,18 @@ Dependencies (PyYAML, numpy, openai, anthropic, etc.) are installed inside the D
You'll need:
- **Docker** — for local development (`docker compose up -d`)
- **AWS CLI** — configured with credentials for DynamoDB and S3

Claude Integration (Skip if you are intending to use Bedrock)
- **OpenAI API key** — set as `OPENAI_API_KEY` environment variable (for embeddings)
- **Anthropic API key** — set as `ANTHROPIC_API_KEY` environment variable (for Claude responses)

Bedrock
After you make your first call to bedrock, you'll need to do the following to continue.
- Go to the AWS Console:
- Bedrock → Model catalog (or Model access)
- There should be a prompt to submit use case details. Mine was at the top of the page.
- Fill it out — keep it simple ("AI chatbot for personal portfolio website")

## Creating a New Bot

### Step 1: Create the bot folder
Expand Down
4 changes: 2 additions & 2 deletions ai/factory/bots/guitar/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ bot:

rag:
embedding_model: "openai"
top_k: 10
similarity_threshold: 0.3
top_k: 3
similarity_threshold: 0.5

boundaries:
discuss_guitar: true
Expand Down
146 changes: 146 additions & 0 deletions ai/factory/core/anthropic_chatbot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
"""
If you do not want to use Bedrock, rename this file to chatbot.py and implement
the same interface using the Anthropic API.
"""

"""
Chatbot Module (Parameterized)

Generates responses using Claude API with RAG context.
Loads the system prompt from each bot's prompt.md file and
caches it per bot_id for warm Lambda reuse.

Same pattern as ai/chatbot.py — retrieve context, build messages,
call Claude. Only difference: bot_id drives which prompt and
embeddings are used.
"""
import os
from datetime import datetime
from pathlib import Path
import yaml
import anthropic
from .retrieval import retrieve_relevant_chunks, format_context_for_llm

# ---------------------------------------------------------------------------
# Cached resources — persist across warm Lambda invocations
# ---------------------------------------------------------------------------
_anthropic_client = None
_system_prompts = {}


def get_anthropic_client() -> anthropic.Anthropic:
"""Lazy-init Anthropic client."""
global _anthropic_client
if _anthropic_client is None:
_anthropic_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
return _anthropic_client


def load_system_prompt(bot_id: str) -> str:
"""
Load and cache the system prompt for a bot.
Reads from bots/{bot_id}/prompt.md and injects current date.
"""
global _system_prompts

if bot_id in _system_prompts:
return _system_prompts[bot_id]

prompt_path = Path(__file__).parent.parent / 'bots' / bot_id / 'prompt.yml'

if not prompt_path.exists():
raise FileNotFoundError(f"No prompt.yml found for bot '{bot_id}' at {prompt_path}")

with open(prompt_path, 'r') as f:
data = yaml.safe_load(f)

template = data.get('prompt', '')

# Inject current date
current_date = datetime.now().strftime('%B %d, %Y')
prompt = template.format(current_date=current_date)

_system_prompts[bot_id] = prompt
return prompt


def generate_response(
bot_id: str,
user_message: str,
conversation_history: list[dict] = None,
top_k: int = 5,
similarity_threshold: float = 0.3
) -> dict:
"""
Generate a response using RAG for a specific bot.

Args:
bot_id: Which bot is responding
user_message: The user's question
conversation_history: Previous messages (optional)
top_k: Number of chunks to retrieve
similarity_threshold: Minimum similarity for retrieval

Returns:
dict with 'response' text and 'sources' list
"""
if conversation_history is None:
conversation_history = []

# Retrieve relevant context for this bot
relevant_chunks = retrieve_relevant_chunks(
bot_id=bot_id,
query=user_message,
top_k=top_k,
similarity_threshold=similarity_threshold
)

# Format context for the prompt
context = format_context_for_llm(relevant_chunks)

# Build messages array
messages = []

# Add conversation history
for msg in conversation_history:
messages.append({
"role": msg["role"],
"content": msg["content"]
})

# Add current user message with context
user_content = f"""## Relevant Context:
{context}

## User Question:
{user_message}

Remember: Keep your response short and conversational. Write in PLAIN TEXT ONLY - do not use ** or any markdown. If you can't answer from the context, say so politely."""

messages.append({
"role": "user",
"content": user_content
})

# Load this bot's system prompt
system_prompt = load_system_prompt(bot_id)

# Call Claude
client = get_anthropic_client()
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1000,
system=system_prompt,
messages=messages
)

return {
"response": response.content[0].text,
"sources": [
{
"category": chunk["category"],
"similarity": chunk["similarity"]
}
for chunk in relevant_chunks
]
}
65 changes: 44 additions & 21 deletions ai/factory/core/chatbot.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@
import os
from datetime import datetime
from pathlib import Path
from pyexpat.errors import messages
import yaml
import anthropic
import boto3
import botocore.session
import configparser
from .retrieval import retrieve_relevant_chunks, format_context_for_llm

# ---------------------------------------------------------------------------
Expand All @@ -23,13 +26,33 @@
_system_prompts = {}


def get_anthropic_client() -> anthropic.Anthropic:
"""Lazy-init Anthropic client."""
global _anthropic_client
if _anthropic_client is None:
_anthropic_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
return _anthropic_client

_bedrock_client = None

def get_bedrock_client():
global _bedrock_client
if _bedrock_client is None:
print(">>> Initializing Bedrock client")

try:
# Local dev: read real AWS creds from mounted file
config = configparser.ConfigParser()
config.read('/root/.aws/credentials')
_bedrock_client = boto3.client(
'bedrock-runtime',
region_name='us-east-1',
endpoint_url=os.getenv('BEDROCK_ENDPOINT_URL'),
aws_access_key_id=config.get('default', 'aws_access_key_id'),
aws_secret_access_key=config.get('default', 'aws_secret_access_key')
)
print(">>> Using credentials file")
except Exception:
# Lambda: IAM role provides credentials automatically
_bedrock_client = boto3.client(
'bedrock-runtime',
region_name='us-east-1'
)
print(">>> Using IAM role credentials")
return _bedrock_client

def load_system_prompt(bot_id: str) -> str:
"""
Expand Down Expand Up @@ -62,9 +85,10 @@ def load_system_prompt(bot_id: str) -> str:
def generate_response(
bot_id: str,
user_message: str,
top_k: int,
similarity_threshold: float,
conversation_history: list[dict] = None,
top_k: int = 5,
similarity_threshold: float = 0.3

) -> dict:
"""
Generate a response using RAG for a specific bot.
Expand Down Expand Up @@ -100,8 +124,8 @@ def generate_response(
for msg in conversation_history:
messages.append({
"role": msg["role"],
"content": msg["content"]
})
"content": [{"text": msg["content"]}]
})

# Add current user message with context
user_content = f"""## Relevant Context:
Expand All @@ -114,23 +138,22 @@ def generate_response(

messages.append({
"role": "user",
"content": user_content
"content": [{"text": user_content}]
})

# Load this bot's system prompt
system_prompt = load_system_prompt(bot_id)

# Call Claude
client = get_anthropic_client()
response = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1000,
system=system_prompt,
messages=messages
)
client = get_bedrock_client()
response = client.converse(
modelId="us.anthropic.claude-sonnet-4-20250514-v1:0",
inferenceConfig={"maxTokens": 1000},
system=[{"text": system_prompt}],
messages=messages)

return {
"response": response.content[0].text,
"response": response["output"]["message"]["content"][0]["text"],
"sources": [
{
"category": chunk["category"],
Expand Down
20 changes: 18 additions & 2 deletions ai/factory/core/retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ def cosine_similarity(vec1: list[float], vec2: list[float]) -> float:
def retrieve_relevant_chunks(
bot_id: str,
query: str,
top_k: int = 5,
similarity_threshold: float = 0.3
top_k: int,
similarity_threshold: float
) -> list[dict]:
"""
Retrieve the most relevant chunks for a user's query, scoped to a bot.
Expand All @@ -129,6 +129,17 @@ def retrieve_relevant_chunks(
items = get_cached_embeddings(bot_id)
print(f"Searching {len(items)} embeddings...")


all_scores = []
for item in items:
stored_embedding = [float(x) for x in item['embedding']]
similarity = cosine_similarity(query_embedding, stored_embedding)
all_scores.append((similarity, item.get('category', ''), item.get('heading', '')))
all_scores.sort(reverse=True)
for score, cat, heading in all_scores[:5]:
print(f" Score: {score:.4f} | {cat}: {heading}")


# Calculate similarity for each chunk
results = []
for item in items:
Expand All @@ -146,8 +157,13 @@ def retrieve_relevant_chunks(

print(f"Found {len(results)} results above threshold ({similarity_threshold})")

print(f" Above 0.6: {len([r for r in results if r['similarity'] >= 0.6])}")
print(f" Above 0.55: {len([r for r in results if r['similarity'] >= 0.55])}")
print(f" Above 0.5: {len([r for r in results if r['similarity'] >= 0.5])}")

# Sort by similarity (highest first) and return top K
results.sort(key=lambda x: x['similarity'], reverse=True)
print(f"Found {len(results)} above threshold, returning top {top_k}")
return results[:top_k]


Expand Down
2 changes: 1 addition & 1 deletion ai/factory/core/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ async def chat(request: ChatRequest):
user_message=request.message,
conversation_history=[msg.model_dump() for msg in request.conversation_history],
top_k=rag_config.get('top_k', 5),
similarity_threshold=rag_config.get('similarity_threshold', 0.3)
similarity_threshold=rag_config.get('similarity_threshold')
)

# Log the interaction
Expand Down
1 change: 1 addition & 0 deletions ai/factory/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ anthropic>=0.25.0
numpy>=1.24.0
pyyaml>=6.0
uvicorn>=0.23.0
botocore.session>=1.28.0
Loading
Loading