mr-flowjangles · mr-flowjangles · Feb 19, 2026 · Feb 19, 2026 · Feb 19, 2026
diff --git a/ai/factory/README.md b/ai/factory/README.md
@@ -23,9 +23,18 @@ Dependencies (PyYAML, numpy, openai, anthropic, etc.) are installed inside the D
 You'll need:
 - **Docker** — for local development (`docker compose up -d`)
 - **AWS CLI** — configured with credentials for DynamoDB and S3
+
+Claude Integration (Skip if you are intending to use Bedrock)
 - **OpenAI API key** — set as `OPENAI_API_KEY` environment variable (for embeddings)
 - **Anthropic API key** — set as `ANTHROPIC_API_KEY` environment variable (for Claude responses)
 
+Bedrock
+After you make your first call to bedrock, you'll need to do the following to continue. 
+- Go to the AWS Console:
+- Bedrock → Model catalog (or Model access)
+- There should be a prompt to submit use case details.  Mine was at the top of the page.
+- Fill it out — keep it simple ("AI chatbot for personal portfolio website")
+
 ## Creating a New Bot
 
 ### Step 1: Create the bot folder

diff --git a/ai/factory/bots/guitar/config.yml b/ai/factory/bots/guitar/config.yml
@@ -16,8 +16,8 @@ bot:
 
   rag:
     embedding_model: "openai"
-    top_k: 10
-    similarity_threshold: 0.3
+    top_k: 3
+    similarity_threshold: 0.5
 
   boundaries:
     discuss_guitar: true

diff --git a/ai/factory/core/anthropic_chatbot.py b/ai/factory/core/anthropic_chatbot.py
@@ -0,0 +1,146 @@
+"""
+If you do not want to use Bedrock, rename this file to chatbot.py and implement 
+the same interface using the Anthropic API.
+"""
+
+"""
+Chatbot Module (Parameterized)
+
+Generates responses using Claude API with RAG context.
+Loads the system prompt from each bot's prompt.md file and
+caches it per bot_id for warm Lambda reuse.
+
+Same pattern as ai/chatbot.py — retrieve context, build messages,
+call Claude. Only difference: bot_id drives which prompt and
+embeddings are used.
+"""
+import os
+from datetime import datetime
+from pathlib import Path
+import yaml
+import anthropic
+from .retrieval import retrieve_relevant_chunks, format_context_for_llm
+
+# ---------------------------------------------------------------------------
+# Cached resources — persist across warm Lambda invocations
+# ---------------------------------------------------------------------------
+_anthropic_client = None
+_system_prompts = {}
+
+
+def get_anthropic_client() -> anthropic.Anthropic:
+    """Lazy-init Anthropic client."""
+    global _anthropic_client
+    if _anthropic_client is None:
+        _anthropic_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
+    return _anthropic_client
+
+
+def load_system_prompt(bot_id: str) -> str:
+    """
+    Load and cache the system prompt for a bot.
+    Reads from bots/{bot_id}/prompt.md and injects current date.
+    """
+    global _system_prompts
+
+    if bot_id in _system_prompts:
+        return _system_prompts[bot_id]
+
+    prompt_path = Path(__file__).parent.parent / 'bots' / bot_id / 'prompt.yml'
+
+    if not prompt_path.exists():
+        raise FileNotFoundError(f"No prompt.yml found for bot '{bot_id}' at {prompt_path}")
+
+    with open(prompt_path, 'r') as f:
+        data = yaml.safe_load(f)
+
+    template = data.get('prompt', '')
+
+    # Inject current date
+    current_date = datetime.now().strftime('%B %d, %Y')
+    prompt = template.format(current_date=current_date)
+
+    _system_prompts[bot_id] = prompt
+    return prompt
+
+
+def generate_response(
+    bot_id: str,
+    user_message: str,
+    conversation_history: list[dict] = None,
+    top_k: int = 5,
+    similarity_threshold: float = 0.3
+) -> dict:
+    """
+    Generate a response using RAG for a specific bot.
+
+    Args:
+        bot_id: Which bot is responding
+        user_message: The user's question
+        conversation_history: Previous messages (optional)
+        top_k: Number of chunks to retrieve
+        similarity_threshold: Minimum similarity for retrieval
+
+    Returns:
+        dict with 'response' text and 'sources' list
+    """
+    if conversation_history is None:
+        conversation_history = []
+
+    # Retrieve relevant context for this bot
+    relevant_chunks = retrieve_relevant_chunks(
+        bot_id=bot_id,
+        query=user_message,
+        top_k=top_k,
+        similarity_threshold=similarity_threshold
+    )
+
+    # Format context for the prompt
+    context = format_context_for_llm(relevant_chunks)
+
+    # Build messages array
+    messages = []
+
+    # Add conversation history
+    for msg in conversation_history:
+        messages.append({
+            "role": msg["role"],
+            "content": msg["content"]
+        })
+
+    # Add current user message with context
+    user_content = f"""## Relevant Context:
+{context}
+
+## User Question:
+{user_message}
+
+Remember: Keep your response short and conversational. Write in PLAIN TEXT ONLY - do not use ** or any markdown. If you can't answer from the context, say so politely."""
+
+    messages.append({
+        "role": "user",
+        "content": user_content
+    })
+
+    # Load this bot's system prompt
+    system_prompt = load_system_prompt(bot_id)
+
+    # Call Claude
+    client = get_anthropic_client()
+    response = client.messages.create(
+        model="claude-sonnet-4-20250514",
+        max_tokens=1000,
+        system=system_prompt,
+        messages=messages
+    )
+
+    return {
+        "response": response.content[0].text,
+        "sources": [
+            {
+                "category": chunk["category"],
+                "similarity": chunk["similarity"]
+            }
+            for chunk in relevant_chunks
+        ]
+    }
diff --git a/ai/factory/core/chatbot.py b/ai/factory/core/chatbot.py
@@ -12,8 +12,11 @@
 import os
 from datetime import datetime
 from pathlib import Path
+from pyexpat.errors import messages
 import yaml
-import anthropic
+import boto3
+import botocore.session
+import configparser
 from .retrieval import retrieve_relevant_chunks, format_context_for_llm
 
 # ---------------------------------------------------------------------------
@@ -23,13 +26,33 @@
 _system_prompts = {}
 
 
-def get_anthropic_client() -> anthropic.Anthropic:
-    """Lazy-init Anthropic client."""
-    global _anthropic_client
-    if _anthropic_client is None:
-        _anthropic_client = anthropic.Anthropic(api_key=os.getenv('ANTHROPIC_API_KEY'))
-    return _anthropic_client
-
+_bedrock_client = None
+
+def get_bedrock_client():
+    global _bedrock_client
+    if _bedrock_client is None:
+        print(">>> Initializing Bedrock client")
+
+        try:
+            # Local dev: read real AWS creds from mounted file
+            config = configparser.ConfigParser()
+            config.read('/root/.aws/credentials')
+            _bedrock_client = boto3.client(
+                'bedrock-runtime',
+                region_name='us-east-1',
+                endpoint_url=os.getenv('BEDROCK_ENDPOINT_URL'),
+                aws_access_key_id=config.get('default', 'aws_access_key_id'),
+                aws_secret_access_key=config.get('default', 'aws_secret_access_key')
+            )
+            print(">>> Using credentials file")
+        except Exception:
+            # Lambda: IAM role provides credentials automatically
+            _bedrock_client = boto3.client(
+                'bedrock-runtime',
+                region_name='us-east-1'
+            )
+            print(">>> Using IAM role credentials")
+    return _bedrock_client
 
 def load_system_prompt(bot_id: str) -> str:
     """
@@ -62,9 +85,10 @@ def load_system_prompt(bot_id: str) -> str:
 def generate_response(
     bot_id: str,
     user_message: str,
+    top_k: int,
+    similarity_threshold: float,
     conversation_history: list[dict] = None,
-    top_k: int = 5,
-    similarity_threshold: float = 0.3
+
 ) -> dict:
     """
     Generate a response using RAG for a specific bot.
@@ -100,8 +124,8 @@ def generate_response(
     for msg in conversation_history:
         messages.append({
             "role": msg["role"],
-            "content": msg["content"]
-        })
+            "content": [{"text": msg["content"]}]
+    })
 
     # Add current user message with context
     user_content = f"""## Relevant Context:
@@ -114,23 +138,22 @@ def generate_response(
 
     messages.append({
         "role": "user",
-        "content": user_content
+        "content": [{"text": user_content}]
     })
 
     # Load this bot's system prompt
     system_prompt = load_system_prompt(bot_id)
 
     # Call Claude
-    client = get_anthropic_client()
-    response = client.messages.create(
-        model="claude-sonnet-4-20250514",
-        max_tokens=1000,
-        system=system_prompt,
-        messages=messages
-    )
+    client = get_bedrock_client()
+    response = client.converse(
+        modelId="us.anthropic.claude-sonnet-4-20250514-v1:0",
+        inferenceConfig={"maxTokens": 1000},
+        system=[{"text": system_prompt}],
+        messages=messages)
 
     return {
-        "response": response.content[0].text,
+        "response": response["output"]["message"]["content"][0]["text"],
         "sources": [
             {
                 "category": chunk["category"],

diff --git a/ai/factory/core/retrieval.py b/ai/factory/core/retrieval.py
@@ -105,8 +105,8 @@ def cosine_similarity(vec1: list[float], vec2: list[float]) -> float:
 def retrieve_relevant_chunks(
     bot_id: str,
     query: str,
-    top_k: int = 5,
-    similarity_threshold: float = 0.3
+    top_k: int,
+    similarity_threshold: float
 ) -> list[dict]:
     """
     Retrieve the most relevant chunks for a user's query, scoped to a bot.
@@ -129,6 +129,17 @@ def retrieve_relevant_chunks(
     items = get_cached_embeddings(bot_id)
     print(f"Searching {len(items)} embeddings...")
 
+
+    all_scores = []
+    for item in items:
+        stored_embedding = [float(x) for x in item['embedding']]
+        similarity = cosine_similarity(query_embedding, stored_embedding)
+        all_scores.append((similarity, item.get('category', ''), item.get('heading', '')))
+    all_scores.sort(reverse=True)
+    for score, cat, heading in all_scores[:5]:
+        print(f"  Score: {score:.4f} | {cat}: {heading}")
+
+
     # Calculate similarity for each chunk
     results = []
     for item in items:
@@ -146,8 +157,13 @@ def retrieve_relevant_chunks(
 
     print(f"Found {len(results)} results above threshold ({similarity_threshold})")
 
+    print(f"  Above 0.6: {len([r for r in results if r['similarity'] >= 0.6])}")
+    print(f"  Above 0.55: {len([r for r in results if r['similarity'] >= 0.55])}")
+    print(f"  Above 0.5: {len([r for r in results if r['similarity'] >= 0.5])}")
+
     # Sort by similarity (highest first) and return top K
     results.sort(key=lambda x: x['similarity'], reverse=True)
+    print(f"Found {len(results)} above threshold, returning top {top_k}")
     return results[:top_k]
 
 

diff --git a/ai/factory/core/router.py b/ai/factory/core/router.py
@@ -144,7 +144,7 @@ async def chat(request: ChatRequest):
                 user_message=request.message,
                 conversation_history=[msg.model_dump() for msg in request.conversation_history],
                 top_k=rag_config.get('top_k', 5),
-                similarity_threshold=rag_config.get('similarity_threshold', 0.3)
+                similarity_threshold=rag_config.get('similarity_threshold')
             )
 
             # Log the interaction

diff --git a/ai/factory/requirements.txt b/ai/factory/requirements.txt
@@ -7,3 +7,4 @@ anthropic>=0.25.0
 numpy>=1.24.0
 pyyaml>=6.0
 uvicorn>=0.23.0
+botocore.session>=1.28.0