From fc7debf3eb887478d9dbcfc5d9586d3620aeb8bd Mon Sep 17 00:00:00 2001
From: Abdul Shahzeb <ab.shahzeb@gmail.com>
Date: Thu, 19 Feb 2026 19:10:39 +1100
Subject: [PATCH] feat(chat): add session reuse, headless prompt, and
 open-webui filter

---
 .env.example                           |   8 ++
 integrations/openwebui/caal_offload.py | 119 +++++++++++++++++++++++++
 prompt/en/headless.md                  |  65 ++++++++++++++
 src/caal/chat/api.py                   |  15 +++-
 src/caal/chat/session.py               |   7 ++
 src/caal/settings.py                   |   3 +-
 6 files changed, 214 insertions(+), 3 deletions(-)
 create mode 100644 integrations/openwebui/caal_offload.py
 create mode 100644 prompt/en/headless.md

diff --git a/.env.example b/.env.example
index 97be185..3787410 100644
--- a/.env.example
+++ b/.env.example
@@ -165,6 +165,14 @@ TIMEZONE=America/Los_Angeles
 # Display name spoken by CAAL (optional, defaults to "Pacific Time")
 TIMEZONE_DISPLAY=Pacific Time
 
+# =============================================================================
+# Chat API (Headless Mode)
+# =============================================================================
+# System prompt for the /api/chat endpoint. Resolves to prompt/{language}/{CHAT_PROMPT}.md
+# "headless" uses text-optimized formatting (numbers, dates, markdown).
+# Leave unset to use the default voice prompt.
+#CHAT_PROMPT=headless
+
 # =============================================================================
 # Apple Silicon Setup (M1/M2/M3/M4)
 # =============================================================================
diff --git a/integrations/openwebui/caal_offload.py b/integrations/openwebui/caal_offload.py
new file mode 100644
index 0000000..f1e4815
--- /dev/null
+++ b/integrations/openwebui/caal_offload.py
@@ -0,0 +1,119 @@
+"""
+title: CAAL Tool Offload
+author: AbdulShahzeb
+version: 0.2
+required_open_webui_version: 0.3.9
+"""
+
+from pydantic import BaseModel, Field
+from typing import Optional
+import requests
+
+
+class Filter:
+    class Valves(BaseModel):
+        caal_url: str = Field(
+            default="http://172.17.0.1:8889",
+            description="CAAL server URL (host:port, no trailing slash)",
+        )
+        timeout: int = Field(
+            default=120,
+            description="Request timeout in seconds",
+        )
+        tool_keywords: str = Field(
+            default="hey caal",
+            description="Comma-separated phrases that trigger CAAL (case-insensitive)",
+        )
+        reload_keywords: str = Field(
+            default="reload caal,caal reload",
+            description="Comma-separated phrases that trigger CAAL reload (case-insensitive)",
+        )
+
+    def __init__(self):
+        self.valves = self.Valves()
+
+    def _should_route(self, message: str) -> bool:
+        message_lower = message.lower()
+        keywords = [k.strip().lower() for k in self.valves.tool_keywords.split(",")]
+        return any(kw in message_lower for kw in keywords if kw)
+
+    def _should_reload(self, message: str) -> bool:
+        message_lower = message.lower().strip()
+        keywords = [k.strip().lower() for k in self.valves.reload_keywords.split(",")]
+        return any(kw == message_lower for kw in keywords if kw)
+
+    def _reload_caal(self) -> str:
+        try:
+            resp = requests.post(
+                f"{self.valves.caal_url}/api/chat/reload",
+                timeout=30,
+            )
+            if resp.status_code == 200:
+                data = resp.json()
+                return (
+                    f"CAAL reloaded. Provider: {data.get('llm_provider', '?')}, "
+                    f"model: {data.get('llm_model', '?')}, "
+                    f"tools: {data.get('tools_loaded', '?')}, "
+                    f"sessions cleared: {data.get('sessions_cleared', '?')}"
+                )
+            else:
+                return f"[CAAL reload error: HTTP {resp.status_code}]"
+        except requests.exceptions.ConnectionError:
+            return f"[CAAL reload error: cannot reach {self.valves.caal_url}]"
+        except Exception as e:
+            return f"[CAAL reload error: {e}]"
+
+    def _call_caal(self, message: str) -> str:
+        try:
+            resp = requests.post(
+                f"{self.valves.caal_url}/api/chat",
+                json={
+                    "text": message,
+                    "reuse_session": True,
+                },
+                timeout=self.valves.timeout,
+            )
+
+            if resp.status_code == 200:
+                data = resp.json()
+                return data.get("response", "")
+            else:
+                return f"[CAAL error: HTTP {resp.status_code}]"
+
+        except requests.exceptions.Timeout:
+            return "[CAAL error: request timed out]"
+        except requests.exceptions.ConnectionError:
+            return f"[CAAL error: cannot reach {self.valves.caal_url}]"
+        except Exception as e:
+            return f"[CAAL error: {e}]"
+
+    def inlet(self, body: dict, __user__: Optional[dict] = None) -> dict:
+        messages = body.get("messages", [])
+        if not messages:
+            return body
+
+        last = messages[-1]
+        if last.get("role") != "user":
+            return body
+
+        user_text = last.get("content", "")
+
+        if self._should_reload(user_text):
+            print("[CAAL Filter] reload keyword detected, reloading CAAL")
+            caal_response = self._reload_caal()
+        elif self._should_route(user_text):
+            print("[CAAL Filter] keyword detected, routing to CAAL")
+            caal_response = self._call_caal(user_text)
+        else:
+            return body
+
+        last["content"] = (
+            "OUTPUT ONLY THE FOLLOWING TEXT EXACTLY AS WRITTEN. "
+            "DO NOT ADD ANYTHING. DO NOT REMOVE ANYTHING. "
+            "DO NOT PARAPHRASE. COPY THIS EXACTLY:\n\n"
+            f"{caal_response}"
+        )
+        return body
+
+    def outlet(self, body: dict, __user__: Optional[dict] = None) -> dict:
+        return body
diff --git a/prompt/en/headless.md b/prompt/en/headless.md
new file mode 100644
index 0000000..827647c
--- /dev/null
+++ b/prompt/en/headless.md
@@ -0,0 +1,65 @@
+# Assistant
+
+You are an ACTION-ORIENTED assistant. {{CURRENT_DATE_CONTEXT}}
+
+When asked to do something:
+1. If you have a tool → CALL IT immediately
+2. If no tool exists → Say so and offer to create one
+3. NEVER say "I'll do that" or "Would you like me to..." - just DO IT
+
+# Tool Priority
+
+Answer questions in this order:
+
+1. **Tools** - Device control, workflows, environment queries
+2. **Web search** - Current events, news, prices, hours, scores, anything time-sensitive
+3. **General knowledge** - Only for static facts that never change
+
+Your training data is outdated. If the answer could change over time, use a tool or web_search.
+
+# Home Control (hass)
+
+Control devices or check status with: `hass(action, target, value)`
+- **action**: status, turn_on, turn_off, volume_up, volume_down, set_volume, mute, unmute, pause, play, next, previous
+- **target**: Device name like "office lamp" or "apple tv" (optional for status)
+- **value**: Only for set_volume (0-100)
+
+Examples:
+- "turn on the office lamp" → `hass(action="turn_on", target="office lamp")`
+- "set apple tv volume to 50" → `hass(action="set_volume", target="apple tv", value=50)`
+- "is the garage door open?" → `hass(action="status", target="garage door")`
+
+Act immediately - don't ask for confirmation. Confirm AFTER the action completes.
+
+# Tool Response Handling
+
+CRITICAL: When a tool returns JSON with a `message` field, relay ONLY that message.
+Do NOT read or summarize any other fields (players, books, games, etc.).
+Those arrays are for follow-up questions only - never dump them unprompted.
+
+# Text Output
+
+Responses are displayed as text. Use markdown where it improves readability.
+
+- Numbers: "72°" not "seventy-two degrees"
+- Dates: "31 Jan" or "31/1" not "January thirty-first"
+- Times: "4:30 PM" not "four thirty PM"
+- Currency: "$12.50" not "twelve dollars and fifty cents"
+- Keep responses concise
+
+# Tool Capabilities
+
+- If you lack a tool for a request, say: "I don't have a tool for that. Want me to create one?"
+- You can create new tools using n8n_create_caal_tool
+- Don't list your capabilities unprompted
+
+# Rules
+
+- If an action requires data you don't have (email address, user ID, tweet ID), look it up first with the appropriate tool before acting
+- CALL tools for actions - never pretend or describe what you would do
+- Speaking about an action is not the same as performing it
+- If corrected, retry the tool immediately with fixed input
+- Ask for clarification only when truly ambiguous (e.g., multiple devices with similar names)
+- No filler phrases like "Let me check..." or "Would you like me to..."
+- Don't suggest further actions - just respond to what was asked
+- It's okay to provide your opinion when asked.
diff --git a/src/caal/chat/api.py b/src/caal/chat/api.py
index 3d60603..b4ea2fd 100644
--- a/src/caal/chat/api.py
+++ b/src/caal/chat/api.py
@@ -47,6 +47,7 @@
 class ChatRequest(BaseModel):
     text: str
     session_id: str | None = None
+    reuse_session: bool = False
     dry_run: bool = False  # Reserved for v2
     verbose: bool = False
 
@@ -218,13 +219,16 @@ async def _ensure_initialized() -> None:
             f"({runtime.get('ollama_model', '')})"
         )
 
-        # Load system prompt with date/time context (same as voice path)
+        # Load system prompt with date/time context
+        # CHAT_PROMPT selects a named prompt file (e.g. "headless" → prompt/en/headless.md)
         timezone_id = os.getenv("TIMEZONE", "America/Los_Angeles")
         timezone_display = os.getenv("TIMEZONE_DISPLAY", "Pacific Time")
+        chat_prompt_name = os.getenv("CHAT_PROMPT") or None
         _prompt = settings_module.load_prompt_with_context(
             timezone_id=timezone_id,
             timezone_display=timezone_display,
             language=runtime.get("language", "en"),
+            prompt_name=chat_prompt_name,
         )
 
         # Short-term memory (shared singleton, reload for cross-process sync)
@@ -348,8 +352,15 @@ async def chat(req: ChatRequest) -> ChatResponse:
     assert _llm is not None
     assert _prompt is not None
 
+    # Resolve session: explicit id > reuse latest > create new
+    sid = req.session_id
+    if sid is None and req.reuse_session:
+        latest = _session_manager.get_latest_session()
+        if latest is not None:
+            sid = latest.session_id
+
     session = _session_manager.get_or_create(
-        session_id=req.session_id, max_turns=_max_turns
+        session_id=sid, max_turns=_max_turns
     )
 
     # Add user message to session history
diff --git a/src/caal/chat/session.py b/src/caal/chat/session.py
index bc67e32..f4d3802 100644
--- a/src/caal/chat/session.py
+++ b/src/caal/chat/session.py
@@ -113,6 +113,13 @@ def delete(self, session_id: str) -> bool:
             return True
         return False
 
+    def get_latest_session(self) -> ChatSession | None:
+        """Return the most recently active non-expired session, or None."""
+        active = [s for s in self._sessions.values() if not s.is_expired]
+        if not active:
+            return None
+        return max(active, key=lambda s: s.last_activity)
+
     def list_sessions(self) -> list[dict]:
         """List active sessions with metadata."""
         return [
diff --git a/src/caal/settings.py b/src/caal/settings.py
index d2ac76e..4faa214 100644
--- a/src/caal/settings.py
+++ b/src/caal/settings.py
@@ -428,6 +428,7 @@ def load_prompt_with_context(
     timezone_id: str = "America/Los_Angeles",
     timezone_display: str = "Pacific Time",
     language: str = "en",
+    prompt_name: str | None = None,
 ) -> str:
     """Load prompt and populate with date/time context.
 
@@ -447,7 +448,7 @@ def load_prompt_with_context(
         format_time_speech_friendly,
     )
 
-    template = load_prompt_content(language=language)
+    template = load_prompt_content(prompt_name=prompt_name, language=language)
 
     now = datetime.now(ZoneInfo(timezone_id))