Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,12 @@ security:
pii_protection: false
output_blocklist: true
# Input: block injections/PII. Output: mask PII, block secrets.

context_management:
strategy: "heuristic" # allowed values: "heuristic" or "karl"

karl:
model: "same_as_chat" # or explicit Ollama model name
summary_max_tokens: 512
keep_last_messages: 2
log_dir: "logs"
131 changes: 131 additions & 0 deletions src/core/context_summarizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from __future__ import annotations

from datetime import datetime
from pathlib import Path
from typing import Any


class KarlSummarizationError(RuntimeError):
"""Raised when Karl cannot summarize the current conversation."""


class KarlSummarizer:
"""Pure service for context compression via an LLM summary."""

def __init__(
self,
llm_core,
config: dict[str, Any],
chat_model_name: str,
) -> None:
self._llm_core = llm_core
self._chat_model_name = chat_model_name
self._model_name = self._resolve_model_name(config)
self._summary_max_tokens = self._require_positive_int(
config, "summary_max_tokens"
)
self._keep_last_messages = self._require_non_negative_int(
config, "keep_last_messages"
)
self._log_dir = self._require_non_empty_str(config, "log_dir")

def summarize(self, messages: list[dict]) -> list[dict]:
items = [dict(message) for message in messages]
if len(items) <= self._keep_last_messages:
return items

split_index = len(items) - self._keep_last_messages
history = items[:split_index]
tail = items[split_index:]

prompt_messages = [
{
"role": "system",
"content": (
"Compress the conversation history precisely. "
"Preserve facts, open tasks, constraints, decisions, and unresolved questions. "
"Do not invent content. Keep it concise and actionable."
),
},
{
"role": "user",
"content": self._format_history(history),
},
]

try:
stream = self._llm_core.stream_chat(
model_name=self._model_name,
messages=prompt_messages,
options={"num_predict": self._summary_max_tokens},
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Preserve persona context options in Karl summary calls

The Karl summarization request is sent with options={"num_predict": ...} only, so persona-specific options like num_ctx are dropped for this call even though context compression is triggered specifically in near-limit scenarios. In setups where personas run with a larger num_ctx than the backend default, summarization can fail or truncate before compression happens, causing KarlSummarizationError (and a hard failure when no fallback is configured). Pass through relevant persona options (at least num_ctx) when building the summary request options.

Useful? React with 👍 / 👎.

keep_alive=600,
)
summary = "".join(
chunk.get("message", {}).get("content", "") for chunk in stream
).strip()
except Exception as exc:
raise KarlSummarizationError(
f"Karl summarization failed with model '{self._model_name}'."
) from exc

if not summary:
raise KarlSummarizationError(
f"Karl summarization returned an empty summary with model '{self._model_name}'."
)

self._append_log_entry(len(history), len(summary), self._model_name)

return [{"role": "system", "content": summary}, *tail]

def _append_log_entry(
self, summarized_count: int, summary_length: int, model_name: str
) -> None:
Path(self._log_dir).mkdir(parents=True, exist_ok=True)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Guard Karl log writes from breaking successful summaries

After a summary is successfully produced, _append_log_entry performs filesystem writes without error handling; if log_dir cannot be created or appended to (e.g., read-only path, permission issues, disk full), the turn raises and fails despite having a valid summary. Because this logging is non-critical telemetry, these I/O failures should be caught and downgraded (e.g., warning) so context compression still completes.

Useful? React with 👍 / 👎.

log_file = Path(self._log_dir) / f"karl_{datetime.now().strftime('%Y-%m-%d')}.log"
timestamp = datetime.now().astimezone().isoformat(timespec="seconds")
with log_file.open("a", encoding="utf-8") as handle:
handle.write(
f"{timestamp} summarized={summarized_count} summary_chars={summary_length} model={model_name}\n"
)

def _resolve_model_name(self, config: dict[str, Any]) -> str:
model_name = self._require_non_empty_str(config, "model")
if model_name == "same_as_chat":
return self._chat_model_name
return model_name

@staticmethod
def _format_history(messages: list[dict]) -> str:
lines: list[str] = []
for message in messages:
role = str(message.get("role", "unknown")).upper()
content = str(message.get("content", "")).strip()
lines.append(f"{role}: {content}")
return "\n".join(lines)

@staticmethod
def _require_non_empty_str(config: dict[str, Any], key: str) -> str:
value = config.get(key)
if not isinstance(value, str) or not value.strip():
raise ValueError(
f"context_management.karl.{key} must be a non-empty string."
)
return value.strip()

@staticmethod
def _require_positive_int(config: dict[str, Any], key: str) -> int:
value = config.get(key)
if not isinstance(value, int) or value <= 0:
raise ValueError(
f"context_management.karl.{key} must be a positive integer."
)
return value

@staticmethod
def _require_non_negative_int(config: dict[str, Any], key: str) -> int:
value = config.get(key)
if not isinstance(value, int) or value < 0:
raise ValueError(
f"context_management.karl.{key} must be a non-negative integer."
)
return value
70 changes: 70 additions & 0 deletions src/ui/terminal_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from colorama import Fore, Style, init
from config.personas import get_all_persona_names, get_drink, _load_system_prompts
from core.context_summarizer import KarlSummarizationError, KarlSummarizer
from core.context_utils import context_near_limit, karl_prepare_quick_and_dirty
from core.orchestrator import broadcast_to_ensemble

Expand Down Expand Up @@ -389,6 +390,23 @@ def _ensure_context_headroom(self) -> None:
wait_msg = self._t("context_wait_message", persona_name=self.bot, drink=drink)
print(wait_msg)

context_management = self._require_context_management_config()
strategy = context_management["strategy"]

if strategy == "heuristic":
self._apply_heuristic_context_trim(persona_options)
return

if strategy == "karl":
self._apply_karl_context_summary(context_management["karl"], persona_options)
return

raise ValueError(
"context_management.strategy must be either 'heuristic' or 'karl'."
)

def _apply_heuristic_context_trim(self, persona_options) -> None:

num_ctx = persona_options.get("num_ctx")
if not num_ctx:
logging.info("TerminalUI: Context limit reached, but 'num_ctx' is not set.")
Expand Down Expand Up @@ -416,6 +434,58 @@ def _ensure_context_headroom(self) -> None:
notice = self.texts["terminal_context_trim_notice"]
print(f"{Fore.YELLOW}{notice}{Style.RESET_ALL}")

def _apply_karl_context_summary(self, karl_cfg, persona_options) -> None:
summarizer = KarlSummarizer(
llm_core=self.streamer._llm_core,
config=karl_cfg,
chat_model_name=self.streamer.model_name,
)
original_length = len(self.history)
try:
self.history = summarizer.summarize(self.history)
except KarlSummarizationError:
fallback = karl_cfg.get("fallback_strategy")
if fallback == "heuristic":
self._apply_heuristic_context_trim(persona_options)
return
logging.exception("Karl summarization failed and no fallback is configured.")
raise

removed = original_length - len(self.history)
if removed > 0:
notice = self.texts["terminal_context_trim_notice"]
print(f"{Fore.YELLOW}{notice}{Style.RESET_ALL}")

def _require_context_management_config(self):
context_management = getattr(self.config, "context_management", None)
if not isinstance(context_management, dict):
raise ValueError("Missing required 'context_management' configuration section.")

strategy = context_management.get("strategy")
if strategy not in {"heuristic", "karl"}:
raise ValueError(
"context_management.strategy must be either 'heuristic' or 'karl'."
)

if strategy == "karl":
karl_cfg = context_management.get("karl")
if not isinstance(karl_cfg, dict):
raise ValueError("Missing required 'context_management.karl' section.")
required_keys = {
"model",
"summary_max_tokens",
"keep_last_messages",
"log_dir",
}
missing = [key for key in required_keys if key not in karl_cfg]
if missing:
raise ValueError(
"Missing required context_management.karl keys: "
+ ", ".join(sorted(missing))
)

return context_management

def _print_loaded_history(self) -> None:
if not self.history:
return
Expand Down
67 changes: 66 additions & 1 deletion src/ui/web_ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import gradio as gr
from config.personas import get_all_persona_names, get_drink, _load_system_prompts
from core.context_summarizer import KarlSummarizationError, KarlSummarizer
from core.context_utils import context_near_limit, karl_prepare_quick_and_dirty
from core.streaming_provider import inject_wiki_context, lookup_wiki_snippet
from ui.conversation_io_terminal import load_conversation
Expand Down Expand Up @@ -127,6 +128,26 @@ def _handle_context_warning(self, llm_history, chat_history):
chat_history.append((None, warn))

persona_options = getattr(self.streamer, "persona_options", {}) or {}
context_management = self._require_context_management_config()
strategy = context_management["strategy"]

if strategy == "heuristic":
self._apply_heuristic_context_trim(llm_history, persona_options)
return True

if strategy == "karl":
self._apply_karl_context_summary(
llm_history,
context_management["karl"],
persona_options,
)
return True

raise ValueError(
"context_management.strategy must be either 'heuristic' or 'karl'."
)

def _apply_heuristic_context_trim(self, llm_history, persona_options):

num_ctx_value = persona_options.get("num_ctx")

Expand All @@ -150,7 +171,51 @@ def _handle_context_warning(self, llm_history, chat_history):
num_ctx_value,
)

return True
def _apply_karl_context_summary(self, llm_history, karl_cfg, persona_options):
summarizer = KarlSummarizer(
llm_core=self.streamer._llm_core,
config=karl_cfg,
chat_model_name=self.streamer.model_name,
)
try:
llm_history[:] = summarizer.summarize(llm_history)
except KarlSummarizationError:
fallback = karl_cfg.get("fallback_strategy")
if fallback == "heuristic":
self._apply_heuristic_context_trim(llm_history, persona_options)
return
logging.exception("Karl summarization failed and no fallback is configured.")
raise

def _require_context_management_config(self):
context_management = getattr(self.cfg, "context_management", None)
if not isinstance(context_management, dict):
raise ValueError("Missing required 'context_management' configuration section.")

strategy = context_management.get("strategy")
if strategy not in {"heuristic", "karl"}:
raise ValueError(
"context_management.strategy must be either 'heuristic' or 'karl'."
)

if strategy == "karl":
karl_cfg = context_management.get("karl")
if not isinstance(karl_cfg, dict):
raise ValueError("Missing required 'context_management.karl' section.")
required_keys = {
"model",
"summary_max_tokens",
"keep_last_messages",
"log_dir",
}
missing = [key for key in required_keys if key not in karl_cfg]
if missing:
raise ValueError(
"Missing required context_management.karl keys: "
+ ", ".join(sorted(missing))
)

return context_management

# Stream the response (UI updates continuously)
def _stream_reply(self, message_history, chat_history):
Expand Down
Loading