diff --git a/community/coding-agent-runner/README.md b/community/coding-agent-runner/README.md new file mode 100644 index 00000000..6bc9e663 --- /dev/null +++ b/community/coding-agent-runner/README.md @@ -0,0 +1,91 @@ +# Coding Agent Runner + +![Community](https://img.shields.io/badge/OpenHome-Community-orange?style=flat-square) +![Author](https://img.shields.io/badge/Author-@juyounglee-lightgrey?style=flat-square) + +## What It Does +Runs a coding task through a remote webhook that invokes Claude Code or Codex headlessly, then reads back a short spoken result. + +## Trigger Words +- "run coding task" +- "run a coding agent" +- "execute coding task" + +## Setup +1. Run any webhook server that accepts `POST /run` with bearer auth (see example below). +2. In `main.py`, replace `WEBHOOK_URL` and `WEBHOOK_TOKEN` placeholders. Use the same token on both sides. +3. Upload this ability to OpenHome and set trigger words in the dashboard. + +If OpenHome can't reach your server directly, use a tunnel (e.g. `ngrok http 8080`). + +## Webhook Contract + +The ability sends: +``` +POST /run +Authorization: Bearer +{"prompt": "Add tests for the validator script"} +``` + +And expects back: +```json +{"ok": true, "summary": "Added tests and they pass."} +``` + +Optional response fields: `artifact_path`, `request_id`. + +## Minimal Webhook Server + +The webhook just needs to run Claude Code or Codex and return the output. Swap the command to match your agent. + +> **Safety note:** Both examples use autonomous execution flags. Only run in a +> sandboxed environment or a directory you're comfortable modifying. + +```python +# Runs on a separate server, not inside OpenHome. +import subprocess +from flask import Flask, jsonify, request + +app = Flask(__name__) +TOKEN = "your-secret-token" +AGENT = "claude" # "claude" or "codex" +WORKDIR = "/path/to/your/project" # sandbox / working directory + +def agent_cmd(prompt): + if AGENT == "codex": + return ["codex", "exec", "--full-auto", prompt] + return ["claude", "-p", prompt, "--allowedTools", "Bash,Read,Write,Edit"] + +@app.post("/run") +def run(): + if request.headers.get("Authorization") != f"Bearer {TOKEN}": + return jsonify(ok=False, error="unauthorized"), 401 + + prompt = (request.get_json(silent=True) or {}).get("prompt", "").strip() + if not prompt: + return jsonify(ok=False, error="prompt required"), 400 + + result = subprocess.run( + agent_cmd(prompt), + capture_output=True, text=True, timeout=600, check=False, + cwd=WORKDIR, + ) + if result.returncode != 0: + return jsonify(ok=False, error=f"exit code {result.returncode}"), 500 + + return jsonify(ok=True, summary=result.stdout.strip() or "Done.") +``` + +## Example Conversation +> **User:** "run coding task" +> **AI:** "Tell me the coding task you'd like to run." +> **User:** "Add basic tests for the validator script and run them." +> **AI:** "Got it. Want me to run that now?" +> **User:** "Yes" +> **AI:** "Tests were added and they all pass." + +## Logs +Look for `[CodingAgentRunner]` entries in OpenHome Live Editor logs. + +## Token Hygiene +For demos, static tokens are fine. After testing, rotate on both sides. diff --git a/community/coding-agent-runner/__init__.py b/community/coding-agent-runner/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/community/coding-agent-runner/__init__.py @@ -0,0 +1 @@ + diff --git a/community/coding-agent-runner/main.py b/community/coding-agent-runner/main.py new file mode 100644 index 00000000..a2bfc304 --- /dev/null +++ b/community/coding-agent-runner/main.py @@ -0,0 +1,157 @@ +"""OpenHome ability – voice-triggered coding task execution via webhook. + +Flow: ask → confirm → refine prompt → call webhook → speak result. +""" + +import asyncio + +import requests +from src.agent.capability import MatchingCapability +from src.main import AgentWorker +from src.agent.capability_worker import CapabilityWorker + +WEBHOOK_URL = "YOUR_WEBHOOK_URL_HERE" +WEBHOOK_TOKEN = "YOUR_WEBHOOK_TOKEN_HERE" +REQUEST_TIMEOUT_SECONDS = 180 +EXIT_WORDS = {"stop", "cancel", "exit", "quit", "never mind"} + +TAG = "[CodingAgentRunner]" + + +class CodingAgentRunnerCapability(MatchingCapability): + """Voice ability that sends coding tasks to an external webhook.""" + + worker: AgentWorker = None + capability_worker: CapabilityWorker = None + + # {{register capability}} + + def call(self, worker: AgentWorker): + self.worker = worker + self.capability_worker = CapabilityWorker(self.worker) + self.worker.session_tasks.create(self.run()) + + async def run(self): + try: + # 1) Guard: ensure webhook is configured. + if WEBHOOK_URL in ("", "YOUR_WEBHOOK_URL_HERE") \ + or WEBHOOK_TOKEN in ("", "YOUR_WEBHOOK_TOKEN_HERE"): + await self.capability_worker.speak( + "This coding agent runner is not configured yet. " + "Please set the webhook URL and token in the ability code." + ) + return + + # 2) Ask for the coding task. + await self.capability_worker.speak( + "Tell me the coding task you'd like to run." + ) + task = await self.capability_worker.user_response() + + if not task: + await self.capability_worker.speak( + "I didn't catch that. Please try again." + ) + return + + lowered = task.lower().strip() + if any(lowered == w or lowered.startswith(f"{w} ") for w in EXIT_WORDS): + await self.capability_worker.speak("Okay, canceled.") + return + + # 3) Confirm before running. + if not await self.capability_worker.run_confirmation_loop( + "Got it. Want me to run that now?" + ): + await self.capability_worker.speak("Okay, I won't run it.") + return + + # 4) Refine transcription → call the webhook. + prompt = self._refine_prompt(task) + await self.capability_worker.speak( + "Running your coding task now. This may take up to a few minutes." + ) + result = await self._call_webhook(prompt) + + if not result or not result.get("ok"): + await self.capability_worker.speak( + "I couldn't complete that coding task. " + "Check your webhook server logs." + ) + return + + # 5) Speak the result. + spoken = self._rewrite_for_voice( + result.get("summary") or "Task finished but returned no summary." + ) + await self.capability_worker.speak(spoken) + + if result.get("artifact_path"): + await self.capability_worker.speak( + "I also saved the full output in the run artifacts." + ) + + except Exception as err: + self.worker.editor_logging_handler.error( + f"{TAG} unexpected error: {err}" + ) + await self.capability_worker.speak( + "Something went wrong while running the coding task." + ) + finally: + self.capability_worker.resume_normal_flow() + + async def _call_webhook(self, prompt: str) -> dict | None: + """POST the task to the webhook; return parsed JSON or None.""" + try: + resp = await asyncio.to_thread( + requests.post, + WEBHOOK_URL, + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {WEBHOOK_TOKEN}", + }, + json={"prompt": prompt}, + timeout=REQUEST_TIMEOUT_SECONDS, + ) + resp.raise_for_status() + payload = resp.json() + if not isinstance(payload, dict): + raise ValueError("response is not a JSON object") + except Exception as err: + self.worker.editor_logging_handler.error( + f"{TAG} webhook failed: {err}" + ) + return None + return payload + + def _refine_prompt(self, raw: str) -> str: + """Use the LLM to clean up a voice transcription into a clear coding task.""" + try: + text = self.capability_worker.text_to_text_response( + "The following is a voice transcription of a coding task. " + "Clean it up into a clear, actionable prompt for a coding agent. " + "Fix transcription errors, remove filler words, and keep the intent. " + "Return only the refined prompt, nothing else.\n\n" + f"Transcription:\n{raw}", + self.worker.agent_memory.full_message_history, + ) + return (text or "").strip() or raw + except Exception: + return raw + + def _rewrite_for_voice(self, raw: str) -> str: + """Use the LLM to rewrite a raw summary into spoken-friendly text.""" + try: + text = self.capability_worker.text_to_text_response( + "Rewrite this coding result for spoken voice. " + "Use 1-2 short conversational sentences. " + "No list numbers, markdown, file paths, or code snippets. " + "Keep only the key outcome and one optional follow-up.\n\n" + f"Result:\n{raw}", + self.worker.agent_memory.full_message_history, + ) + cleaned = (text or "").replace("```", "").strip() + return cleaned or raw + except Exception: + return raw diff --git a/community/private-notes/README.md b/community/private-notes/README.md new file mode 100644 index 00000000..86bde400 --- /dev/null +++ b/community/private-notes/README.md @@ -0,0 +1,48 @@ +# Private Notes + +`Private Notes` is a voice-first note-taking agent for OpenHome. It stores notes in persistent `private_notes.json`, so note contents stay out of the Personality prompt and are only spoken when the user explicitly asks. + +## What It Does + +- saves a new note +- reads one or more notes +- overwrites a specific note after confirmation +- deletes one or more notes after confirmation + +The ability uses a single LLM tool loop with conversation history. Python owns all note reads and writes. + +## Example Phrases + +- `take a note` +- `note this down: call Sarah after lunch` +- `read my notes` +- `read my last note` +- `update my grocery note` +- `delete my last note` +- `delete my notes` + +## Storage + +- File: `private_notes.json` +- Persistence: `temp=False` +- JSON saves safely overwrite by deleting any existing file before writing because `write_file()` appends by default +- No `.md` files are written, so the Memory Watcher does not inject note contents into the Personality prompt + +## Voice UX + +- if no request is captured, the ability asks what the user wants to do +- reads are capped to the 3 most recent matches to avoid long voice dumps +- overwrite and delete actions always require confirmation +- final responses stay short, warm, and conversational + +## Suggested Trigger Words + +Configure these in the OpenHome dashboard: + +- `private note` +- `private notes` +- `take a note` +- `note this down` +- `write this down` +- `read my notes` +- `delete my notes` diff --git a/community/private-notes/SPEC.md b/community/private-notes/SPEC.md new file mode 100644 index 00000000..70100457 --- /dev/null +++ b/community/private-notes/SPEC.md @@ -0,0 +1,145 @@ +# Private Notes Spec + +## Goal + +`private-notes` is a voice-first personal note-taking agent for OpenHome. + +- save a note +- read one or more notes +- overwrite a specific note +- delete one or more notes + +--- + +## Core Principles + +1. Notes are private user data stored in JSON. +2. The LLM picks which tool to call. Python executes it. +3. Tool execution is id-based, not title-based. +4. Voice responses are short and natural. +5. No open-ended agent loop. Capped at 4 turns. + +--- + +## Architecture + +A uniform tool loop with one system prompt and conversation history: + +```text +history = [user: initial context] + +while turns remain: + tool_call = LLM(history, SYSTEM_PROMPT) + history += assistant: tool_call + + finish -> speak response, stop + ask_followup -> speak question, history += user: answer, continue + write/read/delete -> execute in Python, history += user: result, continue +``` + +One system prompt. One conversation via `history`. `finish` is a tool like any other. The LLM writes confirmation messages for destructive actions. + +--- + +## Data Model + +### Note + +```json +{ + "id": "uuid", + "title": "string", + "content": "string", + "created_at": "ISO timestamp", + "updated_at": "ISO timestamp" +} +``` + +### Store + +```json +{ + "schema_version": 2, + "notes": [Note] +} +``` + +Persistent storage lives in `private_notes.json`. + +--- + +## Tools + +### `write_note` + +```json +{"name": "write_note", "arguments": {"note_id": null, "title": "string", "content": "string", "confirmation": "string or null"}} +``` + +- `note_id = null` creates a new note (no confirmation needed). +- `note_id = ` overwrites an existing note. LLM provides the `confirmation` prompt. +- If `note_id` does not exist, Python returns an error result instead of crashing the ability. + +### `read_notes` + +```json +{"name": "read_notes", "arguments": {"note_ids": ["uuid"]}} +``` + +- Readback capped to 3 notes. +- Returns raw note data (title, content, updated_at). LLM formats for speech via `finish`. + +### `delete_notes` + +```json +{"name": "delete_notes", "arguments": {"note_ids": ["uuid"], "confirmation": "string"}} +``` + +- LLM provides the `confirmation` prompt. Always confirmed before deleting. + +### `ask_followup` + +```json +{"name": "ask_followup", "arguments": {"question": "string"}} +``` + +- Used when the request is ambiguous. + +### `finish` + +```json +{"name": "finish", "arguments": {"response": "string"}} +``` + +- Spoken response to the user. Ends the loop. + +--- + +## Context + +The first message in history contains: + +1. Current local time (captured once for caching) +2. User request +3. Minimal note index: id, title, updated_at for each note (sorted by recency) + +Subsequent turns append tool results and follow-up answers as history entries. The LLM resolves "my latest note" to the first id in the index. + +--- + +## Safety Rules + +1. Python executes all note mutations, not the LLM. +2. Overwrite requires confirmation (LLM writes the prompt). +3. Delete requires confirmation (LLM writes the prompt). +4. JSON saves safely overwrite by deleting any existing file before writing because `write_file()` appends to existing files. +5. The loop is capped at 4 turns. + +--- + +## Validation + +``` +python3 -m py_compile abilities/community/private-notes/main.py +python3 abilities/validate_ability.py abilities/community/private-notes +``` diff --git a/community/private-notes/__init__.py b/community/private-notes/__init__.py new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/community/private-notes/__init__.py @@ -0,0 +1 @@ + diff --git a/community/private-notes/main.py b/community/private-notes/main.py new file mode 100644 index 00000000..0a7f600b --- /dev/null +++ b/community/private-notes/main.py @@ -0,0 +1,266 @@ +import json +from datetime import datetime +from uuid import uuid4 + +from zoneinfo import ZoneInfo + +from src.agent.capability import MatchingCapability +from src.agent.capability_worker import CapabilityWorker +from src.main import AgentWorker + +NOTES_FILE = "private_notes.json" +MAX_READBACK = 3 +MAX_TURNS = 4 + +# LLM picks one tool per turn; loop ends when it calls "finish". +SYSTEM_PROMPT = """ +You are an OpenHome private notes ability. + +Return ONLY valid JSON with exactly this shape: +{ + "name": "write_note|read_notes|delete_notes|ask_followup|finish", + "arguments": {} +} + +Available tools: + +1) write_note — create or overwrite a note +{"name": "write_note", "arguments": {"note_id": null, "title": "string", "content": "string", "confirmation": "string or null"}} +note_id=null creates a new note (no confirmation needed). +note_id= overwrites — provide a short spoken confirmation question. + +2) read_notes — read notes by id +{"name": "read_notes", "arguments": {"note_ids": ["uuid"]}} + +3) delete_notes — delete notes by id +{"name": "delete_notes", "arguments": {"note_ids": ["uuid"], "confirmation": "string"}} +Always provide a short spoken confirmation question (e.g. "Delete your grocery list?"). + +4) ask_followup — ask the user for clarification +{"name": "ask_followup", "arguments": {"question": "string"}} + +5) finish — speak a final response to the user +{"name": "finish", "arguments": {"response": "string"}} + +Rules: +- The note index is sorted by updated_at descending. Latest note = first id. +- Never invent note ids. Resolve titles to ids from the note index. +- If ambiguous, use ask_followup. +- Create short, useful titles. Keep content faithful to the user's meaning. +- After a tool result is shown, call finish with a concise voice-friendly response. +- When reading notes aloud, say the title, a natural relative timestamp (e.g. "from today", "yesterday afternoon"), and the content. +- Keep responses short, warm, and conversational. Like talking to a friend. +""".strip() + + +class PrivateNotesCapability(MatchingCapability): + worker: AgentWorker = None + capability_worker: CapabilityWorker = None + + # {{register capability}} + + def call(self, worker: AgentWorker): + """Entry point. Framework calls this when the ability is triggered.""" + self.worker = worker + self.capability_worker = CapabilityWorker(self.worker) + self.worker.session_tasks.create(self.run()) + + async def run(self): + """ + Main flow: + 1. Get user request (from transcription or by asking) + 2. Load notes from file + 3. Run the tool loop until the LLM calls finish + """ + self.worker.editor_logging_handler.info("[PrivateNotes] Ability started") + try: + request_text = (await self.capability_worker.wait_for_complete_transcription() or "").strip() + if not request_text: + await self.capability_worker.speak( + "Private notes is open. What would you like to do?" + ) + request_text = await self._get_user_input( + "I didn't catch anything for private notes." + ) + if not request_text: + return + + notebook = await self._load_notebook() + + # Capture time once so the context prefix stays identical across turns (LLM caching). + now = datetime.now(ZoneInfo(self.capability_worker.get_timezone())) + + tool_handlers = { + "write_note": self._handle_write_note, + "read_notes": self._handle_read_notes, + "delete_notes": self._handle_delete_notes, + } + + history = [{"role": "user", "content": self._build_context(request_text, notebook, now)}] + + for _ in range(MAX_TURNS): + # text_to_text_response is sync — no await + llm_response = self.capability_worker.text_to_text_response( + history[-1]["content"], + history=history[:-1], + system_prompt=SYSTEM_PROMPT, + ) + # LLMs sometimes wrap JSON in markdown fences + tool_call = json.loads(llm_response.replace("```json", "").replace("```", "").strip()) + tool_name = tool_call.get("name", "") + tool_args = tool_call.get("arguments", {}) + self.worker.editor_logging_handler.info(f"[PrivateNotes] Tool={tool_name}") + + history.append({"role": "assistant", "content": llm_response}) + + if tool_name == "finish": + await self.capability_worker.speak(tool_args.get("response", "")) + return + + if tool_name == "ask_followup": + await self.capability_worker.speak(tool_args.get("question", "")) + followup = await self._get_user_input( + "I didn't catch anything, so I didn't change your notes." + ) + if not followup: + return + history.append({"role": "user", "content": followup}) + continue + + handler = tool_handlers.get(tool_name) + if not handler: + break # unknown tool — fall through to "couldn't complete" message + + result = await handler(notebook, tool_args, now) + + if result.get("notes_changed"): + await self._save_notebook(notebook) + + # Feed result back so the LLM can call finish with a spoken summary + history.append({"role": "user", "content": json.dumps(result, ensure_ascii=True)}) + + await self.capability_worker.speak( + "I couldn't complete that note request." + ) + except Exception as exc: + self.worker.editor_logging_handler.error(f"[PrivateNotes] Unexpected error: {exc}") + await self.capability_worker.speak( + "Something went wrong with your private notes." + ) + finally: + self.worker.editor_logging_handler.info("[PrivateNotes] Ability ended") + self.capability_worker.resume_normal_flow() + + # --- Context --- + + def _build_context(self, request_text: str, notebook: dict, now: datetime) -> str: + """Build the initial user message with time, request, and note index.""" + notes = sorted( + notebook["notes"], key=lambda n: n.get("updated_at", ""), reverse=True + ) + note_index = { + "note_count": len(notes), + "notes": [ + {"id": n.get("id"), "title": n.get("title"), "updated_at": n.get("updated_at")} + for n in notes + ], + } + return ( + f"Current local time: {now.isoformat()}\n" + f"User request: {request_text}\n" + f"Note index:\n{json.dumps(note_index, ensure_ascii=True)}" + ) + + # --- Tool handlers --- + + async def _handle_write_note(self, notebook: dict, args: dict, now: datetime) -> dict: + """Create a new note (note_id=null) or overwrite an existing one (with confirmation).""" + note_id = args.get("note_id") + title = args.get("title", "") + content = args.get("content", "") + timestamp = now.isoformat() + + if not note_id: + notebook["notes"].append({ + "id": str(uuid4()), + "title": title, + "content": content, + "created_at": timestamp, + "updated_at": timestamp, + }) + return {"ok": True, "notes_changed": True, "status": "created", "title": title} + + existing = next((n for n in notebook["notes"] if n.get("id") == note_id), None) + if not existing: + return {"ok": False, "notes_changed": False, "error": "note not found"} + + if not await self.capability_worker.run_confirmation_loop(args.get("confirmation", "")): + return {"ok": True, "notes_changed": False, "status": "cancelled"} + + existing["title"] = title + existing["content"] = content + existing["updated_at"] = timestamp + return {"ok": True, "notes_changed": True, "status": "updated", "title": title} + + async def _handle_read_notes(self, notebook: dict, args: dict, _now: datetime) -> dict: + """Return matched notes (capped at MAX_READBACK). LLM formats them for speech.""" + note_ids = set(args.get("note_ids", [])) + matched = sorted( + [n for n in notebook["notes"] if n.get("id") in note_ids], + key=lambda n: n.get("updated_at", ""), + reverse=True, + ) + capped = matched[:MAX_READBACK] + return { + "ok": True, + "notes_changed": False, + "total_matched": len(matched), + "total_returned": len(capped), + "total_remaining": max(len(matched) - len(capped), 0), + "notes": [ + {"title": n.get("title"), "content": n.get("content"), "updated_at": n.get("updated_at")} + for n in capped + ], + } + + async def _handle_delete_notes(self, notebook: dict, args: dict, _now: datetime) -> dict: + """Delete notes by id after user confirms.""" + ids_to_delete = set(args.get("note_ids", [])) + + if not await self.capability_worker.run_confirmation_loop(args.get("confirmation", "")): + return {"ok": True, "notes_changed": False, "deleted_count": 0, "status": "cancelled"} + + before = len(notebook["notes"]) + notebook["notes"] = [n for n in notebook["notes"] if n.get("id") not in ids_to_delete] + deleted_count = before - len(notebook["notes"]) + return {"ok": True, "notes_changed": deleted_count > 0, "deleted_count": deleted_count, "status": "deleted"} + + # --- Storage --- + + async def _load_notebook(self) -> dict: + """Load notes from JSON file, or return empty notebook if missing.""" + if not await self.capability_worker.check_if_file_exists(NOTES_FILE, False): + return {"schema_version": 2, "notes": []} + raw = await self.capability_worker.read_file(NOTES_FILE, False) + return json.loads(raw) + + async def _save_notebook(self, notebook: dict): + """Write notes to JSON file, sorted by most recently updated.""" + notebook["notes"] = sorted( + notebook["notes"], key=lambda n: n.get("updated_at", ""), reverse=True + ) + # write_file appends, so delete first to avoid corrupted JSON + if await self.capability_worker.check_if_file_exists(NOTES_FILE, False): + await self.capability_worker.delete_file(NOTES_FILE, False) + await self.capability_worker.write_file( + NOTES_FILE, json.dumps(notebook, ensure_ascii=True), False, + ) + + # --- Helpers --- + + async def _get_user_input(self, fallback_msg: str) -> str | None: + """Wait for transcription; speak fallback and return None if empty.""" + text = (await self.capability_worker.wait_for_complete_transcription() or "").strip() + if not text: + await self.capability_worker.speak(fallback_msg) + return text or None