diff --git a/docs/architecture.md b/docs/architecture.md index 257af63c..db95cb9b 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -66,7 +66,7 @@ lifecycle → guard → cache → toolkit → registry - **guard:** pre-execution safety/redundancy checks and post-execution call recording - **cache:** per-task reuse layer for read-only and search tool results -- **toolkit:** domain tool definitions with guarded execution (`file-toolkit`, `code-toolkit`, `git-toolkit`, `shell-toolkit`, `web-toolkit`) +- **toolkit:** domain tool definitions with guarded execution (`file-toolkit`, `code-toolkit`, `git-toolkit`, `shell-toolkit`, `web-toolkit`, `checklist-toolkit`) - **registry:** toolkit registration, permission filtering, and agent-facing tool surface - **details:** see [Tooling](./tooling.md) diff --git a/docs/features.md b/docs/features.md index 8a6b4903..8b0da0c1 100644 --- a/docs/features.md +++ b/docs/features.md @@ -36,6 +36,7 @@ Shipped, user-visible capabilities. - automatic formatting of edited files via detected formatter - automatic linting of edited files via detected linter - deterministic verify command execution from detected project configuration +- inline task checklist for multi-step tasks, pinned between transcript and input ## Tools diff --git a/docs/glossary.md b/docs/glossary.md index 415a0d89..6fb7dfd7 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -21,6 +21,7 @@ Naming conventions and core terms used across Acolyte code and docs. | Term | Definition | |---|---| | Base Agent Input | Immutable prompt input created during `prepare` and used as the base for each generation attempt | +| Checklist | Inline progress display for multi-step tasks, rendered between transcript and input. The agent defines steps via `create-checklist` and marks progress via `update-checklist` | | Context Budgeting | Proactive token allocation via tiktoken — system prompt reserved first, remaining space filled by priority (memory → attachments → history → tool payloads) | | Continuation State | Persisted "Current task" and "Next step" cues carried into later turns | | Distill | Automatic memory source family that extracts and consolidates knowledge into records (project/user/session scope variants) | diff --git a/docs/tooling.md b/docs/tooling.md index 8adbfde5..1300509e 100644 --- a/docs/tooling.md +++ b/docs/tooling.md @@ -9,7 +9,7 @@ lifecycle → guard → cache → toolkit → registry ## Layers - **guard**: pre-execution checks and post-execution call recording -- **toolkit**: domain tool definitions (`file-toolkit`, `code-toolkit`, `git-toolkit`, `shell-toolkit`, `web-toolkit`) +- **toolkit**: domain tool definitions (`file-toolkit`, `code-toolkit`, `git-toolkit`, `shell-toolkit`, `web-toolkit`, `checklist-toolkit`) - **registry**: permission filtering and agent-facing tool surface ## Guarded execution diff --git a/src/agent-modes.ts b/src/agent-modes.ts index 2e044532..a73f3ea4 100644 --- a/src/agent-modes.ts +++ b/src/agent-modes.ts @@ -46,6 +46,7 @@ export const agentModes: Record = { "Do not run verify, test, or build commands — the lifecycle handles format, lint, and verify automatically after your edits.", "Do not signal done until the requested behavior is actually implemented. Updating help text, comments, or tests alone is not completing the task — the functional change must be in place.", "After the last tool call, use the lifecycle signal format from the base instructions and keep the user-facing outcome to one sentence.", + "For multi-step tasks (3+ distinct steps), use `create-checklist` at the start to define a progress checklist. Use `update-checklist` to mark items as you complete each step.", ], }, verify: { diff --git a/src/chat-app.tsx b/src/chat-app.tsx index 5a453b92..0a64a6f1 100644 --- a/src/chat-app.tsx +++ b/src/chat-app.tsx @@ -1,3 +1,6 @@ +import { ChatChecklist } from "./chat-checklist"; +import type { ChatRow } from "./chat-contract"; +import { isChecklistOutput } from "./chat-contract"; import { ChatHeader } from "./chat-header"; import { ChatInputPanel } from "./chat-input-panel"; import { isHeaderItem } from "./chat-promotion"; @@ -11,6 +14,12 @@ function ChatApp(props: ChatAppProps) { const { exit } = useApp(); const state = useChatState(props, exit); + const transcriptRows: ChatRow[] = []; + const checklistRows: ChatRow[] = []; + for (const row of state.rows) { + (isChecklistOutput(row.content) ? checklistRows : transcriptRows).push(row); + } + return ( @@ -39,13 +48,14 @@ function ChatApp(props: ChatAppProps) { }} + + {header} + {items.map((item) => ( + + {"\n"} + {` ${item.marker} ${item.label}`} + + ))} + + ); +} + +type ChatChecklistProps = { + rows: ChatRow[]; +}; + +export function ChatChecklist({ rows }: ChatChecklistProps): React.ReactNode { + if (rows.length === 0) return null; + const columns = process.stdout.columns ?? DEFAULT_COLUMNS; + const contentWidth = Math.max(24, columns - 2); + return ( + <> + {rows.map((row) => ( + + + + {isChecklistOutput(row.content) ? {renderChecklist(row.content)} : null} + + + ))} + + ); +} diff --git a/src/chat-contract.ts b/src/chat-contract.ts index 15d79d8f..8cdd9d53 100644 --- a/src/chat-contract.ts +++ b/src/chat-contract.ts @@ -1,4 +1,5 @@ import { z } from "zod"; +import { checklistOutputSchema } from "./checklist-contract"; import { isoDateTimeSchema } from "./datetime"; import { domainIdSchema } from "./id-contract"; import { createId } from "./short-id"; @@ -48,7 +49,7 @@ export const commandOutputSchema = z.object({ export type CommandOutput = z.infer; -const chatRowContentSchema = z.union([z.string(), toolOutputSchema, commandOutputSchema]); +const chatRowContentSchema = z.union([z.string(), toolOutputSchema, commandOutputSchema, checklistOutputSchema]); export type ChatRowContent = z.infer; @@ -72,3 +73,9 @@ export function isToolOutput(content: ChatRowContent | undefined): content is To export function isCommandOutput(content: ChatRowContent | undefined): content is CommandOutput { return typeof content === "object" && "header" in content; } + +export function isChecklistOutput( + content: ChatRowContent | undefined, +): content is z.infer { + return typeof content === "object" && "groupId" in content; +} diff --git a/src/chat-message-handler-stream.ts b/src/chat-message-handler-stream.ts index b29a927a..d4f5be48 100644 --- a/src/chat-message-handler-stream.ts +++ b/src/chat-message-handler-stream.ts @@ -1,4 +1,5 @@ import { type ChatRow, createRow } from "./chat-contract"; +import type { ChecklistItem } from "./checklist-contract"; import { LIFECYCLE_ERROR_CODES } from "./error-contract"; import { palette } from "./palette"; import { createId } from "./short-id"; @@ -14,6 +15,7 @@ export type MessageStreamState = { errorCode?: string; error?: { category?: string; [key: string]: unknown }; }) => void; + onChecklist: (entry: { groupId: string; groupTitle: string; items: ChecklistItem[] }) => void; onProgressError: (error: string) => void; streamedAssistantText: () => string; /** Flush remaining content and return IDs of all streaming assistant rows (for replacement by final turn rows). */ @@ -37,6 +39,9 @@ export function createMessageStreamState(input: { const toolRowIdByCallId = new Map(); const toolOutput = createToolOutputState(); + // --- checklist state --- + const checklistRowIdByGroupId = new Map(); + function cancelFlushTimer(): void { if (flushTimer) { clearTimeout(flushTimer); @@ -120,6 +125,19 @@ export function createMessageStreamState(input: { ); }, + onChecklist: (entry) => { + const content = { groupId: entry.groupId, groupTitle: entry.groupTitle, items: entry.items }; + const existingRowId = checklistRowIdByGroupId.get(entry.groupId); + if (!existingRowId) { + sealAssistantRow(); + const rowId = `row_${createId()}`; + checklistRowIdByGroupId.set(entry.groupId, rowId); + input.setRows((current) => [...current, { id: rowId, kind: "task" as const, content }]); + return; + } + input.setRows((current) => current.map((row) => (row.id === existingRowId ? { ...row, content } : row))); + }, + onProgressError: (error) => { input.setRows((current) => { const last = current[current.length - 1]; @@ -132,6 +150,11 @@ export function createMessageStreamState(input: { finalize: () => { sealAssistantRow(); + const checklistIds = new Set(checklistRowIdByGroupId.values()); + checklistRowIdByGroupId.clear(); + if (checklistIds.size > 0) { + input.setRows((current) => current.filter((row) => !checklistIds.has(row.id))); + } const ids = [...assistantRowIds]; assistantRowIds.length = 0; return ids; @@ -139,13 +162,15 @@ export function createMessageStreamState(input: { dispose: () => { cancelFlushTimer(); + const checklistIds = new Set(checklistRowIdByGroupId.values()); + checklistRowIdByGroupId.clear(); const idsToRemove = [...assistantRowIds]; if (activeRowId && !idsToRemove.includes(activeRowId)) idsToRemove.push(activeRowId); activeRowId = null; activeContent = ""; assistantRowIds.length = 0; - if (idsToRemove.length > 0) { - const removeSet = new Set(idsToRemove); + const removeSet = new Set([...idsToRemove, ...checklistIds]); + if (removeSet.size > 0) { input.setRows((current) => current.filter((row) => !removeSet.has(row.id))); } }, diff --git a/src/chat-message-handler.ts b/src/chat-message-handler.ts index d471a68b..fc13474a 100644 --- a/src/chat-message-handler.ts +++ b/src/chat-message-handler.ts @@ -127,6 +127,9 @@ export function createMessageHandler(input: CreateMessageHandlerInput): { case "tool-result": streamState.onToolResult(event); break; + case "checklist": + streamState.onChecklist(event); + break; case "error": streamState.onProgressError(event.errorMessage); break; diff --git a/src/chat-transcript.tsx b/src/chat-transcript.tsx index 5a22bbc5..5c66f240 100644 --- a/src/chat-transcript.tsx +++ b/src/chat-transcript.tsx @@ -219,15 +219,15 @@ export function ChatTranscriptRow({ row, contentWidth, toolContentWidth }: ChatT {renderToolOutput(row.content.parts, toolContentWidth)} ) : isCommandOutput(row.content) ? ( {renderCommandOutput(row.content)} - ) : row.kind === "assistant" ? ( + ) : row.kind === "assistant" && typeof row.content === "string" ? ( {renderAssistantContent(row.content, contentWidth)} - ) : ( + ) : typeof row.content === "string" ? ( {row.content} - )} + ) : null} ); diff --git a/src/checklist-contract.test.ts b/src/checklist-contract.test.ts new file mode 100644 index 00000000..95aab3fb --- /dev/null +++ b/src/checklist-contract.test.ts @@ -0,0 +1,45 @@ +import { describe, expect, test } from "bun:test"; +import { checklistMarker, checklistProgress } from "./checklist-contract"; + +describe("checklistMarker", () => { + test("returns correct markers", () => { + expect(checklistMarker("pending")).toBe("○"); + expect(checklistMarker("in_progress")).toBe("◐"); + expect(checklistMarker("done")).toBe("●"); + expect(checklistMarker("failed")).toBe("◉"); + }); +}); + +describe("checklistProgress", () => { + test("counts done items", () => { + expect( + checklistProgress([ + { id: "1", label: "a", status: "done", order: 0 }, + { id: "2", label: "b", status: "in_progress", order: 1 }, + { id: "3", label: "c", status: "pending", order: 2 }, + ]), + ).toEqual({ done: 1, total: 3 }); + }); + + test("handles all done", () => { + expect( + checklistProgress([ + { id: "1", label: "a", status: "done", order: 0 }, + { id: "2", label: "b", status: "done", order: 1 }, + ]), + ).toEqual({ done: 2, total: 2 }); + }); + + test("failed items do not count as done", () => { + expect( + checklistProgress([ + { id: "1", label: "a", status: "done", order: 0 }, + { id: "2", label: "b", status: "failed", order: 1 }, + ]), + ).toEqual({ done: 1, total: 2 }); + }); + + test("handles empty list", () => { + expect(checklistProgress([])).toEqual({ done: 0, total: 0 }); + }); +}); diff --git a/src/checklist-contract.ts b/src/checklist-contract.ts new file mode 100644 index 00000000..52c3ae95 --- /dev/null +++ b/src/checklist-contract.ts @@ -0,0 +1,39 @@ +import { z } from "zod"; + +export const checklistItemStatusSchema = z.enum(["pending", "in_progress", "done", "failed"]); +export type ChecklistItemStatus = z.infer; + +export const checklistItemSchema = z.object({ + id: z.string().min(1), + label: z.string().min(1), + status: checklistItemStatusSchema, + order: z.number().int().nonnegative(), +}); + +export type ChecklistItem = z.infer; + +export const checklistOutputSchema = z.object({ + groupId: z.string().min(1), + groupTitle: z.string().min(1), + items: z.array(checklistItemSchema), +}); + +export type ChecklistOutput = z.infer; + +const STATUS_MARKERS: Record = { + pending: "○", + in_progress: "◐", + done: "●", + failed: "◉", +}; + +export function checklistMarker(status: ChecklistItemStatus): string { + return STATUS_MARKERS[status]; +} + +export function checklistProgress(items: ChecklistItem[]): { done: number; total: number } { + return { + done: items.filter((item) => item.status === "done").length, + total: items.length, + }; +} diff --git a/src/checklist-format.test.ts b/src/checklist-format.test.ts new file mode 100644 index 00000000..e8d972d4 --- /dev/null +++ b/src/checklist-format.test.ts @@ -0,0 +1,32 @@ +import { describe, expect, test } from "bun:test"; +import { formatChecklist } from "./checklist-format"; + +describe("formatChecklist", () => { + test("returns header with progress and sorted items", () => { + const result = formatChecklist({ + groupId: "g1", + groupTitle: "Build", + items: [ + { id: "s2", label: "test", status: "in_progress", order: 1 }, + { id: "s1", label: "lint", status: "done", order: 0 }, + { id: "s3", label: "deploy", status: "pending", order: 2 }, + ], + }); + expect(result.header).toBe("Build (1/3)"); + expect(result.items).toEqual([ + { id: "s1", marker: "●", label: "lint" }, + { id: "s2", marker: "◐", label: "test" }, + { id: "s3", marker: "○", label: "deploy" }, + ]); + }); + + test("handles single item", () => { + const result = formatChecklist({ + groupId: "g1", + groupTitle: "Quick", + items: [{ id: "s1", label: "do it", status: "pending", order: 0 }], + }); + expect(result.header).toBe("Quick (0/1)"); + expect(result.items).toEqual([{ id: "s1", marker: "○", label: "do it" }]); + }); +}); diff --git a/src/checklist-format.ts b/src/checklist-format.ts new file mode 100644 index 00000000..946094f9 --- /dev/null +++ b/src/checklist-format.ts @@ -0,0 +1,12 @@ +import { type ChecklistOutput, checklistMarker, checklistProgress } from "./checklist-contract"; + +export type FormattedChecklistItem = { id: string; marker: string; label: string }; + +export function formatChecklist(output: ChecklistOutput): { header: string; items: FormattedChecklistItem[] } { + const sorted = [...output.items].sort((a, b) => a.order - b.order); + const { done, total } = checklistProgress(sorted); + return { + header: `${output.groupTitle} (${done}/${total})`, + items: sorted.map((item) => ({ id: item.id, marker: checklistMarker(item.status), label: item.label })), + }; +} diff --git a/src/checklist-toolkit.ts b/src/checklist-toolkit.ts new file mode 100644 index 00000000..ee4546d6 --- /dev/null +++ b/src/checklist-toolkit.ts @@ -0,0 +1,111 @@ +import { z } from "zod"; +import { type ChecklistItem, checklistItemStatusSchema } from "./checklist-contract"; +import type { ToolkitDeps, ToolkitInput } from "./tool-contract"; +import { createTool } from "./tool-contract"; +import { runTool } from "./tool-execution"; + +const createChecklistInputSchema = z.object({ + groupId: z.string().min(1), + groupTitle: z.string().min(1), + items: z + .array( + z.object({ + id: z.string().min(1), + label: z.string().min(1), + order: z.number().int().nonnegative(), + }), + ) + .min(1), +}); + +const createChecklistOutputSchema = z.object({ + kind: z.literal("create-checklist"), + groupId: z.string(), + itemCount: z.number(), +}); + +const updateChecklistInputSchema = z.object({ + groupId: z.string().min(1), + itemId: z.string().min(1), + status: checklistItemStatusSchema, +}); + +const updateChecklistOutputSchema = z.object({ + kind: z.literal("update-checklist"), + groupId: z.string(), + itemId: z.string(), + status: checklistItemStatusSchema, +}); + +function createCreateChecklistTool( + _deps: ToolkitDeps, + input: ToolkitInput, + state: Map, +) { + return createTool({ + id: "create-checklist", + category: "meta", + permissions: [], + description: "Create an inline task checklist visible to the user. All items start as pending.", + instruction: + "Use `create-checklist` once at the start of multi-step tasks to show the user a progress checklist. Define all steps upfront. Use `update-checklist` to change item statuses as you work.", + inputSchema: createChecklistInputSchema, + outputSchema: createChecklistOutputSchema, + execute: async (toolInput, toolCallId) => { + return runTool(input.session, "create-checklist", toolCallId, toolInput, async () => { + const items: ChecklistItem[] = toolInput.items.map((item) => ({ + id: item.id, + label: item.label, + status: "pending", + order: item.order, + })); + state.set(toolInput.groupId, { title: toolInput.groupTitle, items }); + input.onChecklist({ groupId: toolInput.groupId, groupTitle: toolInput.groupTitle, items }); + return { kind: "create-checklist" as const, groupId: toolInput.groupId, itemCount: items.length }; + }); + }, + }); +} + +function createUpdateChecklistTool( + _deps: ToolkitDeps, + input: ToolkitInput, + state: Map, +) { + return createTool({ + id: "update-checklist", + category: "meta", + permissions: [], + description: "Update the status of a single checklist item.", + instruction: + "Use `update-checklist` to mark a checklist item as `in_progress`, `done`, or `failed`. Requires a prior `create-checklist` call for the same groupId.", + inputSchema: updateChecklistInputSchema, + outputSchema: updateChecklistOutputSchema, + execute: async (toolInput, toolCallId) => { + return runTool(input.session, "update-checklist", toolCallId, toolInput, async () => { + const group = state.get(toolInput.groupId); + if (!group) throw new Error(`No checklist found for groupId "${toolInput.groupId}"`); + if (!group.items.some((i) => i.id === toolInput.itemId)) { + throw new Error(`No item "${toolInput.itemId}" in checklist "${toolInput.groupId}"`); + } + const items = group.items.map((i) => (i.id === toolInput.itemId ? { ...i, status: toolInput.status } : i)); + state.set(toolInput.groupId, { ...group, items }); + input.onChecklist({ groupId: toolInput.groupId, groupTitle: group.title, items }); + return { + kind: "update-checklist" as const, + groupId: toolInput.groupId, + itemId: toolInput.itemId, + status: toolInput.status, + }; + }); + }, + }); +} + +export function createChecklistToolkit(deps: ToolkitDeps, input: ToolkitInput) { + const state = new Map(); + return { + createChecklist: createCreateChecklistTool(deps, input, state), + updateChecklist: createUpdateChecklistTool(deps, input, state), + }; +} diff --git a/src/checklist.int.test.ts b/src/checklist.int.test.ts new file mode 100644 index 00000000..41879bcf --- /dev/null +++ b/src/checklist.int.test.ts @@ -0,0 +1,248 @@ +import { describe, expect, test } from "bun:test"; +import type { ChatRow } from "./chat-contract"; +import { isChecklistOutput } from "./chat-contract"; +import type { ChecklistOutput } from "./checklist-contract"; +import { createClient, createMessageHandlerHarness } from "./test-utils"; + +describe("checklist integration", () => { + test("checklist event creates a task row with correct content", async () => { + let snapshot: ChatRow[] = []; + const { handleMessage, rows } = createMessageHandlerHarness({ + client: createClient({ + status: async () => ({}), + replyStream: async (_input, options) => { + options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); + options.onEvent({ + type: "checklist", + groupId: "grp_1", + groupTitle: "Refactoring auth module", + items: [ + { id: "item_1", label: "read existing auth implementation", status: "pending", order: 0 }, + { id: "item_2", label: "extract token validation", status: "pending", order: 1 }, + { id: "item_3", label: "add unit tests", status: "pending", order: 2 }, + ], + }); + snapshot = [...rows]; + return { state: "done" as const, model: "gpt-5-mini", output: "done" }; + }, + }), + }); + + await handleMessage("refactor auth"); + + const taskRows = snapshot.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + expect(taskRows).toHaveLength(1); + + const content = taskRows[0]?.content as ChecklistOutput; + expect(content.groupId).toBe("grp_1"); + expect(content.groupTitle).toBe("Refactoring auth module"); + expect(content.items).toHaveLength(3); + expect(content.items.every((item) => item.status === "pending")).toBe(true); + }); + + test("subsequent checklist events update the same row in place", async () => { + let snapshot: ChatRow[] = []; + const { handleMessage, rows } = createMessageHandlerHarness({ + client: createClient({ + status: async () => ({}), + replyStream: async (_input, options) => { + options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); + // set-checklist creates with all pending + options.onEvent({ + type: "checklist", + groupId: "grp_1", + groupTitle: "Build pipeline", + items: [ + { id: "s1", label: "lint", status: "pending", order: 0 }, + { id: "s2", label: "test", status: "pending", order: 1 }, + ], + }); + // update-checklist updates individual items + options.onEvent({ + type: "checklist", + groupId: "grp_1", + groupTitle: "Build pipeline", + items: [ + { id: "s1", label: "lint", status: "done", order: 0 }, + { id: "s2", label: "test", status: "in_progress", order: 1 }, + ], + }); + snapshot = [...rows]; + return { state: "done" as const, model: "gpt-5-mini", output: "done" }; + }, + }), + }); + + await handleMessage("run pipeline"); + + const taskRows = snapshot.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + expect(taskRows).toHaveLength(1); + + const content = taskRows[0]?.content as ChecklistOutput; + expect(content.items[0]?.status).toBe("done"); + expect(content.items[1]?.status).toBe("in_progress"); + }); + + test("different group IDs produce separate checklist rows", async () => { + let snapshot: ChatRow[] = []; + const { handleMessage, rows } = createMessageHandlerHarness({ + client: createClient({ + status: async () => ({}), + replyStream: async (_input, options) => { + options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); + options.onEvent({ + type: "checklist", + groupId: "grp_a", + groupTitle: "Phase A", + items: [{ id: "a1", label: "step A1", status: "pending", order: 0 }], + }); + options.onEvent({ + type: "checklist", + groupId: "grp_b", + groupTitle: "Phase B", + items: [{ id: "b1", label: "step B1", status: "pending", order: 0 }], + }); + snapshot = [...rows]; + return { state: "done" as const, model: "gpt-5-mini", output: "done" }; + }, + }), + }); + + await handleMessage("multi-phase"); + + const taskRows = snapshot.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + expect(taskRows).toHaveLength(2); + expect((taskRows[0]?.content as ChecklistOutput).groupId).toBe("grp_a"); + expect((taskRows[1]?.content as ChecklistOutput).groupId).toBe("grp_b"); + }); + + test("checklist row appears before subsequent tool rows", async () => { + let snapshot: ChatRow[] = []; + const { handleMessage, rows } = createMessageHandlerHarness({ + client: createClient({ + status: async () => ({}), + replyStream: async (_input, options) => { + options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); + options.onEvent({ + type: "checklist", + groupId: "grp_1", + groupTitle: "Steps", + items: [{ id: "s1", label: "do thing", status: "pending", order: 0 }], + }); + options.onEvent({ + type: "tool-call", + toolCallId: "call_1", + toolName: "read-file", + args: { path: "a.ts" }, + }); + options.onEvent({ + type: "tool-output", + toolCallId: "call_1", + toolName: "read-file", + content: { kind: "tool-header", labelKey: "tool.label.read", detail: "a.ts" }, + }); + snapshot = [...rows]; + return { state: "done" as const, model: "gpt-5-mini", output: "done" }; + }, + }), + }); + + await handleMessage("go"); + + const taskIndex = snapshot.findIndex((row) => row.kind === "task" && isChecklistOutput(row.content)); + const toolIndex = snapshot.findIndex((row) => row.kind === "tool"); + expect(taskIndex).toBeGreaterThanOrEqual(0); + expect(toolIndex).toBeGreaterThanOrEqual(0); + expect(taskIndex).toBeLessThan(toolIndex); + }); + + test("checklist events do not break tool output rows", async () => { + let snapshot: ChatRow[] = []; + const { handleMessage, rows } = createMessageHandlerHarness({ + client: createClient({ + status: async () => ({}), + replyStream: async (_input, options) => { + options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); + options.onEvent({ + type: "checklist", + groupId: "grp_1", + groupTitle: "Steps", + items: [{ id: "s1", label: "edit file", status: "in_progress", order: 0 }], + }); + options.onEvent({ + type: "tool-call", + toolCallId: "call_1", + toolName: "edit-file", + args: { path: "a.ts" }, + }); + options.onEvent({ + type: "tool-output", + toolCallId: "call_1", + toolName: "edit-file", + content: { kind: "tool-header", labelKey: "tool.label.edit", detail: "a.ts" }, + }); + options.onEvent({ + type: "tool-result", + toolCallId: "call_1", + toolName: "edit-file", + }); + snapshot = [...rows]; + return { state: "done" as const, model: "gpt-5-mini", output: "done" }; + }, + }), + }); + + await handleMessage("edit something"); + + const taskRows = snapshot.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + const toolRows = snapshot.filter((row) => row.kind === "tool"); + expect(taskRows).toHaveLength(1); + expect(toolRows).toHaveLength(1); + }); + + test("checklist rows are removed after turn completes", async () => { + const { handleMessage, rows } = createMessageHandlerHarness({ + client: createClient({ + status: async () => ({}), + replyStream: async (_input, options) => { + options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); + options.onEvent({ + type: "checklist", + groupId: "grp_1", + groupTitle: "Steps", + items: [{ id: "s1", label: "do thing", status: "done", order: 0 }], + }); + return { state: "done" as const, model: "gpt-5-mini", output: "done" }; + }, + }), + }); + + await handleMessage("go"); + + const checklistRows = rows.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + expect(checklistRows).toHaveLength(0); + }); + + test("checklist rows are removed on abort", async () => { + const { handleMessage, rows } = createMessageHandlerHarness({ + client: createClient({ + status: async () => ({}), + replyStream: async (_input, options) => { + options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); + options.onEvent({ + type: "checklist", + groupId: "grp_1", + groupTitle: "Steps", + items: [{ id: "s1", label: "do thing", status: "in_progress", order: 0 }], + }); + throw Object.assign(new Error("aborted"), { name: "AbortError" }); + }, + }), + }); + + await handleMessage("go"); + + const checklistRows = rows.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + expect(checklistRows).toHaveLength(0); + }); +}); diff --git a/src/checklist.tui.test.tsx b/src/checklist.tui.test.tsx new file mode 100644 index 00000000..98b5305a --- /dev/null +++ b/src/checklist.tui.test.tsx @@ -0,0 +1,138 @@ +import { describe, expect, test } from "bun:test"; +import { ChatChecklist } from "./chat-checklist"; +import type { ChatRow } from "./chat-contract"; +import type { ChecklistOutput } from "./checklist-contract"; +import { dedent } from "./test-utils"; +import { renderPlain } from "./tui-test-utils"; + +function renderChecklist(checklists: ChecklistOutput[]): string { + const rows: ChatRow[] = checklists.map((content, i) => ({ + id: `row_${i}`, + kind: "task", + content, + })); + return renderPlain(, 96); +} + +describe("checklist TUI rendering", () => { + test("renders header with progress and status markers", () => { + expect( + renderChecklist([ + { + groupId: "g1", + groupTitle: "Build pipeline", + items: [ + { id: "s1", label: "lint", status: "done", order: 0 }, + { id: "s2", label: "test", status: "in_progress", order: 1 }, + { id: "s3", label: "deploy", status: "pending", order: 2 }, + ], + }, + ]), + ).toBe( + dedent` + Build pipeline (1/3) + ● lint + ◐ test + ○ deploy + `, + ); + }); + + test("renders all status marker variants", () => { + expect( + renderChecklist([ + { + groupId: "g1", + groupTitle: "Steps", + items: [ + { id: "s1", label: "done step", status: "done", order: 0 }, + { id: "s2", label: "active step", status: "in_progress", order: 1 }, + { id: "s3", label: "waiting step", status: "pending", order: 2 }, + { id: "s4", label: "broken step", status: "failed", order: 3 }, + ], + }, + ]), + ).toBe( + dedent` + Steps (1/4) + ● done step + ◐ active step + ○ waiting step + ◉ broken step + `, + ); + }); + + test("sorts items by order regardless of input order", () => { + expect( + renderChecklist([ + { + groupId: "g1", + groupTitle: "Steps", + items: [ + { id: "s3", label: "third", status: "pending", order: 2 }, + { id: "s1", label: "first", status: "done", order: 0 }, + { id: "s2", label: "second", status: "in_progress", order: 1 }, + ], + }, + ]), + ).toBe( + dedent` + Steps (1/3) + ● first + ◐ second + ○ third + `, + ); + }); + + test("renders nothing when rows are empty", () => { + expect(renderPlain(, 96)).toBe(""); + }); + + test("renders multiple checklists", () => { + expect( + renderChecklist([ + { + groupId: "g1", + groupTitle: "Phase A", + items: [{ id: "a1", label: "step A", status: "done", order: 0 }], + }, + { + groupId: "g2", + groupTitle: "Phase B", + items: [{ id: "b1", label: "step B", status: "pending", order: 0 }], + }, + ]), + ).toBe( + dedent` + Phase A (1/1) + ● step A + + Phase B (0/1) + ○ step B + `, + ); + }); + + test("all done shows full progress", () => { + expect( + renderChecklist([ + { + groupId: "g1", + groupTitle: "Done", + items: [ + { id: "s1", label: "a", status: "done", order: 0 }, + { id: "s2", label: "b", status: "done", order: 1 }, + ], + }, + ]), + ).toBe( + dedent` + Done (2/2) + ● a + ● b + `, + ); + }); +}); diff --git a/src/cli-prompt.test.ts b/src/cli-prompt.test.ts index 1052a0bb..d83917a0 100644 --- a/src/cli-prompt.test.ts +++ b/src/cli-prompt.test.ts @@ -55,6 +55,42 @@ describe("cli-prompt", () => { expect(session.messages[session.messages.length - 1]?.kind).toBe("tool_payload"); }); + test("checklist events print header and items", async () => { + const printed: string[] = []; + const originalWrite = process.stdout.write; + process.stdout.write = ((chunk: string) => { + printed.push(chunk); + return true; + }) as typeof process.stdout.write; + + try { + const events: StreamEvent[] = [ + { + type: "checklist", + groupId: "grp_1", + groupTitle: "Build pipeline", + items: [ + { id: "s1", label: "lint", status: "done", order: 0 }, + { id: "s2", label: "test", status: "in_progress", order: 1 }, + { id: "s3", label: "deploy", status: "pending", order: 2 }, + ], + }, + ]; + + const session = createTestSession(); + const client = createStreamingClient(events); + await handlePrompt("run pipeline", session, client); + + const output = printed.join(""); + expect(output).toContain("Build pipeline (1/3)"); + expect(output).toContain("● lint"); + expect(output).toContain("◐ test"); + expect(output).toContain("○ deploy"); + } finally { + process.stdout.write = originalWrite; + } + }); + test("tool-output events with growing numWidth do not reprint earlier diffs", async () => { const printed: string[] = []; const originalWrite = process.stdout.write; diff --git a/src/cli-prompt.ts b/src/cli-prompt.ts index 1750b2a4..7744f896 100644 --- a/src/cli-prompt.ts +++ b/src/cli-prompt.ts @@ -1,6 +1,7 @@ import { stdout as output } from "node:process"; import { createWorkspaceSpecifier, type VerifyScope } from "./api"; import { createMessage } from "./chat-session"; +import { formatChecklist } from "./checklist-format"; import { formatAssistantReplyOutput, printIndentedDim } from "./cli-format"; import type { Client } from "./client-contract"; import { nowIso } from "./datetime"; @@ -152,6 +153,13 @@ export async function handlePrompt( hasPrintedToolProgress = true; break; } + case "checklist": { + const { header, items } = formatChecklist(event); + printDim(`• ${header}`); + for (const item of items) printIndentedDim(`${item.marker} ${item.label}`); + hasPrintedToolProgress = true; + break; + } case "tool-result": { const guardBlocked = event.isError === true && diff --git a/src/client-contract.ts b/src/client-contract.ts index 851e4f2a..95e16c42 100644 --- a/src/client-contract.ts +++ b/src/client-contract.ts @@ -2,6 +2,7 @@ import { z } from "zod"; import { agentModeSchema } from "./agent-contract"; import { type ChatRequest, type ChatResponse, chatResponseStateSchema } from "./api"; import { invariant } from "./assert"; +import { checklistItemSchema } from "./checklist-contract"; import { rpcServerMessageSchema } from "./rpc-protocol"; import type { StatusFields } from "./status-contract"; import { streamErrorSchema } from "./stream-error"; @@ -88,6 +89,12 @@ export const streamEventSchema = z.discriminatedUnion("type", [ }), streamUsageEventSchema, z.object({ type: z.literal("status"), state: pendingStateSchema }), + z.object({ + type: z.literal("checklist"), + groupId: z.string().min(1), + groupTitle: z.string().min(1), + items: z.array(checklistItemSchema), + }), z.object({ type: z.literal("error"), errorMessage: z.string(), @@ -116,6 +123,12 @@ type ToolResultEvent = { }; type UsageEvent = { type: "usage"; inputTokens: number; outputTokens: number }; type StatusEvent = { type: "status"; state: PendingState }; +type ChecklistEvent = { + type: "checklist"; + groupId: string; + groupTitle: string; + items: z.infer[]; +}; type ErrorEvent = { type: "error"; errorMessage: string; @@ -132,6 +145,7 @@ export type StreamEvent = | ToolResultEvent | UsageEvent | StatusEvent + | ChecklistEvent | ErrorEvent; export interface Client { diff --git a/src/lifecycle-contract.ts b/src/lifecycle-contract.ts index 49a3dbd5..e02e83ed 100644 --- a/src/lifecycle-contract.ts +++ b/src/lifecycle-contract.ts @@ -5,6 +5,7 @@ import type { ErrorCode } from "./error-contract"; import type { ErrorCategory, ErrorSource } from "./error-handling"; import type { LifecyclePolicy } from "./lifecycle-policy"; import type { PromptBreakdownTotals } from "./lifecycle-usage"; +import type { ChecklistListener } from "./tool-contract"; import type { SessionContext } from "./tool-guards"; import type { ToolOutputPart } from "./tool-output-content"; import type { ToolRecovery } from "./tool-recovery"; @@ -101,6 +102,7 @@ export type PhasePrepareInput = { policy: LifecyclePolicy; debug: RunContext["debug"]; onOutput: (event: ToolOutputEvent) => void; + onChecklist: ChecklistListener; }; export type PhasePrepareResult = { session: SessionContext; diff --git a/src/lifecycle-prepare.test.ts b/src/lifecycle-prepare.test.ts index 5f21aaca..ba4646b2 100644 --- a/src/lifecycle-prepare.test.ts +++ b/src/lifecycle-prepare.test.ts @@ -19,6 +19,7 @@ describe("phasePrepare", () => { policy, debug: () => {}, onOutput: () => {}, + onChecklist: () => {}, }); expect(prepared.session.toolTimeoutMs).toBe(1_234); expect(prepared.session.flags.consecutiveGuardBlockLimit).toBe(7); diff --git a/src/lifecycle-prepare.ts b/src/lifecycle-prepare.ts index a1d422f7..1dcf8aef 100644 --- a/src/lifecycle-prepare.ts +++ b/src/lifecycle-prepare.ts @@ -20,6 +20,7 @@ export function phasePrepare(input: PhasePrepareInput): PhasePrepareResult { const { tools, session } = toolsForAgent({ workspace: input.workspace, onOutput: input.onOutput, + onChecklist: input.onChecklist, taskId: input.taskId, sessionId: input.request.sessionId, }); diff --git a/src/lifecycle.ts b/src/lifecycle.ts index ba1604f7..e9616081 100644 --- a/src/lifecycle.ts +++ b/src/lifecycle.ts @@ -218,6 +218,9 @@ export async function runLifecycle(input: LifecycleInput, deps: LifecycleDeps = onOutput: (event: ToolOutputEvent) => { ctxRef?.toolOutputHandler?.(event); }, + onChecklist: (event) => { + emit({ type: "checklist", groupId: event.groupId, groupTitle: event.groupTitle, items: event.items }); + }, }); const ctx = createRunContext(input, { diff --git a/src/tool-contract.ts b/src/tool-contract.ts index d5f7d718..61aaf8c7 100644 --- a/src/tool-contract.ts +++ b/src/tool-contract.ts @@ -1,13 +1,13 @@ import type { z } from "zod"; +import type { ChecklistItem } from "./checklist-contract"; import type { SessionContext } from "./tool-guards"; import type { ToolOutputListener } from "./tool-output-format"; export type ToolPermission = "read" | "write" | "execute" | "network"; -export type ToolCategory = "read" | "search" | "write" | "execute" | "network"; +export type ToolCategory = "read" | "search" | "write" | "execute" | "network" | "meta"; export type ToolDefinition = { readonly id: string; - readonly labelKey: string; readonly category: ToolCategory; readonly permissions: readonly ToolPermission[]; readonly description: string; @@ -15,6 +15,7 @@ export type ToolDefinition = { readonly inputSchema: z.ZodType; readonly outputSchema: z.ZodType; readonly execute: (input: TInput, toolCallId: string) => Promise; + readonly labelKey?: string; }; export type ToolOutputBudgetEntry = { maxChars: number; maxLines: number }; @@ -38,10 +39,13 @@ export type ToolkitDeps = { outputBudget: ToolOutputBudget; }; +export type ChecklistListener = (event: { groupId: string; groupTitle: string; items: ChecklistItem[] }) => void; + export type ToolkitInput = { workspace: string; session: SessionContext; onOutput: ToolOutputListener; + onChecklist: ChecklistListener; }; export type ToolCacheEntry = { diff --git a/src/tool-output.tui.test.tsx b/src/tool-output.tui.test.tsx index 1b110b86..7ad96e6d 100644 --- a/src/tool-output.tui.test.tsx +++ b/src/tool-output.tui.test.tsx @@ -1,29 +1,10 @@ import { describe, expect, test } from "bun:test"; import type { ChatRow } from "./chat-contract"; import { ChatTranscript } from "./chat-transcript"; +import { dedent } from "./test-utils"; import { formatToolOutput, type ToolOutputPart } from "./tool-output-content"; import { renderPlain } from "./tui-test-utils"; -function dedent(value: string): string { - const lines = value.split("\n"); - let start = 0; - while (start < lines.length && lines[start]?.trim().length === 0) start += 1; - let end = lines.length - 1; - while (end >= start && lines[end]?.trim().length === 0) end -= 1; - if (start > end) return ""; - let prefix: string | null = null; - for (const line of lines.slice(start, end + 1)) { - if (line.trim().length === 0) continue; - const current = line.match(/^[ \t]*/)?.[0] ?? ""; - if (prefix === null || current.length < prefix.length) prefix = current; - } - const p = prefix ?? ""; - return lines - .slice(start, end + 1) - .map((line) => (line.startsWith(p) ? line.slice(p.length) : line)) - .join("\n"); -} - function renderChat(toolOutput: ToolOutputPart[]): string { const row: ChatRow = { id: "r1", kind: "tool", content: { parts: toolOutput } }; return renderPlain(, 96); diff --git a/src/tool-registry.test.ts b/src/tool-registry.test.ts index d9e67146..2ff77de5 100644 --- a/src/tool-registry.test.ts +++ b/src/tool-registry.test.ts @@ -12,6 +12,7 @@ describe("toolsets", () => { test("returns all tools", () => { const { tools, session } = toolsForAgent(); expect(Object.keys(tools).sort()).toEqual([ + "createChecklist", "createFile", "deleteFile", "editCode", @@ -27,6 +28,7 @@ describe("toolsets", () => { "runCommand", "scanCode", "searchFiles", + "updateChecklist", "webFetch", "webSearch", ]); @@ -87,6 +89,7 @@ describe("toolIdsByCategory", () => { expect(ids).toContain("delete-file"); expect(ids).toContain("git-add"); expect(ids).toContain("git-commit"); + expect(ids).not.toContain("update-checklist"); expect(ids).not.toContain("read-file"); expect(ids).not.toContain("run-command"); expect(ids).not.toContain("web-search"); diff --git a/src/tool-registry.ts b/src/tool-registry.ts index f55bc56b..7b50391e 100644 --- a/src/tool-registry.ts +++ b/src/tool-registry.ts @@ -1,6 +1,7 @@ import { resolve } from "node:path"; import { appConfig } from "./app-config"; import { invariant } from "./assert"; +import { createChecklistToolkit } from "./checklist-toolkit"; import { createCodeToolkit } from "./code-toolkit"; import { createFileToolkit } from "./file-toolkit"; import { createGitToolkit } from "./git-toolkit"; @@ -8,7 +9,14 @@ import { EN_MESSAGES } from "./i18n/en"; import { createShellToolkit } from "./shell-toolkit"; import { createToolCache } from "./tool-cache"; import { getDefaultToolCacheStore } from "./tool-cache-store"; -import type { ToolCategory, ToolDefinition, ToolkitDeps, ToolkitInput, ToolPermission } from "./tool-contract"; +import type { + ChecklistListener, + ToolCategory, + ToolDefinition, + ToolkitDeps, + ToolkitInput, + ToolPermission, +} from "./tool-contract"; import { createSessionContext, type SessionContext } from "./tool-guards"; import type { ToolOutputListener } from "./tool-output-format"; import { createWebToolkit } from "./web-toolkit"; @@ -20,7 +28,8 @@ type RegisteredToolkit = ReturnType & ReturnType & ReturnType & ReturnType & - ReturnType; + ReturnType & + ReturnType; export type Toolset = { [Key in keyof RegisteredToolkit]: RegisteredToolkit[Key]; @@ -52,9 +61,14 @@ export const TOOLKIT_REGISTRY: { id: "git", createToolkit: (deps, input) => createGitToolkit(deps, input), }, + { + id: "checklist", + createToolkit: (deps, input) => createChecklistToolkit(deps, input), + }, ]; const noopOutput: ToolOutputListener = () => {}; +const noopChecklist: ChecklistListener = () => {}; const defaultToolkitDeps = (): ToolkitDeps => ({ outputBudget: appConfig.agent.toolOutputBudget, @@ -64,11 +78,12 @@ function collectTools( workspace: string, session: SessionContext, onOutput: ToolOutputListener = noopOutput, + onChecklist: ChecklistListener = noopChecklist, deps: ToolkitDeps = defaultToolkitDeps(), ): ToolMap { const combined: ToolMap = {}; for (const toolkit of TOOLKIT_REGISTRY) { - Object.assign(combined, toolkit.createToolkit(deps, { workspace, session, onOutput })); + Object.assign(combined, toolkit.createToolkit(deps, { workspace, session, onOutput, onChecklist })); } return combined; } @@ -77,14 +92,15 @@ function asToolDefinitionsById(entries: ToolMap): Record = {}; for (const tool of Object.values(entries)) { invariant(typeof tool.id === "string" && tool.id.trim().length > 0, "tool id is required"); - invariant(typeof tool.labelKey === "string" && tool.labelKey.trim().length > 0, `tool ${tool.id} missing labelKey`); - invariant(tool.labelKey in EN_MESSAGES, `tool ${tool.id} has unknown labelKey "${tool.labelKey}"`); + if (tool.labelKey) { + invariant(tool.labelKey in EN_MESSAGES, `tool ${tool.id} has unknown labelKey "${tool.labelKey}"`); + } invariant(typeof tool.category === "string" && tool.category.trim().length > 0, `tool ${tool.id} missing category`); invariant( typeof tool.instruction === "string" && tool.instruction.trim().length > 0, `tool ${tool.id} missing instruction`, ); - invariant(Array.isArray(tool.permissions) && tool.permissions.length > 0, `tool ${tool.id} missing permissions`); + invariant(Array.isArray(tool.permissions), `tool ${tool.id} missing permissions`); byId[tool.id] = tool as AnyToolDefinition; } return byId; @@ -123,6 +139,7 @@ export const DISCOVERY_TOOL_SET = new Set(DISCOVERY_TOOLS); export function toolsForAgent(options?: { workspace?: string; onOutput?: ToolOutputListener; + onChecklist?: ChecklistListener; taskId?: string; sessionId?: string; }): { @@ -133,7 +150,7 @@ export function toolsForAgent(options?: { const session = createSessionContext(options?.taskId, WRITE_TOOL_SET); session.cache = createToolCache(DISCOVERY_TOOL_SET, undefined, getDefaultToolCacheStore(options?.sessionId)); return { - tools: collectTools(workspace, session, options?.onOutput) as unknown as Toolset, + tools: collectTools(workspace, session, options?.onOutput, options?.onChecklist) as unknown as Toolset, session, }; }