From 720e5eeaab5e88f4ca981fcdba837c452c9fc54f Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 11:53:01 +0200 Subject: [PATCH 01/21] feat(checklist): add inline task checklist with update-checklist tool --- src/agent-modes.ts | 1 + src/chat-contract.ts | 9 +- src/chat-message-handler-stream.ts | 18 +++ src/chat-message-handler.ts | 3 + src/chat-transcript.tsx | 23 +++- src/checklist-contract.ts | 39 ++++++ src/checklist-toolkit.ts | 74 ++++++++++++ src/checklist.int.test.ts | 186 +++++++++++++++++++++++++++++ src/client-contract.ts | 14 +++ src/i18n/en.ts | 1 + src/lifecycle-contract.ts | 2 + src/lifecycle-prepare.test.ts | 1 + src/lifecycle-prepare.ts | 1 + src/lifecycle.ts | 3 + src/tool-contract.ts | 7 ++ src/tool-registry.test.ts | 2 + src/tool-registry.ts | 24 +++- 17 files changed, 401 insertions(+), 7 deletions(-) create mode 100644 src/checklist-contract.ts create mode 100644 src/checklist-toolkit.ts create mode 100644 src/checklist.int.test.ts diff --git a/src/agent-modes.ts b/src/agent-modes.ts index 2e044532..3d613897 100644 --- a/src/agent-modes.ts +++ b/src/agent-modes.ts @@ -46,6 +46,7 @@ export const agentModes: Record = { "Do not run verify, test, or build commands — the lifecycle handles format, lint, and verify automatically after your edits.", "Do not signal done until the requested behavior is actually implemented. Updating help text, comments, or tests alone is not completing the task — the functional change must be in place.", "After the last tool call, use the lifecycle signal format from the base instructions and keep the user-facing outcome to one sentence.", + "For multi-step tasks (3+ distinct steps), use `update-checklist` at the start to show the user a progress checklist. Update item statuses as you complete each step.", ], }, verify: { diff --git a/src/chat-contract.ts b/src/chat-contract.ts index 15d79d8f..8cdd9d53 100644 --- a/src/chat-contract.ts +++ b/src/chat-contract.ts @@ -1,4 +1,5 @@ import { z } from "zod"; +import { checklistOutputSchema } from "./checklist-contract"; import { isoDateTimeSchema } from "./datetime"; import { domainIdSchema } from "./id-contract"; import { createId } from "./short-id"; @@ -48,7 +49,7 @@ export const commandOutputSchema = z.object({ export type CommandOutput = z.infer; -const chatRowContentSchema = z.union([z.string(), toolOutputSchema, commandOutputSchema]); +const chatRowContentSchema = z.union([z.string(), toolOutputSchema, commandOutputSchema, checklistOutputSchema]); export type ChatRowContent = z.infer; @@ -72,3 +73,9 @@ export function isToolOutput(content: ChatRowContent | undefined): content is To export function isCommandOutput(content: ChatRowContent | undefined): content is CommandOutput { return typeof content === "object" && "header" in content; } + +export function isChecklistOutput( + content: ChatRowContent | undefined, +): content is z.infer { + return typeof content === "object" && "groupId" in content; +} diff --git a/src/chat-message-handler-stream.ts b/src/chat-message-handler-stream.ts index b29a927a..a4144f52 100644 --- a/src/chat-message-handler-stream.ts +++ b/src/chat-message-handler-stream.ts @@ -1,4 +1,5 @@ import { type ChatRow, createRow } from "./chat-contract"; +import type { ChecklistItem } from "./checklist-contract"; import { LIFECYCLE_ERROR_CODES } from "./error-contract"; import { palette } from "./palette"; import { createId } from "./short-id"; @@ -14,6 +15,7 @@ export type MessageStreamState = { errorCode?: string; error?: { category?: string; [key: string]: unknown }; }) => void; + onChecklist: (entry: { groupId: string; groupTitle: string; items: ChecklistItem[] }) => void; onProgressError: (error: string) => void; streamedAssistantText: () => string; /** Flush remaining content and return IDs of all streaming assistant rows (for replacement by final turn rows). */ @@ -37,6 +39,9 @@ export function createMessageStreamState(input: { const toolRowIdByCallId = new Map(); const toolOutput = createToolOutputState(); + // --- checklist state --- + const checklistRowIdByGroupId = new Map(); + function cancelFlushTimer(): void { if (flushTimer) { clearTimeout(flushTimer); @@ -120,6 +125,19 @@ export function createMessageStreamState(input: { ); }, + onChecklist: (entry) => { + const content = { groupId: entry.groupId, groupTitle: entry.groupTitle, items: entry.items }; + const existingRowId = checklistRowIdByGroupId.get(entry.groupId); + if (!existingRowId) { + sealAssistantRow(); + const rowId = `row_${createId()}`; + checklistRowIdByGroupId.set(entry.groupId, rowId); + input.setRows((current) => [...current, { id: rowId, kind: "task" as const, content }]); + return; + } + input.setRows((current) => current.map((row) => (row.id === existingRowId ? { ...row, content } : row))); + }, + onProgressError: (error) => { input.setRows((current) => { const last = current[current.length - 1]; diff --git a/src/chat-message-handler.ts b/src/chat-message-handler.ts index d471a68b..fc13474a 100644 --- a/src/chat-message-handler.ts +++ b/src/chat-message-handler.ts @@ -127,6 +127,9 @@ export function createMessageHandler(input: CreateMessageHandlerInput): { case "tool-result": streamState.onToolResult(event); break; + case "checklist": + streamState.onChecklist(event); + break; case "error": streamState.onProgressError(event.errorMessage); break; diff --git a/src/chat-transcript.tsx b/src/chat-transcript.tsx index 5a22bbc5..246313a6 100644 --- a/src/chat-transcript.tsx +++ b/src/chat-transcript.tsx @@ -2,9 +2,10 @@ import React from "react"; import type { AgentMode } from "./agent-contract"; import { renderAssistantContent } from "./chat-content-render"; import type { ChatRow, CommandOutput } from "./chat-contract"; -import { isCommandOutput, isToolOutput } from "./chat-contract"; +import { isChecklistOutput, isCommandOutput, isToolOutput } from "./chat-contract"; import { commandOutputColWidth, formatTokenCount } from "./chat-format"; import { ShimmerText } from "./chat-shimmer"; +import { type ChecklistOutput, checklistMarker, checklistProgress } from "./checklist-contract"; import type { PendingState } from "./client-contract"; import { t, tDynamic } from "./i18n"; import { palette } from "./palette"; @@ -198,6 +199,22 @@ function renderToolOutput(parts: ToolOutputPart[], toolContentWidth: number): Re ); } +function renderChecklist(output: ChecklistOutput): React.ReactNode { + const sorted = [...output.items].sort((a, b) => a.order - b.order); + const { done, total } = checklistProgress(sorted); + return ( + <> + {`${output.groupTitle} (${done}/${total})`} + {sorted.map((item) => ( + + {"\n"} + {` ${checklistMarker(item.status)} ${item.label}`} + + ))} + + ); +} + type ChatTranscriptRowProps = { row: ChatRow; contentWidth: number; @@ -215,7 +232,9 @@ export function ChatTranscriptRow({ row, contentWidth, toolContentWidth }: ChatT {marker} - {isToolOutput(row.content) ? ( + {isChecklistOutput(row.content) ? ( + {renderChecklist(row.content)} + ) : isToolOutput(row.content) ? ( {renderToolOutput(row.content.parts, toolContentWidth)} ) : isCommandOutput(row.content) ? ( {renderCommandOutput(row.content)} diff --git a/src/checklist-contract.ts b/src/checklist-contract.ts new file mode 100644 index 00000000..bdd6a089 --- /dev/null +++ b/src/checklist-contract.ts @@ -0,0 +1,39 @@ +import { z } from "zod"; + +export const checklistItemStatusSchema = z.enum(["pending", "in_progress", "done", "failed"]); +export type ChecklistItemStatus = z.infer; + +export const checklistItemSchema = z.object({ + id: z.string().min(1), + label: z.string().min(1), + status: checklistItemStatusSchema, + order: z.number().int().nonnegative(), +}); + +export type ChecklistItem = z.infer; + +export const checklistOutputSchema = z.object({ + groupId: z.string().min(1), + groupTitle: z.string().min(1), + items: z.array(checklistItemSchema), +}); + +export type ChecklistOutput = z.infer; + +const STATUS_MARKERS: Record = { + pending: "\u25CB", + in_progress: "\u25D0", + done: "\u25CF", + failed: "\u25C9", +}; + +export function checklistMarker(status: ChecklistItemStatus): string { + return STATUS_MARKERS[status]; +} + +export function checklistProgress(items: ChecklistItem[]): { done: number; total: number } { + return { + done: items.filter((item) => item.status === "done").length, + total: items.length, + }; +} diff --git a/src/checklist-toolkit.ts b/src/checklist-toolkit.ts new file mode 100644 index 00000000..d50f07d6 --- /dev/null +++ b/src/checklist-toolkit.ts @@ -0,0 +1,74 @@ +import { z } from "zod"; +import { checklistItemStatusSchema } from "./checklist-contract"; +import type { ToolkitDeps, ToolkitInput } from "./tool-contract"; +import { createTool } from "./tool-contract"; +import { runTool } from "./tool-execution"; + +const updateChecklistInputSchema = z.object({ + groupId: z.string().min(1).describe("Unique identifier for the checklist group."), + groupTitle: z.string().min(1).describe("Title displayed as the checklist header."), + items: z + .array( + z.object({ + id: z.string().min(1).describe("Unique item identifier within this group."), + label: z.string().min(1).describe("Short description of the step."), + status: checklistItemStatusSchema.describe("Current status of this item."), + order: z.number().int().nonnegative().describe("Display position (0-based)."), + }), + ) + .min(1) + .describe("Full list of checklist items. Always send the complete list, not a partial update."), +}); + +const updateChecklistOutputSchema = z.object({ + kind: z.literal("update-checklist"), + groupId: z.string(), + itemCount: z.number(), +}); + +function createUpdateChecklistTool(_deps: ToolkitDeps, input: ToolkitInput) { + return createTool({ + id: "update-checklist", + labelKey: "tool.label.update_checklist", + category: "write", + permissions: ["write"], + description: + "Create or update an inline task checklist visible to the user. Send the full item list each time — items not included are removed.", + instruction: + "Use `update-checklist` at the start of multi-step tasks to show the user a progress checklist. Create the checklist with all items as pending, then update individual item statuses as you work. Always send the complete item list.", + inputSchema: updateChecklistInputSchema, + outputSchema: updateChecklistOutputSchema, + execute: async (toolInput, toolCallId) => { + return runTool(input.session, "update-checklist", toolCallId, toolInput, async (callId) => { + input.onOutput({ + toolName: "update-checklist", + content: { kind: "tool-header", labelKey: "tool.label.update_checklist", detail: toolInput.groupTitle }, + toolCallId: callId, + }); + + input.onChecklist({ + groupId: toolInput.groupId, + groupTitle: toolInput.groupTitle, + items: toolInput.items.map((item) => ({ + id: item.id, + label: item.label, + status: item.status, + order: item.order, + })), + }); + + return { + kind: "update-checklist" as const, + groupId: toolInput.groupId, + itemCount: toolInput.items.length, + }; + }); + }, + }); +} + +export function createChecklistToolkit(deps: ToolkitDeps, input: ToolkitInput) { + return { + updateChecklist: createUpdateChecklistTool(deps, input), + }; +} diff --git a/src/checklist.int.test.ts b/src/checklist.int.test.ts new file mode 100644 index 00000000..a3fabaf1 --- /dev/null +++ b/src/checklist.int.test.ts @@ -0,0 +1,186 @@ +import { describe, expect, test } from "bun:test"; +import { isChecklistOutput } from "./chat-contract"; +import type { ChecklistOutput } from "./checklist-contract"; +import { createClient, createMessageHandlerHarness } from "./test-utils"; + +describe("checklist integration", () => { + test("checklist event creates a task row with correct content", async () => { + const { handleMessage, rows } = createMessageHandlerHarness({ + client: createClient({ + status: async () => ({}), + replyStream: async (_input, options) => { + options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); + options.onEvent({ + type: "checklist", + groupId: "grp_1", + groupTitle: "Refactoring auth module", + items: [ + { id: "item_1", label: "read existing auth implementation", status: "done", order: 0 }, + { id: "item_2", label: "extract token validation", status: "in_progress", order: 1 }, + { id: "item_3", label: "add unit tests", status: "pending", order: 2 }, + ], + }); + return { state: "done" as const, model: "gpt-5-mini", output: "done" }; + }, + }), + }); + + await handleMessage("refactor auth"); + + const taskRows = rows.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + expect(taskRows).toHaveLength(1); + + const content = taskRows[0]?.content as ChecklistOutput; + expect(content.groupId).toBe("grp_1"); + expect(content.groupTitle).toBe("Refactoring auth module"); + expect(content.items).toHaveLength(3); + expect(content.items[0]?.status).toBe("done"); + expect(content.items[1]?.status).toBe("in_progress"); + expect(content.items[2]?.status).toBe("pending"); + }); + + test("subsequent checklist events update the same row in place", async () => { + const { handleMessage, rows } = createMessageHandlerHarness({ + client: createClient({ + status: async () => ({}), + replyStream: async (_input, options) => { + options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); + + // Initial checklist + options.onEvent({ + type: "checklist", + groupId: "grp_1", + groupTitle: "Build pipeline", + items: [ + { id: "s1", label: "lint", status: "pending", order: 0 }, + { id: "s2", label: "test", status: "pending", order: 1 }, + ], + }); + + // Update: first item done, second in progress + options.onEvent({ + type: "checklist", + groupId: "grp_1", + groupTitle: "Build pipeline", + items: [ + { id: "s1", label: "lint", status: "done", order: 0 }, + { id: "s2", label: "test", status: "in_progress", order: 1 }, + ], + }); + + return { state: "done" as const, model: "gpt-5-mini", output: "done" }; + }, + }), + }); + + await handleMessage("run pipeline"); + + // Should have exactly one task row (updated in place, not two) + const taskRows = rows.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + expect(taskRows).toHaveLength(1); + + const content = taskRows[0]?.content as ChecklistOutput; + expect(content.items[0]?.status).toBe("done"); + expect(content.items[1]?.status).toBe("in_progress"); + }); + + test("different group IDs produce separate checklist rows", async () => { + const { handleMessage, rows } = createMessageHandlerHarness({ + client: createClient({ + status: async () => ({}), + replyStream: async (_input, options) => { + options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); + options.onEvent({ + type: "checklist", + groupId: "grp_a", + groupTitle: "Phase A", + items: [{ id: "a1", label: "step A1", status: "pending", order: 0 }], + }); + options.onEvent({ + type: "checklist", + groupId: "grp_b", + groupTitle: "Phase B", + items: [{ id: "b1", label: "step B1", status: "pending", order: 0 }], + }); + return { state: "done" as const, model: "gpt-5-mini", output: "done" }; + }, + }), + }); + + await handleMessage("multi-phase"); + + const taskRows = rows.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + expect(taskRows).toHaveLength(2); + expect((taskRows[0]?.content as ChecklistOutput).groupId).toBe("grp_a"); + expect((taskRows[1]?.content as ChecklistOutput).groupId).toBe("grp_b"); + }); + + test("checklist row appears before subsequent assistant text", async () => { + const { handleMessage, rows } = createMessageHandlerHarness({ + client: createClient({ + status: async () => ({}), + replyStream: async (_input, options) => { + options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); + options.onEvent({ + type: "checklist", + groupId: "grp_1", + groupTitle: "Steps", + items: [{ id: "s1", label: "do thing", status: "pending", order: 0 }], + }); + options.onEvent({ type: "text-delta", text: "Working on it." }); + return { state: "done" as const, model: "gpt-5-mini", output: "Working on it." }; + }, + }), + }); + + await handleMessage("go"); + + const taskIndex = rows.findIndex((row) => row.kind === "task" && isChecklistOutput(row.content)); + const assistantIndex = rows.findIndex((row) => row.kind === "assistant"); + expect(taskIndex).toBeGreaterThanOrEqual(0); + expect(assistantIndex).toBeGreaterThanOrEqual(0); + expect(taskIndex).toBeLessThan(assistantIndex); + }); + + test("checklist events do not break tool output rows", async () => { + const { handleMessage, rows } = createMessageHandlerHarness({ + client: createClient({ + status: async () => ({}), + replyStream: async (_input, options) => { + options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); + options.onEvent({ + type: "checklist", + groupId: "grp_1", + groupTitle: "Steps", + items: [{ id: "s1", label: "edit file", status: "in_progress", order: 0 }], + }); + options.onEvent({ + type: "tool-call", + toolCallId: "call_1", + toolName: "edit-file", + args: { path: "a.ts" }, + }); + options.onEvent({ + type: "tool-output", + toolCallId: "call_1", + toolName: "edit-file", + content: { kind: "tool-header", labelKey: "tool.label.edit", detail: "a.ts" }, + }); + options.onEvent({ + type: "tool-result", + toolCallId: "call_1", + toolName: "edit-file", + }); + return { state: "done" as const, model: "gpt-5-mini", output: "done" }; + }, + }), + }); + + await handleMessage("edit something"); + + const taskRows = rows.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + const toolRows = rows.filter((row) => row.kind === "tool"); + expect(taskRows).toHaveLength(1); + expect(toolRows).toHaveLength(1); + }); +}); diff --git a/src/client-contract.ts b/src/client-contract.ts index 851e4f2a..95e16c42 100644 --- a/src/client-contract.ts +++ b/src/client-contract.ts @@ -2,6 +2,7 @@ import { z } from "zod"; import { agentModeSchema } from "./agent-contract"; import { type ChatRequest, type ChatResponse, chatResponseStateSchema } from "./api"; import { invariant } from "./assert"; +import { checklistItemSchema } from "./checklist-contract"; import { rpcServerMessageSchema } from "./rpc-protocol"; import type { StatusFields } from "./status-contract"; import { streamErrorSchema } from "./stream-error"; @@ -88,6 +89,12 @@ export const streamEventSchema = z.discriminatedUnion("type", [ }), streamUsageEventSchema, z.object({ type: z.literal("status"), state: pendingStateSchema }), + z.object({ + type: z.literal("checklist"), + groupId: z.string().min(1), + groupTitle: z.string().min(1), + items: z.array(checklistItemSchema), + }), z.object({ type: z.literal("error"), errorMessage: z.string(), @@ -116,6 +123,12 @@ type ToolResultEvent = { }; type UsageEvent = { type: "usage"; inputTokens: number; outputTokens: number }; type StatusEvent = { type: "status"; state: PendingState }; +type ChecklistEvent = { + type: "checklist"; + groupId: string; + groupTitle: string; + items: z.infer[]; +}; type ErrorEvent = { type: "error"; errorMessage: string; @@ -132,6 +145,7 @@ export type StreamEvent = | ToolResultEvent | UsageEvent | StatusEvent + | ChecklistEvent | ErrorEvent; export interface Client { diff --git a/src/i18n/en.ts b/src/i18n/en.ts index 62795fa6..232f4d2a 100644 --- a/src/i18n/en.ts +++ b/src/i18n/en.ts @@ -226,6 +226,7 @@ export const EN_MESSAGES = { "tool.label.scan": "Scan", "tool.label.run": "Run", "tool.label.search": "Search", + "tool.label.update_checklist": "Checklist", "tool.label.web_fetch": "Web Fetch", "tool.label.web_search": "Web Search", "unit.call": "{count} calls", diff --git a/src/lifecycle-contract.ts b/src/lifecycle-contract.ts index 49a3dbd5..e02e83ed 100644 --- a/src/lifecycle-contract.ts +++ b/src/lifecycle-contract.ts @@ -5,6 +5,7 @@ import type { ErrorCode } from "./error-contract"; import type { ErrorCategory, ErrorSource } from "./error-handling"; import type { LifecyclePolicy } from "./lifecycle-policy"; import type { PromptBreakdownTotals } from "./lifecycle-usage"; +import type { ChecklistListener } from "./tool-contract"; import type { SessionContext } from "./tool-guards"; import type { ToolOutputPart } from "./tool-output-content"; import type { ToolRecovery } from "./tool-recovery"; @@ -101,6 +102,7 @@ export type PhasePrepareInput = { policy: LifecyclePolicy; debug: RunContext["debug"]; onOutput: (event: ToolOutputEvent) => void; + onChecklist: ChecklistListener; }; export type PhasePrepareResult = { session: SessionContext; diff --git a/src/lifecycle-prepare.test.ts b/src/lifecycle-prepare.test.ts index 5f21aaca..ba4646b2 100644 --- a/src/lifecycle-prepare.test.ts +++ b/src/lifecycle-prepare.test.ts @@ -19,6 +19,7 @@ describe("phasePrepare", () => { policy, debug: () => {}, onOutput: () => {}, + onChecklist: () => {}, }); expect(prepared.session.toolTimeoutMs).toBe(1_234); expect(prepared.session.flags.consecutiveGuardBlockLimit).toBe(7); diff --git a/src/lifecycle-prepare.ts b/src/lifecycle-prepare.ts index a1d422f7..1dcf8aef 100644 --- a/src/lifecycle-prepare.ts +++ b/src/lifecycle-prepare.ts @@ -20,6 +20,7 @@ export function phasePrepare(input: PhasePrepareInput): PhasePrepareResult { const { tools, session } = toolsForAgent({ workspace: input.workspace, onOutput: input.onOutput, + onChecklist: input.onChecklist, taskId: input.taskId, sessionId: input.request.sessionId, }); diff --git a/src/lifecycle.ts b/src/lifecycle.ts index ba1604f7..e9616081 100644 --- a/src/lifecycle.ts +++ b/src/lifecycle.ts @@ -218,6 +218,9 @@ export async function runLifecycle(input: LifecycleInput, deps: LifecycleDeps = onOutput: (event: ToolOutputEvent) => { ctxRef?.toolOutputHandler?.(event); }, + onChecklist: (event) => { + emit({ type: "checklist", groupId: event.groupId, groupTitle: event.groupTitle, items: event.items }); + }, }); const ctx = createRunContext(input, { diff --git a/src/tool-contract.ts b/src/tool-contract.ts index d5f7d718..b95859ca 100644 --- a/src/tool-contract.ts +++ b/src/tool-contract.ts @@ -38,10 +38,17 @@ export type ToolkitDeps = { outputBudget: ToolOutputBudget; }; +export type ChecklistListener = (event: { + groupId: string; + groupTitle: string; + items: { id: string; label: string; status: "pending" | "in_progress" | "done" | "failed"; order: number }[]; +}) => void; + export type ToolkitInput = { workspace: string; session: SessionContext; onOutput: ToolOutputListener; + onChecklist: ChecklistListener; }; export type ToolCacheEntry = { diff --git a/src/tool-registry.test.ts b/src/tool-registry.test.ts index d9e67146..4afacf06 100644 --- a/src/tool-registry.test.ts +++ b/src/tool-registry.test.ts @@ -27,6 +27,7 @@ describe("toolsets", () => { "runCommand", "scanCode", "searchFiles", + "updateChecklist", "webFetch", "webSearch", ]); @@ -87,6 +88,7 @@ describe("toolIdsByCategory", () => { expect(ids).toContain("delete-file"); expect(ids).toContain("git-add"); expect(ids).toContain("git-commit"); + expect(ids).toContain("update-checklist"); expect(ids).not.toContain("read-file"); expect(ids).not.toContain("run-command"); expect(ids).not.toContain("web-search"); diff --git a/src/tool-registry.ts b/src/tool-registry.ts index f55bc56b..d4a3968f 100644 --- a/src/tool-registry.ts +++ b/src/tool-registry.ts @@ -1,6 +1,7 @@ import { resolve } from "node:path"; import { appConfig } from "./app-config"; import { invariant } from "./assert"; +import { createChecklistToolkit } from "./checklist-toolkit"; import { createCodeToolkit } from "./code-toolkit"; import { createFileToolkit } from "./file-toolkit"; import { createGitToolkit } from "./git-toolkit"; @@ -8,7 +9,14 @@ import { EN_MESSAGES } from "./i18n/en"; import { createShellToolkit } from "./shell-toolkit"; import { createToolCache } from "./tool-cache"; import { getDefaultToolCacheStore } from "./tool-cache-store"; -import type { ToolCategory, ToolDefinition, ToolkitDeps, ToolkitInput, ToolPermission } from "./tool-contract"; +import type { + ChecklistListener, + ToolCategory, + ToolDefinition, + ToolkitDeps, + ToolkitInput, + ToolPermission, +} from "./tool-contract"; import { createSessionContext, type SessionContext } from "./tool-guards"; import type { ToolOutputListener } from "./tool-output-format"; import { createWebToolkit } from "./web-toolkit"; @@ -20,7 +28,8 @@ type RegisteredToolkit = ReturnType & ReturnType & ReturnType & ReturnType & - ReturnType; + ReturnType & + ReturnType; export type Toolset = { [Key in keyof RegisteredToolkit]: RegisteredToolkit[Key]; @@ -52,9 +61,14 @@ export const TOOLKIT_REGISTRY: { id: "git", createToolkit: (deps, input) => createGitToolkit(deps, input), }, + { + id: "checklist", + createToolkit: (deps, input) => createChecklistToolkit(deps, input), + }, ]; const noopOutput: ToolOutputListener = () => {}; +const noopChecklist: ChecklistListener = () => {}; const defaultToolkitDeps = (): ToolkitDeps => ({ outputBudget: appConfig.agent.toolOutputBudget, @@ -65,10 +79,11 @@ function collectTools( session: SessionContext, onOutput: ToolOutputListener = noopOutput, deps: ToolkitDeps = defaultToolkitDeps(), + onChecklist: ChecklistListener = noopChecklist, ): ToolMap { const combined: ToolMap = {}; for (const toolkit of TOOLKIT_REGISTRY) { - Object.assign(combined, toolkit.createToolkit(deps, { workspace, session, onOutput })); + Object.assign(combined, toolkit.createToolkit(deps, { workspace, session, onOutput, onChecklist })); } return combined; } @@ -123,6 +138,7 @@ export const DISCOVERY_TOOL_SET = new Set(DISCOVERY_TOOLS); export function toolsForAgent(options?: { workspace?: string; onOutput?: ToolOutputListener; + onChecklist?: ChecklistListener; taskId?: string; sessionId?: string; }): { @@ -133,7 +149,7 @@ export function toolsForAgent(options?: { const session = createSessionContext(options?.taskId, WRITE_TOOL_SET); session.cache = createToolCache(DISCOVERY_TOOL_SET, undefined, getDefaultToolCacheStore(options?.sessionId)); return { - tools: collectTools(workspace, session, options?.onOutput) as unknown as Toolset, + tools: collectTools(workspace, session, options?.onOutput, undefined, options?.onChecklist) as unknown as Toolset, session, }; } From 0c406f65ac59e02ad5717d8e6c6b42517a16ba37 Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 11:55:06 +0200 Subject: [PATCH 02/21] fix(checklist): render checklist between transcript and input --- src/chat-app.tsx | 9 +++++++-- src/chat-transcript.tsx | 27 +++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/chat-app.tsx b/src/chat-app.tsx index 5a453b92..137dda15 100644 --- a/src/chat-app.tsx +++ b/src/chat-app.tsx @@ -1,8 +1,9 @@ +import { isChecklistOutput } from "./chat-contract"; import { ChatHeader } from "./chat-header"; import { ChatInputPanel } from "./chat-input-panel"; import { isHeaderItem } from "./chat-promotion"; import { type ChatAppProps, useChatState } from "./chat-state"; -import { ChatTranscript, ChatTranscriptRow } from "./chat-transcript"; +import { ChatChecklist, ChatTranscript, ChatTranscriptRow } from "./chat-transcript"; import { palette } from "./palette"; import { Box, render, Static, Text, useApp } from "./tui"; import { DEFAULT_COLUMNS } from "./tui/styles"; @@ -11,6 +12,9 @@ function ChatApp(props: ChatAppProps) { const { exit } = useApp(); const state = useChatState(props, exit); + const transcriptRows = state.rows.filter((row) => !isChecklistOutput(row.content)); + const checklistRows = state.rows.filter((row) => isChecklistOutput(row.content)); + return ( @@ -39,13 +43,14 @@ function ChatApp(props: ChatAppProps) { }} + ); } + +type ChatChecklistProps = { + rows: ChatRow[]; +}; + +export function ChatChecklist({ rows }: ChatChecklistProps): React.ReactNode { + if (rows.length === 0) return null; + const columns = process.stdout.columns ?? DEFAULT_COLUMNS; + const contentWidth = Math.max(24, columns - 2); + return ( + <> + {rows.map((row) => ( + + + + + {"• "} + + + {isChecklistOutput(row.content) ? {renderChecklist(row.content)} : null} + + + + ))} + + ); +} From 25f43f81a5078fee2f5e3814b69b14ef9391e329 Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 11:57:12 +0200 Subject: [PATCH 03/21] refactor(checklist): extract ChatChecklist to own file --- src/chat-app.tsx | 3 ++- src/chat-checklist.tsx | 50 ++++++++++++++++++++++++++++++++++++ src/chat-transcript.tsx | 56 ++++------------------------------------- 3 files changed, 57 insertions(+), 52 deletions(-) create mode 100644 src/chat-checklist.tsx diff --git a/src/chat-app.tsx b/src/chat-app.tsx index 137dda15..6b18c92c 100644 --- a/src/chat-app.tsx +++ b/src/chat-app.tsx @@ -1,9 +1,10 @@ +import { ChatChecklist } from "./chat-checklist"; import { isChecklistOutput } from "./chat-contract"; import { ChatHeader } from "./chat-header"; import { ChatInputPanel } from "./chat-input-panel"; import { isHeaderItem } from "./chat-promotion"; import { type ChatAppProps, useChatState } from "./chat-state"; -import { ChatChecklist, ChatTranscript, ChatTranscriptRow } from "./chat-transcript"; +import { ChatTranscript, ChatTranscriptRow } from "./chat-transcript"; import { palette } from "./palette"; import { Box, render, Static, Text, useApp } from "./tui"; import { DEFAULT_COLUMNS } from "./tui/styles"; diff --git a/src/chat-checklist.tsx b/src/chat-checklist.tsx new file mode 100644 index 00000000..f3b75602 --- /dev/null +++ b/src/chat-checklist.tsx @@ -0,0 +1,50 @@ +import React from "react"; +import type { ChatRow } from "./chat-contract"; +import { isChecklistOutput } from "./chat-contract"; +import { type ChecklistOutput, checklistMarker, checklistProgress } from "./checklist-contract"; +import { palette } from "./palette"; +import { Box, Text } from "./tui"; +import { DEFAULT_COLUMNS } from "./tui/styles"; + +function renderChecklist(output: ChecklistOutput): React.ReactNode { + const sorted = [...output.items].sort((a, b) => a.order - b.order); + const { done, total } = checklistProgress(sorted); + return ( + <> + {`${output.groupTitle} (${done}/${total})`} + {sorted.map((item) => ( + + {"\n"} + {` ${checklistMarker(item.status)} ${item.label}`} + + ))} + + ); +} + +type ChatChecklistProps = { + rows: ChatRow[]; +}; + +export function ChatChecklist({ rows }: ChatChecklistProps): React.ReactNode { + if (rows.length === 0) return null; + const columns = process.stdout.columns ?? DEFAULT_COLUMNS; + const contentWidth = Math.max(24, columns - 2); + return ( + <> + {rows.map((row) => ( + + + + + {"• "} + + + {isChecklistOutput(row.content) ? {renderChecklist(row.content)} : null} + + + + ))} + + ); +} diff --git a/src/chat-transcript.tsx b/src/chat-transcript.tsx index 77a6082f..5c66f240 100644 --- a/src/chat-transcript.tsx +++ b/src/chat-transcript.tsx @@ -2,10 +2,9 @@ import React from "react"; import type { AgentMode } from "./agent-contract"; import { renderAssistantContent } from "./chat-content-render"; import type { ChatRow, CommandOutput } from "./chat-contract"; -import { isChecklistOutput, isCommandOutput, isToolOutput } from "./chat-contract"; +import { isCommandOutput, isToolOutput } from "./chat-contract"; import { commandOutputColWidth, formatTokenCount } from "./chat-format"; import { ShimmerText } from "./chat-shimmer"; -import { type ChecklistOutput, checklistMarker, checklistProgress } from "./checklist-contract"; import type { PendingState } from "./client-contract"; import { t, tDynamic } from "./i18n"; import { palette } from "./palette"; @@ -199,22 +198,6 @@ function renderToolOutput(parts: ToolOutputPart[], toolContentWidth: number): Re ); } -function renderChecklist(output: ChecklistOutput): React.ReactNode { - const sorted = [...output.items].sort((a, b) => a.order - b.order); - const { done, total } = checklistProgress(sorted); - return ( - <> - {`${output.groupTitle} (${done}/${total})`} - {sorted.map((item) => ( - - {"\n"} - {` ${checklistMarker(item.status)} ${item.label}`} - - ))} - - ); -} - type ChatTranscriptRowProps = { row: ChatRow; contentWidth: number; @@ -232,21 +215,19 @@ export function ChatTranscriptRow({ row, contentWidth, toolContentWidth }: ChatT {marker} - {isChecklistOutput(row.content) ? ( - {renderChecklist(row.content)} - ) : isToolOutput(row.content) ? ( + {isToolOutput(row.content) ? ( {renderToolOutput(row.content.parts, toolContentWidth)} ) : isCommandOutput(row.content) ? ( {renderCommandOutput(row.content)} - ) : row.kind === "assistant" ? ( + ) : row.kind === "assistant" && typeof row.content === "string" ? ( {renderAssistantContent(row.content, contentWidth)} - ) : ( + ) : typeof row.content === "string" ? ( {row.content} - )} + ) : null} ); @@ -340,30 +321,3 @@ export function ChatTranscript(props: ChatTranscriptProps): React.ReactNode { ); } - -type ChatChecklistProps = { - rows: ChatRow[]; -}; - -export function ChatChecklist({ rows }: ChatChecklistProps): React.ReactNode { - if (rows.length === 0) return null; - const columns = process.stdout.columns ?? DEFAULT_COLUMNS; - const contentWidth = Math.max(24, columns - 2); - return ( - <> - {rows.map((row) => ( - - - - - {"• "} - - - {isChecklistOutput(row.content) ? {renderChecklist(row.content)} : null} - - - - ))} - - ); -} From fbd19f01df196c0e6e3acce54a1550a6c0a3f7c5 Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 12:06:32 +0200 Subject: [PATCH 04/21] test(checklist): add TUI rendering tests --- src/checklist.tui.test.tsx | 138 +++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 src/checklist.tui.test.tsx diff --git a/src/checklist.tui.test.tsx b/src/checklist.tui.test.tsx new file mode 100644 index 00000000..a0b36feb --- /dev/null +++ b/src/checklist.tui.test.tsx @@ -0,0 +1,138 @@ +import { describe, expect, test } from "bun:test"; +import { ChatChecklist } from "./chat-checklist"; +import type { ChatRow } from "./chat-contract"; +import type { ChecklistOutput } from "./checklist-contract"; +import { dedent } from "./test-utils"; +import { renderPlain } from "./tui-test-utils"; + +function renderChecklist(checklists: ChecklistOutput[]): string { + const rows: ChatRow[] = checklists.map((content, i) => ({ + id: `row_${i}`, + kind: "task", + content, + })); + return renderPlain(, 96); +} + +describe("checklist TUI rendering", () => { + test("renders header with progress and status markers", () => { + expect( + renderChecklist([ + { + groupId: "g1", + groupTitle: "Build pipeline", + items: [ + { id: "s1", label: "lint", status: "done", order: 0 }, + { id: "s2", label: "test", status: "in_progress", order: 1 }, + { id: "s3", label: "deploy", status: "pending", order: 2 }, + ], + }, + ]), + ).toBe( + dedent` + • Build pipeline (1/3) + ● lint + ◐ test + ○ deploy + `, + ); + }); + + test("renders all status marker variants", () => { + expect( + renderChecklist([ + { + groupId: "g1", + groupTitle: "Steps", + items: [ + { id: "s1", label: "done step", status: "done", order: 0 }, + { id: "s2", label: "active step", status: "in_progress", order: 1 }, + { id: "s3", label: "waiting step", status: "pending", order: 2 }, + { id: "s4", label: "broken step", status: "failed", order: 3 }, + ], + }, + ]), + ).toBe( + dedent` + • Steps (1/4) + ● done step + ◐ active step + ○ waiting step + ◉ broken step + `, + ); + }); + + test("sorts items by order regardless of input order", () => { + expect( + renderChecklist([ + { + groupId: "g1", + groupTitle: "Steps", + items: [ + { id: "s3", label: "third", status: "pending", order: 2 }, + { id: "s1", label: "first", status: "done", order: 0 }, + { id: "s2", label: "second", status: "in_progress", order: 1 }, + ], + }, + ]), + ).toBe( + dedent` + • Steps (1/3) + ● first + ◐ second + ○ third + `, + ); + }); + + test("renders nothing when rows are empty", () => { + expect(renderPlain(, 96)).toBe(""); + }); + + test("renders multiple checklists", () => { + expect( + renderChecklist([ + { + groupId: "g1", + groupTitle: "Phase A", + items: [{ id: "a1", label: "step A", status: "done", order: 0 }], + }, + { + groupId: "g2", + groupTitle: "Phase B", + items: [{ id: "b1", label: "step B", status: "pending", order: 0 }], + }, + ]), + ).toBe( + dedent` + • Phase A (1/1) + ● step A + + • Phase B (0/1) + ○ step B + `, + ); + }); + + test("all done shows full progress", () => { + expect( + renderChecklist([ + { + groupId: "g1", + groupTitle: "Done", + items: [ + { id: "s1", label: "a", status: "done", order: 0 }, + { id: "s2", label: "b", status: "done", order: 1 }, + ], + }, + ]), + ).toBe( + dedent` + • Done (2/2) + ● a + ● b + `, + ); + }); +}); From f88c6f083d44d35e031c2d6b72fe927b45bc0dba Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 12:10:35 +0200 Subject: [PATCH 05/21] test(checklist): add TUI rendering tests --- src/chat-checklist.tsx | 3 +-- src/checklist.tui.test.tsx | 47 +++++++++++++++++++++++++------------- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/chat-checklist.tsx b/src/chat-checklist.tsx index f3b75602..b9f8a8d0 100644 --- a/src/chat-checklist.tsx +++ b/src/chat-checklist.tsx @@ -2,7 +2,6 @@ import React from "react"; import type { ChatRow } from "./chat-contract"; import { isChecklistOutput } from "./chat-contract"; import { type ChecklistOutput, checklistMarker, checklistProgress } from "./checklist-contract"; -import { palette } from "./palette"; import { Box, Text } from "./tui"; import { DEFAULT_COLUMNS } from "./tui/styles"; @@ -37,7 +36,7 @@ export function ChatChecklist({ rows }: ChatChecklistProps): React.ReactNode { - {"• "} + {" "} {isChecklistOutput(row.content) ? {renderChecklist(row.content)} : null} diff --git a/src/checklist.tui.test.tsx b/src/checklist.tui.test.tsx index a0b36feb..3f4e03a5 100644 --- a/src/checklist.tui.test.tsx +++ b/src/checklist.tui.test.tsx @@ -29,12 +29,15 @@ describe("checklist TUI rendering", () => { }, ]), ).toBe( - dedent` - • Build pipeline (1/3) + dedent( + ` + Build pipeline (1/3) ● lint ◐ test ○ deploy - `, + `, + 2, + ), ); }); @@ -53,13 +56,16 @@ describe("checklist TUI rendering", () => { }, ]), ).toBe( - dedent` - • Steps (1/4) + dedent( + ` + Steps (1/4) ● done step ◐ active step ○ waiting step ◉ broken step - `, + `, + 2, + ), ); }); @@ -77,12 +83,15 @@ describe("checklist TUI rendering", () => { }, ]), ).toBe( - dedent` - • Steps (1/3) + dedent( + ` + Steps (1/3) ● first ◐ second ○ third - `, + `, + 2, + ), ); }); @@ -105,13 +114,16 @@ describe("checklist TUI rendering", () => { }, ]), ).toBe( - dedent` - • Phase A (1/1) + dedent( + ` + Phase A (1/1) ● step A - • Phase B (0/1) + Phase B (0/1) ○ step B - `, + `, + 2, + ), ); }); @@ -128,11 +140,14 @@ describe("checklist TUI rendering", () => { }, ]), ).toBe( - dedent` - • Done (2/2) + dedent( + ` + Done (2/2) ● a ● b - `, + `, + 2, + ), ); }); }); From 6220378cc6a52bd5d38be514b324dc6abf9f7697 Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 12:13:40 +0200 Subject: [PATCH 06/21] refactor(checklist): remove empty marker box from checklist --- src/chat-checklist.tsx | 9 +---- src/checklist.tui.test.tsx | 75 +++++++++++++++----------------------- 2 files changed, 32 insertions(+), 52 deletions(-) diff --git a/src/chat-checklist.tsx b/src/chat-checklist.tsx index b9f8a8d0..e1b9f699 100644 --- a/src/chat-checklist.tsx +++ b/src/chat-checklist.tsx @@ -34,13 +34,8 @@ export function ChatChecklist({ rows }: ChatChecklistProps): React.ReactNode { {rows.map((row) => ( - - - {" "} - - - {isChecklistOutput(row.content) ? {renderChecklist(row.content)} : null} - + + {isChecklistOutput(row.content) ? {renderChecklist(row.content)} : null} ))} diff --git a/src/checklist.tui.test.tsx b/src/checklist.tui.test.tsx index 3f4e03a5..98b5305a 100644 --- a/src/checklist.tui.test.tsx +++ b/src/checklist.tui.test.tsx @@ -29,15 +29,12 @@ describe("checklist TUI rendering", () => { }, ]), ).toBe( - dedent( - ` - Build pipeline (1/3) - ● lint - ◐ test - ○ deploy - `, - 2, - ), + dedent` + Build pipeline (1/3) + ● lint + ◐ test + ○ deploy + `, ); }); @@ -56,16 +53,13 @@ describe("checklist TUI rendering", () => { }, ]), ).toBe( - dedent( - ` - Steps (1/4) - ● done step - ◐ active step - ○ waiting step - ◉ broken step - `, - 2, - ), + dedent` + Steps (1/4) + ● done step + ◐ active step + ○ waiting step + ◉ broken step + `, ); }); @@ -83,15 +77,12 @@ describe("checklist TUI rendering", () => { }, ]), ).toBe( - dedent( - ` - Steps (1/3) - ● first - ◐ second - ○ third - `, - 2, - ), + dedent` + Steps (1/3) + ● first + ◐ second + ○ third + `, ); }); @@ -114,16 +105,13 @@ describe("checklist TUI rendering", () => { }, ]), ).toBe( - dedent( - ` - Phase A (1/1) - ● step A + dedent` + Phase A (1/1) + ● step A - Phase B (0/1) - ○ step B - `, - 2, - ), + Phase B (0/1) + ○ step B + `, ); }); @@ -140,14 +128,11 @@ describe("checklist TUI rendering", () => { }, ]), ).toBe( - dedent( - ` - Done (2/2) - ● a - ● b - `, - 2, - ), + dedent` + Done (2/2) + ● a + ● b + `, ); }); }); From 98cb1cfb4dd682622817b7dace22f1e07b11496a Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 12:15:24 +0200 Subject: [PATCH 07/21] refactor(test): use shared dedent in tool-output TUI tests --- src/tool-output.tui.test.tsx | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/src/tool-output.tui.test.tsx b/src/tool-output.tui.test.tsx index 1b110b86..7ad96e6d 100644 --- a/src/tool-output.tui.test.tsx +++ b/src/tool-output.tui.test.tsx @@ -1,29 +1,10 @@ import { describe, expect, test } from "bun:test"; import type { ChatRow } from "./chat-contract"; import { ChatTranscript } from "./chat-transcript"; +import { dedent } from "./test-utils"; import { formatToolOutput, type ToolOutputPart } from "./tool-output-content"; import { renderPlain } from "./tui-test-utils"; -function dedent(value: string): string { - const lines = value.split("\n"); - let start = 0; - while (start < lines.length && lines[start]?.trim().length === 0) start += 1; - let end = lines.length - 1; - while (end >= start && lines[end]?.trim().length === 0) end -= 1; - if (start > end) return ""; - let prefix: string | null = null; - for (const line of lines.slice(start, end + 1)) { - if (line.trim().length === 0) continue; - const current = line.match(/^[ \t]*/)?.[0] ?? ""; - if (prefix === null || current.length < prefix.length) prefix = current; - } - const p = prefix ?? ""; - return lines - .slice(start, end + 1) - .map((line) => (line.startsWith(p) ? line.slice(p.length) : line)) - .join("\n"); -} - function renderChat(toolOutput: ToolOutputPart[]): string { const row: ChatRow = { id: "r1", kind: "tool", content: { parts: toolOutput } }; return renderPlain(, 96); From 4c7445cf0feb09bf07f8e1def1cfe6439d9c0701 Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 12:20:27 +0200 Subject: [PATCH 08/21] fix(checklist): address review issues --- src/chat-message-handler-stream.ts | 2 ++ src/checklist-toolkit.ts | 8 +------- src/tool-contract.ts | 7 ++----- src/tool-registry.ts | 4 ++-- 4 files changed, 7 insertions(+), 14 deletions(-) diff --git a/src/chat-message-handler-stream.ts b/src/chat-message-handler-stream.ts index a4144f52..053b03bc 100644 --- a/src/chat-message-handler-stream.ts +++ b/src/chat-message-handler-stream.ts @@ -150,6 +150,7 @@ export function createMessageStreamState(input: { finalize: () => { sealAssistantRow(); + checklistRowIdByGroupId.clear(); const ids = [...assistantRowIds]; assistantRowIds.length = 0; return ids; @@ -157,6 +158,7 @@ export function createMessageStreamState(input: { dispose: () => { cancelFlushTimer(); + checklistRowIdByGroupId.clear(); const idsToRemove = [...assistantRowIds]; if (activeRowId && !idsToRemove.includes(activeRowId)) idsToRemove.push(activeRowId); activeRowId = null; diff --git a/src/checklist-toolkit.ts b/src/checklist-toolkit.ts index d50f07d6..0ed0baa6 100644 --- a/src/checklist-toolkit.ts +++ b/src/checklist-toolkit.ts @@ -39,13 +39,7 @@ function createUpdateChecklistTool(_deps: ToolkitDeps, input: ToolkitInput) { inputSchema: updateChecklistInputSchema, outputSchema: updateChecklistOutputSchema, execute: async (toolInput, toolCallId) => { - return runTool(input.session, "update-checklist", toolCallId, toolInput, async (callId) => { - input.onOutput({ - toolName: "update-checklist", - content: { kind: "tool-header", labelKey: "tool.label.update_checklist", detail: toolInput.groupTitle }, - toolCallId: callId, - }); - + return runTool(input.session, "update-checklist", toolCallId, toolInput, async () => { input.onChecklist({ groupId: toolInput.groupId, groupTitle: toolInput.groupTitle, diff --git a/src/tool-contract.ts b/src/tool-contract.ts index b95859ca..0bee1cb3 100644 --- a/src/tool-contract.ts +++ b/src/tool-contract.ts @@ -1,4 +1,5 @@ import type { z } from "zod"; +import type { ChecklistItem } from "./checklist-contract"; import type { SessionContext } from "./tool-guards"; import type { ToolOutputListener } from "./tool-output-format"; @@ -38,11 +39,7 @@ export type ToolkitDeps = { outputBudget: ToolOutputBudget; }; -export type ChecklistListener = (event: { - groupId: string; - groupTitle: string; - items: { id: string; label: string; status: "pending" | "in_progress" | "done" | "failed"; order: number }[]; -}) => void; +export type ChecklistListener = (event: { groupId: string; groupTitle: string; items: ChecklistItem[] }) => void; export type ToolkitInput = { workspace: string; diff --git a/src/tool-registry.ts b/src/tool-registry.ts index d4a3968f..4475a25d 100644 --- a/src/tool-registry.ts +++ b/src/tool-registry.ts @@ -78,8 +78,8 @@ function collectTools( workspace: string, session: SessionContext, onOutput: ToolOutputListener = noopOutput, - deps: ToolkitDeps = defaultToolkitDeps(), onChecklist: ChecklistListener = noopChecklist, + deps: ToolkitDeps = defaultToolkitDeps(), ): ToolMap { const combined: ToolMap = {}; for (const toolkit of TOOLKIT_REGISTRY) { @@ -149,7 +149,7 @@ export function toolsForAgent(options?: { const session = createSessionContext(options?.taskId, WRITE_TOOL_SET); session.cache = createToolCache(DISCOVERY_TOOL_SET, undefined, getDefaultToolCacheStore(options?.sessionId)); return { - tools: collectTools(workspace, session, options?.onOutput, undefined, options?.onChecklist) as unknown as Toolset, + tools: collectTools(workspace, session, options?.onOutput, options?.onChecklist) as unknown as Toolset, session, }; } From a36ebfe28aad5313dd42fbc25667368f113b31a7 Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 12:52:43 +0200 Subject: [PATCH 09/21] fix(checklist): address PR review findings --- docs/architecture.md | 2 +- docs/features.md | 1 + docs/glossary.md | 1 + docs/tooling.md | 2 +- src/chat-app.tsx | 8 ++- src/chat-message-handler-stream.ts | 9 ++- src/checklist-toolkit.ts | 4 +- src/checklist.int.test.ts | 98 ++++++++++++++++++++++++------ src/tool-registry.test.ts | 2 +- 9 files changed, 100 insertions(+), 27 deletions(-) diff --git a/docs/architecture.md b/docs/architecture.md index 257af63c..db95cb9b 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -66,7 +66,7 @@ lifecycle → guard → cache → toolkit → registry - **guard:** pre-execution safety/redundancy checks and post-execution call recording - **cache:** per-task reuse layer for read-only and search tool results -- **toolkit:** domain tool definitions with guarded execution (`file-toolkit`, `code-toolkit`, `git-toolkit`, `shell-toolkit`, `web-toolkit`) +- **toolkit:** domain tool definitions with guarded execution (`file-toolkit`, `code-toolkit`, `git-toolkit`, `shell-toolkit`, `web-toolkit`, `checklist-toolkit`) - **registry:** toolkit registration, permission filtering, and agent-facing tool surface - **details:** see [Tooling](./tooling.md) diff --git a/docs/features.md b/docs/features.md index 8a6b4903..8b0da0c1 100644 --- a/docs/features.md +++ b/docs/features.md @@ -36,6 +36,7 @@ Shipped, user-visible capabilities. - automatic formatting of edited files via detected formatter - automatic linting of edited files via detected linter - deterministic verify command execution from detected project configuration +- inline task checklist for multi-step tasks, pinned between transcript and input ## Tools diff --git a/docs/glossary.md b/docs/glossary.md index 415a0d89..cbbad987 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -21,6 +21,7 @@ Naming conventions and core terms used across Acolyte code and docs. | Term | Definition | |---|---| | Base Agent Input | Immutable prompt input created during `prepare` and used as the base for each generation attempt | +| Checklist | Inline progress display for multi-step tasks, rendered between transcript and input. The agent creates and updates items via the `update-checklist` tool | | Context Budgeting | Proactive token allocation via tiktoken — system prompt reserved first, remaining space filled by priority (memory → attachments → history → tool payloads) | | Continuation State | Persisted "Current task" and "Next step" cues carried into later turns | | Distill | Automatic memory source family that extracts and consolidates knowledge into records (project/user/session scope variants) | diff --git a/docs/tooling.md b/docs/tooling.md index 8adbfde5..1300509e 100644 --- a/docs/tooling.md +++ b/docs/tooling.md @@ -9,7 +9,7 @@ lifecycle → guard → cache → toolkit → registry ## Layers - **guard**: pre-execution checks and post-execution call recording -- **toolkit**: domain tool definitions (`file-toolkit`, `code-toolkit`, `git-toolkit`, `shell-toolkit`, `web-toolkit`) +- **toolkit**: domain tool definitions (`file-toolkit`, `code-toolkit`, `git-toolkit`, `shell-toolkit`, `web-toolkit`, `checklist-toolkit`) - **registry**: permission filtering and agent-facing tool surface ## Guarded execution diff --git a/src/chat-app.tsx b/src/chat-app.tsx index 6b18c92c..0a64a6f1 100644 --- a/src/chat-app.tsx +++ b/src/chat-app.tsx @@ -1,4 +1,5 @@ import { ChatChecklist } from "./chat-checklist"; +import type { ChatRow } from "./chat-contract"; import { isChecklistOutput } from "./chat-contract"; import { ChatHeader } from "./chat-header"; import { ChatInputPanel } from "./chat-input-panel"; @@ -13,8 +14,11 @@ function ChatApp(props: ChatAppProps) { const { exit } = useApp(); const state = useChatState(props, exit); - const transcriptRows = state.rows.filter((row) => !isChecklistOutput(row.content)); - const checklistRows = state.rows.filter((row) => isChecklistOutput(row.content)); + const transcriptRows: ChatRow[] = []; + const checklistRows: ChatRow[] = []; + for (const row of state.rows) { + (isChecklistOutput(row.content) ? checklistRows : transcriptRows).push(row); + } return ( diff --git a/src/chat-message-handler-stream.ts b/src/chat-message-handler-stream.ts index 053b03bc..d4f5be48 100644 --- a/src/chat-message-handler-stream.ts +++ b/src/chat-message-handler-stream.ts @@ -150,7 +150,11 @@ export function createMessageStreamState(input: { finalize: () => { sealAssistantRow(); + const checklistIds = new Set(checklistRowIdByGroupId.values()); checklistRowIdByGroupId.clear(); + if (checklistIds.size > 0) { + input.setRows((current) => current.filter((row) => !checklistIds.has(row.id))); + } const ids = [...assistantRowIds]; assistantRowIds.length = 0; return ids; @@ -158,14 +162,15 @@ export function createMessageStreamState(input: { dispose: () => { cancelFlushTimer(); + const checklistIds = new Set(checklistRowIdByGroupId.values()); checklistRowIdByGroupId.clear(); const idsToRemove = [...assistantRowIds]; if (activeRowId && !idsToRemove.includes(activeRowId)) idsToRemove.push(activeRowId); activeRowId = null; activeContent = ""; assistantRowIds.length = 0; - if (idsToRemove.length > 0) { - const removeSet = new Set(idsToRemove); + const removeSet = new Set([...idsToRemove, ...checklistIds]); + if (removeSet.size > 0) { input.setRows((current) => current.filter((row) => !removeSet.has(row.id))); } }, diff --git a/src/checklist-toolkit.ts b/src/checklist-toolkit.ts index 0ed0baa6..b0103ee7 100644 --- a/src/checklist-toolkit.ts +++ b/src/checklist-toolkit.ts @@ -30,8 +30,8 @@ function createUpdateChecklistTool(_deps: ToolkitDeps, input: ToolkitInput) { return createTool({ id: "update-checklist", labelKey: "tool.label.update_checklist", - category: "write", - permissions: ["write"], + category: "read", + permissions: ["read"], description: "Create or update an inline task checklist visible to the user. Send the full item list each time — items not included are removed.", instruction: diff --git a/src/checklist.int.test.ts b/src/checklist.int.test.ts index a3fabaf1..90cd03cf 100644 --- a/src/checklist.int.test.ts +++ b/src/checklist.int.test.ts @@ -1,10 +1,12 @@ import { describe, expect, test } from "bun:test"; +import type { ChatRow } from "./chat-contract"; import { isChecklistOutput } from "./chat-contract"; import type { ChecklistOutput } from "./checklist-contract"; import { createClient, createMessageHandlerHarness } from "./test-utils"; describe("checklist integration", () => { test("checklist event creates a task row with correct content", async () => { + let snapshot: ChatRow[] = []; const { handleMessage, rows } = createMessageHandlerHarness({ client: createClient({ status: async () => ({}), @@ -20,6 +22,7 @@ describe("checklist integration", () => { { id: "item_3", label: "add unit tests", status: "pending", order: 2 }, ], }); + snapshot = [...rows]; return { state: "done" as const, model: "gpt-5-mini", output: "done" }; }, }), @@ -27,7 +30,7 @@ describe("checklist integration", () => { await handleMessage("refactor auth"); - const taskRows = rows.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + const taskRows = snapshot.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); expect(taskRows).toHaveLength(1); const content = taskRows[0]?.content as ChecklistOutput; @@ -40,13 +43,12 @@ describe("checklist integration", () => { }); test("subsequent checklist events update the same row in place", async () => { + let snapshot: ChatRow[] = []; const { handleMessage, rows } = createMessageHandlerHarness({ client: createClient({ status: async () => ({}), replyStream: async (_input, options) => { options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); - - // Initial checklist options.onEvent({ type: "checklist", groupId: "grp_1", @@ -56,8 +58,6 @@ describe("checklist integration", () => { { id: "s2", label: "test", status: "pending", order: 1 }, ], }); - - // Update: first item done, second in progress options.onEvent({ type: "checklist", groupId: "grp_1", @@ -67,7 +67,7 @@ describe("checklist integration", () => { { id: "s2", label: "test", status: "in_progress", order: 1 }, ], }); - + snapshot = [...rows]; return { state: "done" as const, model: "gpt-5-mini", output: "done" }; }, }), @@ -75,8 +75,7 @@ describe("checklist integration", () => { await handleMessage("run pipeline"); - // Should have exactly one task row (updated in place, not two) - const taskRows = rows.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + const taskRows = snapshot.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); expect(taskRows).toHaveLength(1); const content = taskRows[0]?.content as ChecklistOutput; @@ -85,6 +84,7 @@ describe("checklist integration", () => { }); test("different group IDs produce separate checklist rows", async () => { + let snapshot: ChatRow[] = []; const { handleMessage, rows } = createMessageHandlerHarness({ client: createClient({ status: async () => ({}), @@ -102,6 +102,7 @@ describe("checklist integration", () => { groupTitle: "Phase B", items: [{ id: "b1", label: "step B1", status: "pending", order: 0 }], }); + snapshot = [...rows]; return { state: "done" as const, model: "gpt-5-mini", output: "done" }; }, }), @@ -109,13 +110,14 @@ describe("checklist integration", () => { await handleMessage("multi-phase"); - const taskRows = rows.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + const taskRows = snapshot.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); expect(taskRows).toHaveLength(2); expect((taskRows[0]?.content as ChecklistOutput).groupId).toBe("grp_a"); expect((taskRows[1]?.content as ChecklistOutput).groupId).toBe("grp_b"); }); - test("checklist row appears before subsequent assistant text", async () => { + test("checklist row appears before subsequent tool rows", async () => { + let snapshot: ChatRow[] = []; const { handleMessage, rows } = createMessageHandlerHarness({ client: createClient({ status: async () => ({}), @@ -127,22 +129,35 @@ describe("checklist integration", () => { groupTitle: "Steps", items: [{ id: "s1", label: "do thing", status: "pending", order: 0 }], }); - options.onEvent({ type: "text-delta", text: "Working on it." }); - return { state: "done" as const, model: "gpt-5-mini", output: "Working on it." }; + options.onEvent({ + type: "tool-call", + toolCallId: "call_1", + toolName: "read-file", + args: { path: "a.ts" }, + }); + options.onEvent({ + type: "tool-output", + toolCallId: "call_1", + toolName: "read-file", + content: { kind: "tool-header", labelKey: "tool.label.read", detail: "a.ts" }, + }); + snapshot = [...rows]; + return { state: "done" as const, model: "gpt-5-mini", output: "done" }; }, }), }); await handleMessage("go"); - const taskIndex = rows.findIndex((row) => row.kind === "task" && isChecklistOutput(row.content)); - const assistantIndex = rows.findIndex((row) => row.kind === "assistant"); + const taskIndex = snapshot.findIndex((row) => row.kind === "task" && isChecklistOutput(row.content)); + const toolIndex = snapshot.findIndex((row) => row.kind === "tool"); expect(taskIndex).toBeGreaterThanOrEqual(0); - expect(assistantIndex).toBeGreaterThanOrEqual(0); - expect(taskIndex).toBeLessThan(assistantIndex); + expect(toolIndex).toBeGreaterThanOrEqual(0); + expect(taskIndex).toBeLessThan(toolIndex); }); test("checklist events do not break tool output rows", async () => { + let snapshot: ChatRow[] = []; const { handleMessage, rows } = createMessageHandlerHarness({ client: createClient({ status: async () => ({}), @@ -171,6 +186,7 @@ describe("checklist integration", () => { toolCallId: "call_1", toolName: "edit-file", }); + snapshot = [...rows]; return { state: "done" as const, model: "gpt-5-mini", output: "done" }; }, }), @@ -178,9 +194,55 @@ describe("checklist integration", () => { await handleMessage("edit something"); - const taskRows = rows.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); - const toolRows = rows.filter((row) => row.kind === "tool"); + const taskRows = snapshot.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + const toolRows = snapshot.filter((row) => row.kind === "tool"); expect(taskRows).toHaveLength(1); expect(toolRows).toHaveLength(1); }); + + test("checklist rows are removed after turn completes", async () => { + const { handleMessage, rows } = createMessageHandlerHarness({ + client: createClient({ + status: async () => ({}), + replyStream: async (_input, options) => { + options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); + options.onEvent({ + type: "checklist", + groupId: "grp_1", + groupTitle: "Steps", + items: [{ id: "s1", label: "do thing", status: "done", order: 0 }], + }); + return { state: "done" as const, model: "gpt-5-mini", output: "done" }; + }, + }), + }); + + await handleMessage("go"); + + const checklistRows = rows.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + expect(checklistRows).toHaveLength(0); + }); + + test("checklist rows are removed on abort", async () => { + const { handleMessage, rows } = createMessageHandlerHarness({ + client: createClient({ + status: async () => ({}), + replyStream: async (_input, options) => { + options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); + options.onEvent({ + type: "checklist", + groupId: "grp_1", + groupTitle: "Steps", + items: [{ id: "s1", label: "do thing", status: "in_progress", order: 0 }], + }); + throw Object.assign(new Error("aborted"), { name: "AbortError" }); + }, + }), + }); + + await handleMessage("go"); + + const checklistRows = rows.filter((row) => row.kind === "task" && isChecklistOutput(row.content)); + expect(checklistRows).toHaveLength(0); + }); }); diff --git a/src/tool-registry.test.ts b/src/tool-registry.test.ts index 4afacf06..f6e18a7e 100644 --- a/src/tool-registry.test.ts +++ b/src/tool-registry.test.ts @@ -88,7 +88,7 @@ describe("toolIdsByCategory", () => { expect(ids).toContain("delete-file"); expect(ids).toContain("git-add"); expect(ids).toContain("git-commit"); - expect(ids).toContain("update-checklist"); + expect(ids).not.toContain("update-checklist"); expect(ids).not.toContain("read-file"); expect(ids).not.toContain("run-command"); expect(ids).not.toContain("web-search"); From e968e041fc987f5aaa975ce5d6faad5a5fa11f41 Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 12:59:40 +0200 Subject: [PATCH 10/21] chore(checklist): remove field descriptions from input schema --- src/checklist-toolkit.ts | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/checklist-toolkit.ts b/src/checklist-toolkit.ts index b0103ee7..6823cbd5 100644 --- a/src/checklist-toolkit.ts +++ b/src/checklist-toolkit.ts @@ -5,19 +5,18 @@ import { createTool } from "./tool-contract"; import { runTool } from "./tool-execution"; const updateChecklistInputSchema = z.object({ - groupId: z.string().min(1).describe("Unique identifier for the checklist group."), - groupTitle: z.string().min(1).describe("Title displayed as the checklist header."), + groupId: z.string().min(1), + groupTitle: z.string().min(1), items: z .array( z.object({ - id: z.string().min(1).describe("Unique item identifier within this group."), - label: z.string().min(1).describe("Short description of the step."), - status: checklistItemStatusSchema.describe("Current status of this item."), - order: z.number().int().nonnegative().describe("Display position (0-based)."), + id: z.string().min(1), + label: z.string().min(1), + status: checklistItemStatusSchema, + order: z.number().int().nonnegative(), }), ) - .min(1) - .describe("Full list of checklist items. Always send the complete list, not a partial update."), + .min(1), }); const updateChecklistOutputSchema = z.object({ From cbadfed258ca611eda9b7c15f8065c4e5ece786a Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 13:09:09 +0200 Subject: [PATCH 11/21] refactor(checklist): split into set-checklist and update-checklist tools --- src/agent-modes.ts | 2 +- src/checklist-toolkit.ts | 86 +++++++++++++++++++++++++++++---------- src/checklist.int.test.ts | 10 ++--- src/i18n/en.ts | 1 + src/tool-registry.test.ts | 1 + 5 files changed, 73 insertions(+), 27 deletions(-) diff --git a/src/agent-modes.ts b/src/agent-modes.ts index 3d613897..d8bd2ce0 100644 --- a/src/agent-modes.ts +++ b/src/agent-modes.ts @@ -46,7 +46,7 @@ export const agentModes: Record = { "Do not run verify, test, or build commands — the lifecycle handles format, lint, and verify automatically after your edits.", "Do not signal done until the requested behavior is actually implemented. Updating help text, comments, or tests alone is not completing the task — the functional change must be in place.", "After the last tool call, use the lifecycle signal format from the base instructions and keep the user-facing outcome to one sentence.", - "For multi-step tasks (3+ distinct steps), use `update-checklist` at the start to show the user a progress checklist. Update item statuses as you complete each step.", + "For multi-step tasks (3+ distinct steps), use `set-checklist` at the start to define a progress checklist. Use `update-checklist` to mark items as you complete each step.", ], }, verify: { diff --git a/src/checklist-toolkit.ts b/src/checklist-toolkit.ts index 6823cbd5..a3075996 100644 --- a/src/checklist-toolkit.ts +++ b/src/checklist-toolkit.ts @@ -1,10 +1,10 @@ import { z } from "zod"; -import { checklistItemStatusSchema } from "./checklist-contract"; +import { type ChecklistItem, checklistItemStatusSchema } from "./checklist-contract"; import type { ToolkitDeps, ToolkitInput } from "./tool-contract"; import { createTool } from "./tool-contract"; import { runTool } from "./tool-execution"; -const updateChecklistInputSchema = z.object({ +const setChecklistInputSchema = z.object({ groupId: z.string().min(1), groupTitle: z.string().min(1), items: z @@ -12,48 +12,90 @@ const updateChecklistInputSchema = z.object({ z.object({ id: z.string().min(1), label: z.string().min(1), - status: checklistItemStatusSchema, order: z.number().int().nonnegative(), }), ) .min(1), }); +const setChecklistOutputSchema = z.object({ + kind: z.literal("set-checklist"), + groupId: z.string(), + itemCount: z.number(), +}); + +const updateChecklistInputSchema = z.object({ + groupId: z.string().min(1), + itemId: z.string().min(1), + status: checklistItemStatusSchema, +}); + const updateChecklistOutputSchema = z.object({ kind: z.literal("update-checklist"), groupId: z.string(), - itemCount: z.number(), + itemId: z.string(), + status: checklistItemStatusSchema, }); -function createUpdateChecklistTool(_deps: ToolkitDeps, input: ToolkitInput) { +function createSetChecklistTool( + _deps: ToolkitDeps, + input: ToolkitInput, + state: Map, +) { + return createTool({ + id: "set-checklist", + labelKey: "tool.label.set_checklist", + category: "read", + permissions: ["read"], + description: "Create an inline task checklist visible to the user. All items start as pending.", + instruction: + "Use `set-checklist` once at the start of multi-step tasks to show the user a progress checklist. Define all steps upfront. Use `update-checklist` to change item statuses as you work.", + inputSchema: setChecklistInputSchema, + outputSchema: setChecklistOutputSchema, + execute: async (toolInput, toolCallId) => { + return runTool(input.session, "set-checklist", toolCallId, toolInput, async () => { + const items: ChecklistItem[] = toolInput.items.map((item) => ({ + id: item.id, + label: item.label, + status: "pending", + order: item.order, + })); + state.set(toolInput.groupId, { title: toolInput.groupTitle, items }); + input.onChecklist({ groupId: toolInput.groupId, groupTitle: toolInput.groupTitle, items }); + return { kind: "set-checklist" as const, groupId: toolInput.groupId, itemCount: items.length }; + }); + }, + }); +} + +function createUpdateChecklistTool( + _deps: ToolkitDeps, + input: ToolkitInput, + state: Map, +) { return createTool({ id: "update-checklist", labelKey: "tool.label.update_checklist", category: "read", permissions: ["read"], - description: - "Create or update an inline task checklist visible to the user. Send the full item list each time — items not included are removed.", + description: "Update the status of a single checklist item.", instruction: - "Use `update-checklist` at the start of multi-step tasks to show the user a progress checklist. Create the checklist with all items as pending, then update individual item statuses as you work. Always send the complete item list.", + "Use `update-checklist` to mark a checklist item as `in_progress`, `done`, or `failed`. Requires a prior `set-checklist` call for the same groupId.", inputSchema: updateChecklistInputSchema, outputSchema: updateChecklistOutputSchema, execute: async (toolInput, toolCallId) => { return runTool(input.session, "update-checklist", toolCallId, toolInput, async () => { - input.onChecklist({ - groupId: toolInput.groupId, - groupTitle: toolInput.groupTitle, - items: toolInput.items.map((item) => ({ - id: item.id, - label: item.label, - status: item.status, - order: item.order, - })), - }); - + const group = state.get(toolInput.groupId); + if (!group) throw new Error(`No checklist found for groupId "${toolInput.groupId}"`); + const item = group.items.find((i) => i.id === toolInput.itemId); + if (!item) throw new Error(`No item "${toolInput.itemId}" in checklist "${toolInput.groupId}"`); + item.status = toolInput.status; + input.onChecklist({ groupId: toolInput.groupId, groupTitle: group.title, items: group.items }); return { kind: "update-checklist" as const, groupId: toolInput.groupId, - itemCount: toolInput.items.length, + itemId: toolInput.itemId, + status: toolInput.status, }; }); }, @@ -61,7 +103,9 @@ function createUpdateChecklistTool(_deps: ToolkitDeps, input: ToolkitInput) { } export function createChecklistToolkit(deps: ToolkitDeps, input: ToolkitInput) { + const state = new Map(); return { - updateChecklist: createUpdateChecklistTool(deps, input), + setChecklist: createSetChecklistTool(deps, input, state), + updateChecklist: createUpdateChecklistTool(deps, input, state), }; } diff --git a/src/checklist.int.test.ts b/src/checklist.int.test.ts index 90cd03cf..41879bcf 100644 --- a/src/checklist.int.test.ts +++ b/src/checklist.int.test.ts @@ -17,8 +17,8 @@ describe("checklist integration", () => { groupId: "grp_1", groupTitle: "Refactoring auth module", items: [ - { id: "item_1", label: "read existing auth implementation", status: "done", order: 0 }, - { id: "item_2", label: "extract token validation", status: "in_progress", order: 1 }, + { id: "item_1", label: "read existing auth implementation", status: "pending", order: 0 }, + { id: "item_2", label: "extract token validation", status: "pending", order: 1 }, { id: "item_3", label: "add unit tests", status: "pending", order: 2 }, ], }); @@ -37,9 +37,7 @@ describe("checklist integration", () => { expect(content.groupId).toBe("grp_1"); expect(content.groupTitle).toBe("Refactoring auth module"); expect(content.items).toHaveLength(3); - expect(content.items[0]?.status).toBe("done"); - expect(content.items[1]?.status).toBe("in_progress"); - expect(content.items[2]?.status).toBe("pending"); + expect(content.items.every((item) => item.status === "pending")).toBe(true); }); test("subsequent checklist events update the same row in place", async () => { @@ -49,6 +47,7 @@ describe("checklist integration", () => { status: async () => ({}), replyStream: async (_input, options) => { options.onEvent({ type: "status", state: { kind: "running", mode: "work" } }); + // set-checklist creates with all pending options.onEvent({ type: "checklist", groupId: "grp_1", @@ -58,6 +57,7 @@ describe("checklist integration", () => { { id: "s2", label: "test", status: "pending", order: 1 }, ], }); + // update-checklist updates individual items options.onEvent({ type: "checklist", groupId: "grp_1", diff --git a/src/i18n/en.ts b/src/i18n/en.ts index 232f4d2a..3cd92c66 100644 --- a/src/i18n/en.ts +++ b/src/i18n/en.ts @@ -226,6 +226,7 @@ export const EN_MESSAGES = { "tool.label.scan": "Scan", "tool.label.run": "Run", "tool.label.search": "Search", + "tool.label.set_checklist": "Checklist", "tool.label.update_checklist": "Checklist", "tool.label.web_fetch": "Web Fetch", "tool.label.web_search": "Web Search", diff --git a/src/tool-registry.test.ts b/src/tool-registry.test.ts index f6e18a7e..f12fdae4 100644 --- a/src/tool-registry.test.ts +++ b/src/tool-registry.test.ts @@ -27,6 +27,7 @@ describe("toolsets", () => { "runCommand", "scanCode", "searchFiles", + "setChecklist", "updateChecklist", "webFetch", "webSearch", From 645cc7a8e37b3567a459e230e9dcc59e8f3d99ef Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 13:22:24 +0200 Subject: [PATCH 12/21] refactor(tools): add meta category, make labelKey optional --- src/checklist-toolkit.ts | 10 ++++------ src/i18n/en.ts | 2 -- src/tool-contract.ts | 4 ++-- src/tool-registry.ts | 7 ++++--- 4 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/checklist-toolkit.ts b/src/checklist-toolkit.ts index a3075996..bfe98e5c 100644 --- a/src/checklist-toolkit.ts +++ b/src/checklist-toolkit.ts @@ -44,9 +44,8 @@ function createSetChecklistTool( ) { return createTool({ id: "set-checklist", - labelKey: "tool.label.set_checklist", - category: "read", - permissions: ["read"], + category: "meta", + permissions: [], description: "Create an inline task checklist visible to the user. All items start as pending.", instruction: "Use `set-checklist` once at the start of multi-step tasks to show the user a progress checklist. Define all steps upfront. Use `update-checklist` to change item statuses as you work.", @@ -75,9 +74,8 @@ function createUpdateChecklistTool( ) { return createTool({ id: "update-checklist", - labelKey: "tool.label.update_checklist", - category: "read", - permissions: ["read"], + category: "meta", + permissions: [], description: "Update the status of a single checklist item.", instruction: "Use `update-checklist` to mark a checklist item as `in_progress`, `done`, or `failed`. Requires a prior `set-checklist` call for the same groupId.", diff --git a/src/i18n/en.ts b/src/i18n/en.ts index 3cd92c66..62795fa6 100644 --- a/src/i18n/en.ts +++ b/src/i18n/en.ts @@ -226,8 +226,6 @@ export const EN_MESSAGES = { "tool.label.scan": "Scan", "tool.label.run": "Run", "tool.label.search": "Search", - "tool.label.set_checklist": "Checklist", - "tool.label.update_checklist": "Checklist", "tool.label.web_fetch": "Web Fetch", "tool.label.web_search": "Web Search", "unit.call": "{count} calls", diff --git a/src/tool-contract.ts b/src/tool-contract.ts index 0bee1cb3..61aaf8c7 100644 --- a/src/tool-contract.ts +++ b/src/tool-contract.ts @@ -4,11 +4,10 @@ import type { SessionContext } from "./tool-guards"; import type { ToolOutputListener } from "./tool-output-format"; export type ToolPermission = "read" | "write" | "execute" | "network"; -export type ToolCategory = "read" | "search" | "write" | "execute" | "network"; +export type ToolCategory = "read" | "search" | "write" | "execute" | "network" | "meta"; export type ToolDefinition = { readonly id: string; - readonly labelKey: string; readonly category: ToolCategory; readonly permissions: readonly ToolPermission[]; readonly description: string; @@ -16,6 +15,7 @@ export type ToolDefinition = { readonly inputSchema: z.ZodType; readonly outputSchema: z.ZodType; readonly execute: (input: TInput, toolCallId: string) => Promise; + readonly labelKey?: string; }; export type ToolOutputBudgetEntry = { maxChars: number; maxLines: number }; diff --git a/src/tool-registry.ts b/src/tool-registry.ts index 4475a25d..7b50391e 100644 --- a/src/tool-registry.ts +++ b/src/tool-registry.ts @@ -92,14 +92,15 @@ function asToolDefinitionsById(entries: ToolMap): Record = {}; for (const tool of Object.values(entries)) { invariant(typeof tool.id === "string" && tool.id.trim().length > 0, "tool id is required"); - invariant(typeof tool.labelKey === "string" && tool.labelKey.trim().length > 0, `tool ${tool.id} missing labelKey`); - invariant(tool.labelKey in EN_MESSAGES, `tool ${tool.id} has unknown labelKey "${tool.labelKey}"`); + if (tool.labelKey) { + invariant(tool.labelKey in EN_MESSAGES, `tool ${tool.id} has unknown labelKey "${tool.labelKey}"`); + } invariant(typeof tool.category === "string" && tool.category.trim().length > 0, `tool ${tool.id} missing category`); invariant( typeof tool.instruction === "string" && tool.instruction.trim().length > 0, `tool ${tool.id} missing instruction`, ); - invariant(Array.isArray(tool.permissions) && tool.permissions.length > 0, `tool ${tool.id} missing permissions`); + invariant(Array.isArray(tool.permissions), `tool ${tool.id} missing permissions`); byId[tool.id] = tool as AnyToolDefinition; } return byId; From af9e3284cf6d8197ec6113b913b56e6549ebc8a2 Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 13:25:02 +0200 Subject: [PATCH 13/21] refactor(checklist): rename set-checklist to create-checklist --- src/agent-modes.ts | 2 +- src/checklist-toolkit.ts | 24 ++++++++++++------------ src/tool-registry.test.ts | 2 +- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/agent-modes.ts b/src/agent-modes.ts index d8bd2ce0..a73f3ea4 100644 --- a/src/agent-modes.ts +++ b/src/agent-modes.ts @@ -46,7 +46,7 @@ export const agentModes: Record = { "Do not run verify, test, or build commands — the lifecycle handles format, lint, and verify automatically after your edits.", "Do not signal done until the requested behavior is actually implemented. Updating help text, comments, or tests alone is not completing the task — the functional change must be in place.", "After the last tool call, use the lifecycle signal format from the base instructions and keep the user-facing outcome to one sentence.", - "For multi-step tasks (3+ distinct steps), use `set-checklist` at the start to define a progress checklist. Use `update-checklist` to mark items as you complete each step.", + "For multi-step tasks (3+ distinct steps), use `create-checklist` at the start to define a progress checklist. Use `update-checklist` to mark items as you complete each step.", ], }, verify: { diff --git a/src/checklist-toolkit.ts b/src/checklist-toolkit.ts index bfe98e5c..8d678586 100644 --- a/src/checklist-toolkit.ts +++ b/src/checklist-toolkit.ts @@ -4,7 +4,7 @@ import type { ToolkitDeps, ToolkitInput } from "./tool-contract"; import { createTool } from "./tool-contract"; import { runTool } from "./tool-execution"; -const setChecklistInputSchema = z.object({ +const createChecklistInputSchema = z.object({ groupId: z.string().min(1), groupTitle: z.string().min(1), items: z @@ -18,8 +18,8 @@ const setChecklistInputSchema = z.object({ .min(1), }); -const setChecklistOutputSchema = z.object({ - kind: z.literal("set-checklist"), +const createChecklistOutputSchema = z.object({ + kind: z.literal("create-checklist"), groupId: z.string(), itemCount: z.number(), }); @@ -37,22 +37,22 @@ const updateChecklistOutputSchema = z.object({ status: checklistItemStatusSchema, }); -function createSetChecklistTool( +function createCreateChecklistTool( _deps: ToolkitDeps, input: ToolkitInput, state: Map, ) { return createTool({ - id: "set-checklist", + id: "create-checklist", category: "meta", permissions: [], description: "Create an inline task checklist visible to the user. All items start as pending.", instruction: - "Use `set-checklist` once at the start of multi-step tasks to show the user a progress checklist. Define all steps upfront. Use `update-checklist` to change item statuses as you work.", - inputSchema: setChecklistInputSchema, - outputSchema: setChecklistOutputSchema, + "Use `create-checklist` once at the start of multi-step tasks to show the user a progress checklist. Define all steps upfront. Use `update-checklist` to change item statuses as you work.", + inputSchema: createChecklistInputSchema, + outputSchema: createChecklistOutputSchema, execute: async (toolInput, toolCallId) => { - return runTool(input.session, "set-checklist", toolCallId, toolInput, async () => { + return runTool(input.session, "create-checklist", toolCallId, toolInput, async () => { const items: ChecklistItem[] = toolInput.items.map((item) => ({ id: item.id, label: item.label, @@ -61,7 +61,7 @@ function createSetChecklistTool( })); state.set(toolInput.groupId, { title: toolInput.groupTitle, items }); input.onChecklist({ groupId: toolInput.groupId, groupTitle: toolInput.groupTitle, items }); - return { kind: "set-checklist" as const, groupId: toolInput.groupId, itemCount: items.length }; + return { kind: "create-checklist" as const, groupId: toolInput.groupId, itemCount: items.length }; }); }, }); @@ -78,7 +78,7 @@ function createUpdateChecklistTool( permissions: [], description: "Update the status of a single checklist item.", instruction: - "Use `update-checklist` to mark a checklist item as `in_progress`, `done`, or `failed`. Requires a prior `set-checklist` call for the same groupId.", + "Use `update-checklist` to mark a checklist item as `in_progress`, `done`, or `failed`. Requires a prior `create-checklist` call for the same groupId.", inputSchema: updateChecklistInputSchema, outputSchema: updateChecklistOutputSchema, execute: async (toolInput, toolCallId) => { @@ -103,7 +103,7 @@ function createUpdateChecklistTool( export function createChecklistToolkit(deps: ToolkitDeps, input: ToolkitInput) { const state = new Map(); return { - setChecklist: createSetChecklistTool(deps, input, state), + createChecklist: createCreateChecklistTool(deps, input, state), updateChecklist: createUpdateChecklistTool(deps, input, state), }; } diff --git a/src/tool-registry.test.ts b/src/tool-registry.test.ts index f12fdae4..2ff77de5 100644 --- a/src/tool-registry.test.ts +++ b/src/tool-registry.test.ts @@ -12,6 +12,7 @@ describe("toolsets", () => { test("returns all tools", () => { const { tools, session } = toolsForAgent(); expect(Object.keys(tools).sort()).toEqual([ + "createChecklist", "createFile", "deleteFile", "editCode", @@ -27,7 +28,6 @@ describe("toolsets", () => { "runCommand", "scanCode", "searchFiles", - "setChecklist", "updateChecklist", "webFetch", "webSearch", From ab4e133c69a9420185770e2adc282423749179cd Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 16:58:13 +0200 Subject: [PATCH 14/21] feat(checklist): handle checklist events in run mode --- src/cli-prompt.ts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/cli-prompt.ts b/src/cli-prompt.ts index 1750b2a4..54414395 100644 --- a/src/cli-prompt.ts +++ b/src/cli-prompt.ts @@ -1,6 +1,7 @@ import { stdout as output } from "node:process"; import { createWorkspaceSpecifier, type VerifyScope } from "./api"; import { createMessage } from "./chat-session"; +import { checklistMarker, checklistProgress } from "./checklist-contract"; import { formatAssistantReplyOutput, printIndentedDim } from "./cli-format"; import type { Client } from "./client-contract"; import { nowIso } from "./datetime"; @@ -152,6 +153,16 @@ export async function handlePrompt( hasPrintedToolProgress = true; break; } + case "checklist": { + const sorted = [...event.items].sort((a, b) => a.order - b.order); + const { done, total } = checklistProgress(sorted); + printDim(`• ${event.groupTitle} (${done}/${total})`); + for (const item of sorted) { + printIndentedDim(`${checklistMarker(item.status)} ${item.label}`); + } + hasPrintedToolProgress = true; + break; + } case "tool-result": { const guardBlocked = event.isError === true && From 5e0a814f2ac8bbda4b71314330bbbb9648c9abdd Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 17:00:00 +0200 Subject: [PATCH 15/21] refactor(checklist): share formatting between TUI and CLI --- src/chat-checklist.tsx | 13 ++++++------- src/checklist-contract.ts | 9 +++++++++ src/cli-prompt.ts | 11 ++++------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/chat-checklist.tsx b/src/chat-checklist.tsx index e1b9f699..fccea7e5 100644 --- a/src/chat-checklist.tsx +++ b/src/chat-checklist.tsx @@ -1,20 +1,19 @@ import React from "react"; import type { ChatRow } from "./chat-contract"; import { isChecklistOutput } from "./chat-contract"; -import { type ChecklistOutput, checklistMarker, checklistProgress } from "./checklist-contract"; +import { type ChecklistOutput, formatChecklist } from "./checklist-contract"; import { Box, Text } from "./tui"; import { DEFAULT_COLUMNS } from "./tui/styles"; function renderChecklist(output: ChecklistOutput): React.ReactNode { - const sorted = [...output.items].sort((a, b) => a.order - b.order); - const { done, total } = checklistProgress(sorted); + const { header, lines } = formatChecklist(output); return ( <> - {`${output.groupTitle} (${done}/${total})`} - {sorted.map((item) => ( - + {header} + {lines.map((line) => ( + {"\n"} - {` ${checklistMarker(item.status)} ${item.label}`} + {` ${line}`} ))} diff --git a/src/checklist-contract.ts b/src/checklist-contract.ts index bdd6a089..0bea5635 100644 --- a/src/checklist-contract.ts +++ b/src/checklist-contract.ts @@ -37,3 +37,12 @@ export function checklistProgress(items: ChecklistItem[]): { done: number; total total: items.length, }; } + +export function formatChecklist(output: ChecklistOutput): { header: string; lines: string[] } { + const sorted = [...output.items].sort((a, b) => a.order - b.order); + const { done, total } = checklistProgress(sorted); + return { + header: `${output.groupTitle} (${done}/${total})`, + lines: sorted.map((item) => `${checklistMarker(item.status)} ${item.label}`), + }; +} diff --git a/src/cli-prompt.ts b/src/cli-prompt.ts index 54414395..6c17e282 100644 --- a/src/cli-prompt.ts +++ b/src/cli-prompt.ts @@ -1,7 +1,7 @@ import { stdout as output } from "node:process"; import { createWorkspaceSpecifier, type VerifyScope } from "./api"; import { createMessage } from "./chat-session"; -import { checklistMarker, checklistProgress } from "./checklist-contract"; +import { formatChecklist } from "./checklist-contract"; import { formatAssistantReplyOutput, printIndentedDim } from "./cli-format"; import type { Client } from "./client-contract"; import { nowIso } from "./datetime"; @@ -154,12 +154,9 @@ export async function handlePrompt( break; } case "checklist": { - const sorted = [...event.items].sort((a, b) => a.order - b.order); - const { done, total } = checklistProgress(sorted); - printDim(`• ${event.groupTitle} (${done}/${total})`); - for (const item of sorted) { - printIndentedDim(`${checklistMarker(item.status)} ${item.label}`); - } + const { header, lines } = formatChecklist(event); + printDim(`• ${header}`); + for (const line of lines) printIndentedDim(line); hasPrintedToolProgress = true; break; } From f10fabd105d8160aa986ad18887f009dbbd24a29 Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 17:03:09 +0200 Subject: [PATCH 16/21] test(checklist): add CLI output test for checklist events --- src/cli-prompt.test.ts | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/cli-prompt.test.ts b/src/cli-prompt.test.ts index 1052a0bb..d83917a0 100644 --- a/src/cli-prompt.test.ts +++ b/src/cli-prompt.test.ts @@ -55,6 +55,42 @@ describe("cli-prompt", () => { expect(session.messages[session.messages.length - 1]?.kind).toBe("tool_payload"); }); + test("checklist events print header and items", async () => { + const printed: string[] = []; + const originalWrite = process.stdout.write; + process.stdout.write = ((chunk: string) => { + printed.push(chunk); + return true; + }) as typeof process.stdout.write; + + try { + const events: StreamEvent[] = [ + { + type: "checklist", + groupId: "grp_1", + groupTitle: "Build pipeline", + items: [ + { id: "s1", label: "lint", status: "done", order: 0 }, + { id: "s2", label: "test", status: "in_progress", order: 1 }, + { id: "s3", label: "deploy", status: "pending", order: 2 }, + ], + }, + ]; + + const session = createTestSession(); + const client = createStreamingClient(events); + await handlePrompt("run pipeline", session, client); + + const output = printed.join(""); + expect(output).toContain("Build pipeline (1/3)"); + expect(output).toContain("● lint"); + expect(output).toContain("◐ test"); + expect(output).toContain("○ deploy"); + } finally { + process.stdout.write = originalWrite; + } + }); + test("tool-output events with growing numWidth do not reprint earlier diffs", async () => { const printed: string[] = []; const originalWrite = process.stdout.write; From 66fd1e2d7edc7f22adb5c43198653a652546812b Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 17:04:44 +0200 Subject: [PATCH 17/21] test(checklist): add contract unit tests --- src/checklist-contract.test.ts | 71 ++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 src/checklist-contract.test.ts diff --git a/src/checklist-contract.test.ts b/src/checklist-contract.test.ts new file mode 100644 index 00000000..08e1259d --- /dev/null +++ b/src/checklist-contract.test.ts @@ -0,0 +1,71 @@ +import { describe, expect, test } from "bun:test"; +import { checklistMarker, checklistProgress, formatChecklist } from "./checklist-contract"; + +describe("checklistMarker", () => { + test("returns correct markers", () => { + expect(checklistMarker("pending")).toBe("○"); + expect(checklistMarker("in_progress")).toBe("◐"); + expect(checklistMarker("done")).toBe("●"); + expect(checklistMarker("failed")).toBe("◉"); + }); +}); + +describe("checklistProgress", () => { + test("counts done items", () => { + expect( + checklistProgress([ + { id: "1", label: "a", status: "done", order: 0 }, + { id: "2", label: "b", status: "in_progress", order: 1 }, + { id: "3", label: "c", status: "pending", order: 2 }, + ]), + ).toEqual({ done: 1, total: 3 }); + }); + + test("handles all done", () => { + expect( + checklistProgress([ + { id: "1", label: "a", status: "done", order: 0 }, + { id: "2", label: "b", status: "done", order: 1 }, + ]), + ).toEqual({ done: 2, total: 2 }); + }); + + test("failed items do not count as done", () => { + expect( + checklistProgress([ + { id: "1", label: "a", status: "done", order: 0 }, + { id: "2", label: "b", status: "failed", order: 1 }, + ]), + ).toEqual({ done: 1, total: 2 }); + }); + + test("handles empty list", () => { + expect(checklistProgress([])).toEqual({ done: 0, total: 0 }); + }); +}); + +describe("formatChecklist", () => { + test("returns header with progress and sorted lines", () => { + const result = formatChecklist({ + groupId: "g1", + groupTitle: "Build", + items: [ + { id: "s2", label: "test", status: "in_progress", order: 1 }, + { id: "s1", label: "lint", status: "done", order: 0 }, + { id: "s3", label: "deploy", status: "pending", order: 2 }, + ], + }); + expect(result.header).toBe("Build (1/3)"); + expect(result.lines).toEqual(["● lint", "◐ test", "○ deploy"]); + }); + + test("handles single item", () => { + const result = formatChecklist({ + groupId: "g1", + groupTitle: "Quick", + items: [{ id: "s1", label: "do it", status: "pending", order: 0 }], + }); + expect(result.header).toBe("Quick (0/1)"); + expect(result.lines).toEqual(["○ do it"]); + }); +}); From 337652ecda8b94e37674159b7fc5466fbd2aedf1 Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 17:06:49 +0200 Subject: [PATCH 18/21] refactor(checklist): move formatChecklist to checklist-format --- src/chat-checklist.tsx | 3 ++- src/checklist-contract.test.ts | 28 +--------------------------- src/checklist-contract.ts | 9 --------- src/checklist-format.test.ts | 28 ++++++++++++++++++++++++++++ src/checklist-format.ts | 10 ++++++++++ src/cli-prompt.ts | 2 +- 6 files changed, 42 insertions(+), 38 deletions(-) create mode 100644 src/checklist-format.test.ts create mode 100644 src/checklist-format.ts diff --git a/src/chat-checklist.tsx b/src/chat-checklist.tsx index fccea7e5..22eb1826 100644 --- a/src/chat-checklist.tsx +++ b/src/chat-checklist.tsx @@ -1,7 +1,8 @@ import React from "react"; import type { ChatRow } from "./chat-contract"; import { isChecklistOutput } from "./chat-contract"; -import { type ChecklistOutput, formatChecklist } from "./checklist-contract"; +import type { ChecklistOutput } from "./checklist-contract"; +import { formatChecklist } from "./checklist-format"; import { Box, Text } from "./tui"; import { DEFAULT_COLUMNS } from "./tui/styles"; diff --git a/src/checklist-contract.test.ts b/src/checklist-contract.test.ts index 08e1259d..95aab3fb 100644 --- a/src/checklist-contract.test.ts +++ b/src/checklist-contract.test.ts @@ -1,5 +1,5 @@ import { describe, expect, test } from "bun:test"; -import { checklistMarker, checklistProgress, formatChecklist } from "./checklist-contract"; +import { checklistMarker, checklistProgress } from "./checklist-contract"; describe("checklistMarker", () => { test("returns correct markers", () => { @@ -43,29 +43,3 @@ describe("checklistProgress", () => { expect(checklistProgress([])).toEqual({ done: 0, total: 0 }); }); }); - -describe("formatChecklist", () => { - test("returns header with progress and sorted lines", () => { - const result = formatChecklist({ - groupId: "g1", - groupTitle: "Build", - items: [ - { id: "s2", label: "test", status: "in_progress", order: 1 }, - { id: "s1", label: "lint", status: "done", order: 0 }, - { id: "s3", label: "deploy", status: "pending", order: 2 }, - ], - }); - expect(result.header).toBe("Build (1/3)"); - expect(result.lines).toEqual(["● lint", "◐ test", "○ deploy"]); - }); - - test("handles single item", () => { - const result = formatChecklist({ - groupId: "g1", - groupTitle: "Quick", - items: [{ id: "s1", label: "do it", status: "pending", order: 0 }], - }); - expect(result.header).toBe("Quick (0/1)"); - expect(result.lines).toEqual(["○ do it"]); - }); -}); diff --git a/src/checklist-contract.ts b/src/checklist-contract.ts index 0bea5635..bdd6a089 100644 --- a/src/checklist-contract.ts +++ b/src/checklist-contract.ts @@ -37,12 +37,3 @@ export function checklistProgress(items: ChecklistItem[]): { done: number; total total: items.length, }; } - -export function formatChecklist(output: ChecklistOutput): { header: string; lines: string[] } { - const sorted = [...output.items].sort((a, b) => a.order - b.order); - const { done, total } = checklistProgress(sorted); - return { - header: `${output.groupTitle} (${done}/${total})`, - lines: sorted.map((item) => `${checklistMarker(item.status)} ${item.label}`), - }; -} diff --git a/src/checklist-format.test.ts b/src/checklist-format.test.ts new file mode 100644 index 00000000..33c566c2 --- /dev/null +++ b/src/checklist-format.test.ts @@ -0,0 +1,28 @@ +import { describe, expect, test } from "bun:test"; +import { formatChecklist } from "./checklist-format"; + +describe("formatChecklist", () => { + test("returns header with progress and sorted lines", () => { + const result = formatChecklist({ + groupId: "g1", + groupTitle: "Build", + items: [ + { id: "s2", label: "test", status: "in_progress", order: 1 }, + { id: "s1", label: "lint", status: "done", order: 0 }, + { id: "s3", label: "deploy", status: "pending", order: 2 }, + ], + }); + expect(result.header).toBe("Build (1/3)"); + expect(result.lines).toEqual(["● lint", "◐ test", "○ deploy"]); + }); + + test("handles single item", () => { + const result = formatChecklist({ + groupId: "g1", + groupTitle: "Quick", + items: [{ id: "s1", label: "do it", status: "pending", order: 0 }], + }); + expect(result.header).toBe("Quick (0/1)"); + expect(result.lines).toEqual(["○ do it"]); + }); +}); diff --git a/src/checklist-format.ts b/src/checklist-format.ts new file mode 100644 index 00000000..859d5669 --- /dev/null +++ b/src/checklist-format.ts @@ -0,0 +1,10 @@ +import { type ChecklistOutput, checklistMarker, checklistProgress } from "./checklist-contract"; + +export function formatChecklist(output: ChecklistOutput): { header: string; lines: string[] } { + const sorted = [...output.items].sort((a, b) => a.order - b.order); + const { done, total } = checklistProgress(sorted); + return { + header: `${output.groupTitle} (${done}/${total})`, + lines: sorted.map((item) => `${checklistMarker(item.status)} ${item.label}`), + }; +} diff --git a/src/cli-prompt.ts b/src/cli-prompt.ts index 6c17e282..5bcc257b 100644 --- a/src/cli-prompt.ts +++ b/src/cli-prompt.ts @@ -1,7 +1,7 @@ import { stdout as output } from "node:process"; import { createWorkspaceSpecifier, type VerifyScope } from "./api"; import { createMessage } from "./chat-session"; -import { formatChecklist } from "./checklist-contract"; +import { formatChecklist } from "./checklist-format"; import { formatAssistantReplyOutput, printIndentedDim } from "./cli-format"; import type { Client } from "./client-contract"; import { nowIso } from "./datetime"; From 4620baa8620470b75f61e231d0847dc354104b0c Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 17:12:59 +0200 Subject: [PATCH 19/21] fix(checklist): address final review findings --- docs/glossary.md | 2 +- src/chat-checklist.tsx | 8 ++++---- src/checklist-format.test.ts | 10 +++++++--- src/checklist-format.ts | 6 ++++-- src/checklist-toolkit.ts | 10 ++++++---- src/cli-prompt.ts | 4 ++-- 6 files changed, 24 insertions(+), 16 deletions(-) diff --git a/docs/glossary.md b/docs/glossary.md index cbbad987..6fb7dfd7 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -21,7 +21,7 @@ Naming conventions and core terms used across Acolyte code and docs. | Term | Definition | |---|---| | Base Agent Input | Immutable prompt input created during `prepare` and used as the base for each generation attempt | -| Checklist | Inline progress display for multi-step tasks, rendered between transcript and input. The agent creates and updates items via the `update-checklist` tool | +| Checklist | Inline progress display for multi-step tasks, rendered between transcript and input. The agent defines steps via `create-checklist` and marks progress via `update-checklist` | | Context Budgeting | Proactive token allocation via tiktoken — system prompt reserved first, remaining space filled by priority (memory → attachments → history → tool payloads) | | Continuation State | Persisted "Current task" and "Next step" cues carried into later turns | | Distill | Automatic memory source family that extracts and consolidates knowledge into records (project/user/session scope variants) | diff --git a/src/chat-checklist.tsx b/src/chat-checklist.tsx index 22eb1826..fe655b0f 100644 --- a/src/chat-checklist.tsx +++ b/src/chat-checklist.tsx @@ -7,14 +7,14 @@ import { Box, Text } from "./tui"; import { DEFAULT_COLUMNS } from "./tui/styles"; function renderChecklist(output: ChecklistOutput): React.ReactNode { - const { header, lines } = formatChecklist(output); + const { header, items } = formatChecklist(output); return ( <> {header} - {lines.map((line) => ( - + {items.map((item) => ( + {"\n"} - {` ${line}`} + {` ${item.text}`} ))} diff --git a/src/checklist-format.test.ts b/src/checklist-format.test.ts index 33c566c2..5f4d501a 100644 --- a/src/checklist-format.test.ts +++ b/src/checklist-format.test.ts @@ -2,7 +2,7 @@ import { describe, expect, test } from "bun:test"; import { formatChecklist } from "./checklist-format"; describe("formatChecklist", () => { - test("returns header with progress and sorted lines", () => { + test("returns header with progress and sorted items", () => { const result = formatChecklist({ groupId: "g1", groupTitle: "Build", @@ -13,7 +13,11 @@ describe("formatChecklist", () => { ], }); expect(result.header).toBe("Build (1/3)"); - expect(result.lines).toEqual(["● lint", "◐ test", "○ deploy"]); + expect(result.items).toEqual([ + { id: "s1", text: "● lint" }, + { id: "s2", text: "◐ test" }, + { id: "s3", text: "○ deploy" }, + ]); }); test("handles single item", () => { @@ -23,6 +27,6 @@ describe("formatChecklist", () => { items: [{ id: "s1", label: "do it", status: "pending", order: 0 }], }); expect(result.header).toBe("Quick (0/1)"); - expect(result.lines).toEqual(["○ do it"]); + expect(result.items).toEqual([{ id: "s1", text: "○ do it" }]); }); }); diff --git a/src/checklist-format.ts b/src/checklist-format.ts index 859d5669..21dc058d 100644 --- a/src/checklist-format.ts +++ b/src/checklist-format.ts @@ -1,10 +1,12 @@ import { type ChecklistOutput, checklistMarker, checklistProgress } from "./checklist-contract"; -export function formatChecklist(output: ChecklistOutput): { header: string; lines: string[] } { +export type FormattedChecklistItem = { id: string; text: string }; + +export function formatChecklist(output: ChecklistOutput): { header: string; items: FormattedChecklistItem[] } { const sorted = [...output.items].sort((a, b) => a.order - b.order); const { done, total } = checklistProgress(sorted); return { header: `${output.groupTitle} (${done}/${total})`, - lines: sorted.map((item) => `${checklistMarker(item.status)} ${item.label}`), + items: sorted.map((item) => ({ id: item.id, text: `${checklistMarker(item.status)} ${item.label}` })), }; } diff --git a/src/checklist-toolkit.ts b/src/checklist-toolkit.ts index 8d678586..ee4546d6 100644 --- a/src/checklist-toolkit.ts +++ b/src/checklist-toolkit.ts @@ -85,10 +85,12 @@ function createUpdateChecklistTool( return runTool(input.session, "update-checklist", toolCallId, toolInput, async () => { const group = state.get(toolInput.groupId); if (!group) throw new Error(`No checklist found for groupId "${toolInput.groupId}"`); - const item = group.items.find((i) => i.id === toolInput.itemId); - if (!item) throw new Error(`No item "${toolInput.itemId}" in checklist "${toolInput.groupId}"`); - item.status = toolInput.status; - input.onChecklist({ groupId: toolInput.groupId, groupTitle: group.title, items: group.items }); + if (!group.items.some((i) => i.id === toolInput.itemId)) { + throw new Error(`No item "${toolInput.itemId}" in checklist "${toolInput.groupId}"`); + } + const items = group.items.map((i) => (i.id === toolInput.itemId ? { ...i, status: toolInput.status } : i)); + state.set(toolInput.groupId, { ...group, items }); + input.onChecklist({ groupId: toolInput.groupId, groupTitle: group.title, items }); return { kind: "update-checklist" as const, groupId: toolInput.groupId, diff --git a/src/cli-prompt.ts b/src/cli-prompt.ts index 5bcc257b..21b72adb 100644 --- a/src/cli-prompt.ts +++ b/src/cli-prompt.ts @@ -154,9 +154,9 @@ export async function handlePrompt( break; } case "checklist": { - const { header, lines } = formatChecklist(event); + const { header, items } = formatChecklist(event); printDim(`• ${header}`); - for (const line of lines) printIndentedDim(line); + for (const item of items) printIndentedDim(item.text); hasPrintedToolProgress = true; break; } From 3ba6f8d0c35a797e3b92f57bfe69ab497dae1ad9 Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 17:15:29 +0200 Subject: [PATCH 20/21] refactor(checklist): split marker and label in formatted items --- src/chat-checklist.tsx | 2 +- src/checklist-format.test.ts | 8 ++++---- src/checklist-format.ts | 4 ++-- src/cli-prompt.ts | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/chat-checklist.tsx b/src/chat-checklist.tsx index fe655b0f..248d4e4f 100644 --- a/src/chat-checklist.tsx +++ b/src/chat-checklist.tsx @@ -14,7 +14,7 @@ function renderChecklist(output: ChecklistOutput): React.ReactNode { {items.map((item) => ( {"\n"} - {` ${item.text}`} + {` ${item.marker} ${item.label}`} ))} diff --git a/src/checklist-format.test.ts b/src/checklist-format.test.ts index 5f4d501a..e8d972d4 100644 --- a/src/checklist-format.test.ts +++ b/src/checklist-format.test.ts @@ -14,9 +14,9 @@ describe("formatChecklist", () => { }); expect(result.header).toBe("Build (1/3)"); expect(result.items).toEqual([ - { id: "s1", text: "● lint" }, - { id: "s2", text: "◐ test" }, - { id: "s3", text: "○ deploy" }, + { id: "s1", marker: "●", label: "lint" }, + { id: "s2", marker: "◐", label: "test" }, + { id: "s3", marker: "○", label: "deploy" }, ]); }); @@ -27,6 +27,6 @@ describe("formatChecklist", () => { items: [{ id: "s1", label: "do it", status: "pending", order: 0 }], }); expect(result.header).toBe("Quick (0/1)"); - expect(result.items).toEqual([{ id: "s1", text: "○ do it" }]); + expect(result.items).toEqual([{ id: "s1", marker: "○", label: "do it" }]); }); }); diff --git a/src/checklist-format.ts b/src/checklist-format.ts index 21dc058d..946094f9 100644 --- a/src/checklist-format.ts +++ b/src/checklist-format.ts @@ -1,12 +1,12 @@ import { type ChecklistOutput, checklistMarker, checklistProgress } from "./checklist-contract"; -export type FormattedChecklistItem = { id: string; text: string }; +export type FormattedChecklistItem = { id: string; marker: string; label: string }; export function formatChecklist(output: ChecklistOutput): { header: string; items: FormattedChecklistItem[] } { const sorted = [...output.items].sort((a, b) => a.order - b.order); const { done, total } = checklistProgress(sorted); return { header: `${output.groupTitle} (${done}/${total})`, - items: sorted.map((item) => ({ id: item.id, text: `${checklistMarker(item.status)} ${item.label}` })), + items: sorted.map((item) => ({ id: item.id, marker: checklistMarker(item.status), label: item.label })), }; } diff --git a/src/cli-prompt.ts b/src/cli-prompt.ts index 21b72adb..7744f896 100644 --- a/src/cli-prompt.ts +++ b/src/cli-prompt.ts @@ -156,7 +156,7 @@ export async function handlePrompt( case "checklist": { const { header, items } = formatChecklist(event); printDim(`• ${header}`); - for (const item of items) printIndentedDim(item.text); + for (const item of items) printIndentedDim(`${item.marker} ${item.label}`); hasPrintedToolProgress = true; break; } From 6ef9f6ef34b287471220ebe789fb8a649d1cf1b7 Mon Sep 17 00:00:00 2001 From: Christoffer Niska Date: Tue, 24 Mar 2026 18:13:40 +0200 Subject: [PATCH 21/21] chore(checklist): use literal UTF-8 markers --- src/checklist-contract.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/checklist-contract.ts b/src/checklist-contract.ts index bdd6a089..52c3ae95 100644 --- a/src/checklist-contract.ts +++ b/src/checklist-contract.ts @@ -21,10 +21,10 @@ export const checklistOutputSchema = z.object({ export type ChecklistOutput = z.infer; const STATUS_MARKERS: Record = { - pending: "\u25CB", - in_progress: "\u25D0", - done: "\u25CF", - failed: "\u25C9", + pending: "○", + in_progress: "◐", + done: "●", + failed: "◉", }; export function checklistMarker(status: ChecklistItemStatus): string {