diff --git a/.ralph-wiggum/PROMPT_plan.md b/.ralph-wiggum/PROMPT_plan.md index 2a96812..83fa139 100644 --- a/.ralph-wiggum/PROMPT_plan.md +++ b/.ralph-wiggum/PROMPT_plan.md @@ -13,17 +13,37 @@ You are an autonomous planning agent. Analyze specs and create a structured impl - Plan only — do NOT implement anything - Do NOT assume functionality is missing — confirm with code search first - Each spec should have clear tasks and acceptance criteria -- Treat `src/lib` as shared utilities — prefer consolidation over duplication +- Prefer consolidation over duplication (shared code usually lives in `src/utils`, `src/services`, `src/domain`) + +## Task Granularity (Critical) +A "task" is the unit of work for one build-loop iteration (read context → implement → verify). Optimize for fewer, larger tasks. +- Target 3–10 tasks per spec (rarely > 12) +- Each task should represent a meaningful deliverable slice and be finishable in one iteration +- Do NOT split into separate tasks for tiny edits (e.g., adding a few constants, renaming exports, wiring imports, updating a couple call sites) +- Split tasks only at real dependency/risk boundaries (e.g., new public API, schema change, multi-step rollout, complex UI flow) +- Do NOT create tasks that are only "run tests/typecheck/lint" — baseline verification already happens every loop; only add testing tasks when new/changed tests need to be written + +## Estimation (Story Points) +Estimate each task using story points: 1, 2, 3, 5, 8. +- Target task size: 2–5 points +- 1-point tasks should be merged into a nearby task unless there is a hard dependency boundary +- 8-point tasks must be split (too much context/verification for one loop) +- Specs can be large; the key is that each task stays loop-sized. Use `pointsBudget` as an optional milestone target (e.g., 20 points per phase), not a hard cap. ## Workflow ### 1. Audit Specs - Read all specs in `.ralph-wiggum/specs/*` - For each spec, verify tasks and acceptance criteria are clear and complete -- If a spec is missing details, update it with specific tasks and acceptance criteria +- If a spec is missing details, update it with clear milestone-level tasks and acceptance criteria (avoid micro-tasks) ### 2. Audit Codebase -- Use up to 500 parallel subagents to search the source code +- Subagents are for code exploration and context gathering only (facts, file locations, existing patterns). +- Subagent budget: + - 1 “map” pass: identify likely entrypoints + relevant files + - Up to 8 “area” passes: one subagent per code area (CLI, commands, services, domain, agents, utils) + - Up to 4 “verification” passes: locate relevant scripts, test locations, linters, and guardrails + - Hard cap: 13 subagents total - Compare implementation against specs - Look for: TODOs, placeholders, skipped tests, incomplete features, inconsistent patterns @@ -39,22 +59,28 @@ Create or update @.ralph-wiggum/implementation.json with this structure: "specs": [ { "id": "spec-id-kebab-case", - "file": "specs/spec-file.md", + "file": ".ralph-wiggum/specs/spec-file.md", "name": "Human Readable Name", "priority": 1, "status": "pending", "context": "Brief context for this spec. Reference existing code locations.", + "dependsOn": ["other-spec-id"], + "pointsBudget": 20, "tasks": [ { "id": "spec-id-1", "description": "First task description", "status": "pending", + "dependsOn": ["other-spec-id-1"], + "points": 3, "acceptanceCriteria": ["Criteria 1", "Criteria 2"] }, { "id": "spec-id-2", "description": "Second task description", - "status": "pending" + "status": "pending", + "dependsOn": ["spec-id-1"], + "points": 2 } ], "acceptanceCriteria": ["Spec-level AC 1", "Spec-level AC 2"] @@ -69,6 +95,11 @@ Create or update @.ralph-wiggum/implementation.json with this structure: - `priority`: Lower number = higher priority (1 = first to implement) - `status`: "pending" for unstarted, "in_progress" for active, "completed" for done - `context`: Include relevant code paths, dependencies, or notes for the build agent +- `dependsOn`: Optional list of spec IDs that must be completed before this spec is runnable +- `dependsOn` (tasks): Optional list of task IDs that must be completed before this task is runnable +- `points`: 1, 2, 3, 5, 8 story points for task sizing +- `pointsBudget`: Optional milestone target (not a cap) +- Make tasks coarse enough to justify a full build-loop iteration; merge trivial steps into their parent task rather than creating a new task ### 4. Create Missing Specs If functionality is needed but no spec exists: @@ -82,4 +113,4 @@ If functionality is needed but no spec exists: ### 5. Update Guardrails (if needed) If you discover project-specific rules that should be enforced, add them to the "Project-Specific Rules" section of @.ralph-wiggum/GUARDRAILS.md. -COMPLETION: When all specs are audited, have clear tasks/acceptance criteria, and implementation.json is created/updated, output exactly: DONE \ No newline at end of file +COMPLETION: When all specs are audited, have clear tasks/acceptance criteria, and implementation.json is created/updated, output exactly: DONE diff --git a/.ralph-wiggum/specs/example.md b/.ralph-wiggum/specs/example.md index 81b61e9..39890d1 100644 --- a/.ralph-wiggum/specs/example.md +++ b/.ralph-wiggum/specs/example.md @@ -15,6 +15,9 @@ Example: "The user should be able to use the UI/API to ## Tasks + - [ ] Task 1 - [ ] Task 2 - [ ] Task 3 @@ -28,10 +31,12 @@ Example: "The user should be able to use the UI/API to ## Success Metrics -## Testing Requirements -- [ ] Unit test changes -- [ ] Integration test changes -- [ ] End-to-end test changes +## Verification + +- [ ] Baseline: `bun run typecheck`, `bun run test`, `bun run build` +- [ ] Frontend (UI changes): Use `agent-browser` to navigate to the relevant screen, exercise the full user flow, and capture a screenshot for `.ralph-wiggum/PROGRESS.md` (example: "Open Settings → toggle X → verify Y updates") +- [ ] Backend (service/API changes): Add/update unit tests in `src/__tests__/` (example: "Task parsing returns expected JSON"); add integration tests when cross-module behavior changes (example: "CLI command runs end-to-end against a temp workspace"); run `bun run test` ## Notes diff --git a/src/domain/implementation.ts b/src/domain/implementation.ts index 0187971..f74d87f 100644 --- a/src/domain/implementation.ts +++ b/src/domain/implementation.ts @@ -1,9 +1,180 @@ import fse from "fs-extra"; -import type { Implementation as ImplementationData } from "../types"; +import type { + Implementation as ImplementationData, + SpecEntry, + StoryPoints, + TaskEntry, + TaskStatusType, +} from "../types"; import { getImplementationFile } from "../utils/paths"; import { Spec } from "./spec"; import type { Task } from "./task"; +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null; +} + +function asString(value: unknown): string | undefined { + return typeof value === "string" ? value : undefined; +} + +function asNumber(value: unknown): number | undefined { + return typeof value === "number" && Number.isFinite(value) + ? value + : undefined; +} + +function asStringArray(value: unknown): string[] | undefined { + if (!Array.isArray(value)) { + return undefined; + } + const strings = value.filter((v) => typeof v === "string"); + return strings.length === value.length ? strings : undefined; +} + +function asStoryPoints(value: unknown): StoryPoints | undefined { + return value === 1 || value === 2 || value === 3 || value === 5 || value === 8 + ? value + : undefined; +} + +function asTaskStatusType(value: unknown): TaskStatusType | undefined { + return value === "pending" || + value === "in_progress" || + value === "completed" || + value === "blocked" || + value === "failed" + ? value + : undefined; +} + +function normalizeTaskEntry( + raw: unknown, + specId: string, + taskIndex: number +): TaskEntry | null { + if (!isRecord(raw)) { + return null; + } + + const taskId = asString(raw.id) ?? `${specId}-${taskIndex + 1}`; + const description = asString(raw.description) ?? `Task ${taskIndex + 1}`; + const status = asTaskStatusType(raw.status) ?? "pending"; + + const task: TaskEntry = { id: taskId, description, status }; + + const acceptanceCriteria = asStringArray(raw.acceptanceCriteria); + if (acceptanceCriteria !== undefined) { + task.acceptanceCriteria = acceptanceCriteria; + } + + const dependsOn = asStringArray(raw.dependsOn); + if (dependsOn !== undefined) { + task.dependsOn = dependsOn; + } + + const points = asStoryPoints(raw.points); + if (points !== undefined) { + task.points = points; + } + + const blockedReason = asString(raw.blockedReason); + if (blockedReason !== undefined) { + task.blockedReason = blockedReason; + } + + const retryCount = asNumber(raw.retryCount); + if (retryCount !== undefined) { + task.retryCount = retryCount; + } + + const completedAt = asString(raw.completedAt); + if (completedAt !== undefined) { + task.completedAt = completedAt; + } + + return task; +} + +function normalizeSpecEntry(raw: unknown, index: number): SpecEntry | null { + if (!isRecord(raw)) { + return null; + } + + const id = asString(raw.id); + if (!id) { + return null; + } + + const tasksRaw = Array.isArray(raw.tasks) ? raw.tasks : []; + const tasks: TaskEntry[] = tasksRaw + .map((t, taskIndex) => normalizeTaskEntry(t, id, taskIndex)) + .filter((t): t is TaskEntry => t !== null); + + const spec: SpecEntry = { + id, + file: asString(raw.file) ?? "", + name: asString(raw.name) ?? id, + priority: asNumber(raw.priority) ?? index + 1, + status: asTaskStatusType(raw.status) ?? "pending", + tasks, + }; + + const context = asString(raw.context); + if (context !== undefined) { + spec.context = context; + } + + const dependsOn = asStringArray(raw.dependsOn); + if (dependsOn !== undefined) { + spec.dependsOn = dependsOn; + } + + const pointsBudget = asNumber(raw.pointsBudget); + if (pointsBudget !== undefined) { + spec.pointsBudget = pointsBudget; + } + + const pointsTotal = asNumber(raw.pointsTotal); + if (pointsTotal !== undefined) { + spec.pointsTotal = pointsTotal; + } + + const acceptanceCriteria = asStringArray(raw.acceptanceCriteria); + if (acceptanceCriteria !== undefined) { + spec.acceptanceCriteria = acceptanceCriteria; + } + + return spec; +} + +function normalizeImplementationData(raw: unknown): ImplementationData { + const now = new Date().toISOString(); + if (!isRecord(raw)) { + return { version: 1, updatedAt: now, updatedBy: "user", specs: [] }; + } + + const updatedByRaw = asString(raw.updatedBy); + const updatedBy = + updatedByRaw === "plan-mode" || + updatedByRaw === "build-mode" || + updatedByRaw === "user" + ? updatedByRaw + : "user"; + + const specsRaw = Array.isArray(raw.specs) ? raw.specs : []; + const specs = specsRaw + .map((entry, index) => normalizeSpecEntry(entry, index)) + .filter((spec): spec is SpecEntry => spec !== null); + + return { + version: asNumber(raw.version) ?? 1, + updatedAt: asString(raw.updatedAt) ?? now, + updatedBy, + specs, + }; +} + export class Implementation { private readonly _specs: Spec[]; private readonly _version: number; @@ -38,12 +209,37 @@ export class Implementation { * Finds the first pending task in the first non-completed spec. */ get nextPendingTask(): { spec: Spec; task: Task } | undefined { + const completedSpecIds = new Set( + this._specs.filter((s) => s.isCompleted).map((s) => s.id) + ); + + const completedTaskIds = new Set(); for (const spec of this._specs) { + for (const task of spec.tasks) { + if (task.status === "completed") { + completedTaskIds.add(task.id); + } + } + } + + const specsByPriority = this._specs + .map((spec, index) => ({ spec, index })) + .sort((a, b) => { + const priorityDelta = a.spec.priority - b.spec.priority; + return priorityDelta !== 0 ? priorityDelta : a.index - b.index; + }) + .map(({ spec }) => spec); + + for (const spec of specsByPriority) { if (spec.isCompleted) { continue; } - const task = spec.nextPendingTask; + if (!spec.dependenciesSatisfied(completedSpecIds)) { + continue; + } + + const task = spec.nextRunnablePendingTask(completedTaskIds); if (task) { return { spec, task }; } @@ -51,6 +247,10 @@ export class Implementation { return undefined; } + get hasPendingTasks(): boolean { + return this._specs.some((s) => s.tasks.some((t) => t.status === "pending")); + } + /** * Get all completed specs. */ @@ -136,7 +336,7 @@ export class Implementation { try { const content = await fse.readJson(implPath); - return new Implementation(content as ImplementationData); + return new Implementation(normalizeImplementationData(content)); } catch { return null; } diff --git a/src/domain/spec.ts b/src/domain/spec.ts index 9b15be9..8dd4443 100644 --- a/src/domain/spec.ts +++ b/src/domain/spec.ts @@ -1,4 +1,4 @@ -import type { SpecEntry } from "../types"; +import type { SpecEntry, TaskStatusType } from "../types"; import { Task } from "./task"; export class Spec { @@ -6,7 +6,10 @@ export class Spec { private readonly _file: string; private readonly _name: string; private readonly _priority: number; + private readonly _status: TaskStatusType; private readonly _context?: string; + private readonly _dependsOn: string[]; + private readonly _pointsBudget?: number; private readonly _tasks: Task[]; private readonly _acceptanceCriteria: string[]; @@ -15,7 +18,10 @@ export class Spec { this._file = entry.file; this._name = entry.name; this._priority = entry.priority; + this._status = entry.status; this._context = entry.context; + this._dependsOn = entry.dependsOn ?? []; + this._pointsBudget = entry.pointsBudget; this._tasks = entry.tasks.map((t) => Task.fromEntry(t)); this._acceptanceCriteria = entry.acceptanceCriteria ?? []; } @@ -36,10 +42,34 @@ export class Spec { return this._priority; } + get status(): TaskStatusType { + return this._status; + } + get context(): string | undefined { return this._context; } + get dependsOn(): string[] { + return this._dependsOn; + } + + get pointsBudget(): number | undefined { + return this._pointsBudget; + } + + get pointsTotal(): number | undefined { + let total = 0; + let sawAny = false; + for (const task of this._tasks) { + if (task.points !== undefined) { + total += task.points; + sawAny = true; + } + } + return sawAny ? total : undefined; + } + get tasks(): Task[] { return this._tasks; } @@ -49,16 +79,25 @@ export class Spec { } get isCompleted(): boolean { - return ( - this._tasks.length > 0 && - this._tasks.every((t) => t.status === "completed") - ); + if (this._tasks.length === 0) { + return this._status === "completed"; + } + + return this._tasks.every((t) => t.status === "completed"); } get nextPendingTask(): Task | undefined { return this._tasks.find((t) => t.status === "pending"); } + nextRunnablePendingTask( + completedTaskIds: ReadonlySet + ): Task | undefined { + return this._tasks.find( + (t) => t.status === "pending" && t.dependenciesSatisfied(completedTaskIds) + ); + } + get completedTasks(): Task[] { return this._tasks.filter((t) => t.status === "completed"); } @@ -70,6 +109,13 @@ export class Spec { return { completed, total, percentage }; } + dependenciesSatisfied(completedSpecIds: ReadonlySet): boolean { + if (this._dependsOn.length === 0) { + return true; + } + return this._dependsOn.every((id) => completedSpecIds.has(id)); + } + /** * Checks if all tasks are completed and returns the completion status. * Does not mutate state - just reports current completion status. @@ -106,6 +152,19 @@ export class Spec { entry.context = this._context; } + if (this._dependsOn.length > 0) { + entry.dependsOn = this._dependsOn; + } + + if (this._pointsBudget !== undefined) { + entry.pointsBudget = this._pointsBudget; + } + + const pointsTotal = this.pointsTotal; + if (pointsTotal !== undefined) { + entry.pointsTotal = pointsTotal; + } + if (this._acceptanceCriteria.length > 0) { entry.acceptanceCriteria = this._acceptanceCriteria; } diff --git a/src/domain/task.ts b/src/domain/task.ts index 773f7b2..05cf0ca 100644 --- a/src/domain/task.ts +++ b/src/domain/task.ts @@ -1,10 +1,12 @@ -import type { TaskEntry, TaskStatusType } from "../types"; +import type { StoryPoints, TaskEntry, TaskStatusType } from "../types"; export class Task { private readonly _id: string; private readonly _description: string; private _status: TaskStatusType; private readonly _acceptanceCriteria: string[]; + private readonly _dependsOn: string[]; + private readonly _points?: StoryPoints; private _blockedReason?: string; private _retryCount: number; private _completedAt?: string; @@ -14,6 +16,8 @@ export class Task { this._description = entry.description; this._status = entry.status; this._acceptanceCriteria = entry.acceptanceCriteria ?? []; + this._dependsOn = entry.dependsOn ?? []; + this._points = entry.points; this._blockedReason = entry.blockedReason; this._retryCount = entry.retryCount ?? 0; this._completedAt = entry.completedAt; @@ -35,6 +39,14 @@ export class Task { return this._acceptanceCriteria; } + get dependsOn(): string[] { + return this._dependsOn; + } + + get points(): StoryPoints | undefined { + return this._points; + } + get blockedReason(): string | undefined { return this._blockedReason; } @@ -73,6 +85,13 @@ export class Task { this._blockedReason = undefined; } + dependenciesSatisfied(completedTaskIds: ReadonlySet): boolean { + if (this._dependsOn.length === 0) { + return true; + } + return this._dependsOn.every((id) => completedTaskIds.has(id)); + } + toJSON(): TaskEntry { const entry: TaskEntry = { id: this._id, @@ -84,6 +103,14 @@ export class Task { entry.acceptanceCriteria = this._acceptanceCriteria; } + if (this._dependsOn.length > 0) { + entry.dependsOn = this._dependsOn; + } + + if (this._points !== undefined) { + entry.points = this._points; + } + if (this._blockedReason !== undefined) { entry.blockedReason = this._blockedReason; } diff --git a/src/services/builder.ts b/src/services/builder.ts index 4237798..6b1fc19 100644 --- a/src/services/builder.ts +++ b/src/services/builder.ts @@ -185,97 +185,206 @@ export class Builder { } } - private async runLoop(): Promise { - if (!this.state) { - return; + private async loadImplementation(): Promise { + const impl = await Implementation.load(); + if (!impl) { + console.log(pc.red("No implementation.json found.")); + return null; } - while (true) { - const impl = await Implementation.load(); - if (!impl) { - console.log(pc.red("No implementation.json found.")); - break; - } + return impl; + } - const next = impl.nextPendingTask; - if (!next) { - await this.hooks?.emit("onLoopCompleted", this.buildPayload()); - break; + private getCompletedTaskIds(impl: Implementation): Set { + const completedTaskIds = new Set(); + for (const spec of impl.specs) { + for (const task of spec.tasks) { + if (task.status === "completed") { + completedTaskIds.add(task.id); + } } + } + return completedTaskIds; + } - const { spec, task } = next; - this.state.session.incrementIteration(); - await this.updateSessionState(); + private reportBlockedSpecs( + impl: Implementation, + completedSpecIds: ReadonlySet + ): void { + const blockedSpecs = impl.specs + .filter((s) => !s.isCompleted) + .filter((s) => s.nextPendingTask) + .filter((s) => !s.dependenciesSatisfied(completedSpecIds)) + .map( + (s) => + `${s.id} (missing: ${s.dependsOn.filter((d) => !completedSpecIds.has(d)).join(", ")})` + ); - this.logger?.startTaskLog(task.id); + if (blockedSpecs.length === 0) { + return; + } - await this.hooks?.emit( - "onIterationStarted", - this.buildPayload({ - taskDescription: task.description, - specName: spec.name, - logFile: this.logger?.logFile ?? undefined, - }) + console.log(pc.yellow("Pending specs blocked by spec dependencies:")); + for (const line of blockedSpecs) { + console.log(pc.gray(`- ${line}`)); + } + } + + private reportBlockedTasks( + impl: Implementation, + completedTaskIds: ReadonlySet + ): void { + const blockedTasks = impl.specs + .filter((s) => !s.isCompleted) + .flatMap((s) => + s.tasks + .filter((t) => t.status === "pending") + .filter((t) => !t.dependenciesSatisfied(completedTaskIds)) + .map((t) => ({ + specId: s.id, + taskId: t.id, + missing: t.dependsOn.filter((d) => !completedTaskIds.has(d)), + })) + ) + .map((t) => `${t.specId}:${t.taskId} (missing: ${t.missing.join(", ")})`); + + if (blockedTasks.length === 0) { + return; + } + + console.log(pc.yellow("Pending tasks blocked by task dependencies:")); + for (const line of blockedTasks) { + console.log(pc.gray(`- ${line}`)); + } + } + + private async handleNoRunnableTasks(impl: Implementation): Promise { + if (impl.isCompleted) { + await this.hooks?.emit("onLoopCompleted", this.buildPayload()); + return; + } + + if (!impl.hasPendingTasks) { + console.log( + pc.yellow( + "No runnable tasks found (all remaining tasks are blocked or failed)." + ) ); - this.logger?.log(`Starting task: ${task.id} - ${task.description}`); + return; + } - task.markInProgress(); - await impl.save(); + const completedSpecIds = new Set(impl.completedSpecs.map((s) => s.id)); + const completedTaskIds = this.getCompletedTaskIds(impl); - const taskPrompt = generateTaskPrompt(spec, task); - const result = await this.agentRunner?.run({ - prompt: taskPrompt, - onSpawn: (child) => { - this.currentChild = child; - if (child.pid && this.state) { - this.state.session.setPid(child.pid); - } - this.updateSessionState().catch(() => { - // fire-and-forget - }); - }, - }); - - if (!result) { - console.log(pc.red(" ✗ Agent runner not initialized")); - break; - } + console.log(pc.yellow("No runnable pending tasks.")); + this.reportBlockedSpecs(impl, completedSpecIds); + this.reportBlockedTasks(impl, completedTaskIds); + } - if (result.status === "blocked") { - await this.hooks?.emit( - "onTaskBlocked", - this.buildPayload({ taskDescription: result.reason }) - ); - this.logger?.log(`Task blocked: ${result.reason}`); - task.block(result.reason || "Unknown"); - await impl.save(); - continue; - } + private async runTaskIteration( + impl: Implementation, + spec: Spec, + task: Task + ): Promise<"continue" | "stop"> { + if (!this.state) { + return "stop"; + } - if (result.status === "done") { - await this.hooks?.emit( - "onIterationSuccess", - this.buildPayload({ taskDescription: task.description }) - ); - this.logger?.log(`Task completed: ${task.id}`); - task.complete(); - await impl.save(); - if (spec.isCompleted) { - await this.hooks?.emit( - "onSpecCompleted", - this.buildPayload({ specName: spec.name }) - ); - this.logger?.log(`Spec completed: ${spec.name}`); + this.state.session.incrementIteration(); + await this.updateSessionState(); + + this.logger?.startTaskLog(task.id); + + await this.hooks?.emit( + "onIterationStarted", + this.buildPayload({ + taskDescription: task.description, + specName: spec.name, + logFile: this.logger?.logFile ?? undefined, + }) + ); + this.logger?.log(`Starting task: ${task.id} - ${task.description}`); + + task.markInProgress(); + await impl.save(); + + const taskPrompt = generateTaskPrompt(spec, task); + const result = await this.agentRunner?.run({ + prompt: taskPrompt, + onSpawn: (child) => { + this.currentChild = child; + if (child.pid && this.state) { + this.state.session.setPid(child.pid); } - } else { + this.updateSessionState().catch(() => { + // fire-and-forget + }); + }, + }); + + if (!result) { + console.log(pc.red(" ✗ Agent runner not initialized")); + return "stop"; + } + + if (result.status === "blocked") { + await this.hooks?.emit( + "onTaskBlocked", + this.buildPayload({ taskDescription: result.reason }) + ); + this.logger?.log(`Task blocked: ${result.reason}`); + task.block(result.reason || "Unknown"); + await impl.save(); + return "continue"; + } + + if (result.status === "done") { + await this.hooks?.emit( + "onIterationSuccess", + this.buildPayload({ taskDescription: task.description }) + ); + this.logger?.log(`Task completed: ${task.id}`); + task.complete(); + await impl.save(); + if (spec.isCompleted) { await this.hooks?.emit( - "onIterationFailure", - this.buildPayload({ taskDescription: task.description }) + "onSpecCompleted", + this.buildPayload({ specName: spec.name }) ); - this.logger?.log(`Task failed: ${task.id}`); - task.fail(); - await impl.save(); + this.logger?.log(`Spec completed: ${spec.name}`); } + return "continue"; + } + await this.hooks?.emit( + "onIterationFailure", + this.buildPayload({ taskDescription: task.description }) + ); + this.logger?.log(`Task failed: ${task.id}`); + task.fail(); + await impl.save(); + return "continue"; + } + + private async runLoop(): Promise { + if (!this.state) { + return; + } + while (true) { + const impl = await this.loadImplementation(); + if (!impl) { + return; + } + const next = impl.nextPendingTask; + if (!next) { + await this.handleNoRunnableTasks(impl); + return; + } + + const { spec, task } = next; + const result = await this.runTaskIteration(impl, spec, task); + if (result === "stop") { + return; + } console.log(pc.gray(`\n${"=".repeat(50)}\n`)); } } diff --git a/src/templates/prompts.ts b/src/templates/prompts.ts index a48de7c..cf524bf 100644 --- a/src/templates/prompts.ts +++ b/src/templates/prompts.ts @@ -13,17 +13,37 @@ You are an autonomous planning agent. Analyze specs and create a structured impl - Plan only — do NOT implement anything - Do NOT assume functionality is missing — confirm with code search first - Each spec should have clear tasks and acceptance criteria -- Treat \`src/lib\` as shared utilities — prefer consolidation over duplication +- Prefer consolidation over duplication (shared code usually lives in \`src/utils\`, \`src/services\`, \`src/domain\`) + +## Task Granularity (Critical) +A "task" is the unit of work for one build-loop iteration (read context → implement → verify). Optimize for fewer, larger tasks. +- Target 3–10 tasks per spec (rarely > 12) +- Each task should represent a meaningful deliverable slice and be finishable in one iteration +- Do NOT split into separate tasks for tiny edits (e.g., adding a few constants, renaming exports, wiring imports, updating a couple call sites) +- Split tasks only at real dependency/risk boundaries (e.g., new public API, schema change, multi-step rollout, complex UI flow) +- Do NOT create tasks that are only "run tests/typecheck/lint" — baseline verification already happens every loop; only add testing tasks when new/changed tests need to be written + +## Estimation (Story Points) +Estimate each task using story points: 1, 2, 3, 5, 8. +- Target task size: 2–5 points +- 1-point tasks should be merged into a nearby task unless there is a hard dependency boundary +- 8-point tasks must be split (too much context/verification for one loop) +- Specs can be large; the key is that each task stays loop-sized. Use \`pointsBudget\` as an optional milestone target (e.g., 20 points per phase), not a hard cap. ## Workflow ### 1. Audit Specs - Read all specs in \`.ralph-wiggum/specs/*\` - For each spec, verify tasks and acceptance criteria are clear and complete -- If a spec is missing details, update it with specific tasks and acceptance criteria +- If a spec is missing details, update it with clear milestone-level tasks and acceptance criteria (avoid micro-tasks) ### 2. Audit Codebase -- Use up to 500 parallel subagents to search the source code +- Subagents are for code exploration and context gathering only (facts, file locations, existing patterns). +- Subagent budget: + - 1 “map” pass: identify likely entrypoints + relevant files + - Up to 8 “area” passes: one subagent per code area (CLI, commands, services, domain, agents, utils) + - Up to 4 “verification” passes: locate relevant scripts, test locations, linters, and guardrails + - Hard cap: 13 subagents total - Compare implementation against specs - Look for: TODOs, placeholders, skipped tests, incomplete features, inconsistent patterns @@ -43,18 +63,23 @@ Create or update @.ralph-wiggum/implementation.json with this structure: "name": "Human Readable Name", "priority": 1, "status": "pending", - "context": "Brief context for this spec. Reference existing code locations.", + "dependsOn": ["other-spec-id"], + "pointsBudget": 20, "tasks": [ { "id": "spec-id-1", "description": "First task description", "status": "pending", + "dependsOn": ["other-spec-id-1"], + "points": 3, "acceptanceCriteria": ["Criteria 1", "Criteria 2"] }, { "id": "spec-id-2", "description": "Second task description", - "status": "pending" + "status": "pending", + "dependsOn": ["spec-id-1"], + "points": 2 } ], "acceptanceCriteria": ["Spec-level AC 1", "Spec-level AC 2"] @@ -68,7 +93,10 @@ Create or update @.ralph-wiggum/implementation.json with this structure: - Tasks get sequential IDs like \`{spec-id}-1\`, \`{spec-id}-2\`, etc. - \`priority\`: Lower number = higher priority (1 = first to implement) - \`status\`: "pending" for unstarted, "in_progress" for active, "completed" for done -- \`context\`: Include relevant code paths, dependencies, or notes for the build agent +- \`dependsOn\`: Optional list of spec IDs that must be completed before this spec is runnable +- \`points\`: 1, 2, 3, 5, 8 story points for task sizing +- \`pointsBudget\`: Optional milestone target (not a cap) +- Make tasks coarse enough to justify a full build-loop iteration; merge trivial steps into their a single task rather than creating a new task - Make sure to copy / paste relevant acceptance criteria from the spec file for each task. Some tasks may share the same acceptance criteria. ### 4. Create Missing Specs @@ -248,6 +276,9 @@ Example: "The user should be able to use the UI/API to ## Tasks + - [ ] Task 1 - [ ] Task 2 - [ ] Task 3 @@ -261,10 +292,12 @@ Example: "The user should be able to use the UI/API to ## Success Metrics -## Testing Requirements -- [ ] Unit test changes -- [ ] Integration test changes -- [ ] End-to-end test changes +## Verification + +- [ ] Baseline: \`bun run typecheck\`, \`bun run test\`, \`bun run build\` +- [ ] Frontend (UI changes): Use \`agent-browser\` to navigate to the relevant screen, exercise the full user flow, and capture a screenshot for \`.ralph-wiggum/PROGRESS.md\` (example: "Open Settings → toggle X → verify Y updates") +- [ ] Backend (service/API changes): Add/update unit tests in \`src/__tests__/\` (example: "Task parsing returns expected JSON"); add integration tests when cross-module behavior changes (example: "CLI command runs end-to-end against a temp workspace"); run \`bun run test\` ## Notes diff --git a/src/types.ts b/src/types.ts index fa271c7..0a308da 100644 --- a/src/types.ts +++ b/src/types.ts @@ -68,6 +68,8 @@ export interface AgentCommand { // Task-level orchestration types +export type StoryPoints = 1 | 2 | 3 | 5 | 8; + export type TaskStatusType = | "pending" | "in_progress" @@ -80,6 +82,8 @@ export interface TaskEntry { description: string; status: TaskStatusType; acceptanceCriteria?: string[]; + dependsOn?: string[]; + points?: StoryPoints; blockedReason?: string; retryCount?: number; completedAt?: string; @@ -92,6 +96,9 @@ export interface SpecEntry { priority: number; status: TaskStatusType; context?: string; + dependsOn?: string[]; + pointsBudget?: number; + pointsTotal?: number; tasks: TaskEntry[]; acceptanceCriteria?: string[]; }