yemyat · yemyat · Jan 21, 2026 · Jan 21, 2026 · Jan 22, 2026
diff --git a/.ralph-wiggum/PROMPT_plan.md b/.ralph-wiggum/PROMPT_plan.md
@@ -13,17 +13,37 @@ You are an autonomous planning agent. Analyze specs and create a structured impl
 - Plan only — do NOT implement anything
 - Do NOT assume functionality is missing — confirm with code search first
 - Each spec should have clear tasks and acceptance criteria
-- Treat `src/lib` as shared utilities — prefer consolidation over duplication
+- Prefer consolidation over duplication (shared code usually lives in `src/utils`, `src/services`, `src/domain`)
+
+## Task Granularity (Critical)
+A "task" is the unit of work for one build-loop iteration (read context → implement → verify). Optimize for fewer, larger tasks.
+- Target 3–10 tasks per spec (rarely > 12)
+- Each task should represent a meaningful deliverable slice and be finishable in one iteration
+- Do NOT split into separate tasks for tiny edits (e.g., adding a few constants, renaming exports, wiring imports, updating a couple call sites)
+- Split tasks only at real dependency/risk boundaries (e.g., new public API, schema change, multi-step rollout, complex UI flow)
+- Do NOT create tasks that are only "run tests/typecheck/lint" — baseline verification already happens every loop; only add testing tasks when new/changed tests need to be written
+
+## Estimation (Story Points)
+Estimate each task using story points: 1, 2, 3, 5, 8.
+- Target task size: 2–5 points
+- 1-point tasks should be merged into a nearby task unless there is a hard dependency boundary
+- 8-point tasks must be split (too much context/verification for one loop)
+- Specs can be large; the key is that each task stays loop-sized. Use `pointsBudget` as an optional milestone target (e.g., 20 points per phase), not a hard cap.
 
 ## Workflow
 
 ### 1. Audit Specs
 - Read all specs in `.ralph-wiggum/specs/*`
 - For each spec, verify tasks and acceptance criteria are clear and complete
-- If a spec is missing details, update it with specific tasks and acceptance criteria
+- If a spec is missing details, update it with clear milestone-level tasks and acceptance criteria (avoid micro-tasks)
 
 ### 2. Audit Codebase
-- Use up to 500 parallel subagents to search the source code
+- Subagents are for code exploration and context gathering only (facts, file locations, existing patterns).
+- Subagent budget:
+  - 1 “map” pass: identify likely entrypoints + relevant files
+  - Up to 8 “area” passes: one subagent per code area (CLI, commands, services, domain, agents, utils)
+  - Up to 4 “verification” passes: locate relevant scripts, test locations, linters, and guardrails
+  - Hard cap: 13 subagents total
 - Compare implementation against specs
 - Look for: TODOs, placeholders, skipped tests, incomplete features, inconsistent patterns
 
@@ -39,22 +59,28 @@ Create or update @.ralph-wiggum/implementation.json with this structure:
   "specs": [
     {
       "id": "spec-id-kebab-case",
-      "file": "specs/spec-file.md",
+      "file": ".ralph-wiggum/specs/spec-file.md",
       "name": "Human Readable Name",
       "priority": 1,
       "status": "pending",
       "context": "Brief context for this spec. Reference existing code locations.",
+      "dependsOn": ["other-spec-id"],
+      "pointsBudget": 20,
       "tasks": [
         {
           "id": "spec-id-1",
           "description": "First task description",
           "status": "pending",
+          "dependsOn": ["other-spec-id-1"],
+          "points": 3,
           "acceptanceCriteria": ["Criteria 1", "Criteria 2"]
         },
         {
           "id": "spec-id-2",
           "description": "Second task description",
-          "status": "pending"
+          "status": "pending",
+          "dependsOn": ["spec-id-1"],
+          "points": 2
         }
       ],
       "acceptanceCriteria": ["Spec-level AC 1", "Spec-level AC 2"]
@@ -69,6 +95,11 @@ Create or update @.ralph-wiggum/implementation.json with this structure:
 - `priority`: Lower number = higher priority (1 = first to implement)
 - `status`: "pending" for unstarted, "in_progress" for active, "completed" for done
 - `context`: Include relevant code paths, dependencies, or notes for the build agent
+- `dependsOn`: Optional list of spec IDs that must be completed before this spec is runnable
+- `dependsOn` (tasks): Optional list of task IDs that must be completed before this task is runnable
+- `points`: 1, 2, 3, 5, 8 story points for task sizing
+- `pointsBudget`: Optional milestone target (not a cap)
+- Make tasks coarse enough to justify a full build-loop iteration; merge trivial steps into their parent task rather than creating a new task
 
 ### 4. Create Missing Specs
 If functionality is needed but no spec exists:
@@ -82,4 +113,4 @@ If functionality is needed but no spec exists:
 ### 5. Update Guardrails (if needed)
 If you discover project-specific rules that should be enforced, add them to the "Project-Specific Rules" section of @.ralph-wiggum/GUARDRAILS.md.
 
-COMPLETION: When all specs are audited, have clear tasks/acceptance criteria, and implementation.json is created/updated, output exactly: <STATUS>DONE</STATUS>
+COMPLETION: When all specs are audited, have clear tasks/acceptance criteria, and implementation.json is created/updated, output exactly: <STATUS>DONE</STATUS>
diff --git a/.ralph-wiggum/specs/example.md b/.ralph-wiggum/specs/example.md
@@ -15,6 +15,9 @@ Example: "The user should be able to use the UI/API to <perform a specific task>
 <!-- How are we going to solve the problem? Include the user journey. -->
 
 ## Tasks
+<!-- Keep tasks milestone-level: aim for 3–10 tasks that are each "one build-loop" sized.
+Avoid micro-tasks like "add 3 constants" or "rename an export" — list those as notes instead.
+Optional: Add a coarse estimate prefix like "(SP:3)" to help planning. -->
 - [ ] Task 1
 - [ ] Task 2
 - [ ] Task 3
@@ -28,10 +31,12 @@ Example: "The user should be able to use the UI/API to <perform a specific task>
 ## Success Metrics
 <!-- How will we know this feature is successful? Define measurable outcomes. -->
 
-## Testing Requirements
-- [ ] Unit test changes
-- [ ] Integration test changes
-- [ ] End-to-end test changes
+## Verification
+<!-- Baseline verification is already part of the build loop (typecheck/tests/build).
+Only list additional verification here when it's truly extra (new tests, manual QA steps, migrations, deployments). -->
+- [ ] Baseline: `bun run typecheck`, `bun run test`, `bun run build`
+- [ ] Frontend (UI changes): Use `agent-browser` to navigate to the relevant screen, exercise the full user flow, and capture a screenshot for `.ralph-wiggum/PROGRESS.md` (example: "Open Settings → toggle X → verify Y updates")
+- [ ] Backend (service/API changes): Add/update unit tests in `src/__tests__/` (example: "Task parsing returns expected JSON"); add integration tests when cross-module behavior changes (example: "CLI command runs end-to-end against a temp workspace"); run `bun run test`
 
 ## Notes
 <!-- Implementation notes, edge cases, dependencies, security considerations -->
diff --git a/src/domain/implementation.ts b/src/domain/implementation.ts
@@ -1,9 +1,180 @@
 import fse from "fs-extra";
-import type { Implementation as ImplementationData } from "../types";
+import type {
+  Implementation as ImplementationData,
+  SpecEntry,
+  StoryPoints,
+  TaskEntry,
+  TaskStatusType,
+} from "../types";
 import { getImplementationFile } from "../utils/paths";
 import { Spec } from "./spec";
 import type { Task } from "./task";
 
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null;
+}
+
+function asString(value: unknown): string | undefined {
+  return typeof value === "string" ? value : undefined;
+}
+
+function asNumber(value: unknown): number | undefined {
+  return typeof value === "number" && Number.isFinite(value)
+    ? value
+    : undefined;
+}
+
+function asStringArray(value: unknown): string[] | undefined {
+  if (!Array.isArray(value)) {
+    return undefined;
+  }
+  const strings = value.filter((v) => typeof v === "string");
+  return strings.length === value.length ? strings : undefined;
+}
+
+function asStoryPoints(value: unknown): StoryPoints | undefined {
+  return value === 1 || value === 2 || value === 3 || value === 5 || value === 8
+    ? value
+    : undefined;
+}
+
+function asTaskStatusType(value: unknown): TaskStatusType | undefined {
+  return value === "pending" ||
+    value === "in_progress" ||
+    value === "completed" ||
+    value === "blocked" ||
+    value === "failed"
+    ? value
+    : undefined;
+}
+
+function normalizeTaskEntry(
+  raw: unknown,
+  specId: string,
+  taskIndex: number
+): TaskEntry | null {
+  if (!isRecord(raw)) {
+    return null;
+  }
+
+  const taskId = asString(raw.id) ?? `${specId}-${taskIndex + 1}`;
+  const description = asString(raw.description) ?? `Task ${taskIndex + 1}`;
+  const status = asTaskStatusType(raw.status) ?? "pending";
+
+  const task: TaskEntry = { id: taskId, description, status };
+
+  const acceptanceCriteria = asStringArray(raw.acceptanceCriteria);
+  if (acceptanceCriteria !== undefined) {
+    task.acceptanceCriteria = acceptanceCriteria;
+  }
+
+  const dependsOn = asStringArray(raw.dependsOn);
+  if (dependsOn !== undefined) {
+    task.dependsOn = dependsOn;
+  }
+
+  const points = asStoryPoints(raw.points);
+  if (points !== undefined) {
+    task.points = points;
+  }
+
+  const blockedReason = asString(raw.blockedReason);
+  if (blockedReason !== undefined) {
+    task.blockedReason = blockedReason;
+  }
+
+  const retryCount = asNumber(raw.retryCount);
+  if (retryCount !== undefined) {
+    task.retryCount = retryCount;
+  }
+
+  const completedAt = asString(raw.completedAt);
+  if (completedAt !== undefined) {
+    task.completedAt = completedAt;
+  }
+
+  return task;
+}
+
+function normalizeSpecEntry(raw: unknown, index: number): SpecEntry | null {
+  if (!isRecord(raw)) {
+    return null;
+  }
+
+  const id = asString(raw.id);
+  if (!id) {
+    return null;
+  }
+
+  const tasksRaw = Array.isArray(raw.tasks) ? raw.tasks : [];
+  const tasks: TaskEntry[] = tasksRaw
+    .map((t, taskIndex) => normalizeTaskEntry(t, id, taskIndex))
+    .filter((t): t is TaskEntry => t !== null);
+
+  const spec: SpecEntry = {
+    id,
+    file: asString(raw.file) ?? "",
+    name: asString(raw.name) ?? id,
+    priority: asNumber(raw.priority) ?? index + 1,
+    status: asTaskStatusType(raw.status) ?? "pending",
+    tasks,
+  };
+
+  const context = asString(raw.context);
+  if (context !== undefined) {
+    spec.context = context;
+  }
+
+  const dependsOn = asStringArray(raw.dependsOn);
+  if (dependsOn !== undefined) {
+    spec.dependsOn = dependsOn;
+  }
+
+  const pointsBudget = asNumber(raw.pointsBudget);
+  if (pointsBudget !== undefined) {
+    spec.pointsBudget = pointsBudget;
+  }
+
+  const pointsTotal = asNumber(raw.pointsTotal);
+  if (pointsTotal !== undefined) {
+    spec.pointsTotal = pointsTotal;
+  }
+
+  const acceptanceCriteria = asStringArray(raw.acceptanceCriteria);
+  if (acceptanceCriteria !== undefined) {
+    spec.acceptanceCriteria = acceptanceCriteria;
+  }
+
+  return spec;
+}
+
+function normalizeImplementationData(raw: unknown): ImplementationData {
+  const now = new Date().toISOString();
+  if (!isRecord(raw)) {
+    return { version: 1, updatedAt: now, updatedBy: "user", specs: [] };
+  }
+
+  const updatedByRaw = asString(raw.updatedBy);
+  const updatedBy =
+    updatedByRaw === "plan-mode" ||
+    updatedByRaw === "build-mode" ||
+    updatedByRaw === "user"
+      ? updatedByRaw
+      : "user";
+
+  const specsRaw = Array.isArray(raw.specs) ? raw.specs : [];
+  const specs = specsRaw
+    .map((entry, index) => normalizeSpecEntry(entry, index))
+    .filter((spec): spec is SpecEntry => spec !== null);
+
+  return {
+    version: asNumber(raw.version) ?? 1,
+    updatedAt: asString(raw.updatedAt) ?? now,
+    updatedBy,
+    specs,
+  };
+}
+
 export class Implementation {
   private readonly _specs: Spec[];
   private readonly _version: number;
@@ -38,19 +209,48 @@ export class Implementation {
    * Finds the first pending task in the first non-completed spec.
    */
   get nextPendingTask(): { spec: Spec; task: Task } | undefined {
+    const completedSpecIds = new Set(
+      this._specs.filter((s) => s.isCompleted).map((s) => s.id)
+    );
+
+    const completedTaskIds = new Set<string>();
     for (const spec of this._specs) {
+      for (const task of spec.tasks) {
+        if (task.status === "completed") {
+          completedTaskIds.add(task.id);
+        }
+      }
+    }
+
+    const specsByPriority = this._specs
+      .map((spec, index) => ({ spec, index }))
+      .sort((a, b) => {
+        const priorityDelta = a.spec.priority - b.spec.priority;
+        return priorityDelta !== 0 ? priorityDelta : a.index - b.index;
+      })
+      .map(({ spec }) => spec);
+
+    for (const spec of specsByPriority) {
       if (spec.isCompleted) {
         continue;
       }
 
-      const task = spec.nextPendingTask;
+      if (!spec.dependenciesSatisfied(completedSpecIds)) {
+        continue;
+      }
+
+      const task = spec.nextRunnablePendingTask(completedTaskIds);
       if (task) {
         return { spec, task };
       }
     }
     return undefined;
   }
 
+  get hasPendingTasks(): boolean {
+    return this._specs.some((s) => s.tasks.some((t) => t.status === "pending"));
+  }
+
   /**
    * Get all completed specs.
    */
@@ -136,7 +336,7 @@ export class Implementation {
 
     try {
       const content = await fse.readJson(implPath);
-      return new Implementation(content as ImplementationData);
+      return new Implementation(normalizeImplementationData(content));
     } catch {
       return null;
     }