Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Empty file.
92 changes: 92 additions & 0 deletions src/agents/anti-duplication.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/// <reference types="bun-types" />

import { describe, it, expect } from "bun:test"
import { buildAntiDuplicationSection } from "./dynamic-agent-prompt-builder"

describe("buildAntiDuplicationSection", () => {
it("#given no arguments #when building anti-duplication section #then returns comprehensive rule section", () => {
//#given: no special configuration needed

//#when: building the anti-duplication section
const result = buildAntiDuplicationSection()

//#then: should contain the anti-duplication rule with all key concepts
expect(result).toContain("Anti-Duplication Rule")
expect(result).toContain("CRITICAL")
expect(result).toContain("DO NOT perform the same search yourself")
})

it("#given no arguments #when building #then explicitly forbids manual re-search after delegation", () => {
//#given: no special configuration

//#when: building the section
const result = buildAntiDuplicationSection()

//#then: should explicitly list forbidden behaviors
expect(result).toContain("FORBIDDEN")
expect(result).toContain("manually grep/search for the same information")
expect(result).toContain("Re-doing the research")
})

it("#given no arguments #when building #then allows non-overlapping work", () => {
//#given: no special configuration

//#when: building the section
const result = buildAntiDuplicationSection()

//#then: should explicitly allow non-overlapping work
expect(result).toContain("ALLOWED")
expect(result).toContain("non-overlapping work")
expect(result).toContain("work that doesn't depend on the delegated research")
})

it("#given no arguments #when building #then includes wait-for-results instructions", () => {
//#given: no special configuration

//#when: building the section
const result = buildAntiDuplicationSection()

//#then: should include instructions for waiting properly
expect(result).toContain("Wait for Results Properly")
expect(result).toContain("End your response")
expect(result).toContain("Wait for the completion notification")
expect(result).toContain("background_output")
})

it("#given no arguments #when building #then explains why this matters", () => {
//#given: no special configuration

//#when: building the section
const result = buildAntiDuplicationSection()

//#then: should explain the purpose
expect(result).toContain("Why This Matters")
expect(result).toContain("Wasted tokens")
expect(result).toContain("Confusion")
expect(result).toContain("Efficiency")
})

it("#given no arguments #when building #then provides code examples", () => {
//#given: no special configuration

//#when: building the section
const result = buildAntiDuplicationSection()

//#then: should include examples
expect(result).toContain("Example")
expect(result).toContain("WRONG")
expect(result).toContain("CORRECT")
expect(result).toContain("task(subagent_type=")
})

it("#given no arguments #when building #then uses proper markdown formatting", () => {
//#given: no special configuration

//#when: building the section
const result = buildAntiDuplicationSection()

//#then: should be wrapped in Anti_Duplication tag
expect(result).toContain("<Anti_Duplication>")
expect(result).toContain("</Anti_Duplication>")
})
})
118 changes: 118 additions & 0 deletions src/agents/atlas/atlas-prompt.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import { describe, test, expect } from "bun:test"
import { ATLAS_SYSTEM_PROMPT } from "./default"
import { ATLAS_GPT_SYSTEM_PROMPT } from "./gpt"
import { ATLAS_GEMINI_SYSTEM_PROMPT } from "./gemini"

describe("Atlas prompts auto-continue policy", () => {
test("default variant should forbid asking user for continuation confirmation", () => {
// given
const prompt = ATLAS_SYSTEM_PROMPT

// when
const lowerPrompt = prompt.toLowerCase()

// then
expect(lowerPrompt).toContain("auto-continue policy")
expect(lowerPrompt).toContain("never ask the user")
expect(lowerPrompt).toContain("should i continue")
expect(lowerPrompt).toContain("proceed to next task")
expect(lowerPrompt).toContain("approval-style")
expect(lowerPrompt).toContain("auto-continue immediately")
})

test("gpt variant should forbid asking user for continuation confirmation", () => {
// given
const prompt = ATLAS_GPT_SYSTEM_PROMPT

// when
const lowerPrompt = prompt.toLowerCase()

// then
expect(lowerPrompt).toContain("auto-continue policy")
expect(lowerPrompt).toContain("never ask the user")
expect(lowerPrompt).toContain("should i continue")
expect(lowerPrompt).toContain("proceed to next task")
expect(lowerPrompt).toContain("approval-style")
expect(lowerPrompt).toContain("auto-continue immediately")
})

test("gemini variant should forbid asking user for continuation confirmation", () => {
// given
const prompt = ATLAS_GEMINI_SYSTEM_PROMPT

// when
const lowerPrompt = prompt.toLowerCase()

// then
expect(lowerPrompt).toContain("auto-continue policy")
expect(lowerPrompt).toContain("never ask the user")
expect(lowerPrompt).toContain("should i continue")
expect(lowerPrompt).toContain("proceed to next task")
expect(lowerPrompt).toContain("approval-style")
expect(lowerPrompt).toContain("auto-continue immediately")
})

test("all variants should require immediate continuation after verification passes", () => {
// given
const prompts = [ATLAS_SYSTEM_PROMPT, ATLAS_GPT_SYSTEM_PROMPT, ATLAS_GEMINI_SYSTEM_PROMPT]

// when / then
for (const prompt of prompts) {
const lowerPrompt = prompt.toLowerCase()
expect(lowerPrompt).toMatch(/auto-continue immediately after verification/)
expect(lowerPrompt).toMatch(/immediately delegate next task/)
}
})

test("all variants should define when user interaction is actually needed", () => {
// given
const prompts = [ATLAS_SYSTEM_PROMPT, ATLAS_GPT_SYSTEM_PROMPT, ATLAS_GEMINI_SYSTEM_PROMPT]

// when / then
for (const prompt of prompts) {
const lowerPrompt = prompt.toLowerCase()
expect(lowerPrompt).toMatch(/only pause.*truly blocked/)
expect(lowerPrompt).toMatch(/plan needs clarification|blocked by external/)
}
})
})

describe("Atlas prompts plan path consistency", () => {
test("default variant should use .sisyphus/plans/{plan-name}.md path", () => {
// given
const prompt = ATLAS_SYSTEM_PROMPT

// when / then
expect(prompt).toContain(".sisyphus/plans/{plan-name}.md")
Copy link

@cubic-dev-ai cubic-dev-ai bot Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1: Custom agent: Opencode Compatibility

To maintain compatibility with Opencode's native plan tooling and permissions, plans should be stored in .opencode/plans/ rather than .sisyphus/plans/. Opencode explicitly whitelists .opencode/plans/*.md for plan-related operations.

Prompt for AI agents
Check if this issue is valid — if so, understand the root cause and fix it. At src/agents/atlas/atlas-prompt.test.ts, line 86:

<comment>To maintain compatibility with Opencode's native plan tooling and permissions, plans should be stored in `.opencode/plans/` rather than `.sisyphus/plans/`. Opencode explicitly whitelists `.opencode/plans/*.md` for plan-related operations.</comment>

<file context>
@@ -0,0 +1,118 @@
+    const prompt = ATLAS_SYSTEM_PROMPT
+
+    // when / then
+    expect(prompt).toContain(".sisyphus/plans/{plan-name}.md")
+    expect(prompt).not.toContain(".sisyphus/tasks/{plan-name}.yaml")
+    expect(prompt).not.toContain(".sisyphus/tasks/")
</file context>
Fix with Cubic

expect(prompt).not.toContain(".sisyphus/tasks/{plan-name}.yaml")
expect(prompt).not.toContain(".sisyphus/tasks/")
})

test("gpt variant should use .sisyphus/plans/{plan-name}.md path", () => {
// given
const prompt = ATLAS_GPT_SYSTEM_PROMPT

// when / then
expect(prompt).toContain(".sisyphus/plans/{plan-name}.md")
expect(prompt).not.toContain(".sisyphus/tasks/")
})

test("gemini variant should use .sisyphus/plans/{plan-name}.md path", () => {
// given
const prompt = ATLAS_GEMINI_SYSTEM_PROMPT

// when / then
expect(prompt).toContain(".sisyphus/plans/{plan-name}.md")
expect(prompt).not.toContain(".sisyphus/tasks/")
})

test("all variants should read plan file after verification", () => {
// given
const prompts = [ATLAS_SYSTEM_PROMPT, ATLAS_GPT_SYSTEM_PROMPT, ATLAS_GEMINI_SYSTEM_PROMPT]

// when / then
for (const prompt of prompts) {
expect(prompt).toMatch(/read[\s\S]*?\.sisyphus\/plans\//)
}
})
})
25 changes: 24 additions & 1 deletion src/agents/atlas/default.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,29 @@ Every \`task()\` prompt MUST include ALL 6 sections:
**If your prompt is under 30 lines, it's TOO SHORT.**
</delegation_system>

<auto_continue>
## AUTO-CONTINUE POLICY (STRICT)

**CRITICAL: NEVER ask the user "should I continue", "proceed to next task", or any approval-style questions between plan steps.**

**You MUST auto-continue immediately after verification passes:**
- After any delegation completes and passes verification → Immediately delegate next task
- Do NOT wait for user input, do NOT ask "should I continue"
- Only pause or ask if you are truly blocked by missing information, an external dependency, or a critical failure

**The only time you ask the user:**
- Plan needs clarification or modification before execution
- Blocked by an external dependency beyond your control
- Critical failure prevents any further progress

**Auto-continue examples:**
- Task A done → Verify → Pass → Immediately start Task B
- Task fails → Retry 3x → Still fails → Document → Move to next independent task
- NEVER: "Should I continue to the next task?"

**This is NOT optional. This is core to your role as orchestrator.**
</auto_continue>

<workflow>
## Step 0: Register Tracking

Expand Down Expand Up @@ -214,7 +237,7 @@ After EVERY delegation, complete ALL of these steps — no shortcuts:

After verification, READ the plan file directly — every time, no exceptions:
\`\`\`
Read(".sisyphus/tasks/{plan-name}.yaml")
Read(".sisyphus/plans/{plan-name}.md")
\`\`\`
Count remaining \`- [ ]\` tasks. This is your ground truth for what comes next.

Expand Down
23 changes: 23 additions & 0 deletions src/agents/atlas/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,29 @@ Every \`task()\` prompt MUST include ALL 6 sections:
**Minimum 30 lines per delegation prompt. Under 30 lines = the subagent WILL fail.**
</delegation_system>

<auto_continue>
## AUTO-CONTINUE POLICY (STRICT)

**CRITICAL: NEVER ask the user "should I continue", "proceed to next task", or any approval-style questions between plan steps.**

**You MUST auto-continue immediately after verification passes:**
- After any delegation completes and passes verification → Immediately delegate next task
- Do NOT wait for user input, do NOT ask "should I continue"
- Only pause or ask if you are truly blocked by missing information, an external dependency, or a critical failure

**The only time you ask the user:**
- Plan needs clarification or modification before execution
- Blocked by an external dependency beyond your control
- Critical failure prevents any further progress

**Auto-continue examples:**
- Task A done → Verify → Pass → Immediately start Task B
- Task fails → Retry 3x → Still fails → Document → Move to next independent task
- NEVER: "Should I continue to the next task?"

**This is NOT optional. This is core to your role as orchestrator.**
</auto_continue>

<workflow>
## Step 0: Register Tracking

Expand Down
26 changes: 25 additions & 1 deletion src/agents/atlas/gpt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@ Complete ALL tasks in a work plan via \`task()\` until fully done.
</scope_and_design_constraints>

<uncertainty_and_ambiguity>
- If a task is ambiguous or underspecified:
- During initial plan analysis, if a task is ambiguous or underspecified:
- Ask 1-3 precise clarifying questions, OR
- State your interpretation explicitly and proceed with the simplest approach.
- Once execution has started, do NOT stop to ask for continuation or approval between steps.
- Never fabricate task details, file paths, or requirements.
- Prefer language like "Based on the plan..." instead of absolute claims.
- When unsure about parallelization, default to sequential execution.
Expand Down Expand Up @@ -134,6 +135,29 @@ Every \`task()\` prompt MUST include ALL 6 sections:
**Minimum 30 lines per delegation prompt.**
</delegation_system>

<auto_continue>
## AUTO-CONTINUE POLICY (STRICT)

**CRITICAL: NEVER ask the user "should I continue", "proceed to next task", or any approval-style questions between plan steps.**

**You MUST auto-continue immediately after verification passes:**
- After any delegation completes and passes verification → Immediately delegate next task
- Do NOT wait for user input, do NOT ask "should I continue"
- Only pause or ask if you are truly blocked by missing information, an external dependency, or a critical failure

**The only time you ask the user:**
- Plan needs clarification or modification before execution
- Blocked by an external dependency beyond your control
- Critical failure prevents any further progress

**Auto-continue examples:**
- Task A done → Verify → Pass → Immediately start Task B
- Task fails → Retry 3x → Still fails → Document → Move to next independent task
- NEVER: "Should I continue to the next task?"

**This is NOT optional. This is core to your role as orchestrator.**
</auto_continue>

<workflow>
## Step 0: Register Tracking

Expand Down
Loading
Loading