Skip to content

Commit 0515dd9

Browse files
FL4TLiN3claude
andauthored
fix(e2e): improve test reliability and fix broken assertions (#398)
* fix(e2e): improve test reliability and fix broken assertions - Update streaming event names (startStreamingReasoning, completeStreamingReasoning) - Fix lazy-init.toml to use local e2e-mcp-server path instead of npx - Add --run-id option to runtime CLI - Refactor PDF/image tests to use flow-based assertions instead of flaky keyword matching - Add --config to publish.test.ts unpublish commands - Add infrastructure failure detection helper for Docker tests - Support additionalVolumes in Docker runtime for local package mounting All 142 E2E tests now pass reliably. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(core): add runId to RunParamsInput type - Add runId?: string to RunParamsInput for CLI --run-id option - Add changeset for E2E test reliability fixes Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * style: format streaming.test.ts Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix(core): add volume option to CommandOptions schema Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * refactor(runtime): remove unused ReasoningDetail type export Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 6f854ae commit 0515dd9

29 files changed

+245
-104
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
---
2+
"@perstack/core": patch
3+
"@perstack/runtime": patch
4+
"@perstack/docker": patch
5+
"@perstack/e2e-mcp-server": patch
6+
"perstack": patch
7+
---
8+
9+
fix(e2e): improve test reliability and fix broken assertions
10+
11+
- Update streaming event names to match state-machine-redesign changes
12+
- Fix lazy-init.toml to use local e2e-mcp-server path
13+
- Add --run-id option to runtime CLI
14+
- Refactor PDF/image tests to use flow-based assertions
15+
- Add infrastructure failure detection for Docker tests
16+
- Support additionalVolumes in Docker runtime

apps/e2e-mcp-server/bin/server.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#!/usr/bin/env node
21
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
32
import { createServer } from "../src/server.js"
43

apps/e2e-mcp-server/tsup.config.ts

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,25 @@
11
import { defineConfig, type Options } from "tsup"
22
import { baseConfig } from "../../tsup.config.js"
3-
export const e2eMcpServerConfig: Options = {
3+
4+
// Library entry - normal external dependencies
5+
export const libConfig: Options = {
46
...baseConfig,
57
entry: {
6-
"bin/server": "bin/server.ts",
78
"src/index": "src/index.ts",
89
},
910
}
10-
export default defineConfig(e2eMcpServerConfig)
11+
12+
// Standalone server binary - bundle all dependencies for Docker execution
13+
export const serverConfig: Options = {
14+
...baseConfig,
15+
entry: {
16+
"bin/server": "bin/server.ts",
17+
},
18+
dts: false, // No types needed for binary
19+
noExternal: [/.*/], // Bundle all dependencies
20+
banner: {
21+
js: "#!/usr/bin/env node",
22+
},
23+
}
24+
25+
export default defineConfig([libConfig, serverConfig])

apps/perstack/src/run.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ export const runCommand = new Command()
5959
.option("-i, --interactive-tool-call-result", "Query is interactive tool call result")
6060
.option("--runtime <runtime>", "Execution runtime (docker, local, cursor, claude-code, gemini)")
6161
.option("--workspace <workspace>", "Workspace directory for Docker runtime")
62+
.option(
63+
"--volume <volume>",
64+
"Additional volume mount for Docker runtime (format: hostPath:containerPath:mode, can be specified multiple times)",
65+
(value: string, previous: string[]) => previous.concat(value),
66+
[] as string[],
67+
)
6268
.option(
6369
"--filter <types>",
6470
"Filter events by type (comma-separated, e.g., completeRun,stopRunByError)",
@@ -126,6 +132,7 @@ export const runCommand = new Command()
126132
eventListener,
127133
workspace: input.options.workspace,
128134
additionalEnvKeys: input.options.env,
135+
additionalVolumes: input.options.volume,
129136
})
130137
} catch (error) {
131138
if (error instanceof Error) {

apps/runtime/bin/cli.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ program
5454
"Timeout for each generation in milliseconds, default is 60000 (1 minute)",
5555
)
5656
.option("--job-id <jobId>", "Job ID for identifying the job")
57+
.option("--run-id <runId>", "Run ID for identifying the run")
5758
.option(
5859
"--env-path <path>",
5960
"Path to the environment file (can be specified multiple times), default is .env and .env.local",
@@ -98,6 +99,7 @@ program
9899
{
99100
setting: {
100101
jobId: input.options.jobId,
102+
runId: input.options.runId,
101103
expertKey: input.expertKey,
102104
input: { text: input.query },
103105
experts,

apps/runtime/src/helpers/thinking.ts

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,3 @@ export function extractThinkingText(reasoning: ReasoningPart[] | undefined): str
4848
.map((r) => r.text)
4949
.join("\n")
5050
}
51-
52-
// Re-export for backwards compatibility
53-
export type { ReasoningPart as ReasoningDetail }

e2e/experts/docker-attack-scenarios.toml

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@ pick = ["attemptCompletion", "think"]
2121

2222
[experts."attack-metadata".skills."attacker"]
2323
type = "mcpStdioSkill"
24-
command = "npx"
25-
packageName = "@perstack/e2e-mcp-server"
24+
command = "node"
25+
args = ["/repo/apps/e2e-mcp-server/dist/bin/server.js"]
2626
allowedDomains = ["api.anthropic.com"]
2727
lazyInit = false
2828

@@ -42,8 +42,8 @@ pick = ["attemptCompletion", "think"]
4242

4343
[experts."attack-ssrf".skills."attacker"]
4444
type = "mcpStdioSkill"
45-
command = "npx"
46-
packageName = "@perstack/e2e-mcp-server"
45+
command = "node"
46+
args = ["/repo/apps/e2e-mcp-server/dist/bin/server.js"]
4747
allowedDomains = ["api.anthropic.com"]
4848
lazyInit = false
4949

@@ -63,8 +63,8 @@ pick = ["attemptCompletion", "think"]
6363

6464
[experts."attack-filesystem".skills."attacker"]
6565
type = "mcpStdioSkill"
66-
command = "npx"
67-
packageName = "@perstack/e2e-mcp-server"
66+
command = "node"
67+
args = ["/repo/apps/e2e-mcp-server/dist/bin/server.js"]
6868
allowedDomains = ["api.anthropic.com"]
6969
lazyInit = false
7070

@@ -85,8 +85,8 @@ pick = ["attemptCompletion", "think"]
8585

8686
[experts."attack-symlink".skills."attacker"]
8787
type = "mcpStdioSkill"
88-
command = "npx"
89-
packageName = "@perstack/e2e-mcp-server"
88+
command = "node"
89+
args = ["/repo/apps/e2e-mcp-server/dist/bin/server.js"]
9090
allowedDomains = ["api.anthropic.com"]
9191
lazyInit = false
9292

@@ -106,8 +106,8 @@ pick = ["attemptCompletion", "think"]
106106

107107
[experts."attack-proxy".skills."attacker"]
108108
type = "mcpStdioSkill"
109-
command = "npx"
110-
packageName = "@perstack/e2e-mcp-server"
109+
command = "node"
110+
args = ["/repo/apps/e2e-mcp-server/dist/bin/server.js"]
111111
allowedDomains = ["api.anthropic.com"]
112112
lazyInit = false
113113

@@ -127,8 +127,8 @@ pick = ["attemptCompletion", "think"]
127127

128128
[experts."attack-env".skills."attacker"]
129129
type = "mcpStdioSkill"
130-
command = "npx"
131-
packageName = "@perstack/e2e-mcp-server"
130+
command = "node"
131+
args = ["/repo/apps/e2e-mcp-server/dist/bin/server.js"]
132132
allowedDomains = ["api.anthropic.com"]
133133
lazyInit = false
134134

@@ -148,8 +148,8 @@ pick = ["attemptCompletion", "think"]
148148

149149
[experts."attack-exfiltrate".skills."attacker"]
150150
type = "mcpStdioSkill"
151-
command = "npx"
152-
packageName = "@perstack/e2e-mcp-server"
151+
command = "node"
152+
args = ["/repo/apps/e2e-mcp-server/dist/bin/server.js"]
153153
allowedDomains = ["api.anthropic.com"]
154154
lazyInit = false
155155

@@ -169,8 +169,8 @@ pick = ["attemptCompletion", "think"]
169169

170170
[experts."attack-dns-exfil".skills."attacker"]
171171
type = "mcpStdioSkill"
172-
command = "npx"
173-
packageName = "@perstack/e2e-mcp-server"
172+
command = "node"
173+
args = ["/repo/apps/e2e-mcp-server/dist/bin/server.js"]
174174
allowedDomains = ["api.anthropic.com"]
175175
lazyInit = false
176176

@@ -190,8 +190,8 @@ pick = ["attemptCompletion", "think"]
190190

191191
[experts."attack-harvest-env".skills."attacker"]
192192
type = "mcpStdioSkill"
193-
command = "npx"
194-
packageName = "@perstack/e2e-mcp-server"
193+
command = "node"
194+
args = ["/repo/apps/e2e-mcp-server/dist/bin/server.js"]
195195
allowedDomains = ["api.anthropic.com"]
196196
lazyInit = false
197197

@@ -211,7 +211,7 @@ pick = ["attemptCompletion", "think"]
211211

212212
[experts."attack-allowed-domains".skills."attacker"]
213213
type = "mcpStdioSkill"
214-
command = "npx"
215-
packageName = "@perstack/e2e-mcp-server"
214+
command = "node"
215+
args = ["/repo/apps/e2e-mcp-server/dist/bin/server.js"]
216216
allowedDomains = ["api.anthropic.com", "httpbin.org"]
217217
lazyInit = false

e2e/experts/docker-security.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,14 +87,14 @@ pick = ["attemptCompletion", "think"]
8787

8888
[experts."docker-security-multi-skill".skills."network-github"]
8989
type = "mcpStdioSkill"
90-
command = "npx"
91-
packageName = "@perstack/e2e-mcp-server"
90+
command = "node"
91+
args = ["/repo/apps/e2e-mcp-server/dist/bin/server.js"]
9292
allowedDomains = ["api.github.com"]
9393
lazyInit = false
9494

9595
[experts."docker-security-multi-skill".skills."network-httpbin"]
9696
type = "mcpStdioSkill"
97-
command = "npx"
98-
packageName = "@perstack/e2e-mcp-server"
97+
command = "node"
98+
args = ["/repo/apps/e2e-mcp-server/dist/bin/server.js"]
9999
allowedDomains = ["httpbin.org"]
100100
lazyInit = false

e2e/experts/lazy-init.toml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ lazyInit = false
2424
[experts."e2e-lazy-init-all-false".skills."attacker"]
2525
type = "mcpStdioSkill"
2626
description = "E2E MCP server (no lazy init)"
27-
command = "npx"
28-
packageName = "@perstack/e2e-mcp-server"
27+
command = "node"
28+
args = ["apps/e2e-mcp-server/dist/bin/server.js"]
2929
lazyInit = false
3030

3131
# Expert with multiple skills: one lazyInit=false (required), one lazyInit=true
@@ -49,6 +49,6 @@ lazyInit = false
4949
[experts."e2e-lazy-init-mixed".skills."attacker"]
5050
type = "mcpStdioSkill"
5151
description = "E2E MCP server (lazy init)"
52-
command = "npx"
53-
packageName = "@perstack/e2e-mcp-server"
52+
command = "node"
53+
args = ["apps/e2e-mcp-server/dist/bin/server.js"]
5454
lazyInit = true

e2e/lib/event-parser.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,14 @@ export type CheckpointState = {
1414
partialToolResults: ToolCallInfo[]
1515
}
1616

17+
// Note: callDelegate, callInteractiveTool, finishAllToolCalls were removed in state-machine-redesign
1718
const RELEVANT_EVENT_TYPES = [
1819
"startRun",
20+
"resumeFromStop",
1921
"callTools",
20-
"callDelegate",
21-
"callInteractiveTool",
2222
"stopRunByDelegate",
2323
"stopRunByInteractiveTool",
2424
"resumeToolCalls",
25-
"finishAllToolCalls",
2625
"completeRun",
2726
"resolveToolResults",
2827
] as const

0 commit comments

Comments
 (0)