diff --git a/.changeset/tool-output-truncation.md b/.changeset/tool-output-truncation.md new file mode 100644 index 00000000..ea25b8aa --- /dev/null +++ b/.changeset/tool-output-truncation.md @@ -0,0 +1,7 @@ +--- +"@perstack/core": patch +"@perstack/base": patch +"@perstack/runtime": patch +--- + +Add tool output truncation to prevent token overflow. Base skill tools (exec, readTextFile, writeTextFile, editTextFile) truncate outputs to 30K chars with middle-truncation. Runtime catch-all truncates MCP tool TextPart results. diff --git a/apps/base/src/tools/edit-text-file.test.ts b/apps/base/src/tools/edit-text-file.test.ts index c1e01869..0a4b02d2 100644 --- a/apps/base/src/tools/edit-text-file.test.ts +++ b/apps/base/src/tools/edit-text-file.test.ts @@ -1,5 +1,6 @@ import { afterEach, describe, expect, it } from "bun:test" import fs from "node:fs/promises" +import { MAX_TOOL_OUTPUT_CHARS } from "@perstack/core" import { validatePath } from "../lib/path.js" import { editTextFile } from "./edit-text-file.js" @@ -46,6 +47,23 @@ describe("editTextFile tool", () => { await fs.chmod(testFile, 0o644) }) + it("truncates echoed oldText and newText exceeding 30K characters", async () => { + const largeOldText = "C".repeat(50_000) + const largeNewText = "D".repeat(50_000) + await fs.writeFile(testFile, largeOldText) + const result = await editTextFile({ + path: testFile, + oldText: largeOldText, + newText: largeNewText, + }) + expect(result.oldText.length).toBeLessThanOrEqual(MAX_TOOL_OUTPUT_CHARS) + expect(result.newText.length).toBeLessThanOrEqual(MAX_TOOL_OUTPUT_CHARS) + expect(result.oldText).toContain("... [truncated:") + expect(result.newText).toContain("... [truncated:") + // File itself should contain the full new content + expect(await fs.readFile(testFile, "utf-8")).toBe(largeNewText) + }) + it("throws error if text not found in file", async () => { await fs.writeFile(testFile, "This is the content") await expect( diff --git a/apps/base/src/tools/edit-text-file.ts b/apps/base/src/tools/edit-text-file.ts index f94ec614..39250698 100644 --- a/apps/base/src/tools/edit-text-file.ts +++ b/apps/base/src/tools/edit-text-file.ts @@ -1,5 +1,6 @@ import { stat } from "node:fs/promises" import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" +import { truncateText } from "@perstack/core" import { z } from "zod/v4" import { validatePath } from "../lib/path.js" import { safeReadFile, safeWriteFile } from "../lib/safe-file.js" @@ -18,8 +19,8 @@ export async function editTextFile(input: { path: string; newText: string; oldTe await applyFileEdit(validatedPath, newText, oldText) return { path: validatedPath, - newText, - oldText, + newText: truncateText(newText), + oldText: truncateText(oldText), } } diff --git a/apps/base/src/tools/exec.test.ts b/apps/base/src/tools/exec.test.ts index b866762d..73fc2337 100644 --- a/apps/base/src/tools/exec.test.ts +++ b/apps/base/src/tools/exec.test.ts @@ -1,4 +1,5 @@ import { describe, expect, it } from "bun:test" +import { MAX_TOOL_OUTPUT_CHARS } from "@perstack/core" import { exec } from "./exec.js" describe("exec tool", () => { @@ -82,6 +83,21 @@ describe("exec tool", () => { expect(result.output).toBe("CUSTOM_VALUE") }) + it("truncates output exceeding 30K characters", async () => { + const largeOutput = "X".repeat(50_000) + const result = await exec({ + command: process.execPath, + args: ["-e", `process.stdout.write("${largeOutput}")`], + env: {}, + cwd: process.cwd(), + stdout: true, + stderr: false, + timeout: 5000, + }) + expect(result.output.length).toBeLessThanOrEqual(MAX_TOOL_OUTPUT_CHARS) + expect(result.output).toContain("... [truncated:") + }) + it("handles args with a single flag", async () => { const result = await exec({ command: process.execPath, diff --git a/apps/base/src/tools/exec.ts b/apps/base/src/tools/exec.ts index 8993e566..a535bfa1 100644 --- a/apps/base/src/tools/exec.ts +++ b/apps/base/src/tools/exec.ts @@ -1,7 +1,7 @@ import { type ExecException, execFile } from "node:child_process" import { promisify } from "node:util" import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" -import { getFilteredEnv } from "@perstack/core" +import { getFilteredEnv, truncateText } from "@perstack/core" import { z } from "zod/v4" import { validatePath } from "../lib/path.js" import { successToolResult } from "../lib/tool-result.js" @@ -38,7 +38,7 @@ export async function exec(input: ExecInput) { if (!output.trim()) { output = "Command executed successfully, but produced no output." } - return { output } + return { output: truncateText(output) } } export function registerExec(server: McpServer) { @@ -85,10 +85,10 @@ export function registerExec(server: McpServer) { } const result: { error: string; stdout?: string; stderr?: string } = { error: message } if (stdout && input.stdout) { - result.stdout = stdout + result.stdout = truncateText(stdout) } if (stderr && input.stderr) { - result.stderr = stderr + result.stderr = truncateText(stderr) } return { content: [{ type: "text", text: JSON.stringify(result) }] } } diff --git a/apps/base/src/tools/read-text-file.test.ts b/apps/base/src/tools/read-text-file.test.ts index 2c12e301..be055ba7 100644 --- a/apps/base/src/tools/read-text-file.test.ts +++ b/apps/base/src/tools/read-text-file.test.ts @@ -1,5 +1,6 @@ import { afterEach, describe, expect, it } from "bun:test" import fs from "node:fs/promises" +import { MAX_TOOL_OUTPUT_CHARS } from "@perstack/core" import { readTextFile } from "./read-text-file.js" const testFile = "read-text-file.test.txt" @@ -65,6 +66,14 @@ describe("readTextFile tool", () => { await expect(readTextFile({ path: "nonexistent.txt" })).rejects.toThrow("does not exist") }) + it("truncates content exceeding 30K characters", async () => { + const largeContent = "A".repeat(50_000) + await fs.writeFile(testFile, largeContent, "utf-8") + const result = await readTextFile({ path: testFile }) + expect(result.content.length).toBeLessThanOrEqual(MAX_TOOL_OUTPUT_CHARS) + expect(result.content).toContain("... [truncated:") + }) + it("throws error if path validation fails", async () => { await expect(readTextFile({ path: "../../../etc/passwd" })).rejects.toThrow("Access denied") }) diff --git a/apps/base/src/tools/read-text-file.ts b/apps/base/src/tools/read-text-file.ts index 163bbfec..e70e8edf 100644 --- a/apps/base/src/tools/read-text-file.ts +++ b/apps/base/src/tools/read-text-file.ts @@ -1,5 +1,6 @@ import { stat } from "node:fs/promises" import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" +import { truncateText } from "@perstack/core" import { z } from "zod/v4" import { validatePath } from "../lib/path.js" import { safeReadFile } from "../lib/safe-file.js" @@ -20,7 +21,7 @@ export async function readTextFile(input: { path: string; from?: number; to?: nu const content = selectedLines.join("\n") return { path, - content, + content: truncateText(content), from: fromLine, to: toLine, } diff --git a/apps/base/src/tools/write-text-file.test.ts b/apps/base/src/tools/write-text-file.test.ts index 6fb5f08e..1bf791f0 100644 --- a/apps/base/src/tools/write-text-file.test.ts +++ b/apps/base/src/tools/write-text-file.test.ts @@ -2,6 +2,7 @@ import { afterEach, describe, expect, it } from "bun:test" import { existsSync } from "node:fs" import fs from "node:fs/promises" import { join } from "node:path" +import { MAX_TOOL_OUTPUT_CHARS } from "@perstack/core" import { validatePath } from "../lib/path.js" import { writeTextFile } from "./write-text-file.js" @@ -47,6 +48,15 @@ describe("writeTextFile tool", () => { expect(result.text).toBe("") }) + it("truncates echoed text exceeding 30K characters", async () => { + const largeText = "B".repeat(50_000) + const result = await writeTextFile({ path: testFile, text: largeText }) + expect(result.text.length).toBeLessThanOrEqual(MAX_TOOL_OUTPUT_CHARS) + expect(result.text).toContain("... [truncated:") + // File itself should contain the full content + expect(await fs.readFile(testFile, "utf-8")).toBe(largeText) + }) + it("throws error if existing file is not writable", async () => { await fs.writeFile(testFile, "readonly content") await fs.chmod(testFile, 0o444) diff --git a/apps/base/src/tools/write-text-file.ts b/apps/base/src/tools/write-text-file.ts index e405c646..784554a0 100644 --- a/apps/base/src/tools/write-text-file.ts +++ b/apps/base/src/tools/write-text-file.ts @@ -1,6 +1,7 @@ import { mkdir, stat } from "node:fs/promises" import { dirname } from "node:path" import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js" +import { truncateText } from "@perstack/core" import { z } from "zod/v4" import { validatePath } from "../lib/path.js" import { safeWriteFile } from "../lib/safe-file.js" @@ -18,7 +19,7 @@ export async function writeTextFile(input: { path: string; text: string }) { await safeWriteFile(validatedPath, text) return { path: validatedPath, - text, + text: truncateText(text), } } diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 81c4d897..3a095a0b 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -25,4 +25,5 @@ export * from "./utils/activity.js" export * from "./utils/env-filter.js" export * from "./utils/event-filter.js" export * from "./utils/expert-type.js" +export * from "./utils/truncate.js" export * from "./utils/zod-error.js" diff --git a/packages/core/src/utils/truncate.test.ts b/packages/core/src/utils/truncate.test.ts new file mode 100644 index 00000000..2fa5ee1b --- /dev/null +++ b/packages/core/src/utils/truncate.test.ts @@ -0,0 +1,49 @@ +import { describe, expect, it } from "bun:test" +import { MAX_TOOL_OUTPUT_CHARS, truncateText } from "./truncate.js" + +describe("truncateText", () => { + it("returns text unchanged when under limit", () => { + const text = "short text" + expect(truncateText(text)).toBe(text) + }) + + it("returns empty string unchanged", () => { + expect(truncateText("")).toBe("") + }) + + it("truncates text over limit with marker", () => { + const text = "x".repeat(50_000) + const result = truncateText(text) + expect(result.length).toBeLessThanOrEqual(MAX_TOOL_OUTPUT_CHARS) + expect(result).toContain("... [truncated:") + expect(result).toContain("characters removed from middle of 50000 total") + }) + + it("keeps head ~80% and tail ~20%", () => { + const text = "H".repeat(25_000) + "T".repeat(25_000) + const result = truncateText(text) + const markerIndex = result.indexOf("\n\n... [truncated:") + const markerEnd = result.indexOf("] ...\n\n") + "] ...\n\n".length + const headLen = markerIndex + const tailLen = result.length - markerEnd + expect(headLen).toBeGreaterThan(tailLen * 3) + }) + + it("total output never exceeds maxChars", () => { + const text = "a".repeat(100_000) + const result = truncateText(text) + expect(result.length).toBeLessThanOrEqual(MAX_TOOL_OUTPUT_CHARS) + }) + + it("respects custom maxChars", () => { + const text = "a".repeat(200) + const result = truncateText(text, 100) + expect(result.length).toBeLessThanOrEqual(100) + expect(result).toContain("... [truncated:") + }) + + it("returns text exactly at limit unchanged", () => { + const text = "a".repeat(MAX_TOOL_OUTPUT_CHARS) + expect(truncateText(text)).toBe(text) + }) +}) diff --git a/packages/core/src/utils/truncate.ts b/packages/core/src/utils/truncate.ts new file mode 100644 index 00000000..723819fc --- /dev/null +++ b/packages/core/src/utils/truncate.ts @@ -0,0 +1,13 @@ +export const MAX_TOOL_OUTPUT_CHARS = 30_000 + +export function truncateText(text: string, maxChars: number = MAX_TOOL_OUTPUT_CHARS): string { + if (text.length <= maxChars) return text + + const marker = `\n\n... [truncated: ${text.length - maxChars} characters removed from middle of ${text.length} total] ...\n\n` + const available = maxChars - marker.length + if (available <= 0) return text.slice(0, maxChars) + + const head = Math.floor(available * 0.8) + const tail = available - head + return text.slice(0, head) + marker + text.slice(text.length - tail) +} diff --git a/packages/runtime/src/state-machine/states/resolving-tool-result.test.ts b/packages/runtime/src/state-machine/states/resolving-tool-result.test.ts index 297dd58e..eb3a7e8e 100644 --- a/packages/runtime/src/state-machine/states/resolving-tool-result.test.ts +++ b/packages/runtime/src/state-machine/states/resolving-tool-result.test.ts @@ -1,5 +1,6 @@ import { describe, expect, it } from "bun:test" import { createId } from "@paralleldrive/cuid2" +import { MAX_TOOL_OUTPUT_CHARS } from "@perstack/core" import { createCheckpoint, createMockSkillManagerFromAdapters, @@ -105,6 +106,53 @@ describe("@perstack/runtime: StateMachineLogic['ResolvingToolResult']", () => { ) }) + it("truncates large textPart contents", async () => { + const setting = createRunSetting() + const checkpoint = createCheckpoint() + const largeText = "Z".repeat(50_000) + const step = createStep({ + toolCalls: [ + { + id: "tc_trunc", + skillName: "@perstack/base", + toolName: "exec", + args: { command: "echo" }, + }, + ], + toolResults: [ + { + id: "tc_trunc", + skillName: "@perstack/base", + toolName: "exec", + result: [ + { + type: "textPart" as const, + text: largeText, + id: createId(), + }, + ], + }, + ], + }) + const result = await StateMachineLogics.ResolvingToolResult({ + setting, + checkpoint, + step, + eventListener: async () => {}, + skillManager: createMockSkillManagerFromAdapters({}), + llmExecutor: mockLLMExecutor, + }) + expect(result.type).toBe("finishToolCall") + if (result.type !== "finishToolCall") throw new Error("Expected finishToolCall") + const toolMessage = result.newMessages[0] + const toolResultPart = toolMessage.contents[0] + if (toolResultPart.type !== "toolResultPart") throw new Error("Expected toolResultPart") + const textPart = toolResultPart.contents[0] + if (textPart.type !== "textPart") throw new Error("Expected textPart") + expect(textPart.text.length).toBeLessThanOrEqual(MAX_TOOL_OUTPUT_CHARS) + expect(textPart.text).toContain("... [truncated:") + }) + it("filters non-text and non-image parts from result", async () => { const setting = createRunSetting() const checkpoint = createCheckpoint() diff --git a/packages/runtime/src/state-machine/states/resolving-tool-result.ts b/packages/runtime/src/state-machine/states/resolving-tool-result.ts index 9b988d76..c7c5f9e0 100644 --- a/packages/runtime/src/state-machine/states/resolving-tool-result.ts +++ b/packages/runtime/src/state-machine/states/resolving-tool-result.ts @@ -1,4 +1,4 @@ -import { finishToolCall, type RunEvent } from "@perstack/core" +import { finishToolCall, type RunEvent, truncateText } from "@perstack/core" import { createToolMessage } from "../../messages/message.js" import type { RunSnapshot } from "../machine.js" @@ -30,12 +30,16 @@ export async function resolvingToolResultLogic({ type: "toolResultPart" as const, toolCallId: toolResult.id, toolName: toolCall?.toolName ?? toolResult.toolName, - contents: toolResult.result.filter( - (part) => - part.type === "textPart" || - part.type === "imageInlinePart" || - part.type === "fileInlinePart", - ), + contents: toolResult.result + .filter( + (part) => + part.type === "textPart" || + part.type === "imageInlinePart" || + part.type === "fileInlinePart", + ) + .map((part) => + part.type === "textPart" ? { ...part, text: truncateText(part.text) } : part, + ), } })