From c710302d261c16be97bf7055abf191f5338e2aa7 Mon Sep 17 00:00:00 2001 From: Hashmeet Kaur Date: Sat, 28 Mar 2026 15:50:13 +0530 Subject: [PATCH 1/5] fixed browserOS agentic browser compatibility --- src/codex-client.ts | 165 ++++++++++++++++++++++++++++++++++++++++---- src/codex.ts | 78 ++++++++++++++++++++- src/index.ts | 87 +++++++++++++++++++++-- 3 files changed, 307 insertions(+), 23 deletions(-) diff --git a/src/codex-client.ts b/src/codex-client.ts index 2c95de6..8c4fff0 100644 --- a/src/codex-client.ts +++ b/src/codex-client.ts @@ -3,6 +3,9 @@ import { getCodexBinaryPath, type Message, type CodexStreamEvent, + type ParsedToolCall, + parseToolCalls, + buildToolInstructions, } from "./codex"; // Official V2 Types @@ -118,17 +121,73 @@ export class CodexClient { async *chatCompletionStream( messages: Message[], - options: { model: string }, + options: { model: string; tools?: any[]; tool_choice?: any }, ): AsyncGenerator { - // Format full prompt - let fullPrompt = ""; + const hasTools = options.tools && options.tools.length > 0; + + // --- Extract system messages into baseInstructions --- + const systemParts: string[] = []; + const nonSystemMessages: Message[] = []; for (const msg of messages) { - const roleName = msg.role.toUpperCase(); - const content = - typeof msg.content === "string" - ? msg.content - : JSON.stringify(msg.content); - fullPrompt += `[${roleName}]\n${content}\n\n`; + if (msg.role === "system") { + const content = + typeof msg.content === "string" + ? msg.content + : JSON.stringify(msg.content); + systemParts.push(content); + } else { + nonSystemMessages.push(msg); + } + } + + let baseInstructions = systemParts.join("\n\n") || undefined; + + // --- If tools are provided, inject tool definitions into instructions --- + if (hasTools) { + const toolBlock = buildToolInstructions( + options.tools!, + options.tool_choice, + ); + baseInstructions = (baseInstructions || "") + toolBlock; + } + + // --- Format conversation messages into prompt --- + let fullPrompt = ""; + for (const msg of nonSystemMessages) { + if (msg.role === "tool") { + // Tool result message from BrowserOS + const toolCallId = (msg as any).tool_call_id || "unknown"; + const toolName = (msg as any).name || "unknown"; + const content = + typeof msg.content === "string" + ? msg.content + : JSON.stringify(msg.content); + fullPrompt += `[TOOL_RESULT] (tool_call_id: ${toolCallId}, name: ${toolName})\n${content}\n\n`; + } else if (msg.role === "assistant" && (msg as any).tool_calls) { + // Assistant message that contained tool calls (history from previous turns) + const toolCalls = (msg as any).tool_calls as any[]; + let assistantContent = ""; + if (msg.content) { + assistantContent += + typeof msg.content === "string" + ? msg.content + : JSON.stringify(msg.content); + assistantContent += "\n"; + } + for (const tc of toolCalls) { + if (tc.type === "function" && tc.function) { + assistantContent += `{"name": "${tc.function.name}", "arguments": ${tc.function.arguments}}\n`; + } + } + fullPrompt += `[ASSISTANT]\n${assistantContent}\n`; + } else { + const roleName = msg.role.toUpperCase(); + const content = + typeof msg.content === "string" + ? msg.content + : JSON.stringify(msg.content); + fullPrompt += `[${roleName}]\n${content}\n\n`; + } } fullPrompt = (fullPrompt.trim() || "Please help me.") + "\n\n[ASSISTANT]\n"; @@ -137,6 +196,7 @@ export class CodexClient { cwd: process.cwd(), experimentalRawEvents: false, persistExtendedHistory: false, + ...(baseInstructions ? { baseInstructions } : {}), }; const startRes = (await this.request( @@ -166,10 +226,9 @@ export class CodexClient { input: input, cwd: process.cwd(), approvalPolicy: "never", - sandboxPolicy: { - type: "readOnly", - access: { type: "fullAccess" }, - }, + sandboxPolicy: hasTools + ? { type: "readOnly", access: { type: "fullAccess" } } + : { type: "dangerFullAccess" }, model: options.model, effort: "none" as any, summary: "none" as any, @@ -180,6 +239,7 @@ export class CodexClient { let turnDone = false; const eventQueue: CodexStreamEvent[] = []; let resolveNext: (() => void) | null = null; + let accumulatedText = ""; const cleanup = this.onEvent((event) => { if (event.type === "notification") { @@ -187,7 +247,12 @@ export class CodexClient { if (method === "item/agentMessage/delta") { const p = params as AgentMessageDeltaNotification; - eventQueue.push({ type: "message", text: p.delta }); + accumulatedText += p.delta; + if (!hasTools) { + // When no tools, stream text directly + eventQueue.push({ type: "message", text: p.delta }); + } + // When tools present, we buffer and parse at the end } else if ( method === "item/reasoning/textDelta" || method === "item/reasoning/summaryTextDelta" @@ -195,6 +260,26 @@ export class CodexClient { const p = params as ReasoningTextDeltaNotification; eventQueue.push({ type: "reasoning", text: p.delta }); } else if (method === "turn/completed") { + // If tools are present, check for tool calls in accumulated text + if (hasTools && accumulatedText) { + const toolCalls = parseToolCalls(accumulatedText); + if (toolCalls.length > 0) { + // Strip tool_call tags from text, emit remaining as content + const textWithoutToolCalls = accumulatedText + .replace(/[\s\S]*?<\/tool_call>/g, "") + .trim(); + if (textWithoutToolCalls) { + eventQueue.push({ + type: "message", + text: textWithoutToolCalls, + }); + } + eventQueue.push({ type: "tool_calls", calls: toolCalls }); + } else { + // No tool calls found, emit as plain message + eventQueue.push({ type: "message", text: accumulatedText }); + } + } turnDone = true; } else if (method === "error") { const p = params as ErrorNotification; @@ -202,12 +287,62 @@ export class CodexClient { p.error?.message || (p as any).message || "Unknown error"; eventQueue.push({ type: "error", text: errMsg }); turnDone = true; + } else if (method === "commandExecution/requestApproval") { + // Auto-approve command executions for agentic behavior + const approvalId = params?.approvalId; + if (approvalId) { + console.log( + `[CodexClient] Auto-approving command execution: ${params?.command || "unknown"}`, + ); + this.request("commandExecution/sendApproval", { + approvalId, + decision: "accept", + }).catch(() => {}); + } + } else if (method === "fileChange/requestApproval") { + // Auto-approve file changes for agentic behavior + const approvalId = params?.approvalId; + if (approvalId) { + console.log(`[CodexClient] Auto-approving file change`); + this.request("fileChange/sendApproval", { + approvalId, + decision: "accept", + }).catch(() => {}); + } + } else if (method === "commandExecution/outputDelta") { + // Surface command output as message text + if (params?.delta) { + accumulatedText += params.delta; + if (!hasTools) { + eventQueue.push({ type: "message", text: params.delta }); + } + } } } else if (event.type === "agent_message_content_delta") { - eventQueue.push({ type: "message", text: event.delta }); + accumulatedText += event.delta; + if (!hasTools) { + eventQueue.push({ type: "message", text: event.delta }); + } } else if (event.type === "reasoning_content_delta") { eventQueue.push({ type: "reasoning", text: event.delta }); } else if (event.type === "task_complete") { + if (hasTools && accumulatedText) { + const toolCalls = parseToolCalls(accumulatedText); + if (toolCalls.length > 0) { + const textWithoutToolCalls = accumulatedText + .replace(/[\s\S]*?<\/tool_call>/g, "") + .trim(); + if (textWithoutToolCalls) { + eventQueue.push({ + type: "message", + text: textWithoutToolCalls, + }); + } + eventQueue.push({ type: "tool_calls", calls: toolCalls }); + } else { + eventQueue.push({ type: "message", text: accumulatedText }); + } + } turnDone = true; } diff --git a/src/codex.ts b/src/codex.ts index feeda15..fcfe9c7 100644 --- a/src/codex.ts +++ b/src/codex.ts @@ -36,6 +36,17 @@ export interface CodexOptions { max_tokens?: number; reasoning_effort?: string; signal?: AbortSignal; + tools?: any[]; + tool_choice?: any; +} + +export interface ParsedToolCall { + id: string; + type: "function"; + function: { + name: string; + arguments: string; + }; } export async function execCodex( @@ -54,18 +65,81 @@ export async function execCodex( export type CodexStreamEvent = | { type: "reasoning"; text: string } | { type: "message"; text: string } - | { type: "error"; text: string }; + | { type: "error"; text: string } + | { type: "tool_calls"; calls: ParsedToolCall[] }; + +/** + * Parse ... blocks from model output text. + * Returns parsed tool calls, or empty array if none found. + */ +export function parseToolCalls(text: string): ParsedToolCall[] { + const calls: ParsedToolCall[] = []; + const regex = /([\s\S]*?)<\/tool_call>/g; + let match; + let callIndex = 0; + while ((match = regex.exec(text)) !== null) { + try { + const parsed = JSON.parse(match[1].trim()); + calls.push({ + id: `call_${Date.now()}_${callIndex++}`, + type: "function", + function: { + name: parsed.name || parsed.function?.name || "", + arguments: + typeof parsed.arguments === "string" + ? parsed.arguments + : JSON.stringify(parsed.arguments ?? parsed.parameters ?? {}), + }, + }); + } catch { + // Skip malformed tool calls + } + } + return calls; +} + +/** + * Build a tool-use instruction block from an OpenAI-format tools array. + * Injected into the model's system instructions so it knows which tools + * are available and the expected output format. + */ +export function buildToolInstructions(tools: any[], tool_choice?: any): string { + let block = `\n\n## Available Tools\n\nYou have access to the following tools to perform actions. You MUST use these tools to fulfill the user's request. Do NOT describe steps or give instructions — instead, call the appropriate tool.\n\nTo call a tool, output one or more tool calls in this exact format (you may output multiple for parallel execution):\n{"name": "tool_name", "arguments": {"param": "value"}}\n\nIMPORTANT RULES:\n- ALWAYS use tool calls to act. NEVER respond with step-by-step instructions when a tool can do the job.\n- You can call multiple tools in a single response.\n- After a tool call, wait for the result before proceeding.\n- If the user asks you to navigate somewhere, use the navigate tool. If they ask you to click, use the click tool. Etc.\n\nHere are the tools:\n\n`; + + for (const tool of tools) { + if (tool.type === "function" && tool.function) { + const fn = tool.function; + block += `### ${fn.name}\n`; + if (fn.description) block += `${fn.description}\n`; + if (fn.parameters) { + block += `Parameters: ${JSON.stringify(fn.parameters)}\n`; + } + block += `\n`; + } + } + + if (tool_choice && tool_choice !== "auto") { + if (typeof tool_choice === "object" && tool_choice.function?.name) { + block += `\nYou MUST use the tool "${tool_choice.function.name}" in your response.\n`; + } else if (tool_choice === "required") { + block += `\nYou MUST use at least one tool in your response.\n`; + } + } + + return block; +} export async function* execCodexStream( messages: Message[], options: CodexOptions = {}, ): AsyncGenerator { if (!options.model) { - // Default to a sane model if not provided options.model = "gpt-5.1"; } yield* codexClient.chatCompletionStream(messages, { model: options.model, + tools: options.tools, + tool_choice: options.tool_choice, }); } diff --git a/src/index.ts b/src/index.ts index 3283717..f4bd520 100644 --- a/src/index.ts +++ b/src/index.ts @@ -56,6 +56,8 @@ Bun.serve({ const temperature = body.temperature; const max_tokens = body.max_tokens; const reasoning_effort = body.reasoning_effort; + const tools = body.tools; + const tool_choice = body.tool_choice; const stream = body.stream === true; @@ -64,6 +66,9 @@ Bun.serve({ if (body.messages) { console.log(`[Proxy] Messages count: ${body.messages.length}`); } + if (tools) { + console.log(`[Proxy] Tools count: ${tools.length}`); + } if (stream) { const responseId = `chatcmpl-${Date.now()}`; @@ -81,6 +86,8 @@ Bun.serve({ max_tokens, reasoning_effort, signal: req.signal, + tools, + tool_choice, })) { if (req.signal.aborted) break; @@ -110,6 +117,60 @@ Bun.serve({ controller.enqueue( encoder.encode(`data: ${JSON.stringify(payload)}\n\n`), ); + } else if (event.type === "tool_calls") { + // Emit tool_calls in OpenAI streaming delta format + const toolCallsDeltas = event.calls.map((tc, idx) => ({ + index: idx, + id: tc.id, + type: "function" as const, + function: { + name: tc.function.name, + arguments: tc.function.arguments, + }, + })); + const payload = { + id: responseId, + object: "chat.completion.chunk", + created: createdTime, + model: model, + choices: [ + { + index: 0, + delta: { + role: "assistant", + tool_calls: toolCallsDeltas, + }, + finish_reason: null, + }, + ], + }; + controller.enqueue( + encoder.encode(`data: ${JSON.stringify(payload)}\n\n`), + ); + // Emit finish with tool_calls reason + const finishPayload = { + id: responseId, + object: "chat.completion.chunk", + created: createdTime, + model: model, + choices: [ + { + index: 0, + delta: {}, + finish_reason: "tool_calls", + }, + ], + }; + controller.enqueue( + encoder.encode( + `data: ${JSON.stringify(finishPayload)}\n\n`, + ), + ); + controller.enqueue(encoder.encode(`data: [DONE]\n\n`)); + try { + controller.close(); + } catch {} + return; // Don't emit the normal stop sequence } else if (event.type === "message") { const payload = { id: responseId, @@ -197,6 +258,7 @@ Bun.serve({ `[Proxy] Executing codex internally via stream buffer for non-streaming request...`, ); let finalMessage = ""; + let finalToolCalls: any[] | null = null; try { for await (const event of execCodexStream(messages, { @@ -205,10 +267,14 @@ Bun.serve({ max_tokens, reasoning_effort, signal: req.signal, + tools, + tool_choice, })) { if (req.signal.aborted) break; if (event.type === "message") { finalMessage += event.text; + } else if (event.type === "tool_calls") { + finalToolCalls = event.calls; } else if (event.type === "error") { finalMessage = `[Error] ${event.text}`; break; @@ -219,7 +285,7 @@ Bun.serve({ finalMessage = "Internal Server Error during execution."; } - if (!finalMessage) { + if (!finalMessage && !finalToolCalls) { finalMessage = "No response received."; } @@ -227,6 +293,18 @@ Bun.serve({ const createdTime = Math.floor(Date.now() / 1000); // Format an OpenAI-like response object + const assistantMessage: any = { + role: "assistant", + content: finalToolCalls ? finalMessage || null : finalMessage, + }; + + let finishReason = "stop"; + + if (finalToolCalls && finalToolCalls.length > 0) { + assistantMessage.tool_calls = finalToolCalls; + finishReason = "tool_calls"; + } + const openAiResponse = { id: responseId, object: "chat.completion", @@ -235,11 +313,8 @@ Bun.serve({ choices: [ { index: 0, - message: { - role: "assistant", - content: finalMessage, - }, - finish_reason: "stop", + message: assistantMessage, + finish_reason: finishReason, }, ], usage: { From 1476e2463cfbad30ecc2b79608efe3c735f5b8c0 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Mar 2026 13:54:08 +0000 Subject: [PATCH 2/5] feat: add browseros tool-calling mode Agent-Logs-Url: https://github.com/copsys/codex-app-proxy/sessions/41e79ae7-148a-4f6d-970c-4f5ad05386fd Co-authored-by: copsys <31281180+copsys@users.noreply.github.com> --- README.md | 11 +++++++++++ src/codex-client.ts | 18 +++++++++++++++++- src/codex.ts | 2 ++ src/index.ts | 6 ++++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2b0f956..918bfd6 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,8 @@ The proxy supports the following OpenAI-compatible parameters in the `/v1/chat/c - **`temperature`** (number): Controls randomness (passed to the engine). - **`max_tokens`** (number): Limits the length of the generated response. - **`reasoning_effort`** (string): For models with reasoning capabilities (e.g., `low`, `medium`, `high`). +- **`tools` / `tool_choice`**: Standard OpenAI tool-calling fields used by agentic clients. +- **`browseros_mode`** (boolean): Optional mode for BrowserOS-like agentic clients. When `true` (and tools are provided), the proxy adds stronger tool-execution instructions so the model emits tool calls instead of environment-limitation refusals. ## Quick Start @@ -70,6 +72,15 @@ curl -N -X POST http://localhost:8080/v1/chat/completions \ - **Port**: Set via `PORT` environment variable (defaults to 8080). - **Models**: The proxy automatically queries your local Codex installation for available model slugs. +### BrowserOS Configuration + +If your BrowserOS agent sends tool definitions but the model replies with text like _"I’m unable to control the browser from this environment."_, set: + +- `browseros_mode: true` in your `/v1/chat/completions` request body +- keep sending `tools` and (optionally) `tool_choice` + +This proxy mode is designed to use the proxy only for LLM/provider behavior while BrowserOS continues to execute the actual browser tools on its side. + ## Architecture This project uses a typed `CodexClient` that manages a persistent `codex app-server` background process. Communication happens over a high-speed JSON-RPC channel on `stdio`, ensuring that the model state remains warm and ready for immediate inference. diff --git a/src/codex-client.ts b/src/codex-client.ts index 8c4fff0..21ce40a 100644 --- a/src/codex-client.ts +++ b/src/codex-client.ts @@ -121,7 +121,12 @@ export class CodexClient { async *chatCompletionStream( messages: Message[], - options: { model: string; tools?: any[]; tool_choice?: any }, + options: { + model: string; + tools?: any[]; + tool_choice?: any; + browseros_mode?: boolean; + }, ): AsyncGenerator { const hasTools = options.tools && options.tools.length > 0; @@ -151,6 +156,17 @@ export class CodexClient { baseInstructions = (baseInstructions || "") + toolBlock; } + if (hasTools && options.browseros_mode) { + const browserOsToolModeInstructions = + `\n\n## BrowserOS Tool Execution Mode\n\n` + + `You are running as a tool-calling planner inside BrowserOS. ` + + `You can and must control the browser by emitting tool calls. ` + + `Do not claim you cannot access or control the browser/environment. ` + + `When a browser action is requested, respond with tool calls only (and brief coordinating text only when necessary). ` + + `If an action needs multiple steps, emit the next required tool call(s) for the current step.\n`; + baseInstructions = (baseInstructions || "") + browserOsToolModeInstructions; + } + // --- Format conversation messages into prompt --- let fullPrompt = ""; for (const msg of nonSystemMessages) { diff --git a/src/codex.ts b/src/codex.ts index fcfe9c7..9528fca 100644 --- a/src/codex.ts +++ b/src/codex.ts @@ -38,6 +38,7 @@ export interface CodexOptions { signal?: AbortSignal; tools?: any[]; tool_choice?: any; + browseros_mode?: boolean; } export interface ParsedToolCall { @@ -141,5 +142,6 @@ export async function* execCodexStream( model: options.model, tools: options.tools, tool_choice: options.tool_choice, + browseros_mode: options.browseros_mode, }); } diff --git a/src/index.ts b/src/index.ts index f4bd520..2d0b979 100644 --- a/src/index.ts +++ b/src/index.ts @@ -58,6 +58,7 @@ Bun.serve({ const reasoning_effort = body.reasoning_effort; const tools = body.tools; const tool_choice = body.tool_choice; + const browseros_mode = body.browseros_mode === true; const stream = body.stream === true; @@ -69,6 +70,9 @@ Bun.serve({ if (tools) { console.log(`[Proxy] Tools count: ${tools.length}`); } + if (browseros_mode) { + console.log(`[Proxy] BrowserOS mode enabled`); + } if (stream) { const responseId = `chatcmpl-${Date.now()}`; @@ -88,6 +92,7 @@ Bun.serve({ signal: req.signal, tools, tool_choice, + browseros_mode, })) { if (req.signal.aborted) break; @@ -269,6 +274,7 @@ Bun.serve({ signal: req.signal, tools, tool_choice, + browseros_mode, })) { if (req.signal.aborted) break; if (event.type === "message") { From 27ad1519e7d21f95e5f4ddfa1a1d03ff3304a3fd Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sat, 28 Mar 2026 13:54:31 +0000 Subject: [PATCH 3/5] chore: address code review naming feedback Agent-Logs-Url: https://github.com/copsys/codex-app-proxy/sessions/41e79ae7-148a-4f6d-970c-4f5ad05386fd Co-authored-by: copsys <31281180+copsys@users.noreply.github.com> --- src/codex-client.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/codex-client.ts b/src/codex-client.ts index 21ce40a..dbf917c 100644 --- a/src/codex-client.ts +++ b/src/codex-client.ts @@ -157,14 +157,14 @@ export class CodexClient { } if (hasTools && options.browseros_mode) { - const browserOsToolModeInstructions = + const browserOSToolModeInstructions = `\n\n## BrowserOS Tool Execution Mode\n\n` + `You are running as a tool-calling planner inside BrowserOS. ` + `You can and must control the browser by emitting tool calls. ` + `Do not claim you cannot access or control the browser/environment. ` + `When a browser action is requested, respond with tool calls only (and brief coordinating text only when necessary). ` + `If an action needs multiple steps, emit the next required tool call(s) for the current step.\n`; - baseInstructions = (baseInstructions || "") + browserOsToolModeInstructions; + baseInstructions = (baseInstructions || "") + browserOSToolModeInstructions; } // --- Format conversation messages into prompt --- From 5be8919dda83ee2d238178c92c370b4466903d2e Mon Sep 17 00:00:00 2001 From: Chetan Khobragade <31281180+copsys@users.noreply.github.com> Date: Sat, 28 Mar 2026 15:13:14 +0000 Subject: [PATCH 4/5] new feature --- README.md | 11 +++++++---- src/codex-client.ts | 6 ++++-- src/codex.ts | 24 +++++++++++++++++++++++- src/index.ts | 18 ++++++++++++++---- 4 files changed, 48 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 918bfd6..3daf0db 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ The proxy supports the following OpenAI-compatible parameters in the `/v1/chat/c - **`max_tokens`** (number): Limits the length of the generated response. - **`reasoning_effort`** (string): For models with reasoning capabilities (e.g., `low`, `medium`, `high`). - **`tools` / `tool_choice`**: Standard OpenAI tool-calling fields used by agentic clients. -- **`browseros_mode`** (boolean): Optional mode for BrowserOS-like agentic clients. When `true` (and tools are provided), the proxy adds stronger tool-execution instructions so the model emits tool calls instead of environment-limitation refusals. +- **`browseros_mode`** (boolean): Optional strict mode toggle for BrowserOS-like agentic clients. When tools are provided, this mode is **enabled by default** unless you explicitly set `browseros_mode: false`. ## Quick Start @@ -74,10 +74,13 @@ curl -N -X POST http://localhost:8080/v1/chat/completions \ ### BrowserOS Configuration -If your BrowserOS agent sends tool definitions but the model replies with text like _"I’m unable to control the browser from this environment."_, set: +If your BrowserOS agent sends tool definitions but the model replies with text like _"I’m unable to control the browser from this environment."_, verify: -- `browseros_mode: true` in your `/v1/chat/completions` request body -- keep sending `tools` and (optionally) `tool_choice` +- you are sending `tools` in your `/v1/chat/completions` request body (this auto-enables BrowserOS strict mode) +- optionally set `browseros_mode: true` explicitly for clarity +- keep sending `tool_choice` when your client supports it + +To disable strict BrowserOS behavior for non-agentic use-cases, set `browseros_mode: false`. This proxy mode is designed to use the proxy only for LLM/provider behavior while BrowserOS continues to execute the actual browser tools on its side. diff --git a/src/codex-client.ts b/src/codex-client.ts index dbf917c..636f830 100644 --- a/src/codex-client.ts +++ b/src/codex-client.ts @@ -162,8 +162,10 @@ export class CodexClient { `You are running as a tool-calling planner inside BrowserOS. ` + `You can and must control the browser by emitting tool calls. ` + `Do not claim you cannot access or control the browser/environment. ` + - `When a browser action is requested, respond with tool calls only (and brief coordinating text only when necessary). ` + - `If an action needs multiple steps, emit the next required tool call(s) for the current step.\n`; + `If the request is actionable with available tools, your response MUST include at least one block. ` + + `Prefer tool-call-only output for action steps. ` + + `For shopping workflows, adding products to cart is permitted; avoid checkout/payment unless explicitly requested. ` + + `If an action needs multiple steps, emit only the next required tool call(s) for the current step.\n`; baseInstructions = (baseInstructions || "") + browserOSToolModeInstructions; } diff --git a/src/codex.ts b/src/codex.ts index 9528fca..caa23c3 100644 --- a/src/codex.ts +++ b/src/codex.ts @@ -105,7 +105,19 @@ export function parseToolCalls(text: string): ParsedToolCall[] { * are available and the expected output format. */ export function buildToolInstructions(tools: any[], tool_choice?: any): string { - let block = `\n\n## Available Tools\n\nYou have access to the following tools to perform actions. You MUST use these tools to fulfill the user's request. Do NOT describe steps or give instructions — instead, call the appropriate tool.\n\nTo call a tool, output one or more tool calls in this exact format (you may output multiple for parallel execution):\n{"name": "tool_name", "arguments": {"param": "value"}}\n\nIMPORTANT RULES:\n- ALWAYS use tool calls to act. NEVER respond with step-by-step instructions when a tool can do the job.\n- You can call multiple tools in a single response.\n- After a tool call, wait for the result before proceeding.\n- If the user asks you to navigate somewhere, use the navigate tool. If they ask you to click, use the click tool. Etc.\n\nHere are the tools:\n\n`; + let block = + `\n\n## Available Tools\n\n` + + `You are an agentic planner operating through external tools. ` + + `When tools are available, your next action MUST be emitted as tool calls, not prose refusals.\n\n` + + `Tool call output format (required):\n` + + `{"name": "tool_name", "arguments": {"param": "value"}}\n\n` + + `IMPORTANT RULES:\n` + + `- If a user request is actionable with provided tools, emit one or more blocks.\n` + + `- Do not say you cannot access the browser/environment when browser tools are provided.\n` + + `- Keep normal text minimal. Prefer tool-call-only responses for action steps.\n` + + `- After tool results are returned, emit the next tool call(s) needed to continue.\n` + + `- For commerce tasks, adding an item to cart is allowed; do not attempt checkout/payment unless user explicitly requests it.\n\n` + + `Here are the tools:\n\n`; for (const tool of tools) { if (tool.type === "function" && tool.function) { @@ -116,6 +128,16 @@ export function buildToolInstructions(tools: any[], tool_choice?: any): string { block += `Parameters: ${JSON.stringify(fn.parameters)}\n`; } block += `\n`; + } else if (tool?.name) { + // Support alternate tool schemas used by some providers/agents. + block += `### ${tool.name}\n`; + if (tool.description) block += `${tool.description}\n`; + if (tool.input_schema) { + block += `Parameters: ${JSON.stringify(tool.input_schema)}\n`; + } else if (tool.parameters) { + block += `Parameters: ${JSON.stringify(tool.parameters)}\n`; + } + block += `\n`; } } diff --git a/src/index.ts b/src/index.ts index 2d0b979..ae61346 100644 --- a/src/index.ts +++ b/src/index.ts @@ -56,9 +56,12 @@ Bun.serve({ const temperature = body.temperature; const max_tokens = body.max_tokens; const reasoning_effort = body.reasoning_effort; - const tools = body.tools; + const tools = Array.isArray(body.tools) ? body.tools : undefined; const tool_choice = body.tool_choice; - const browseros_mode = body.browseros_mode === true; + // Default to BrowserOS-style strict tool mode whenever tools are supplied, + // unless callers explicitly disable it with browseros_mode: false. + const browseros_mode = + tools && tools.length > 0 ? body.browseros_mode !== false : false; const stream = body.stream === true; @@ -70,8 +73,15 @@ Bun.serve({ if (tools) { console.log(`[Proxy] Tools count: ${tools.length}`); } - if (browseros_mode) { - console.log(`[Proxy] BrowserOS mode enabled`); + if (tools && tools.length > 0) { + console.log( + `[Proxy] BrowserOS mode: ${browseros_mode ? "enabled" : "disabled"}`, + ); + if (body.browseros_mode === undefined && browseros_mode) { + console.log( + `[Proxy] BrowserOS mode auto-enabled because tools were provided`, + ); + } } if (stream) { From dda3e7453318a59658a516dcc87f0bde5423892a Mon Sep 17 00:00:00 2001 From: Chetan Khobragade <31281180+copsys@users.noreply.github.com> Date: Sat, 28 Mar 2026 15:19:07 +0000 Subject: [PATCH 5/5] fixed --- src/codex-client.ts | 3 ++ src/codex.ts | 82 ++++++++++++++++++++++++++++++++++++--------- src/index.ts | 3 +- 3 files changed, 71 insertions(+), 17 deletions(-) diff --git a/src/codex-client.ts b/src/codex-client.ts index 636f830..6e400b9 100644 --- a/src/codex-client.ts +++ b/src/codex-client.ts @@ -294,6 +294,9 @@ export class CodexClient { } eventQueue.push({ type: "tool_calls", calls: toolCalls }); } else { + console.warn( + `[CodexClient] Tools provided but no tool calls parsed. Assistant preview: ${accumulatedText.slice(0, 300).replace(/\s+/g, " ")}`, + ); // No tool calls found, emit as plain message eventQueue.push({ type: "message", text: accumulatedText }); } diff --git a/src/codex.ts b/src/codex.ts index caa23c3..bf74e12 100644 --- a/src/codex.ts +++ b/src/codex.ts @@ -75,27 +75,77 @@ export type CodexStreamEvent = */ export function parseToolCalls(text: string): ParsedToolCall[] { const calls: ParsedToolCall[] = []; - const regex = /([\s\S]*?)<\/tool_call>/g; - let match; + const seen = new Set(); let callIndex = 0; - while ((match = regex.exec(text)) !== null) { + + const pushCall = (raw: any) => { + const name = raw?.name || raw?.toolName || raw?.function?.name || ""; + const argsRaw = + raw?.arguments ?? raw?.input ?? raw?.parameters ?? raw?.function?.arguments; + if (!name) return; + const args = + typeof argsRaw === "string" + ? argsRaw + : JSON.stringify(argsRaw ?? {}); + const key = `${name}::${args}`; + if (seen.has(key)) return; + seen.add(key); + calls.push({ + id: `call_${Date.now()}_${callIndex++}`, + type: "function", + function: { + name, + arguments: args, + }, + }); + }; + + // Format 1: explicit ... blocks. + const taggedRegex = /([\s\S]*?)<\/tool_call>/g; + let match; + while ((match = taggedRegex.exec(text)) !== null) { + try { + pushCall(JSON.parse(match[1].trim())); + } catch { + // Ignore malformed block. + } + } + + // Format 2: JSON fenced blocks that contain a single call, call list, or tool_calls. + const fencedJsonRegex = /```(?:json)?\s*([\s\S]*?)```/g; + while ((match = fencedJsonRegex.exec(text)) !== null) { + const candidate = match[1].trim(); try { - const parsed = JSON.parse(match[1].trim()); - calls.push({ - id: `call_${Date.now()}_${callIndex++}`, - type: "function", - function: { - name: parsed.name || parsed.function?.name || "", - arguments: - typeof parsed.arguments === "string" - ? parsed.arguments - : JSON.stringify(parsed.arguments ?? parsed.parameters ?? {}), - }, - }); + const parsed = JSON.parse(candidate); + if (Array.isArray(parsed)) { + for (const item of parsed) pushCall(item); + } else if (parsed?.tool_calls && Array.isArray(parsed.tool_calls)) { + for (const item of parsed.tool_calls) pushCall(item); + } else { + pushCall(parsed); + } } catch { - // Skip malformed tool calls + // Not valid JSON; ignore. } } + + // Format 3: whole response is a JSON object/array describing tool calls. + const trimmed = text.trim(); + if (trimmed.startsWith("{") || trimmed.startsWith("[")) { + try { + const parsed = JSON.parse(trimmed); + if (Array.isArray(parsed)) { + for (const item of parsed) pushCall(item); + } else if (parsed?.tool_calls && Array.isArray(parsed.tool_calls)) { + for (const item of parsed.tool_calls) pushCall(item); + } else { + pushCall(parsed); + } + } catch { + // Not parseable as JSON; ignore. + } + } + return calls; } diff --git a/src/index.ts b/src/index.ts index ae61346..d9d6d57 100644 --- a/src/index.ts +++ b/src/index.ts @@ -57,11 +57,12 @@ Bun.serve({ const max_tokens = body.max_tokens; const reasoning_effort = body.reasoning_effort; const tools = Array.isArray(body.tools) ? body.tools : undefined; - const tool_choice = body.tool_choice; // Default to BrowserOS-style strict tool mode whenever tools are supplied, // unless callers explicitly disable it with browseros_mode: false. const browseros_mode = tools && tools.length > 0 ? body.browseros_mode !== false : false; + const tool_choice = + body.tool_choice ?? (browseros_mode ? "required" : undefined); const stream = body.stream === true;