From c710302d261c16be97bf7055abf191f5338e2aa7 Mon Sep 17 00:00:00 2001
From: Hashmeet Kaur <hashmeet.kaur_matharu@bp.com>
Date: Sat, 28 Mar 2026 15:50:13 +0530
Subject: [PATCH 1/5] fixed browserOS agentic browser compatibility

---
 src/codex-client.ts | 165 ++++++++++++++++++++++++++++++++++++++++----
 src/codex.ts        |  78 ++++++++++++++++++++-
 src/index.ts        |  87 +++++++++++++++++++++--
 3 files changed, 307 insertions(+), 23 deletions(-)
diff --git a/src/codex-client.ts b/src/codex-client.ts
index 2c95de6..8c4fff0 100644
--- a/src/codex-client.ts
+++ b/src/codex-client.ts
@@ -3,6 +3,9 @@ import {
   getCodexBinaryPath,
   type Message,
   type CodexStreamEvent,
+  type ParsedToolCall,
+  parseToolCalls,
+  buildToolInstructions,
 } from "./codex";
 
 // Official V2 Types
@@ -118,17 +121,73 @@ export class CodexClient {
 
   async *chatCompletionStream(
     messages: Message[],
-    options: { model: string },
+    options: { model: string; tools?: any[]; tool_choice?: any },
   ): AsyncGenerator<CodexStreamEvent> {
-    // Format full prompt
-    let fullPrompt = "";
+    const hasTools = options.tools && options.tools.length > 0;
+
+    // --- Extract system messages into baseInstructions ---
+    const systemParts: string[] = [];
+    const nonSystemMessages: Message[] = [];
     for (const msg of messages) {
-      const roleName = msg.role.toUpperCase();
-      const content =
-        typeof msg.content === "string"
-          ? msg.content
-          : JSON.stringify(msg.content);
-      fullPrompt += `[${roleName}]\n${content}\n\n`;
+      if (msg.role === "system") {
+        const content =
+          typeof msg.content === "string"
+            ? msg.content
+            : JSON.stringify(msg.content);
+        systemParts.push(content);
+      } else {
+        nonSystemMessages.push(msg);
+      }
+    }
+
+    let baseInstructions = systemParts.join("\n\n") || undefined;
+
+    // --- If tools are provided, inject tool definitions into instructions ---
+    if (hasTools) {
+      const toolBlock = buildToolInstructions(
+        options.tools!,
+        options.tool_choice,
+      );
+      baseInstructions = (baseInstructions || "") + toolBlock;
+    }
+
+    // --- Format conversation messages into prompt ---
+    let fullPrompt = "";
+    for (const msg of nonSystemMessages) {
+      if (msg.role === "tool") {
+        // Tool result message from BrowserOS
+        const toolCallId = (msg as any).tool_call_id || "unknown";
+        const toolName = (msg as any).name || "unknown";
+        const content =
+          typeof msg.content === "string"
+            ? msg.content
+            : JSON.stringify(msg.content);
+        fullPrompt += `[TOOL_RESULT] (tool_call_id: ${toolCallId}, name: ${toolName})\n${content}\n\n`;
+      } else if (msg.role === "assistant" && (msg as any).tool_calls) {
+        // Assistant message that contained tool calls (history from previous turns)
+        const toolCalls = (msg as any).tool_calls as any[];
+        let assistantContent = "";
+        if (msg.content) {
+          assistantContent +=
+            typeof msg.content === "string"
+              ? msg.content
+              : JSON.stringify(msg.content);
+          assistantContent += "\n";
+        }
+        for (const tc of toolCalls) {
+          if (tc.type === "function" && tc.function) {
+            assistantContent += `<tool_call>{"name": "${tc.function.name}", "arguments": ${tc.function.arguments}}</tool_call>\n`;
+          }
+        }
+        fullPrompt += `[ASSISTANT]\n${assistantContent}\n`;
+      } else {
+        const roleName = msg.role.toUpperCase();
+        const content =
+          typeof msg.content === "string"
+            ? msg.content
+            : JSON.stringify(msg.content);
+        fullPrompt += `[${roleName}]\n${content}\n\n`;
+      }
     }
     fullPrompt = (fullPrompt.trim() || "Please help me.") + "\n\n[ASSISTANT]\n";
 
@@ -137,6 +196,7 @@ export class CodexClient {
       cwd: process.cwd(),
       experimentalRawEvents: false,
       persistExtendedHistory: false,
+      ...(baseInstructions ? { baseInstructions } : {}),
     };
 
     const startRes = (await this.request(
@@ -166,10 +226,9 @@ export class CodexClient {
         input: input,
         cwd: process.cwd(),
         approvalPolicy: "never",
-        sandboxPolicy: {
-          type: "readOnly",
-          access: { type: "fullAccess" },
-        },
+        sandboxPolicy: hasTools
+          ? { type: "readOnly", access: { type: "fullAccess" } }
+          : { type: "dangerFullAccess" },
         model: options.model,
         effort: "none" as any,
         summary: "none" as any,
@@ -180,6 +239,7 @@ export class CodexClient {
       let turnDone = false;
       const eventQueue: CodexStreamEvent[] = [];
       let resolveNext: (() => void) | null = null;
+      let accumulatedText = "";
 
       const cleanup = this.onEvent((event) => {
         if (event.type === "notification") {
@@ -187,7 +247,12 @@ export class CodexClient {
 
           if (method === "item/agentMessage/delta") {
             const p = params as AgentMessageDeltaNotification;
-            eventQueue.push({ type: "message", text: p.delta });
+            accumulatedText += p.delta;
+            if (!hasTools) {
+              // When no tools, stream text directly
+              eventQueue.push({ type: "message", text: p.delta });
+            }
+            // When tools present, we buffer and parse at the end
           } else if (
             method === "item/reasoning/textDelta" ||
             method === "item/reasoning/summaryTextDelta"
@@ -195,6 +260,26 @@ export class CodexClient {
             const p = params as ReasoningTextDeltaNotification;
             eventQueue.push({ type: "reasoning", text: p.delta });
           } else if (method === "turn/completed") {
+            // If tools are present, check for tool calls in accumulated text
+            if (hasTools && accumulatedText) {
+              const toolCalls = parseToolCalls(accumulatedText);
+              if (toolCalls.length > 0) {
+                // Strip tool_call tags from text, emit remaining as content
+                const textWithoutToolCalls = accumulatedText
+                  .replace(/<tool_call>[\s\S]*?<\/tool_call>/g, "")
+                  .trim();
+                if (textWithoutToolCalls) {
+                  eventQueue.push({
+                    type: "message",
+                    text: textWithoutToolCalls,
+                  });
+                }
+                eventQueue.push({ type: "tool_calls", calls: toolCalls });
+              } else {
+                // No tool calls found, emit as plain message
+                eventQueue.push({ type: "message", text: accumulatedText });
+              }
+            }
             turnDone = true;
           } else if (method === "error") {
             const p = params as ErrorNotification;
@@ -202,12 +287,62 @@ export class CodexClient {
               p.error?.message || (p as any).message || "Unknown error";
             eventQueue.push({ type: "error", text: errMsg });
             turnDone = true;
+          } else if (method === "commandExecution/requestApproval") {
+            // Auto-approve command executions for agentic behavior
+            const approvalId = params?.approvalId;
+            if (approvalId) {
+              console.log(
+                `[CodexClient] Auto-approving command execution: ${params?.command || "unknown"}`,
+              );
+              this.request("commandExecution/sendApproval", {
+                approvalId,
+                decision: "accept",
+              }).catch(() => {});
+            }
+          } else if (method === "fileChange/requestApproval") {
+            // Auto-approve file changes for agentic behavior
+            const approvalId = params?.approvalId;
+            if (approvalId) {
+              console.log(`[CodexClient] Auto-approving file change`);
+              this.request("fileChange/sendApproval", {
+                approvalId,
+                decision: "accept",
+              }).catch(() => {});
+            }
+          } else if (method === "commandExecution/outputDelta") {
+            // Surface command output as message text
+            if (params?.delta) {
+              accumulatedText += params.delta;
+              if (!hasTools) {
+                eventQueue.push({ type: "message", text: params.delta });
+              }
+            }
           }
         } else if (event.type === "agent_message_content_delta") {
-          eventQueue.push({ type: "message", text: event.delta });
+          accumulatedText += event.delta;
+          if (!hasTools) {
+            eventQueue.push({ type: "message", text: event.delta });
+          }
         } else if (event.type === "reasoning_content_delta") {
           eventQueue.push({ type: "reasoning", text: event.delta });
         } else if (event.type === "task_complete") {
+          if (hasTools && accumulatedText) {
+            const toolCalls = parseToolCalls(accumulatedText);
+            if (toolCalls.length > 0) {
+              const textWithoutToolCalls = accumulatedText
+                .replace(/<tool_call>[\s\S]*?<\/tool_call>/g, "")
+                .trim();
+              if (textWithoutToolCalls) {
+                eventQueue.push({
+                  type: "message",
+                  text: textWithoutToolCalls,
+                });
+              }
+              eventQueue.push({ type: "tool_calls", calls: toolCalls });
+            } else {
+              eventQueue.push({ type: "message", text: accumulatedText });
+            }
+          }
           turnDone = true;
         }
 
diff --git a/src/codex.ts b/src/codex.ts
index feeda15..fcfe9c7 100644
--- a/src/codex.ts
+++ b/src/codex.ts
@@ -36,6 +36,17 @@ export interface CodexOptions {
   max_tokens?: number;
   reasoning_effort?: string;
   signal?: AbortSignal;
+  tools?: any[];
+  tool_choice?: any;
+}
+
+export interface ParsedToolCall {
+  id: string;
+  type: "function";
+  function: {
+    name: string;
+    arguments: string;
+  };
 }
 
 export async function execCodex(
@@ -54,18 +65,81 @@ export async function execCodex(
 export type CodexStreamEvent =
   | { type: "reasoning"; text: string }
   | { type: "message"; text: string }
-  | { type: "error"; text: string };
+  | { type: "error"; text: string }
+  | { type: "tool_calls"; calls: ParsedToolCall[] };
+
+/**
+ * Parse <tool_call>...</tool_call> blocks from model output text.
+ * Returns parsed tool calls, or empty array if none found.
+ */
+export function parseToolCalls(text: string): ParsedToolCall[] {
+  const calls: ParsedToolCall[] = [];
+  const regex = /<tool_call>([\s\S]*?)<\/tool_call>/g;
+  let match;
+  let callIndex = 0;
+  while ((match = regex.exec(text)) !== null) {
+    try {
+      const parsed = JSON.parse(match[1].trim());
+      calls.push({
+        id: `call_${Date.now()}_${callIndex++}`,
+        type: "function",
+        function: {
+          name: parsed.name || parsed.function?.name || "",
+          arguments:
+            typeof parsed.arguments === "string"
+              ? parsed.arguments
+              : JSON.stringify(parsed.arguments ?? parsed.parameters ?? {}),
+        },
+      });
+    } catch {
+      // Skip malformed tool calls
+    }
+  }
+  return calls;
+}
+
+/**
+ * Build a tool-use instruction block from an OpenAI-format tools array.
+ * Injected into the model's system instructions so it knows which tools
+ * are available and the expected output format.
+ */
+export function buildToolInstructions(tools: any[], tool_choice?: any): string {
+  let block = `\n\n## Available Tools\n\nYou have access to the following tools to perform actions. You MUST use these tools to fulfill the user's request. Do NOT describe steps or give instructions — instead, call the appropriate tool.\n\nTo call a tool, output one or more tool calls in this exact format (you may output multiple for parallel execution):\n<tool_call>{"name": "tool_name", "arguments": {"param": "value"}}</tool_call>\n\nIMPORTANT RULES:\n- ALWAYS use tool calls to act. NEVER respond with step-by-step instructions when a tool can do the job.\n- You can call multiple tools in a single response.\n- After a tool call, wait for the result before proceeding.\n- If the user asks you to navigate somewhere, use the navigate tool. If they ask you to click, use the click tool. Etc.\n\nHere are the tools:\n\n`;
+
+  for (const tool of tools) {
+    if (tool.type === "function" && tool.function) {
+      const fn = tool.function;
+      block += `### ${fn.name}\n`;
+      if (fn.description) block += `${fn.description}\n`;
+      if (fn.parameters) {
+        block += `Parameters: ${JSON.stringify(fn.parameters)}\n`;
+      }
+      block += `\n`;
+    }
+  }
+
+  if (tool_choice && tool_choice !== "auto") {
+    if (typeof tool_choice === "object" && tool_choice.function?.name) {
+      block += `\nYou MUST use the tool "${tool_choice.function.name}" in your response.\n`;
+    } else if (tool_choice === "required") {
+      block += `\nYou MUST use at least one tool in your response.\n`;
+    }
+  }
+
+  return block;
+}
 
 export async function* execCodexStream(
   messages: Message[],
   options: CodexOptions = {},
 ): AsyncGenerator<CodexStreamEvent, void, unknown> {
   if (!options.model) {
-    // Default to a sane model if not provided
     options.model = "gpt-5.1";
   }
 
   yield* codexClient.chatCompletionStream(messages, {
     model: options.model,
+    tools: options.tools,
+    tool_choice: options.tool_choice,
   });
 }
diff --git a/src/index.ts b/src/index.ts
index 3283717..f4bd520 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -56,6 +56,8 @@ Bun.serve({
         const temperature = body.temperature;
         const max_tokens = body.max_tokens;
         const reasoning_effort = body.reasoning_effort;
+        const tools = body.tools;
+        const tool_choice = body.tool_choice;
 
         const stream = body.stream === true;
 
@@ -64,6 +66,9 @@ Bun.serve({
         if (body.messages) {
           console.log(`[Proxy] Messages count: ${body.messages.length}`);
         }
+        if (tools) {
+          console.log(`[Proxy] Tools count: ${tools.length}`);
+        }
 
         if (stream) {
           const responseId = `chatcmpl-${Date.now()}`;
@@ -81,6 +86,8 @@ Bun.serve({
                   max_tokens,
                   reasoning_effort,
                   signal: req.signal,
+                  tools,
+                  tool_choice,
                 })) {
                   if (req.signal.aborted) break;
 
@@ -110,6 +117,60 @@ Bun.serve({
                       controller.enqueue(
                         encoder.encode(`data: ${JSON.stringify(payload)}\n\n`),
                       );
+                    } else if (event.type === "tool_calls") {
+                      // Emit tool_calls in OpenAI streaming delta format
+                      const toolCallsDeltas = event.calls.map((tc, idx) => ({
+                        index: idx,
+                        id: tc.id,
+                        type: "function" as const,
+                        function: {
+                          name: tc.function.name,
+                          arguments: tc.function.arguments,
+                        },
+                      }));
+                      const payload = {
+                        id: responseId,
+                        object: "chat.completion.chunk",
+                        created: createdTime,
+                        model: model,
+                        choices: [
+                          {
+                            index: 0,
+                            delta: {
+                              role: "assistant",
+                              tool_calls: toolCallsDeltas,
+                            },
+                            finish_reason: null,
+                          },
+                        ],
+                      };
+                      controller.enqueue(
+                        encoder.encode(`data: ${JSON.stringify(payload)}\n\n`),
+                      );
+                      // Emit finish with tool_calls reason
+                      const finishPayload = {
+                        id: responseId,
+                        object: "chat.completion.chunk",
+                        created: createdTime,
+                        model: model,
+                        choices: [
+                          {
+                            index: 0,
+                            delta: {},
+                            finish_reason: "tool_calls",
+                          },
+                        ],
+                      };
+                      controller.enqueue(
+                        encoder.encode(
+                          `data: ${JSON.stringify(finishPayload)}\n\n`,
+                        ),
+                      );
+                      controller.enqueue(encoder.encode(`data: [DONE]\n\n`));
+                      try {
+                        controller.close();
+                      } catch {}
+                      return; // Don't emit the normal stop sequence
                     } else if (event.type === "message") {
                       const payload = {
                         id: responseId,
@@ -197,6 +258,7 @@ Bun.serve({
           `[Proxy] Executing codex internally via stream buffer for non-streaming request...`,
         );
         let finalMessage = "";
+        let finalToolCalls: any[] | null = null;
 
         try {
           for await (const event of execCodexStream(messages, {
@@ -205,10 +267,14 @@ Bun.serve({
             max_tokens,
             reasoning_effort,
             signal: req.signal,
+            tools,
+            tool_choice,
           })) {
             if (req.signal.aborted) break;
             if (event.type === "message") {
               finalMessage += event.text;
+            } else if (event.type === "tool_calls") {
+              finalToolCalls = event.calls;
             } else if (event.type === "error") {
               finalMessage = `[Error] ${event.text}`;
               break;
@@ -219,7 +285,7 @@ Bun.serve({
           finalMessage = "Internal Server Error during execution.";
         }
 
-        if (!finalMessage) {
+        if (!finalMessage && !finalToolCalls) {
           finalMessage = "No response received.";
         }
 
@@ -227,6 +293,18 @@ Bun.serve({
         const createdTime = Math.floor(Date.now() / 1000);
 
         // Format an OpenAI-like response object
+        const assistantMessage: any = {
+          role: "assistant",
+          content: finalToolCalls ? finalMessage || null : finalMessage,
+        };
+
+        let finishReason = "stop";
+
+        if (finalToolCalls && finalToolCalls.length > 0) {
+          assistantMessage.tool_calls = finalToolCalls;
+          finishReason = "tool_calls";
+        }
+
         const openAiResponse = {
           id: responseId,
           object: "chat.completion",
@@ -235,11 +313,8 @@ Bun.serve({
           choices: [
             {
               index: 0,
-              message: {
-                role: "assistant",
-                content: finalMessage,
-              },
-              finish_reason: "stop",
+              message: assistantMessage,
+              finish_reason: finishReason,
             },
           ],
           usage: {

From 1476e2463cfbad30ecc2b79608efe3c735f5b8c0 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 28 Mar 2026 13:54:08 +0000
Subject: [PATCH 2/5] feat: add browseros tool-calling mode

Agent-Logs-Url: https://github.com/copsys/codex-app-proxy/sessions/41e79ae7-148a-4f6d-970c-4f5ad05386fd

Co-authored-by: copsys <31281180+copsys@users.noreply.github.com>
---
 README.md           | 11 +++++++++++
 src/codex-client.ts | 18 +++++++++++++++++-
 src/codex.ts        |  2 ++
 src/index.ts        |  6 ++++++
 4 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2b0f956..918bfd6 100644
--- a/README.md
+++ b/README.md
@@ -35,6 +35,8 @@ The proxy supports the following OpenAI-compatible parameters in the `/v1/chat/c
 - **`temperature`** (number): Controls randomness (passed to the engine).
 - **`max_tokens`** (number): Limits the length of the generated response.
 - **`reasoning_effort`** (string): For models with reasoning capabilities (e.g., `low`, `medium`, `high`).
+- **`tools` / `tool_choice`**: Standard OpenAI tool-calling fields used by agentic clients.
+- **`browseros_mode`** (boolean): Optional mode for BrowserOS-like agentic clients. When `true` (and tools are provided), the proxy adds stronger tool-execution instructions so the model emits tool calls instead of environment-limitation refusals.
 
 ## Quick Start
 
@@ -70,6 +72,15 @@ curl -N -X POST http://localhost:8080/v1/chat/completions \
 - **Port**: Set via `PORT` environment variable (defaults to 8080).
 - **Models**: The proxy automatically queries your local Codex installation for available model slugs.
 
+### BrowserOS Configuration
+
+If your BrowserOS agent sends tool definitions but the model replies with text like _"I’m unable to control the browser from this environment."_, set:
+
+- `browseros_mode: true` in your `/v1/chat/completions` request body
+- keep sending `tools` and (optionally) `tool_choice`
+
+This proxy mode is designed to use the proxy only for LLM/provider behavior while BrowserOS continues to execute the actual browser tools on its side.
+
 ## Architecture
 
 This project uses a typed `CodexClient` that manages a persistent `codex app-server` background process. Communication happens over a high-speed JSON-RPC channel on `stdio`, ensuring that the model state remains warm and ready for immediate inference.
diff --git a/src/codex-client.ts b/src/codex-client.ts
index 8c4fff0..21ce40a 100644
--- a/src/codex-client.ts
+++ b/src/codex-client.ts
@@ -121,7 +121,12 @@ export class CodexClient {
 
   async *chatCompletionStream(
     messages: Message[],
-    options: { model: string; tools?: any[]; tool_choice?: any },
+    options: {
+      model: string;
+      tools?: any[];
+      tool_choice?: any;
+      browseros_mode?: boolean;
+    },
   ): AsyncGenerator<CodexStreamEvent> {
     const hasTools = options.tools && options.tools.length > 0;
 
@@ -151,6 +156,17 @@ export class CodexClient {
       baseInstructions = (baseInstructions || "") + toolBlock;
     }
 
+    if (hasTools && options.browseros_mode) {
+      const browserOsToolModeInstructions =
+        `\n\n## BrowserOS Tool Execution Mode\n\n` +
+        `You are running as a tool-calling planner inside BrowserOS. ` +
+        `You can and must control the browser by emitting tool calls. ` +
+        `Do not claim you cannot access or control the browser/environment. ` +
+        `When a browser action is requested, respond with tool calls only (and brief coordinating text only when necessary). ` +
+        `If an action needs multiple steps, emit the next required tool call(s) for the current step.\n`;
+      baseInstructions = (baseInstructions || "") + browserOsToolModeInstructions;
+    }
+
     // --- Format conversation messages into prompt ---
     let fullPrompt = "";
     for (const msg of nonSystemMessages) {
diff --git a/src/codex.ts b/src/codex.ts
index fcfe9c7..9528fca 100644
--- a/src/codex.ts
+++ b/src/codex.ts
@@ -38,6 +38,7 @@ export interface CodexOptions {
   signal?: AbortSignal;
   tools?: any[];
   tool_choice?: any;
+  browseros_mode?: boolean;
 }
 
 export interface ParsedToolCall {
@@ -141,5 +142,6 @@ export async function* execCodexStream(
     model: options.model,
     tools: options.tools,
     tool_choice: options.tool_choice,
+    browseros_mode: options.browseros_mode,
   });
 }
diff --git a/src/index.ts b/src/index.ts
index f4bd520..2d0b979 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -58,6 +58,7 @@ Bun.serve({
         const reasoning_effort = body.reasoning_effort;
         const tools = body.tools;
         const tool_choice = body.tool_choice;
+        const browseros_mode = body.browseros_mode === true;
 
         const stream = body.stream === true;
 
@@ -69,6 +70,9 @@ Bun.serve({
         if (tools) {
           console.log(`[Proxy] Tools count: ${tools.length}`);
         }
+        if (browseros_mode) {
+          console.log(`[Proxy] BrowserOS mode enabled`);
+        }
 
         if (stream) {
           const responseId = `chatcmpl-${Date.now()}`;
@@ -88,6 +92,7 @@ Bun.serve({
                   signal: req.signal,
                   tools,
                   tool_choice,
+                  browseros_mode,
                 })) {
                   if (req.signal.aborted) break;
 
@@ -269,6 +274,7 @@ Bun.serve({
             signal: req.signal,
             tools,
             tool_choice,
+            browseros_mode,
           })) {
             if (req.signal.aborted) break;
             if (event.type === "message") {

From 27ad1519e7d21f95e5f4ddfa1a1d03ff3304a3fd Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 28 Mar 2026 13:54:31 +0000
Subject: [PATCH 3/5] chore: address code review naming feedback

Agent-Logs-Url: https://github.com/copsys/codex-app-proxy/sessions/41e79ae7-148a-4f6d-970c-4f5ad05386fd

Co-authored-by: copsys <31281180+copsys@users.noreply.github.com>
---
 src/codex-client.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/codex-client.ts b/src/codex-client.ts
index 21ce40a..dbf917c 100644
--- a/src/codex-client.ts
+++ b/src/codex-client.ts
@@ -157,14 +157,14 @@ export class CodexClient {
     }
 
     if (hasTools && options.browseros_mode) {
-      const browserOsToolModeInstructions =
+      const browserOSToolModeInstructions =
         `\n\n## BrowserOS Tool Execution Mode\n\n` +
         `You are running as a tool-calling planner inside BrowserOS. ` +
         `You can and must control the browser by emitting tool calls. ` +
         `Do not claim you cannot access or control the browser/environment. ` +
         `When a browser action is requested, respond with tool calls only (and brief coordinating text only when necessary). ` +
         `If an action needs multiple steps, emit the next required tool call(s) for the current step.\n`;
-      baseInstructions = (baseInstructions || "") + browserOsToolModeInstructions;
+      baseInstructions = (baseInstructions || "") + browserOSToolModeInstructions;
     }
 
     // --- Format conversation messages into prompt ---

From 5be8919dda83ee2d238178c92c370b4466903d2e Mon Sep 17 00:00:00 2001
From: Chetan Khobragade <31281180+copsys@users.noreply.github.com>
Date: Sat, 28 Mar 2026 15:13:14 +0000
Subject: [PATCH 4/5] new feature

---
 README.md           | 11 +++++++----
 src/codex-client.ts |  6 ++++--
 src/codex.ts        | 24 +++++++++++++++++++++++-
 src/index.ts        | 18 ++++++++++++++----
 4 files changed, 48 insertions(+), 11 deletions(-)

diff --git a/README.md b/README.md
index 918bfd6..3daf0db 100644
--- a/README.md
+++ b/README.md
@@ -36,7 +36,7 @@ The proxy supports the following OpenAI-compatible parameters in the `/v1/chat/c
 - **`max_tokens`** (number): Limits the length of the generated response.
 - **`reasoning_effort`** (string): For models with reasoning capabilities (e.g., `low`, `medium`, `high`).
 - **`tools` / `tool_choice`**: Standard OpenAI tool-calling fields used by agentic clients.
-- **`browseros_mode`** (boolean): Optional mode for BrowserOS-like agentic clients. When `true` (and tools are provided), the proxy adds stronger tool-execution instructions so the model emits tool calls instead of environment-limitation refusals.
+- **`browseros_mode`** (boolean): Optional strict mode toggle for BrowserOS-like agentic clients. When tools are provided, this mode is **enabled by default** unless you explicitly set `browseros_mode: false`.
 
 ## Quick Start
 
@@ -74,10 +74,13 @@ curl -N -X POST http://localhost:8080/v1/chat/completions \
 
 ### BrowserOS Configuration
 
-If your BrowserOS agent sends tool definitions but the model replies with text like _"I’m unable to control the browser from this environment."_, set:
+If your BrowserOS agent sends tool definitions but the model replies with text like _"I’m unable to control the browser from this environment."_, verify:
 
-- `browseros_mode: true` in your `/v1/chat/completions` request body
-- keep sending `tools` and (optionally) `tool_choice`
+- you are sending `tools` in your `/v1/chat/completions` request body (this auto-enables BrowserOS strict mode)
+- optionally set `browseros_mode: true` explicitly for clarity
+- keep sending `tool_choice` when your client supports it
+
+To disable strict BrowserOS behavior for non-agentic use-cases, set `browseros_mode: false`.
 
 This proxy mode is designed to use the proxy only for LLM/provider behavior while BrowserOS continues to execute the actual browser tools on its side.
 
diff --git a/src/codex-client.ts b/src/codex-client.ts
index dbf917c..636f830 100644
--- a/src/codex-client.ts
+++ b/src/codex-client.ts
@@ -162,8 +162,10 @@ export class CodexClient {
         `You are running as a tool-calling planner inside BrowserOS. ` +
         `You can and must control the browser by emitting tool calls. ` +
         `Do not claim you cannot access or control the browser/environment. ` +
-        `When a browser action is requested, respond with tool calls only (and brief coordinating text only when necessary). ` +
-        `If an action needs multiple steps, emit the next required tool call(s) for the current step.\n`;
+        `If the request is actionable with available tools, your response MUST include at least one <tool_call> block. ` +
+        `Prefer tool-call-only output for action steps. ` +
+        `For shopping workflows, adding products to cart is permitted; avoid checkout/payment unless explicitly requested. ` +
+        `If an action needs multiple steps, emit only the next required tool call(s) for the current step.\n`;
       baseInstructions = (baseInstructions || "") + browserOSToolModeInstructions;
     }
 
diff --git a/src/codex.ts b/src/codex.ts
index 9528fca..caa23c3 100644
--- a/src/codex.ts
+++ b/src/codex.ts
@@ -105,7 +105,19 @@ export function parseToolCalls(text: string): ParsedToolCall[] {
  * are available and the expected output format.
  */
 export function buildToolInstructions(tools: any[], tool_choice?: any): string {
-  let block = `\n\n## Available Tools\n\nYou have access to the following tools to perform actions. You MUST use these tools to fulfill the user's request. Do NOT describe steps or give instructions — instead, call the appropriate tool.\n\nTo call a tool, output one or more tool calls in this exact format (you may output multiple for parallel execution):\n<tool_call>{"name": "tool_name", "arguments": {"param": "value"}}</tool_call>\n\nIMPORTANT RULES:\n- ALWAYS use tool calls to act. NEVER respond with step-by-step instructions when a tool can do the job.\n- You can call multiple tools in a single response.\n- After a tool call, wait for the result before proceeding.\n- If the user asks you to navigate somewhere, use the navigate tool. If they ask you to click, use the click tool. Etc.\n\nHere are the tools:\n\n`;
+  let block =
+    `\n\n## Available Tools\n\n` +
+    `You are an agentic planner operating through external tools. ` +
+    `When tools are available, your next action MUST be emitted as tool calls, not prose refusals.\n\n` +
+    `Tool call output format (required):\n` +
+    `<tool_call>{"name": "tool_name", "arguments": {"param": "value"}}</tool_call>\n\n` +
+    `IMPORTANT RULES:\n` +
+    `- If a user request is actionable with provided tools, emit one or more <tool_call> blocks.\n` +
+    `- Do not say you cannot access the browser/environment when browser tools are provided.\n` +
+    `- Keep normal text minimal. Prefer tool-call-only responses for action steps.\n` +
+    `- After tool results are returned, emit the next tool call(s) needed to continue.\n` +
+    `- For commerce tasks, adding an item to cart is allowed; do not attempt checkout/payment unless user explicitly requests it.\n\n` +
+    `Here are the tools:\n\n`;
 
   for (const tool of tools) {
     if (tool.type === "function" && tool.function) {
@@ -116,6 +128,16 @@ export function buildToolInstructions(tools: any[], tool_choice?: any): string {
         block += `Parameters: ${JSON.stringify(fn.parameters)}\n`;
       }
       block += `\n`;
+    } else if (tool?.name) {
+      // Support alternate tool schemas used by some providers/agents.
+      block += `### ${tool.name}\n`;
+      if (tool.description) block += `${tool.description}\n`;
+      if (tool.input_schema) {
+        block += `Parameters: ${JSON.stringify(tool.input_schema)}\n`;
+      } else if (tool.parameters) {
+        block += `Parameters: ${JSON.stringify(tool.parameters)}\n`;
+      }
+      block += `\n`;
     }
   }
 
diff --git a/src/index.ts b/src/index.ts
index 2d0b979..ae61346 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -56,9 +56,12 @@ Bun.serve({
         const temperature = body.temperature;
         const max_tokens = body.max_tokens;
         const reasoning_effort = body.reasoning_effort;
-        const tools = body.tools;
+        const tools = Array.isArray(body.tools) ? body.tools : undefined;
         const tool_choice = body.tool_choice;
-        const browseros_mode = body.browseros_mode === true;
+        // Default to BrowserOS-style strict tool mode whenever tools are supplied,
+        // unless callers explicitly disable it with browseros_mode: false.
+        const browseros_mode =
+          tools && tools.length > 0 ? body.browseros_mode !== false : false;
 
         const stream = body.stream === true;
 
@@ -70,8 +73,15 @@ Bun.serve({
         if (tools) {
           console.log(`[Proxy] Tools count: ${tools.length}`);
         }
-        if (browseros_mode) {
-          console.log(`[Proxy] BrowserOS mode enabled`);
+        if (tools && tools.length > 0) {
+          console.log(
+            `[Proxy] BrowserOS mode: ${browseros_mode ? "enabled" : "disabled"}`,
+          );
+          if (body.browseros_mode === undefined && browseros_mode) {
+            console.log(
+              `[Proxy] BrowserOS mode auto-enabled because tools were provided`,
+            );
+          }
         }
 
         if (stream) {

From dda3e7453318a59658a516dcc87f0bde5423892a Mon Sep 17 00:00:00 2001
From: Chetan Khobragade <31281180+copsys@users.noreply.github.com>
Date: Sat, 28 Mar 2026 15:19:07 +0000
Subject: [PATCH 5/5] fixed

---
 src/codex-client.ts |  3 ++
 src/codex.ts        | 82 ++++++++++++++++++++++++++++++++++++---------
 src/index.ts        |  3 +-
 3 files changed, 71 insertions(+), 17 deletions(-)

diff --git a/src/codex-client.ts b/src/codex-client.ts
index 636f830..6e400b9 100644
--- a/src/codex-client.ts
+++ b/src/codex-client.ts
@@ -294,6 +294,9 @@ export class CodexClient {
                 }
                 eventQueue.push({ type: "tool_calls", calls: toolCalls });
               } else {
+                console.warn(
+                  `[CodexClient] Tools provided but no tool calls parsed. Assistant preview: ${accumulatedText.slice(0, 300).replace(/\s+/g, " ")}`,
+                );
                 // No tool calls found, emit as plain message
                 eventQueue.push({ type: "message", text: accumulatedText });
               }
diff --git a/src/codex.ts b/src/codex.ts
index caa23c3..bf74e12 100644
--- a/src/codex.ts
+++ b/src/codex.ts
@@ -75,27 +75,77 @@ export type CodexStreamEvent =
  */
 export function parseToolCalls(text: string): ParsedToolCall[] {
   const calls: ParsedToolCall[] = [];
-  const regex = /<tool_call>([\s\S]*?)<\/tool_call>/g;
-  let match;
+  const seen = new Set<string>();
   let callIndex = 0;
-  while ((match = regex.exec(text)) !== null) {
+
+  const pushCall = (raw: any) => {
+    const name = raw?.name || raw?.toolName || raw?.function?.name || "";
+    const argsRaw =
+      raw?.arguments ?? raw?.input ?? raw?.parameters ?? raw?.function?.arguments;
+    if (!name) return;
+    const args =
+      typeof argsRaw === "string"
+        ? argsRaw
+        : JSON.stringify(argsRaw ?? {});
+    const key = `${name}::${args}`;
+    if (seen.has(key)) return;
+    seen.add(key);
+    calls.push({
+      id: `call_${Date.now()}_${callIndex++}`,
+      type: "function",
+      function: {
+        name,
+        arguments: args,
+      },
+    });
+  };
+
+  // Format 1: explicit <tool_call>...</tool_call> blocks.
+  const taggedRegex = /<tool_call>([\s\S]*?)<\/tool_call>/g;
+  let match;
+  while ((match = taggedRegex.exec(text)) !== null) {
+    try {
+      pushCall(JSON.parse(match[1].trim()));
+    } catch {
+      // Ignore malformed block.
+    }
+  }
+
+  // Format 2: JSON fenced blocks that contain a single call, call list, or tool_calls.
+  const fencedJsonRegex = /```(?:json)?\s*([\s\S]*?)```/g;
+  while ((match = fencedJsonRegex.exec(text)) !== null) {
+    const candidate = match[1].trim();
     try {
-      const parsed = JSON.parse(match[1].trim());
-      calls.push({
-        id: `call_${Date.now()}_${callIndex++}`,
-        type: "function",
-        function: {
-          name: parsed.name || parsed.function?.name || "",
-          arguments:
-            typeof parsed.arguments === "string"
-              ? parsed.arguments
-              : JSON.stringify(parsed.arguments ?? parsed.parameters ?? {}),
-        },
-      });
+      const parsed = JSON.parse(candidate);
+      if (Array.isArray(parsed)) {
+        for (const item of parsed) pushCall(item);
+      } else if (parsed?.tool_calls && Array.isArray(parsed.tool_calls)) {
+        for (const item of parsed.tool_calls) pushCall(item);
+      } else {
+        pushCall(parsed);
+      }
     } catch {
-      // Skip malformed tool calls
+      // Not valid JSON; ignore.
     }
   }
+
+  // Format 3: whole response is a JSON object/array describing tool calls.
+  const trimmed = text.trim();
+  if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
+    try {
+      const parsed = JSON.parse(trimmed);
+      if (Array.isArray(parsed)) {
+        for (const item of parsed) pushCall(item);
+      } else if (parsed?.tool_calls && Array.isArray(parsed.tool_calls)) {
+        for (const item of parsed.tool_calls) pushCall(item);
+      } else {
+        pushCall(parsed);
+      }
+    } catch {
+      // Not parseable as JSON; ignore.
+    }
+  }
+
   return calls;
 }
 
diff --git a/src/index.ts b/src/index.ts
index ae61346..d9d6d57 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -57,11 +57,12 @@ Bun.serve({
         const max_tokens = body.max_tokens;
         const reasoning_effort = body.reasoning_effort;
         const tools = Array.isArray(body.tools) ? body.tools : undefined;
-        const tool_choice = body.tool_choice;
         // Default to BrowserOS-style strict tool mode whenever tools are supplied,
         // unless callers explicitly disable it with browseros_mode: false.
         const browseros_mode =
           tools && tools.length > 0 ? body.browseros_mode !== false : false;
+        const tool_choice =
+          body.tool_choice ?? (browseros_mode ? "required" : undefined);
 
         const stream = body.stream === true;