vkop007 · copsys · Mar 28, 2026 · Mar 28, 2026 · Mar 28, 2026 · Mar 28, 2026
diff --git a/README.md b/README.md
@@ -35,6 +35,8 @@ The proxy supports the following OpenAI-compatible parameters in the `/v1/chat/c
 - **`temperature`** (number): Controls randomness (passed to the engine).
 - **`max_tokens`** (number): Limits the length of the generated response.
 - **`reasoning_effort`** (string): For models with reasoning capabilities (e.g., `low`, `medium`, `high`).
+- **`tools` / `tool_choice`**: Standard OpenAI tool-calling fields used by agentic clients.
+- **`browseros_mode`** (boolean): Optional strict mode toggle for BrowserOS-like agentic clients. When tools are provided, this mode is **enabled by default** unless you explicitly set `browseros_mode: false`.
 
 ## Quick Start
 
@@ -70,6 +72,18 @@ curl -N -X POST http://localhost:8080/v1/chat/completions \
 - **Port**: Set via `PORT` environment variable (defaults to 8080).
 - **Models**: The proxy automatically queries your local Codex installation for available model slugs.
 
+### BrowserOS Configuration
+
+If your BrowserOS agent sends tool definitions but the model replies with text like _"I’m unable to control the browser from this environment."_, verify:
+
+- you are sending `tools` in your `/v1/chat/completions` request body (this auto-enables BrowserOS strict mode)
+- optionally set `browseros_mode: true` explicitly for clarity
+- keep sending `tool_choice` when your client supports it
+
+To disable strict BrowserOS behavior for non-agentic use-cases, set `browseros_mode: false`.
+
+This proxy mode is designed to use the proxy only for LLM/provider behavior while BrowserOS continues to execute the actual browser tools on its side.
+
 ## Architecture
 
 This project uses a typed `CodexClient` that manages a persistent `codex app-server` background process. Communication happens over a high-speed JSON-RPC channel on `stdio`, ensuring that the model state remains warm and ready for immediate inference.

diff --git a/src/codex-client.ts b/src/codex-client.ts
@@ -3,6 +3,9 @@ import {
   getCodexBinaryPath,
   type Message,
   type CodexStreamEvent,
+  type ParsedToolCall,
+  parseToolCalls,
+  buildToolInstructions,
 } from "./codex";
 
 // Official V2 Types
@@ -118,17 +121,91 @@ export class CodexClient {
 
   async *chatCompletionStream(
     messages: Message[],
-    options: { model: string },
+    options: {
+      model: string;
+      tools?: any[];
+      tool_choice?: any;
+      browseros_mode?: boolean;
+    },
   ): AsyncGenerator<CodexStreamEvent> {
-    // Format full prompt
-    let fullPrompt = "";
+    const hasTools = options.tools && options.tools.length > 0;
+
+    // --- Extract system messages into baseInstructions ---
+    const systemParts: string[] = [];
+    const nonSystemMessages: Message[] = [];
     for (const msg of messages) {
-      const roleName = msg.role.toUpperCase();
-      const content =
-        typeof msg.content === "string"
-          ? msg.content
-          : JSON.stringify(msg.content);
-      fullPrompt += `[${roleName}]\n${content}\n\n`;
+      if (msg.role === "system") {
+        const content =
+          typeof msg.content === "string"
+            ? msg.content
+            : JSON.stringify(msg.content);
+        systemParts.push(content);
+      } else {
+        nonSystemMessages.push(msg);
+      }
+    }
+
+    let baseInstructions = systemParts.join("\n\n") || undefined;
+
+    // --- If tools are provided, inject tool definitions into instructions ---
+    if (hasTools) {
+      const toolBlock = buildToolInstructions(
+        options.tools!,
+        options.tool_choice,
+      );
+      baseInstructions = (baseInstructions || "") + toolBlock;
+    }
+
+    if (hasTools && options.browseros_mode) {
+      const browserOSToolModeInstructions =
+        `\n\n## BrowserOS Tool Execution Mode\n\n` +
+        `You are running as a tool-calling planner inside BrowserOS. ` +
+        `You can and must control the browser by emitting tool calls. ` +
+        `Do not claim you cannot access or control the browser/environment. ` +
+        `If the request is actionable with available tools, your response MUST include at least one <tool_call> block. ` +
+        `Prefer tool-call-only output for action steps. ` +
+        `For shopping workflows, adding products to cart is permitted; avoid checkout/payment unless explicitly requested. ` +
+        `If an action needs multiple steps, emit only the next required tool call(s) for the current step.\n`;
+      baseInstructions = (baseInstructions || "") + browserOSToolModeInstructions;
+    }
+
+    // --- Format conversation messages into prompt ---
+    let fullPrompt = "";
+    for (const msg of nonSystemMessages) {
+      if (msg.role === "tool") {
+        // Tool result message from BrowserOS
+        const toolCallId = (msg as any).tool_call_id || "unknown";
+        const toolName = (msg as any).name || "unknown";
+        const content =
+          typeof msg.content === "string"
+            ? msg.content
+            : JSON.stringify(msg.content);
+        fullPrompt += `[TOOL_RESULT] (tool_call_id: ${toolCallId}, name: ${toolName})\n${content}\n\n`;
+      } else if (msg.role === "assistant" && (msg as any).tool_calls) {
+        // Assistant message that contained tool calls (history from previous turns)
+        const toolCalls = (msg as any).tool_calls as any[];
+        let assistantContent = "";
+        if (msg.content) {
+          assistantContent +=
+            typeof msg.content === "string"
+              ? msg.content
+              : JSON.stringify(msg.content);
+          assistantContent += "\n";
+        }
+        for (const tc of toolCalls) {
+          if (tc.type === "function" && tc.function) {
+            assistantContent += `<tool_call>{"name": "${tc.function.name}", "arguments": ${tc.function.arguments}}</tool_call>\n`;
+          }
+        }
+        fullPrompt += `[ASSISTANT]\n${assistantContent}\n`;
+      } else {
+        const roleName = msg.role.toUpperCase();
+        const content =
+          typeof msg.content === "string"
+            ? msg.content
+            : JSON.stringify(msg.content);
+        fullPrompt += `[${roleName}]\n${content}\n\n`;
+      }
     }
     fullPrompt = (fullPrompt.trim() || "Please help me.") + "\n\n[ASSISTANT]\n";
 
@@ -137,6 +214,7 @@ export class CodexClient {
       cwd: process.cwd(),
       experimentalRawEvents: false,
       persistExtendedHistory: false,
+      ...(baseInstructions ? { baseInstructions } : {}),
     };
 
     const startRes = (await this.request(
@@ -166,10 +244,9 @@ export class CodexClient {
         input: input,
         cwd: process.cwd(),
         approvalPolicy: "never",
-        sandboxPolicy: {
-          type: "readOnly",
-          access: { type: "fullAccess" },
-        },
+        sandboxPolicy: hasTools
+          ? { type: "readOnly", access: { type: "fullAccess" } }
+          : { type: "dangerFullAccess" },
         model: options.model,
         effort: "none" as any,
         summary: "none" as any,
@@ -180,34 +257,113 @@ export class CodexClient {
       let turnDone = false;
       const eventQueue: CodexStreamEvent[] = [];
       let resolveNext: (() => void) | null = null;
+      let accumulatedText = "";
 
       const cleanup = this.onEvent((event) => {
         if (event.type === "notification") {
           const { method, params } = event;
 
           if (method === "item/agentMessage/delta") {
             const p = params as AgentMessageDeltaNotification;
-            eventQueue.push({ type: "message", text: p.delta });
+            accumulatedText += p.delta;
+            if (!hasTools) {
+              // When no tools, stream text directly
+              eventQueue.push({ type: "message", text: p.delta });
+            }
+            // When tools present, we buffer and parse at the end
           } else if (
             method === "item/reasoning/textDelta" ||
             method === "item/reasoning/summaryTextDelta"
           ) {
             const p = params as ReasoningTextDeltaNotification;
             eventQueue.push({ type: "reasoning", text: p.delta });
           } else if (method === "turn/completed") {
+            // If tools are present, check for tool calls in accumulated text
+            if (hasTools && accumulatedText) {
+              const toolCalls = parseToolCalls(accumulatedText);
+              if (toolCalls.length > 0) {
+                // Strip tool_call tags from text, emit remaining as content
+                const textWithoutToolCalls = accumulatedText
+                  .replace(/<tool_call>[\s\S]*?<\/tool_call>/g, "")
+                  .trim();
+                if (textWithoutToolCalls) {
+                  eventQueue.push({
+                    type: "message",
+                    text: textWithoutToolCalls,
+                  });
+                }
+                eventQueue.push({ type: "tool_calls", calls: toolCalls });
+              } else {
+                console.warn(
+                  `[CodexClient] Tools provided but no tool calls parsed. Assistant preview: ${accumulatedText.slice(0, 300).replace(/\s+/g, " ")}`,
+                );
+                // No tool calls found, emit as plain message
+                eventQueue.push({ type: "message", text: accumulatedText });
+              }
+            }
             turnDone = true;
-            // If tools are present, check for tool calls in accumulated text
-            if (hasTools && accumulatedText) {
-              const toolCalls = parseToolCalls(accumulatedText);
-              if (toolCalls.length > 0) {
-                // Strip tool_call tags from text, emit remaining as content
-                const textWithoutToolCalls = accumulatedText
-                  .replace(/<tool_call>[\s\S]*?<\/tool_call>/g, "")
-                  .trim();
-                if (textWithoutToolCalls) {
-                  eventQueue.push({
-                    type: "message",
-                    text: textWithoutToolCalls,
-                  });
-                }
-                eventQueue.push({ type: "tool_calls", calls: toolCalls });
-              } else {
-                console.warn(
-                  `[CodexClient] Tools provided but no tool calls parsed. Assistant preview: ${accumulatedText.slice(0, 300).replace(/\s+/g, " ")}`,
-                );
-                // No tool calls found, emit as plain message
-                eventQueue.push({ type: "message", text: accumulatedText });
-              }
-            }
-            turnDone = true;
+            if (!turnDone) {
+              // If tools are present, check for tool calls in accumulated text
+              if (hasTools && accumulatedText) {
+                const toolCalls = parseToolCalls(accumulatedText);
+                if (toolCalls.length > 0) {
+                  // Strip tool_call tags from text, emit remaining as content
+                  const textWithoutToolCalls = accumulatedText
+                    .replace(/<tool_call>[\s\S]*?<\/tool_call>/g, "")
+                    .trim();
+                  if (textWithoutToolCalls) {
+                    eventQueue.push({
+                      type: "message",
+                      text: textWithoutToolCalls,
+                    });
+                  }
+                  eventQueue.push({ type: "tool_calls", calls: toolCalls });
+                } else {
+                  console.warn(
+                    `[CodexClient] Tools provided but no tool calls parsed. Assistant preview: ${accumulatedText.slice(0, 300).replace(/\s+/g, " ")}`,
+                  );
+                  // No tool calls found, emit as plain message
+                  eventQueue.push({ type: "message", text: accumulatedText });
+                }
+              }
+              turnDone = true;
+            }
-            // If tools are present, check for tool calls in accumulated text
-            if (hasTools && accumulatedText) {
-              const toolCalls = parseToolCalls(accumulatedText);
-              if (toolCalls.length > 0) {
-                // Strip tool_call tags from text, emit remaining as content
-                const textWithoutToolCalls = accumulatedText
-                  .replace(/<tool_call>[\s\S]*?<\/tool_call>/g, "")
-                  .trim();
-                if (textWithoutToolCalls) {
-                  eventQueue.push({
-                    type: "message",
-                    text: textWithoutToolCalls,
-                  });
-                }
-                eventQueue.push({ type: "tool_calls", calls: toolCalls });
-              } else {
-                console.warn(
-                  `[CodexClient] Tools provided but no tool calls parsed. Assistant preview: ${accumulatedText.slice(0, 300).replace(/\s+/g, " ")}`,
-                );
-                // No tool calls found, emit as plain message
-                eventQueue.push({ type: "message", text: accumulatedText });
-              }
-            }
-            turnDone = true;
+            if (!turnDone) {
+              // If tools are present, check for tool calls in accumulated text
+              if (hasTools && accumulatedText) {
+                const toolCalls = parseToolCalls(accumulatedText);
+                if (toolCalls.length > 0) {
+                  // Strip tool_call tags from text, emit remaining as content
+                  const textWithoutToolCalls = accumulatedText
+                    .replace(/<tool_call>[\s\S]*?<\/tool_call>/g, "")
+                    .trim();
+                  if (textWithoutToolCalls) {
+                    eventQueue.push({
+                      type: "message",
+                      text: textWithoutToolCalls,
+                    });
+                  }
+                  eventQueue.push({ type: "tool_calls", calls: toolCalls });
+                } else {
+                  console.warn(
+                    `[CodexClient] Tools provided but no tool calls parsed. Assistant preview: ${accumulatedText.slice(0, 300).replace(/\s+/g, " ")}`,
+                  );
+                  // No tool calls found, emit as plain message
+                  eventQueue.push({ type: "message", text: accumulatedText });
+                }
+              }
+              turnDone = true;
+            }
           } else if (method === "error") {
             const p = params as ErrorNotification;
             const errMsg =
               p.error?.message || (p as any).message || "Unknown error";
             eventQueue.push({ type: "error", text: errMsg });
             turnDone = true;
+          } else if (method === "commandExecution/requestApproval") {
+            // Auto-approve command executions for agentic behavior
+            const approvalId = params?.approvalId;
+            if (approvalId) {
+              console.log(
+                `[CodexClient] Auto-approving command execution: ${params?.command || "unknown"}`,
+              );
+              this.request("commandExecution/sendApproval", {
+                approvalId,
+                decision: "accept",
+              }).catch(() => {});
+            }
+          } else if (method === "fileChange/requestApproval") {
+            // Auto-approve file changes for agentic behavior
+            const approvalId = params?.approvalId;
+            if (approvalId) {
+              console.log(`[CodexClient] Auto-approving file change`);
+              this.request("fileChange/sendApproval", {
+                approvalId,
+                decision: "accept",
+              }).catch(() => {});
+            }
+          } else if (method === "commandExecution/outputDelta") {
+            // Surface command output as message text
+            if (params?.delta) {
+              accumulatedText += params.delta;
+              if (!hasTools) {
+                eventQueue.push({ type: "message", text: params.delta });
+              }
+            }
           }
         } else if (event.type === "agent_message_content_delta") {
-          eventQueue.push({ type: "message", text: event.delta });
+          accumulatedText += event.delta;
+          if (!hasTools) {
+            eventQueue.push({ type: "message", text: event.delta });
+          }
         } else if (event.type === "reasoning_content_delta") {
           eventQueue.push({ type: "reasoning", text: event.delta });
         } else if (event.type === "task_complete") {
+          if (hasTools && accumulatedText) {
+            const toolCalls = parseToolCalls(accumulatedText);
+            if (toolCalls.length > 0) {
+              const textWithoutToolCalls = accumulatedText
+                .replace(/<tool_call>[\s\S]*?<\/tool_call>/g, "")
+                .trim();
+              if (textWithoutToolCalls) {
+                eventQueue.push({
+                  type: "message",
+                  text: textWithoutToolCalls,
+                });
+              }
+              eventQueue.push({ type: "tool_calls", calls: toolCalls });
+            } else {
+              eventQueue.push({ type: "message", text: accumulatedText });
+            }
+          }
           turnDone = true;
         }