antinomyhq · tusharmath · Mar 26, 2026 · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
diff --git a/crates/forge_app/src/lib.rs b/crates/forge_app/src/lib.rs
@@ -51,7 +51,7 @@ pub use services::*;
 pub use template_engine::*;
 pub use tool_resolver::*;
 pub use user::*;
-pub use utils::compute_hash;
+pub use utils::{compute_hash, is_binary_content_type};
 pub use walker::*;
 pub use workspace_status::*;
 pub mod domain {

diff --git a/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap b/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap
@@ -159,6 +159,8 @@ Returns complete output including stdout, stderr, and exit code for diagnostic p
 
 Retrieves content from URLs as markdown or raw text. Enables access to current online information including websites, APIs and documentation. Use for obtaining up-to-date information beyond training data, verifying facts, or retrieving specific online content. Handles HTTP/HTTPS and converts HTML to readable markdown by default. Cannot access private/restricted resources requiring authentication. Respects robots.txt and may be blocked by anti-scraping measures. For large pages, returns the first 40,000 characters and stores the complete content in a temporary file for subsequent access.
 
+IMPORTANT: This tool only handles text-based content (HTML, JSON, XML, plain text, etc.). It will reject binary file downloads (.tar.gz, .zip, .bin, .deb, images, audio, video, etc.) with an error. To download binary files, use the `shell` tool with `curl -fLo <output_file> <url>` instead.
+
 ---
 
 ### followup

diff --git a/crates/forge_app/src/utils.rs b/crates/forge_app/src/utils.rs
@@ -239,6 +239,52 @@ pub fn enforce_strict_schema(schema: &mut serde_json::Value, strict_mode: bool)
     }
 }
 
+/// Returns true if the Content-Type header indicates binary (non-text) content.
+///
+/// This utility helps detect binary content types commonly returned by HTTP
+/// responses. It's useful for tools that handle text content but need to detect
+/// and reject binary data.
+///
+/// # Arguments
+/// * `content_type` - The Content-Type header value (e.g., "text/html",
+///   "application/octet-stream")
+///
+/// # Examples
+///
+/// ```
+/// use forge_app::utils::is_binary_content_type;
+///
+/// // Text content types are not binary
+/// assert!(!is_binary_content_type("text/html"));
+/// assert!(!is_binary_content_type("application/json"));
+///
+/// // Binary content types are detected
+/// assert!(is_binary_content_type("image/png"));
+/// assert!(is_binary_content_type("application/octet-stream"));
+/// ```
+pub fn is_binary_content_type(content_type: &str) -> bool {
+    let ct = content_type.to_lowercase();
+    // Allow text/* and common text-based types
+    if ct.starts_with("text/")
+        || ct.contains("json")
+        || ct.contains("xml")
+        || ct.contains("javascript")
+        || ct.contains("ecmascript")
+        || ct.contains("yaml")
+        || ct.contains("toml")
+        || ct.contains("csv")
+        || ct.contains("html")
+        || ct.contains("svg")
+        || ct.contains("markdown")
+        || ct.is_empty()
+    {
+        return false;
+    }
+    // Everything else (application/gzip, application/octet-stream, image/*,
+    // audio/*, video/*, etc.)
+    true
+}
+
 #[cfg(test)]
 mod tests {
     use pretty_assertions::assert_eq;

diff --git a/...tools/definition/snapshots/forge_domain__tools__definition__usage__tests__tool_usage.snap b/...tools/definition/snapshots/forge_domain__tools__definition__usage__tests__tool_usage.snap
@@ -10,7 +10,7 @@ expression: prompt
 <tool>{"name":"patch","description":"Performs exact string replacements in files.\nUsage:\n- You must use your `{{tool_names.read}}` tool at least once in the conversation before editing. This tool will error if you attempt an edit without reading the file. \n- When editing text from `{{tool_names.read}}` tool output, ensure you preserve the exact indentation (tabs/spaces) as it appears AFTER the line number prefix. The line number prefix format is: 'line_number:'. Everything after that line_number: is the actual file content to match. Never include any part of the line number prefix in the old_string or new_string.\n- ALWAYS prefer editing existing files in the codebase. NEVER write new files unless explicitly required.\n- Only use emojis if the user explicitly requests it. Avoid adding emojis to files unless asked.\n- The edit will FAIL if `old_string` is not unique in the file. Either provide a larger string with more surrounding context to make it unique or use `replace_all` to change every instance of `old_string`. \n- Use `replace_all` for replacing and renaming strings across the file. This parameter is useful if you want to rename a variable for instance.","arguments":{"file_path":{"description":"The absolute path to the file to modify","type":"string","is_required":true},"new_string":{"description":"The text to replace it with (must be different from old_string)","type":"string","is_required":true},"old_string":{"description":"The text to replace","type":"string","is_required":true},"replace_all":{"description":"Replace all occurrences of old_string (default false)","type":"boolean","is_required":false}}}</tool>
 <tool>{"name":"undo","description":"Reverts the most recent file operation (create/modify/delete) on a specific file. Use this tool when you need to recover from incorrect file changes or if a revert is requested by the user.","arguments":{"path":{"description":"The absolute path of the file to revert to its previous state.","type":"string","is_required":true}}}</tool>
 <tool>{"name":"shell","description":"Executes shell commands. The `cwd` parameter sets the working directory for command execution. If not specified, defaults to `{{env.cwd}}`.\n\nCRITICAL: Do NOT use `cd` commands in the command string. This is FORBIDDEN. Always use the `cwd` parameter to set the working directory instead. Any use of `cd` in the command is redundant, incorrect, and violates the tool contract.\n\nIMPORTANT: This tool is for terminal operations like git, npm, docker, etc. DO NOT use it for file operations (reading, writing, editing, searching, finding files) - use the specialized tools for this instead.\n\nBefore executing the command, please follow these steps:\n\n1. Directory Verification:\n   - If the command will create new directories or files, first use `shell` with `ls` to verify the parent directory exists and is the correct location\n   - For example, before running \"mkdir foo/bar\", first use `ls foo` to check that \"foo\" exists and is the intended parent directory\n\n2. Command Execution:\n   - Always quote file paths that contain spaces with double quotes (e.g., python \"path with spaces/script.py\")\n   - Examples of proper quoting:\n     - mkdir \"/Users/name/My Documents\" (correct)\n     - mkdir /Users/name/My Documents (incorrect - will fail)\n     - python \"/path/with spaces/script.py\" (correct)\n     - python /path/with spaces/script.py (incorrect - will fail)\n   - After ensuring proper quoting, execute the command.\n   - Capture the output of the command.\n\nUsage notes:\n  - The command argument is required.\n  - It is very helpful if you write a clear, concise description of what this command does in 5-10 words.\n  - If the output exceeds {{env.stdoutMaxPrefixLength}} prefix lines or {{env.stdoutMaxSuffixLength}} suffix lines, or if a line exceeds {{env.stdoutMaxLineLength}} characters, it will be truncated and the full output will be written to a temporary file. You can use read with start_line/end_line to read specific sections or fs_search to search the full content. Because of this, you do NOT need to use `head`, `tail`, or other truncation commands to limit output - just run the command directly.\n  - Avoid using {{tool_names.shell}} with the `find`, `grep`, `cat`, `head`, `tail`, `sed`, `awk`, or `echo` commands, unless explicitly instructed or when these commands are truly necessary for the task. Instead, always prefer using the dedicated tools for these commands:\n    - File search: Use `{{tool_names.fs_search}}` (NOT find or ls)\n    - Content search: Use `{{tool_names.fs_search}}` with regex (NOT grep or rg)\n    - Read files: Use `{{tool_names.read}}` (NOT cat/head/tail)\n    - Edit files: Use `{{tool_names.patch}}`(NOT sed/awk)\n    - Write files: Use `{{tool_names.write}}` (NOT echo >/cat <<EOF)\n    - Communication: Output text directly (NOT echo/printf)\n  - When issuing multiple commands:\n    - If the commands are independent and can run in parallel, make multiple `{{tool_names.shell}}` tool calls in a single message. For example, if you need to run \"git status\" and \"git diff\", send a single message with two `{{tool_names.shell}}` tool calls in parallel.\n    - If the commands depend on each other and must run sequentially, use a single `{{tool_names.shell}}` call with '&&' to chain them together (e.g., `git add . && git commit -m \"message\" && git push`). For instance, if one operation must complete before another starts (like mkdir before cp, write before shell for git operations, or git add before git commit), run these operations sequentially instead.\n    - Use ';' only when you need to run commands sequentially but don't care if earlier commands fail\n    - DO NOT use newlines to separate commands (newlines are ok in quoted strings)\n  - DO NOT use `cd <directory> && <command>`. Use the `cwd` parameter to change directories instead.\n\nGood examples:\n  - With explicit cwd: cwd=\"/foo/bar\" with command: pytest tests\n\nBad example:\n  cd /foo/bar && pytest tests\n\nReturns complete output including stdout, stderr, and exit code for diagnostic purposes.","arguments":{"command":{"description":"The shell command to execute.","type":"string","is_required":true},"cwd":{"description":"The working directory where the command should be executed.\nIf not specified, defaults to the current working directory from the\nenvironment.","type":"string","is_required":false},"description":{"description":"Clear, concise description of what this command does. Recommended to be\n5-10 words for simple commands. For complex commands with pipes or\nmultiple operations, provide more context. Examples: \"Lists files in\ncurrent directory\", \"Installs package dependencies\", \"Compiles Rust\nproject with release optimizations\".","type":"string","is_required":false},"env":{"description":"Environment variable names to pass to command execution (e.g., [\"PATH\",\n\"HOME\", \"USER\"]). The system automatically reads the specified\nvalues and applies them during command execution.","type":"array","is_required":false},"keep_ansi":{"description":"Whether to preserve ANSI escape codes in the output.\nIf true, ANSI escape codes will be preserved in the output.\nIf false (default), ANSI escape codes will be stripped from the output.","type":"boolean","is_required":false}}}</tool>
-<tool>{"name":"fetch","description":"Retrieves content from URLs as markdown or raw text. Enables access to current online information including websites, APIs and documentation. Use for obtaining up-to-date information beyond training data, verifying facts, or retrieving specific online content. Handles HTTP/HTTPS and converts HTML to readable markdown by default. Cannot access private/restricted resources requiring authentication. Respects robots.txt and may be blocked by anti-scraping measures. For large pages, returns the first 40,000 characters and stores the complete content in a temporary file for subsequent access.","arguments":{"raw":{"description":"Get raw content without any markdown conversion (default: false)","type":"boolean","is_required":false},"url":{"description":"URL to fetch","type":"string","is_required":true}}}</tool>
+<tool>{"name":"fetch","description":"Retrieves content from URLs as markdown or raw text. Enables access to current online information including websites, APIs and documentation. Use for obtaining up-to-date information beyond training data, verifying facts, or retrieving specific online content. Handles HTTP/HTTPS and converts HTML to readable markdown by default. Cannot access private/restricted resources requiring authentication. Respects robots.txt and may be blocked by anti-scraping measures. For large pages, returns the first 40,000 characters and stores the complete content in a temporary file for subsequent access.\n\nIMPORTANT: This tool only handles text-based content (HTML, JSON, XML, plain text, etc.). It will reject binary file downloads (.tar.gz, .zip, .bin, .deb, images, audio, video, etc.) with an error. To download binary files, use the `shell` tool with `curl -fLo <output_file> <url>` instead.","arguments":{"raw":{"description":"Get raw content without any markdown conversion (default: false)","type":"boolean","is_required":false},"url":{"description":"URL to fetch","type":"string","is_required":true}}}</tool>
 <tool>{"name":"followup","description":"Use this tool when you encounter ambiguities, need clarification, or require more details to proceed effectively. Use this tool judiciously to maintain a balance between gathering necessary information and avoiding excessive back-and-forth.","arguments":{"multiple":{"description":"If true, allows selecting multiple options; if false (default), only one\noption can be selected","type":"boolean","is_required":false},"option1":{"description":"First option to choose from","type":"string","is_required":false},"option2":{"description":"Second option to choose from","type":"string","is_required":false},"option3":{"description":"Third option to choose from","type":"string","is_required":false},"option4":{"description":"Fourth option to choose from","type":"string","is_required":false},"option5":{"description":"Fifth option to choose from","type":"string","is_required":false},"question":{"description":"Question to ask the user","type":"string","is_required":true}}}</tool>
 <tool>{"name":"plan","description":"Creates a new plan file with the specified name, version, and content. Use this tool to create structured project plans, task breakdowns, or implementation strategies that can be tracked and referenced throughout development sessions.","arguments":{"content":{"description":"The content to write to the plan file. This should be the complete\nplan content in markdown format.","type":"string","is_required":true},"plan_name":{"description":"The name of the plan (will be used in the filename)","type":"string","is_required":true},"version":{"description":"The version of the plan (e.g., \"v1\", \"v2\", \"1.0\")","type":"string","is_required":true}}}</tool>
 <tool>{"name":"skill","description":"Fetches detailed information about a specific skill. Use this tool to load skill content and instructions when you need to understand how to perform a specialized task. Skills provide domain-specific knowledge, workflows, and best practices. Only invoke skills that are listed in the available skills section. Do not invoke a skill that is already active.","arguments":{"name":{"description":"The name of the skill to fetch (e.g., \"pdf\", \"code_review\")","type":"string","is_required":true}}}</tool>

diff --git a/crates/forge_domain/src/tools/descriptions/net_fetch.md b/crates/forge_domain/src/tools/descriptions/net_fetch.md
@@ -1 +1,3 @@
-Retrieves content from URLs as markdown or raw text. Enables access to current online information including websites, APIs and documentation. Use for obtaining up-to-date information beyond training data, verifying facts, or retrieving specific online content. Handles HTTP/HTTPS and converts HTML to readable markdown by default. Cannot access private/restricted resources requiring authentication. Respects robots.txt and may be blocked by anti-scraping measures. For large pages, returns the first 40,000 characters and stores the complete content in a temporary file for subsequent access.
+Retrieves content from URLs as markdown or raw text. Enables access to current online information including websites, APIs and documentation. Use for obtaining up-to-date information beyond training data, verifying facts, or retrieving specific online content. Handles HTTP/HTTPS and converts HTML to readable markdown by default. Cannot access private/restricted resources requiring authentication. Respects robots.txt and may be blocked by anti-scraping measures. For large pages, returns the first 40,000 characters and stores the complete content in a temporary file for subsequent access.
+
+IMPORTANT: This tool only handles text-based content (HTML, JSON, XML, plain text, etc.). It will reject binary file downloads (.tar.gz, .zip, .bin, .deb, images, audio, video, etc.) with an error. To download binary files, use the `shell` tool with `curl -fLo <output_file> <url>` instead.
diff --git a/...repo__provider__openai_responses__request__tests__openai_responses_all_catalog_tools.snap b/...repo__provider__openai_responses__request__tests__openai_responses_all_catalog_tools.snap
@@ -474,7 +474,7 @@ expression: actual.tools
       "type": "object"
     },
     "strict": true,
-    "description": "Retrieves content from URLs as markdown or raw text. Enables access to current online information including websites, APIs and documentation. Use for obtaining up-to-date information beyond training data, verifying facts, or retrieving specific online content. Handles HTTP/HTTPS and converts HTML to readable markdown by default. Cannot access private/restricted resources requiring authentication. Respects robots.txt and may be blocked by anti-scraping measures. For large pages, returns the first 40,000 characters and stores the complete content in a temporary file for subsequent access."
+    "description": "Retrieves content from URLs as markdown or raw text. Enables access to current online information including websites, APIs and documentation. Use for obtaining up-to-date information beyond training data, verifying facts, or retrieving specific online content. Handles HTTP/HTTPS and converts HTML to readable markdown by default. Cannot access private/restricted resources requiring authentication. Respects robots.txt and may be blocked by anti-scraping measures. For large pages, returns the first 40,000 characters and stores the complete content in a temporary file for subsequent access.\n\nIMPORTANT: This tool only handles text-based content (HTML, JSON, XML, plain text, etc.). It will reject binary file downloads (.tar.gz, .zip, .bin, .deb, images, audio, video, etc.) with an error. To download binary files, use the `shell` tool with `curl -fLo <output_file> <url>` instead."
   },
   {
     "type": "function",
-Original file line number
+Diff line change
@@ Expand Up @@
     Retrieves content from URLs as markdown or raw text. Enables access to current online information including websites, APIs and documentation. Use for obtaining up-to-date information beyond training data, verifying facts, or retrieving specific online content. Handles HTTP/HTTPS and converts HTML to readable markdown by default. Cannot access private/restricted resources requiring authentication. Respects robots.txt and may be blocked by anti-scraping measures. For large pages, returns the first 40,000 characters and stores the complete content in a temporary file for subsequent access.
+    IMPORTANT: This tool only handles text-based content (HTML, JSON, XML, plain text, etc.). It will reject binary file downloads (.tar.gz, .zip, .bin, .deb, images, audio, video, etc.) with an error. To download binary files, use the `shell` tool with `curl -fLo <output_file> <url>` instead.
     ---
     ### followup
@@ Expand Down @@