diff --git a/crates/forge_app/src/agent_executor.rs b/crates/forge_app/src/agent_executor.rs index 4c5ed94ff2..98d31473e4 100644 --- a/crates/forge_app/src/agent_executor.rs +++ b/crates/forge_app/src/agent_executor.rs @@ -36,12 +36,16 @@ impl AgentExecutor { } /// Executes an agent tool call by creating a new chat request for the - /// specified agent. + /// Executes an agent tool call by creating a new chat request for the + /// specified agent. If conversation_id is provided, the agent will reuse + /// that conversation, maintaining context across invocations. Otherwise, + /// a new conversation is created. pub async fn execute( &self, agent_id: AgentId, task: String, ctx: &ToolCallContext, + conversation_id: Option, ) -> anyhow::Result { ctx.send_tool_input( TitleFormat::debug(format!( @@ -52,13 +56,23 @@ impl AgentExecutor { ) .await?; - // Create a new conversation for agent execution - let conversation = Conversation::generate().title(task.clone()); - self.services - .conversation_service() - .upsert_conversation(conversation.clone()) - .await?; - // Execute the request through the ForgeApp + // Reuse existing conversation if provided, otherwise create a new one + let conversation = if let Some(cid) = conversation_id { + let conversation_id = forge_domain::ConversationId::parse(&cid) + .map_err(|_| Error::ConversationNotFound { id: cid.clone() })?; + self.services + .conversation_service() + .find_conversation(&conversation_id) + .await? + .ok_or(Error::ConversationNotFound { id: cid })? + } else { + let conversation = Conversation::generate().title(task.clone()); + self.services + .conversation_service() + .upsert_conversation(conversation.clone()) + .await?; + conversation + }; let app = crate::ForgeApp::new(self.services.clone()); let mut response_stream = app .chat( diff --git a/crates/forge_app/src/dto/anthropic/transforms/capitalize_tool_names.rs b/crates/forge_app/src/dto/anthropic/transforms/capitalize_tool_names.rs index 4e8e6f693c..273b8c6aa7 100644 --- a/crates/forge_app/src/dto/anthropic/transforms/capitalize_tool_names.rs +++ b/crates/forge_app/src/dto/anthropic/transforms/capitalize_tool_names.rs @@ -21,6 +21,7 @@ impl Transformer for CapitalizeToolNames { tool.name = match tool.name.as_str() { "read" => "Read".to_string(), "write" => "Write".to_string(), + "task" => "Task".to_string(), _ => tool.name.clone(), }; } diff --git a/crates/forge_app/src/error.rs b/crates/forge_app/src/error.rs index df579c09d1..7c99f4810d 100644 --- a/crates/forge_app/src/error.rs +++ b/crates/forge_app/src/error.rs @@ -40,6 +40,9 @@ pub enum Error { #[error("Agent '{0}' not found")] AgentNotFound(forge_domain::AgentId), + #[error("Conversation '{id}' not found")] + ConversationNotFound { id: String }, + #[error("No active provider configured")] NoActiveProvider, diff --git a/crates/forge_app/src/fmt/fmt_input.rs b/crates/forge_app/src/fmt/fmt_input.rs index 931bcdcbfd..ef2d75ebd5 100644 --- a/crates/forge_app/src/fmt/fmt_input.rs +++ b/crates/forge_app/src/fmt/fmt_input.rs @@ -125,6 +125,9 @@ impl FormatContent for ToolCatalog { .into(), ), ToolCatalog::TodoRead(_) => Some(TitleFormat::debug("Read Todos").into()), + ToolCatalog::Task(input) => { + Some(TitleFormat::debug("Task").sub_title(&input.agent_id).into()) + } } } } diff --git a/crates/forge_app/src/orch.rs b/crates/forge_app/src/orch.rs index b189e95146..e25c69567c 100644 --- a/crates/forge_app/src/orch.rs +++ b/crates/forge_app/src/orch.rs @@ -6,6 +6,7 @@ use async_recursion::async_recursion; use derive_setters::Setters; use forge_domain::{Agent, *}; use forge_template::Element; +use futures::future::join_all; use tokio::sync::Notify; use tracing::warn; @@ -53,13 +54,39 @@ impl Orchestrator { // Helper function to get all tool results from a vector of tool calls #[async_recursion] - async fn execute_tool_calls<'a>( + async fn execute_tool_calls( &mut self, tool_calls: &[ToolCallFull], tool_context: &ToolCallContext, ) -> anyhow::Result> { - // Always process tool calls sequentially - let mut tool_call_records = Vec::with_capacity(tool_calls.len()); + let task_tool_name = ToolKind::Task.name(); + + // Use a case-insensitive comparison since the model may send "Task" or "task". + let is_task = |tc: &ToolCallFull| { + tc.name + .as_str() + .eq_ignore_ascii_case(task_tool_name.as_str()) + }; + + // Partition into task tool calls (run in parallel) and all others (run + // sequentially). Use a case-insensitive comparison since the model may + // send "Task" or "task". + let is_task_call = + |tc: &&ToolCallFull| tc.name.as_str().to_lowercase() == task_tool_name.as_str(); + let (task_calls, other_calls): (Vec<_>, Vec<_>) = tool_calls.iter().partition(is_task_call); + + // Execute task tool calls in parallel — mirrors how direct agent-as-tool calls + // work. + let task_results: Vec<(ToolCallFull, ToolResult)> = join_all( + task_calls + .iter() + .map(|tc| self.services.call(&self.agent, tool_context, (*tc).clone())), + ) + .await + .into_iter() + .zip(task_calls.iter()) + .map(|(result, tc)| ((*tc).clone(), result)) + .collect(); let system_tools = self .tool_definitions @@ -67,13 +94,17 @@ impl Orchestrator { .map(|tool| &tool.name) .collect::>(); - for tool_call in tool_calls { + // Process non-task tool calls sequentially (preserving UI notifier handshake + // and hooks). + let mut other_results: Vec<(ToolCallFull, ToolResult)> = + Vec::with_capacity(other_calls.len()); + for tool_call in &other_calls { // Send the start notification for system tools and not agent as a tool let is_system_tool = system_tools.contains(&tool_call.name); if is_system_tool { let notifier = Arc::new(Notify::new()); self.send(ChatResponse::ToolCallStart { - tool_call: tool_call.clone(), + tool_call: (*tool_call).clone(), notifier: notifier.clone(), }) .await?; @@ -87,7 +118,7 @@ impl Orchestrator { let toolcall_start_event = LifecycleEvent::ToolcallStart(EventData::new( self.agent.clone(), self.agent.model.clone(), - ToolcallStartPayload::new(tool_call.clone()), + ToolcallStartPayload::new((*tool_call).clone()), )); self.hook .handle(&toolcall_start_event, &mut self.conversation) @@ -96,14 +127,14 @@ impl Orchestrator { // Execute the tool let tool_result = self .services - .call(&self.agent, tool_context, tool_call.clone()) + .call(&self.agent, tool_context, (*tool_call).clone()) .await; // Fire the ToolcallEnd lifecycle event (fires on both success and failure) let toolcall_end_event = LifecycleEvent::ToolcallEnd(EventData::new( self.agent.clone(), self.agent.model.clone(), - ToolcallEndPayload::new(tool_call.clone(), tool_result.clone()), + ToolcallEndPayload::new((*tool_call).clone(), tool_result.clone()), )); self.hook .handle(&toolcall_end_event, &mut self.conversation) @@ -114,11 +145,23 @@ impl Orchestrator { self.send(ChatResponse::ToolCallEnd(tool_result.clone())) .await?; } - // Ensure all tool calls and results are recorded - // Adding task completion records is critical for compaction to work correctly - tool_call_records.push((tool_call.clone(), tool_result)); + other_results.push(((*tool_call).clone(), tool_result)); } + // Reconstruct results in the original order of tool_calls. + let mut task_iter = task_results.into_iter(); + let mut other_iter = other_results.into_iter(); + let tool_call_records = tool_calls + .iter() + .map(|tc| { + if is_task(tc) { + task_iter.next().expect("task result count mismatch") + } else { + other_iter.next().expect("other result count mismatch") + } + }) + .collect(); + Ok(tool_call_records) } diff --git a/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap b/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap index c2def812f4..c6395656aa 100644 --- a/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap +++ b/crates/forge_app/src/snapshots/forge_app__tool_registry__all_rendered_tool_descriptions.snap @@ -391,3 +391,75 @@ Retrieves the current todo list for this coding session. Use this tool to check ## Output Returns all current todos with their IDs, content, and status (`pending`, `in_progress`, `completed`). If no todos exist yet, returns an empty list. + +--- + +### task + +Launch a new agent to handle complex, multi-step tasks autonomously. + +The task tool launches specialized agents (subprocesses) that autonomously handle complex tasks. Each agent type has specific capabilities and tools available to it. + +Available agent types and the tools they have access to: +- **sage**: Specialized in researching codebases + - Tools: read, fs_search, sem_search, fetch +- **debug**: Specialized in debugging issues + - Tools: read, shell, fs_search, sem_search, fetch + +When using the task tool, you must specify a agent_id parameter to select which agent type to use. + +When NOT to use the task tool: +- If you want to read a specific file path, use the read or fs_search tool instead of the task tool, to find the match more quickly +- If you are searching for a specific class definition like "class Foo", use the fs_search tool instead, to find the match more quickly +- If you are searching for code within a specific file or set of 2-3 files, use the read tool instead of the task tool, to find the match more quickly +- Other tasks that are not related to the agent descriptions above + + +Usage notes: +- Always include a short description (3-5 words) summarizing what the agent will do +- Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses +- When the agent is done, it will return a single message back to you. The result returned by the agent is not visible to the user. To show the user the result, you should send a text message back to the user with a concise summary of the result. +- Agents can be resumed using the \`session_id\` parameter by passing the agent ID from a previous invocation. When resumed, the agent continues with its full previous context preserved. When NOT resuming, each invocation starts fresh and you should provide a detailed task description with all necessary context. +- When the agent is done, it will return a single message back to you along with its agent ID. You can use this ID to resume the agent later if needed for follow-up work. +- Provide clear, detailed prompts so the agent can work autonomously and return exactly the information you need. +- Agents with "access to current context" can see the full conversation history before the tool call. When using these agents, you can write concise prompts that reference earlier context (e.g., "investigate the error discussed above") instead of repeating information. The agent will receive all prior messages and understand the context. +- The agent's outputs should generally be trusted +- Clearly tell the agent whether you expect it to write code or just to do research (search, file reads, web fetches, etc.), since it is not aware of the user's intent +- If the agent description mentions that it should be used proactively, then you should try your best to use it without the user having to ask for it first. Use your judgement. +- If the user specifies that they want you to run agents "in parallel", you MUST send a single message with multiple task tool use content blocks. For example, if you need to launch both a build-validator agent and a test-runner agent in parallel, send a single message with both tool calls. + +Example usage: + + +"test-runner": use this agent after you are done writing code to run tests +"greeting-responder": use this agent when to respond to user greetings with a friendly joke + + + +user: "Please write a function that checks if a number is prime" +assistant: Sure let me write a function that checks if a number is prime +assistant: First let me use the write tool to write a function that checks if a number is prime +assistant: I'm going to use the write tool to write the following code: + +function isPrime(n) { + if (n <= 1) return false + for (let i = 2; i * i <= n; i++) { + if (n % i === 0) return false + } + return true +} + + +Since a significant piece of code was written and the task was completed, now use the test-runner agent to run the tests + +assistant: Now let me use the test-runner agent to run the tests +assistant: Uses the task tool to launch the test-runner agent + + + +user: "Hello" + +Since the user is greeting, use the greeting-responder agent to respond with a friendly joke + +assistant: "I'm going to use the task tool to launch the greeting-responder agent" + diff --git a/crates/forge_app/src/system_prompt.rs b/crates/forge_app/src/system_prompt.rs index b8131ed2d8..bb5a040287 100644 --- a/crates/forge_app/src/system_prompt.rs +++ b/crates/forge_app/src/system_prompt.rs @@ -110,6 +110,8 @@ impl SystemPrompt { model: None, tool_names, extensions, + agents: Vec::new(), /* Empty for system prompt (agents list is for tool + * descriptions only) */ }; let static_block = TemplateEngine::default() diff --git a/crates/forge_app/src/tool_executor.rs b/crates/forge_app/src/tool_executor.rs index b96bf2c708..32ee41e1b0 100644 --- a/crates/forge_app/src/tool_executor.rs +++ b/crates/forge_app/src/tool_executor.rs @@ -315,6 +315,10 @@ impl< let todos = context.get_todos()?; ToolOperation::TodoRead { output: todos } } + ToolCatalog::Task(_) => { + // Task tools are handled in ToolRegistry before reaching here + unreachable!("Task tool should be handled in ToolRegistry") + } }) } diff --git a/crates/forge_app/src/tool_registry.rs b/crates/forge_app/src/tool_registry.rs index 60cc9e8bd4..7537d8729e 100644 --- a/crates/forge_app/src/tool_registry.rs +++ b/crates/forge_app/src/tool_registry.rs @@ -105,6 +105,29 @@ impl ToolRegistry { // First, try to call a Forge tool if ToolCatalog::contains(&input.name) { let tool_input: ToolCatalog = ToolCatalog::try_from(input)?; + + // Special handling for Task tool - delegate to AgentExecutor + if let ToolCatalog::Task(task_input) = tool_input { + let executor = self.agent_executor.clone(); + let session_id = task_input.session_id.clone(); + let agent_id = task_input.agent_id.clone(); + // NOTE: Agents should not timeout + let outputs = join_all(task_input.tasks.into_iter().map(|task| { + let session_id = session_id.clone(); + let agent_id = agent_id.clone(); + let executor = executor.clone(); + async move { + executor + .execute(AgentId::new(&agent_id), task, context, session_id) + .await + } + })) + .await + .into_iter() + .collect::>>()?; + return Ok(ToolOutput::from(outputs.into_iter())); + } + let env = self.services.get_environment(); if let Some(content) = tool_input.to_content(&env) { context.send(content).await?; @@ -142,14 +165,20 @@ impl ToolRegistry { // Handle agent delegation tool calls let agent_input = AgentInput::try_from(&input)?; let executor = self.agent_executor.clone(); + let agent_name = input.name.as_str().to_string(); // NOTE: Agents should not timeout - let outputs = - join_all(agent_input.tasks.into_iter().map(|task| { - executor.execute(AgentId::new(input.name.as_str()), task, context) - })) - .await - .into_iter() - .collect::>>()?; + let outputs = join_all(agent_input.tasks.into_iter().map(|task| { + let agent_name = agent_name.clone(); + let executor = executor.clone(); + async move { + executor + .execute(AgentId::new(&agent_name), task, context, None) + .await + } + })) + .await + .into_iter() + .collect::>>()?; Ok(ToolOutput::from(outputs.into_iter())) } else if self.mcp_executor.contains_tool(&input.name).await? { let output = self @@ -212,6 +241,12 @@ impl ToolRegistry { let mcp_tools = self.services.get_mcp_servers().await?; let agent_tools = self.agent_executor.agent_definitions().await?; + // Get agents for template rendering in Task tool description + let agents = self.services.get_agents().await?; + + // Get current agent ID to filter it out from Task tool agent list + let current_agent_id = self.services.get_active_agent_id().await.ok().flatten(); + // Check if current working directory is indexed let environment = self.services.get_environment(); let cwd = environment.cwd.clone(); @@ -226,6 +261,8 @@ impl ToolRegistry { is_indexed && is_authenticated, &environment, model, + agents, + current_agent_id.as_ref(), )) .agents(agent_tools) .mcp(mcp_tools)) @@ -237,6 +274,8 @@ impl ToolRegistry { sem_search_supported: bool, env: &Environment, model: Option, + agents: Vec, + current_agent_id: Option<&AgentId>, ) -> Vec { use crate::TemplateEngine; @@ -258,11 +297,22 @@ impl ToolRegistry { }) .collect(); + // Filter out current agent to prevent self-delegation + let filtered_agents = if let Some(current_id) = current_agent_id { + agents + .into_iter() + .filter(|agent| agent.id != *current_id) + .collect() + } else { + agents + }; + // Create template data with environment nested under "env" let ctx = SystemContext { env: Some(env.clone()), model, tool_names, + agents: filtered_agents, ..Default::default() }; @@ -378,7 +428,7 @@ mod tests { use pretty_assertions::assert_eq; use crate::error::Error; - use crate::tool_registry::ToolRegistry; + use crate::tool_registry::{ToolRegistry, create_test_agents}; fn agent() -> Agent { // only allow read and search tools for this agent @@ -631,7 +681,8 @@ mod tests { fn test_sem_search_included_when_supported() { use fake::{Fake, Faker}; let env: Environment = Faker.fake(); - let actual = ToolRegistry::<()>::get_system_tools(true, &env, None); + let actual = + ToolRegistry::<()>::get_system_tools(true, &env, None, create_test_agents(), None); assert!(actual.iter().any(|t| t.name.as_str() == "sem_search")); } @@ -639,11 +690,49 @@ mod tests { fn test_sem_search_filtered_when_not_supported() { use fake::{Fake, Faker}; let env: Environment = Faker.fake(); - let actual = ToolRegistry::<()>::get_system_tools(false, &env, None); + let actual = + ToolRegistry::<()>::get_system_tools(false, &env, None, create_test_agents(), None); assert!(actual.iter().all(|t| t.name.as_str() != "sem_search")); } } +#[cfg(test)] +fn create_test_agents() -> Vec { + use forge_domain::{Agent, AgentId, ModelId, ProviderId, ToolName}; + + vec![ + Agent::new( + AgentId::new("sage"), + ProviderId::ANTHROPIC, + ModelId::new("claude-3-5-sonnet-20241022"), + ) + .id(AgentId::new("sage")) + .title("Research Agent") + .description("Specialized in researching codebases") + .tools(vec![ + ToolName::new("read"), + ToolName::new("fs_search"), + ToolName::new("sem_search"), + ToolName::new("fetch"), + ]), + Agent::new( + AgentId::new("debug"), + ProviderId::ANTHROPIC, + ModelId::new("claude-3-5-sonnet-20241022"), + ) + .id(AgentId::new("debug")) + .title("Debug Agent") + .description("Specialized in debugging issues") + .tools(vec![ + ToolName::new("read"), + ToolName::new("shell"), + ToolName::new("fs_search"), + ToolName::new("sem_search"), + ToolName::new("fetch"), + ]), + ] +} + #[cfg(test)] fn create_test_model( id: &str, @@ -671,7 +760,7 @@ fn test_template_rendering_in_tool_descriptions() { env.max_search_lines = 1000; env.max_line_length = 2000; - let actual = ToolRegistry::<()>::get_system_tools(true, &env, None); + let actual = ToolRegistry::<()>::get_system_tools(true, &env, None, create_test_agents(), None); let fs_search_tool = actual .iter() .find(|t| t.name.as_str() == "fs_search") @@ -703,7 +792,13 @@ fn test_dynamic_tool_description_with_vision_model() { env.max_image_size = 5000; // Set fixed value for deterministic test let vision_model = create_test_model("gpt-4o", vec![InputModality::Text, InputModality::Image]); - let tools_with_vision = ToolRegistry::<()>::get_system_tools(true, &env, Some(vision_model)); + let tools_with_vision = ToolRegistry::<()>::get_system_tools( + true, + &env, + Some(vision_model), + create_test_agents(), + None, + ); let read_tool = tools_with_vision .iter() .find(|t| t.name.as_str() == "read") @@ -722,7 +817,13 @@ fn test_dynamic_tool_description_with_text_only_model() { env.max_image_size = 5000; // Set fixed value for deterministic test let text_only_model = create_test_model("gpt-3.5-turbo", vec![InputModality::Text]); - let tools_text_only = ToolRegistry::<()>::get_system_tools(true, &env, Some(text_only_model)); + let tools_text_only = ToolRegistry::<()>::get_system_tools( + true, + &env, + Some(text_only_model), + create_test_agents(), + None, + ); let read_tool = tools_text_only .iter() .find(|t| t.name.as_str() == "read") @@ -868,7 +969,8 @@ fn test_dynamic_tool_description_without_model() { env.max_line_length = 2000; // When no model is provided, should default to showing minimal capabilities - let tools_no_model = ToolRegistry::<()>::get_system_tools(true, &env, None); + let tools_no_model = + ToolRegistry::<()>::get_system_tools(true, &env, None, create_test_agents(), None); let read_tool = tools_no_model .iter() .find(|t| t.name.as_str() == "read") @@ -891,7 +993,7 @@ fn test_all_rendered_tool_descriptions() { env.stdout_max_suffix_length = 200; env.stdout_max_line_length = 2000; - let tools = ToolRegistry::<()>::get_system_tools(true, &env, None); + let tools = ToolRegistry::<()>::get_system_tools(true, &env, None, create_test_agents(), None); // Verify all tools have rendered descriptions (no template syntax left) for tool in &tools { diff --git a/crates/forge_app/src/tool_resolver.rs b/crates/forge_app/src/tool_resolver.rs index 8993ae0bb5..2f3d118a6a 100644 --- a/crates/forge_app/src/tool_resolver.rs +++ b/crates/forge_app/src/tool_resolver.rs @@ -15,6 +15,7 @@ fn deprecated_tool_aliases() -> HashMap<&'static str, ToolName> { ("search", ToolName::new("fs_search")), ("Read", ToolName::new("read")), ("Write", ToolName::new("write")), + ("Task", ToolName::new("task")), ]) } @@ -410,4 +411,23 @@ mod tests { assert!(ToolResolver::is_allowed(&fixture, &ToolName::new("write"))); assert!(ToolResolver::is_allowed(&fixture, &ToolName::new("Write"))); } + + #[test] + fn test_capitalized_task_alias() { + // Test that capitalized "Task" resolves to "task" + let all_tool_definitions = vec![ToolDefinition::new("task").description("Task Tool")]; + + let _tool_resolver = ToolResolver::new(all_tool_definitions); + + let fixture = Agent::new( + AgentId::new("test-agent"), + ProviderId::ANTHROPIC, + ModelId::new("claude-3-5-sonnet-20241022"), + ) + .tools(vec![ToolName::new("task")]); + + // Both lowercase and capitalized should be allowed + assert!(ToolResolver::is_allowed(&fixture, &ToolName::new("task"))); + assert!(ToolResolver::is_allowed(&fixture, &ToolName::new("Task"))); + } } diff --git a/crates/forge_app/src/transformers/strip_working_dir.rs b/crates/forge_app/src/transformers/strip_working_dir.rs index 078bcacbaf..ad47257cc1 100644 --- a/crates/forge_app/src/transformers/strip_working_dir.rs +++ b/crates/forge_app/src/transformers/strip_working_dir.rs @@ -84,6 +84,7 @@ impl Transformer for StripWorkingDir { | SummaryTool::Followup { .. } | SummaryTool::Plan { .. } | SummaryTool::Skill { .. } + | SummaryTool::Task { .. } | SummaryTool::Mcp { .. } | SummaryTool::TodoWrite { .. } | SummaryTool::TodoRead => { diff --git a/crates/forge_app/src/transformers/trim_context_summary.rs b/crates/forge_app/src/transformers/trim_context_summary.rs index 333a14b9fb..9207c77b5b 100644 --- a/crates/forge_app/src/transformers/trim_context_summary.rs +++ b/crates/forge_app/src/transformers/trim_context_summary.rs @@ -33,6 +33,8 @@ enum Operation<'a> { Plan(&'a str), /// Skill loading by name Skill(&'a str), + /// Task delegation to an agent + Task(&'a str), /// MCP tool call by name Mcp(&'a str), /// Todo operation - each todo_write is unique and won't be deduplicated @@ -56,6 +58,7 @@ fn to_op(tool: &SummaryTool) -> Operation<'_> { SummaryTool::Followup { question } => Operation::Followup(question), SummaryTool::Plan { plan_name } => Operation::Plan(plan_name), SummaryTool::Skill { name } => Operation::Skill(name), + SummaryTool::Task { agent_id } => Operation::Task(agent_id), SummaryTool::Mcp { name } => Operation::Mcp(name), SummaryTool::TodoWrite { .. } => Operation::Todo, SummaryTool::TodoRead => Operation::Todo, diff --git a/crates/forge_domain/src/agent.rs b/crates/forge_domain/src/agent.rs index 586b4572f5..dd53753a13 100644 --- a/crates/forge_domain/src/agent.rs +++ b/crates/forge_domain/src/agent.rs @@ -1,5 +1,6 @@ use derive_setters::Setters; use merge::Merge; +use serde::{Deserialize, Serialize}; use crate::{ AgentDefinition, AgentId, Compact, Error, EventContext, MaxTokens, ModelId, ProviderId, @@ -9,7 +10,7 @@ use crate::{ /// Runtime agent representation with required model and provider /// Created by converting AgentDefinition with resolved defaults -#[derive(Debug, Clone, PartialEq, Setters)] +#[derive(Debug, Clone, PartialEq, Setters, Serialize, Deserialize)] #[setters(strip_option, into)] pub struct Agent { /// Flag to enable/disable tool support for this agent. @@ -40,6 +41,7 @@ pub struct Agent { pub user_prompt: Option>, /// Tools that the agent can use + #[serde(skip_serializing_if = "Option::is_none")] pub tools: Option>, /// Maximum number of turns the agent can take diff --git a/crates/forge_domain/src/compact/summary.rs b/crates/forge_domain/src/compact/summary.rs index 371087419c..b3247cd15d 100644 --- a/crates/forge_domain/src/compact/summary.rs +++ b/crates/forge_domain/src/compact/summary.rs @@ -191,6 +191,7 @@ pub enum SummaryTool { Followup { question: String }, Plan { plan_name: String }, Skill { name: String }, + Task { agent_id: String }, Mcp { name: String }, TodoWrite { changes: Vec }, TodoRead, @@ -402,6 +403,7 @@ fn extract_tool_info(call: &ToolCallFull, current_todos: &[Todo]) -> Option Some(SummaryTool::TodoRead), + ToolCatalog::Task(input) => Some(SummaryTool::Task { agent_id: input.agent_id }), }; } diff --git a/crates/forge_domain/src/system_context.rs b/crates/forge_domain/src/system_context.rs index 2cb11b99b1..5b60bc1645 100644 --- a/crates/forge_domain/src/system_context.rs +++ b/crates/forge_domain/src/system_context.rs @@ -2,7 +2,7 @@ use derive_setters::Setters; use serde::{Deserialize, Serialize}; use serde_json::{Map, Value}; -use crate::{Environment, File, Model, Skill}; +use crate::{Agent, Environment, File, Model, Skill}; /// Statistics for a file extension #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] @@ -103,4 +103,8 @@ pub struct SystemContext { /// top `limit` extensions as defined in the `Extension` struct. #[serde(skip_serializing_if = "Option::is_none")] pub extensions: Option, + + /// List of available agents for task delegation + #[serde(skip_serializing_if = "Vec::is_empty")] + pub agents: Vec, } diff --git a/crates/forge_domain/src/tools/catalog.rs b/crates/forge_domain/src/tools/catalog.rs index d17aefa00b..0fa070b197 100644 --- a/crates/forge_domain/src/tools/catalog.rs +++ b/crates/forge_domain/src/tools/catalog.rs @@ -55,6 +55,8 @@ pub enum ToolCatalog { Skill(SkillFetch), TodoWrite(TodoWrite), TodoRead(TodoRead), + #[serde(alias = "Task")] + Task(TaskInput), } /// Input structure for agent tool calls. This serves as the generic schema @@ -69,6 +71,28 @@ pub struct AgentInput { pub tasks: Vec, } +/// Input structure for the Task tool - delegates work to specialized agents +#[derive(Default, Debug, Clone, Serialize, Deserialize, JsonSchema, ToolDescription, PartialEq)] +#[tool_description_file = "crates/forge_domain/src/tools/descriptions/task.md"] +pub struct TaskInput { + /// A list of clear and detailed descriptions of the tasks to be performed + /// by the agent in parallel. Provide sufficient context and specific + /// requirements to enable the agent to understand and execute the work + /// accurately. + pub tasks: Vec, + + /// The ID of the specialized agent to delegate to (e.g., "sage", "forge", + /// "muse") + pub agent_id: String, + + /// Optional session ID to continue an existing agent session. If not + /// provided, a new stateless session will be created. Use this to + /// maintain context across multiple task invocations with the same + /// agent. + #[serde(skip_serializing_if = "Option::is_none")] + pub session_id: Option, +} + fn default_true() -> bool { true } @@ -767,6 +791,7 @@ impl ToolDescription for ToolCatalog { ToolCatalog::Skill(v) => v.description(), ToolCatalog::TodoWrite(v) => v.description(), ToolCatalog::TodoRead(v) => v.description(), + ToolCatalog::Task(v) => v.description(), } } } @@ -822,6 +847,7 @@ impl ToolCatalog { ToolCatalog::Write(_) => r#gen.into_root_schema_for::(), ToolCatalog::Plan(_) => r#gen.into_root_schema_for::(), ToolCatalog::Skill(_) => r#gen.into_root_schema_for::(), + ToolCatalog::Task(_) => r#gen.into_root_schema_for::(), ToolCatalog::TodoWrite(_) => r#gen.into_root_schema_for::(), ToolCatalog::TodoRead(_) => r#gen.into_root_schema_for::(), }; @@ -939,7 +965,8 @@ impl ToolCatalog { | ToolCatalog::Plan(_) | ToolCatalog::Skill(_) | ToolCatalog::TodoWrite(_) - | ToolCatalog::TodoRead(_) => None, + | ToolCatalog::TodoRead(_) + | ToolCatalog::Task(_) => None, } } diff --git a/crates/forge_domain/src/tools/definition/snapshots/forge_domain__tools__definition__usage__tests__tool_usage.snap b/crates/forge_domain/src/tools/definition/snapshots/forge_domain__tools__definition__usage__tests__tool_usage.snap index 940884ef49..96403701f6 100644 --- a/crates/forge_domain/src/tools/definition/snapshots/forge_domain__tools__definition__usage__tests__tool_usage.snap +++ b/crates/forge_domain/src/tools/definition/snapshots/forge_domain__tools__definition__usage__tests__tool_usage.snap @@ -16,3 +16,4 @@ expression: prompt {"name":"skill","description":"Fetches detailed information about a specific skill. Use this tool to load skill content and instructions when you need to understand how to perform a specialized task. Skills provide domain-specific knowledge, workflows, and best practices. Only invoke skills that are listed in the available skills section. Do not invoke a skill that is already active.","arguments":{"name":{"description":"The name of the skill to fetch (e.g., \"pdf\", \"code_review\")","type":"string","is_required":true}}} {"name":"todo_write","description":"Use this tool to create and manage a structured task list for your current coding session. This helps you track progress, organize complex tasks, and demonstrate thoroughness to the user.\nIt also helps the user understand the progress of the task and overall progress of their requests.\n\n## How It Works\n\nEach call sends only the items that changed — you do not need to repeat the whole list.\n\nEach item has two required fields:\n- `content`: The task description. This is the **unique key** — the server matches on content to decide whether to add or update.\n- `status`: One of `pending`, `in_progress`, `completed`, or `cancelled`.\n\n**Rules:**\n- Item with this `content` does **not** exist yet → **added** as a new task.\n- Item with this `content` already exists → its `status` is **updated**.\n- `status: cancelled` → the item is **removed** from the list entirely.\n- Items you do not mention are **left unchanged**.\n\nIDs are managed internally by the system and are never exposed to you.\n\n## When to Use This Tool\nUse this tool proactively in these scenarios:\n\n1. Complex multi-step tasks - When a task requires 3 or more distinct steps or actions\n2. Non-trivial and complex tasks - Tasks that require careful planning or multiple operations\n3. User explicitly requests todo list - When the user directly asks you to use the todo list\n4. User provides multiple tasks - When users provide a list of things to be done (numbered or comma-separated)\n5. After receiving new instructions - Immediately capture user requirements as todos\n6. When you start working on a task - Mark it as in_progress BEFORE beginning work. Ideally you should only have one todo as in_progress at a time\n7. After completing a task - Mark it as completed and add any new follow-up tasks discovered during implementation\n\n## When NOT to Use This Tool\n\nSkip using this tool when:\n1. There is only a single, straightforward task\n2. The task is trivial and tracking it provides no organizational benefit\n3. The task can be completed in less than 3 trivial steps\n4. The task is purely conversational or informational\n\nNOTE that you should not use this tool if there is only one trivial task to do. In this case you are better off just doing the task directly.\n\n## Examples of When to Use the Todo List\n\n\nUser: I want to add a dark mode toggle to the application settings. Make sure you run the tests and build when you're done!\nAssistant: I'll help add a dark mode toggle to your application settings. Let me create a todo list to track this implementation.\n*Creates todo list with the following items:*\n1. Creating dark mode toggle component in Settings page\n2. Adding dark mode state management (context/store)\n3. Implementing CSS-in-JS styles for dark theme\n4. Updating existing components to support theme switching\n5. Running tests and build process, addressing any failures or errors that occur\n*Begins working on the first task*\n\n\nThe assistant used the todo list because:\n1. Adding dark mode is a multi-step feature requiring UI, state management, and styling changes\n2. The user explicitly requested tests and build be run afterward\n3. The assistant inferred that tests and build need to pass by adding \"Ensure tests and build succeed\" as the final task\n\n\n\n\nUser: Help me rename the function getCwd to getCurrentWorkingDirectory across my project\nAssistant: Let me first search through your codebase to find all occurrences of 'getCwd'.\n*Uses grep or search tools to locate all instances of getCwd in the codebase*\nAssistant: I've found 15 instances of 'getCwd' across 8 different files. Let me create a todo list to track these changes.\n*Creates todo list with specific items for each file that needs updating*\n\n\nThe assistant used the todo list because:\n1. First, the assistant searched to understand the scope of the task\n2. Upon finding multiple occurrences across different files, it determined this was a complex task with multiple steps\n3. The todo list helps ensure every instance is tracked and updated systematically\n4. This approach prevents missing any occurrences and maintains code consistency\n\n\n\n\n\nUser: I need to implement these features for my e-commerce site: user registration, product catalog, shopping cart, and checkout flow.\nAssistant: I'll help implement these features. First, let's add all the features to the todo list.\n*Creates a todo list breaking down each feature into specific tasks based on the project architecture*\nAssistant: Let's start with user registration. This will involve creating database models, API endpoints, and frontend forms that integrate with your existing authentication system.\n\n\nThe assistant used the todo list because:\n1. The user provided multiple complex features to implement in a comma separated list\n2. The todo list helps organize these large features into manageable tasks\n3. This approach allows for tracking progress across the entire implementation\n\n\n\n\nUser: Can you help optimize my React application? It's rendering slowly and has performance issues.\nAssistant: I'll help optimize your React application. First, let me examine your codebase to identify potential performance bottlenecks.\n*Reviews component structure, render patterns, state management, and data fetching*\nAssistant: After analyzing your codebase, I've identified several performance issues. Let me create a todo list to track our optimization efforts.\n*Creates todo list with items like: 1) Implementing memoization for expensive calculations in ProductList, 2) Adding virtualization for long lists in Dashboard, 3) Optimizing image loading in Gallery component, 4) Fixing state update loops in ShoppingCart, 5) Reviewing bundle size and implementing code splitting*\nLet's start by implementing memoization for the expensive calculations in your ProductList component.\n\n\nThe assistant used the todo list because:\n1. First, the assistant examined the codebase to identify specific performance issues\n2. Based on this analysis, it identified multiple optimization opportunities\n3. Performance optimization is a non-trivial task requiring multiple steps\n4. The todo list helps methodically track improvements across different components\n5. This systematic approach ensures all performance bottlenecks are addressed\n\n\n\n## Examples of When NOT to Use the Todo List\n\n\nUser: How do I print 'Hello World' in Python?\nAssistant: In Python, you can print \"Hello World\" with this simple code:\n\npython\nprint(\"Hello World\")\n\nThis will output the text \"Hello World\" to the console when executed.\n\n\nThe assistant did not use the todo list because this is a single, trivial task that can be completed in one step. There's no need to track multiple tasks or steps for such a straightforward request.\n\n\n\n\nUser: What does the git status command do?\nAssistant: The git status command shows the current state of your working directory and staging area. It displays which changes have been staged, which haven't, and which files aren't being tracked by Git. This is a fundamental command for understanding the current state of your repository before making commits.\n\n\nThe assistant did not use the todo list because this is an informational request with no actual coding task to complete. The user is simply asking for an explanation, not for the assistant to perform multiple steps or tasks.\n\n\n\n\nUser: Can you add a comment to the calculateTotal function to explain what it does?\nAssistant: Sure, let me add a comment to the calculateTotal function to explain what it does.\n* Uses the patch tool to add a comment to the calculateTotal function *\n\n\nThe assistant did not use the todo list because this is a single, straightforward task confined to one location in the code. Adding a comment doesn't require tracking multiple steps or systematic organization.\n\n\n\n\nUser: Run npm install for me and tell me what happens.\nAssistant: I'll run the npm install command for you.\n\n*Executes: npm install*\n\nThe command completed successfully. Here's the output:\n[Output of npm install command]\n\nAll dependencies have been installed according to your package.json file.\n\n\nThe assistant did not use the todo list because this is a single command execution with immediate results. There are no multiple steps to track or organize, making the todo list unnecessary for this straightforward task.\n\n\n\n## Task States and Management\n\n1. **Task States**: Use these states to track progress:\n - `pending`: Task not yet started\n - `in_progress`: Currently working on (limit to ONE task at a time)\n - `completed`: Task finished successfully\n - `cancelled`: Task is no longer relevant — this removes it from the list\n\n2. **Task Management**:\n - Only send the items that changed — do not repeat unchanged items\n - Mark tasks `in_progress` BEFORE beginning work\n - Mark tasks `completed` IMMEDIATELY after finishing (don't batch completions)\n - Exactly ONE task must be `in_progress` at any time\n - Use `cancelled` to remove tasks that are no longer relevant\n - Complete current tasks before starting new ones\n\n3. **Task Completion Requirements**:\n - ONLY mark a task as `completed` when you have FULLY accomplished it\n - If you encounter errors, blockers, or cannot finish, keep the task as `in_progress`\n - When blocked, create a new task describing what needs to be resolved\n - Never mark a task as `completed` if:\n - Tests are failing\n - Implementation is partial\n - You encountered unresolved errors\n - You couldn't find necessary files or dependencies\n\n4. **Task Breakdown**:\n - Create specific, actionable items\n - Break complex tasks into smaller, manageable steps\n - Use clear, descriptive task names\n\nWhen in doubt, use this tool. Being proactive with task management demonstrates attentiveness and ensures you complete all requirements successfully.","arguments":{"todos":{"description":"List of todo items to create or update. Each item must have `content`\nand `status`. The server matches on `content` — if an item with the\nsame content exists it is updated; otherwise a new item is added.\nSet `status` to `cancelled` to remove an item.","type":"array","is_required":true}}} {"name":"todo_read","description":"Retrieves the current todo list for this coding session. Use this tool to check existing todos before making updates, or to review the current state of tasks at any point during the session.\n\n## When to Use This Tool\n\n- Before calling `todo_write`, to understand which tasks already exist and avoid duplicates\n- When you need to know what tasks are pending, in progress, or completed\n- To resume work after a break and understand the current state of tasks\n- When the user asks about the current task list or progress\n\n## Output\n\nReturns all current todos with their IDs, content, and status (`pending`, `in_progress`, `completed`). If no todos exist yet, returns an empty list.","arguments":{}} +{"name":"task","description":"Launch a new agent to handle complex, multi-step tasks autonomously. \n\nThe {{tool_names.task}} tool launches specialized agents (subprocesses) that autonomously handle complex tasks. Each agent type has specific capabilities and tools available to it.\n\nAvailable agent types and the tools they have access to:\n{{#each agents}}\n- **{{id}}**{{#if description}}: {{description}}{{/if}}{{#if tools}}\n - Tools: {{#each tools}}{{this}}{{#unless @last}}, {{/unless}}{{/each}}{{/if}}\n{{/each}}\n\nWhen using the {{tool_names.task}} tool, you must specify a agent_id parameter to select which agent type to use.\n\nWhen NOT to use the {{tool_names.task}} tool:\n- If you want to read a specific file path, use the {{tool_names.read}} or {{tool_names.fs_search}} tool instead of the {{tool_names.task}} tool, to find the match more quickly\n- If you are searching for a specific class definition like \"class Foo\", use the {{tool_names.fs_search}} tool instead, to find the match more quickly\n- If you are searching for code within a specific file or set of 2-3 files, use the {{tool_names.read}} tool instead of the {{tool_names.task}} tool, to find the match more quickly\n- Other tasks that are not related to the agent descriptions above\n\n\nUsage notes:\n- Always include a short description (3-5 words) summarizing what the agent will do\n- Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses\n- When the agent is done, it will return a single message back to you. The result returned by the agent is not visible to the user. To show the user the result, you should send a text message back to the user with a concise summary of the result.\n- Agents can be resumed using the \\`session_id\\` parameter by passing the agent ID from a previous invocation. When resumed, the agent continues with its full previous context preserved. When NOT resuming, each invocation starts fresh and you should provide a detailed task description with all necessary context.\n- When the agent is done, it will return a single message back to you along with its agent ID. You can use this ID to resume the agent later if needed for follow-up work.\n- Provide clear, detailed prompts so the agent can work autonomously and return exactly the information you need.\n- Agents with \"access to current context\" can see the full conversation history before the tool call. When using these agents, you can write concise prompts that reference earlier context (e.g., \"investigate the error discussed above\") instead of repeating information. The agent will receive all prior messages and understand the context.\n- The agent's outputs should generally be trusted\n- Clearly tell the agent whether you expect it to write code or just to do research (search, file reads, web fetches, etc.), since it is not aware of the user's intent\n- If the agent description mentions that it should be used proactively, then you should try your best to use it without the user having to ask for it first. Use your judgement.\n- If the user specifies that they want you to run agents \"in parallel\", you MUST send a single message with multiple {{tool_names.task}} tool use content blocks. For example, if you need to launch both a build-validator agent and a test-runner agent in parallel, send a single message with both tool calls.\n\nExample usage:\n\n\n\"test-runner\": use this agent after you are done writing code to run tests\n\"greeting-responder\": use this agent when to respond to user greetings with a friendly joke\n\n\n\nuser: \"Please write a function that checks if a number is prime\"\nassistant: Sure let me write a function that checks if a number is prime\nassistant: First let me use the {{tool_names.write}} tool to write a function that checks if a number is prime\nassistant: I'm going to use the {{tool_names.write}} tool to write the following code:\n\nfunction isPrime(n) {\n if (n <= 1) return false\n for (let i = 2; i * i <= n; i++) {\n if (n % i === 0) return false\n }\n return true\n}\n\n\nSince a significant piece of code was written and the task was completed, now use the test-runner agent to run the tests\n\nassistant: Now let me use the test-runner agent to run the tests\nassistant: Uses the {{tool_names.task}} tool to launch the test-runner agent\n\n\n\nuser: \"Hello\"\n\nSince the user is greeting, use the greeting-responder agent to respond with a friendly joke\n\nassistant: \"I'm going to use the {{tool_names.task}} tool to launch the greeting-responder agent\"\n","arguments":{"agent_id":{"description":"The ID of the specialized agent to delegate to (e.g., \"sage\", \"forge\",\n\"muse\")","type":"string","is_required":true},"session_id":{"description":"Optional session ID to continue an existing agent session. If not\nprovided, a new stateless session will be created. Use this to\nmaintain context across multiple task invocations with the same\nagent.","type":"string","is_required":false},"tasks":{"description":"A list of clear and detailed descriptions of the tasks to be performed\nby the agent in parallel. Provide sufficient context and specific\nrequirements to enable the agent to understand and execute the work\naccurately.","type":"array","is_required":true}}} diff --git a/crates/forge_domain/src/tools/descriptions/task.md b/crates/forge_domain/src/tools/descriptions/task.md new file mode 100644 index 0000000000..9042583adb --- /dev/null +++ b/crates/forge_domain/src/tools/descriptions/task.md @@ -0,0 +1,67 @@ +Launch a new agent to handle complex, multi-step tasks autonomously. + +The {{tool_names.task}} tool launches specialized agents (subprocesses) that autonomously handle complex tasks. Each agent type has specific capabilities and tools available to it. + +Available agent types and the tools they have access to: +{{#each agents}} +- **{{id}}**{{#if description}}: {{description}}{{/if}}{{#if tools}} + - Tools: {{#each tools}}{{this}}{{#unless @last}}, {{/unless}}{{/each}}{{/if}} +{{/each}} + +When using the {{tool_names.task}} tool, you must specify a agent_id parameter to select which agent type to use. + +When NOT to use the {{tool_names.task}} tool: +- If you want to read a specific file path, use the {{tool_names.read}} or {{tool_names.fs_search}} tool instead of the {{tool_names.task}} tool, to find the match more quickly +- If you are searching for a specific class definition like "class Foo", use the {{tool_names.fs_search}} tool instead, to find the match more quickly +- If you are searching for code within a specific file or set of 2-3 files, use the {{tool_names.read}} tool instead of the {{tool_names.task}} tool, to find the match more quickly +- Other tasks that are not related to the agent descriptions above + + +Usage notes: +- Always include a short description (3-5 words) summarizing what the agent will do +- Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses +- When the agent is done, it will return a single message back to you. The result returned by the agent is not visible to the user. To show the user the result, you should send a text message back to the user with a concise summary of the result. +- Agents can be resumed using the \`session_id\` parameter by passing the agent ID from a previous invocation. When resumed, the agent continues with its full previous context preserved. When NOT resuming, each invocation starts fresh and you should provide a detailed task description with all necessary context. +- When the agent is done, it will return a single message back to you along with its agent ID. You can use this ID to resume the agent later if needed for follow-up work. +- Provide clear, detailed prompts so the agent can work autonomously and return exactly the information you need. +- Agents with "access to current context" can see the full conversation history before the tool call. When using these agents, you can write concise prompts that reference earlier context (e.g., "investigate the error discussed above") instead of repeating information. The agent will receive all prior messages and understand the context. +- The agent's outputs should generally be trusted +- Clearly tell the agent whether you expect it to write code or just to do research (search, file reads, web fetches, etc.), since it is not aware of the user's intent +- If the agent description mentions that it should be used proactively, then you should try your best to use it without the user having to ask for it first. Use your judgement. +- If the user specifies that they want you to run agents "in parallel", you MUST send a single message with multiple {{tool_names.task}} tool use content blocks. For example, if you need to launch both a build-validator agent and a test-runner agent in parallel, send a single message with both tool calls. + +Example usage: + + +"test-runner": use this agent after you are done writing code to run tests +"greeting-responder": use this agent when to respond to user greetings with a friendly joke + + + +user: "Please write a function that checks if a number is prime" +assistant: Sure let me write a function that checks if a number is prime +assistant: First let me use the {{tool_names.write}} tool to write a function that checks if a number is prime +assistant: I'm going to use the {{tool_names.write}} tool to write the following code: + +function isPrime(n) { + if (n <= 1) return false + for (let i = 2; i * i <= n; i++) { + if (n % i === 0) return false + } + return true +} + + +Since a significant piece of code was written and the task was completed, now use the test-runner agent to run the tests + +assistant: Now let me use the test-runner agent to run the tests +assistant: Uses the {{tool_names.task}} tool to launch the test-runner agent + + + +user: "Hello" + +Since the user is greeting, use the greeting-responder agent to respond with a friendly joke + +assistant: "I'm going to use the {{tool_names.task}} tool to launch the greeting-responder agent" + \ No newline at end of file diff --git a/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap b/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap index ce2da28a9c..f04f699e86 100644 --- a/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap +++ b/crates/forge_domain/src/tools/snapshots/forge_domain__tools__catalog__tests__tool_definition_json.snap @@ -402,3 +402,30 @@ expression: tools "title": "TodoRead", "type": "object" } +{ + "title": "TaskInput", + "description": "Input structure for the Task tool - delegates work to specialized agents", + "type": "object", + "properties": { + "agent_id": { + "description": "The ID of the specialized agent to delegate to (e.g., \"sage\", \"forge\",\n\"muse\")", + "type": "string" + }, + "session_id": { + "description": "Optional session ID to continue an existing agent session. If not\nprovided, a new stateless session will be created. Use this to\nmaintain context across multiple task invocations with the same\nagent.", + "type": "string", + "nullable": true + }, + "tasks": { + "description": "A list of clear and detailed descriptions of the tasks to be performed\nby the agent in parallel. Provide sufficient context and specific\nrequirements to enable the agent to understand and execute the work\naccurately.", + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "tasks", + "agent_id" + ] +} diff --git a/crates/forge_repo/src/agents/forge.md b/crates/forge_repo/src/agents/forge.md index 9d18f9918e..3e519e8b08 100644 --- a/crates/forge_repo/src/agents/forge.md +++ b/crates/forge_repo/src/agents/forge.md @@ -5,8 +5,8 @@ description: "Hands-on implementation agent that executes software development t reasoning: enabled: true tools: + - task - sem_search - - sage - fs_search - read - write @@ -128,6 +128,22 @@ Choose tools based on the nature of the task: - **Research Agent**: For deep architectural analysis, tracing complex flows across multiple files, or understanding system design decisions. +- When doing file search, prefer to use the {{tool_names.task}} tool in order to reduce context usage. +- You should proactively use the {{tool_names.task}} tool with specialized agents when the task at hand matches the agent's description. +- You can call multiple tools in a single response. If you intend to call multiple tools and there are no dependencies between them, make all independent tool calls in parallel. Maximize use of parallel tool calls where possible to increase efficiency. However, if some tool calls depend on previous calls to inform dependent values, do NOT call these tools in parallel and instead call them sequentially. Never use placeholders or guess missing parameters in tool calls. +- If the user specifies that they want you to run tools "in parallel", you MUST send a single message with multiple tool use content blocks. For example, if you need to launch multiple agents in parallel, send a single message with multiple {{tool_names.task}} tool calls. +- Use specialized tools instead of shell commands when possible. For file operations, use dedicated tools: {{tool_names.read}} for reading files instead of cat/head/tail, {{tool_names.patch}} for editing instead of sed/awk, and {{tool_names.write}} for creating files instead of echo redirection. Reserve {{tool_names.shell}} exclusively for actual system commands and terminal operations that require shell execution. +- VERY IMPORTANT: When exploring the codebase to gather context or to answer a question that is not a needle query for a specific file/class/function, it is CRITICAL that you use the {{tool_names.task}} tool instead of running search commands directly. + + +user: Where are errors from the client handled? +assistant: [Uses the {{tool_names.task}} tool to find the files that handle client errors instead of using {{tool_names.fs_search}} or {{tool_names.sem_search}} directly] + + +user: What is the codebase structure? +assistant: [Uses the {{tool_names.task}} tool] + + ## Code Output Guidelines: - Only output code when explicitly requested diff --git a/crates/forge_repo/src/provider/openai_responses/snapshots/forge_repo__provider__openai_responses__request__tests__openai_responses_all_catalog_tools.snap b/crates/forge_repo/src/provider/openai_responses/snapshots/forge_repo__provider__openai_responses__request__tests__openai_responses_all_catalog_tools.snap index 55a46263c1..af0a3b6035 100644 --- a/crates/forge_repo/src/provider/openai_responses/snapshots/forge_repo__provider__openai_responses__request__tests__openai_responses_all_catalog_tools.snap +++ b/crates/forge_repo/src/provider/openai_responses/snapshots/forge_repo__provider__openai_responses__request__tests__openai_responses_all_catalog_tools.snap @@ -675,5 +675,46 @@ expression: actual.tools }, "strict": true, "description": "Retrieves the current todo list for this coding session. Use this tool to check existing todos before making updates, or to review the current state of tasks at any point during the session.\n\n## When to Use This Tool\n\n- Before calling `todo_write`, to understand which tasks already exist and avoid duplicates\n- When you need to know what tasks are pending, in progress, or completed\n- To resume work after a break and understand the current state of tasks\n- When the user asks about the current task list or progress\n\n## Output\n\nReturns all current todos with their IDs, content, and status (`pending`, `in_progress`, `completed`). If no todos exist yet, returns an empty list." + }, + { + "type": "function", + "name": "task", + "parameters": { + "additionalProperties": false, + "description": "Input structure for the Task tool - delegates work to specialized agents", + "properties": { + "agent_id": { + "description": "The ID of the specialized agent to delegate to (e.g., \"sage\", \"forge\",\n\"muse\")", + "type": "string" + }, + "session_id": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "description": "Optional session ID to continue an existing agent session. If not\nprovided, a new stateless session will be created. Use this to\nmaintain context across multiple task invocations with the same\nagent." + }, + "tasks": { + "description": "A list of clear and detailed descriptions of the tasks to be performed\nby the agent in parallel. Provide sufficient context and specific\nrequirements to enable the agent to understand and execute the work\naccurately.", + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": [ + "agent_id", + "session_id", + "tasks" + ], + "title": "TaskInput", + "type": "object" + }, + "strict": true, + "description": "Launch a new agent to handle complex, multi-step tasks autonomously. \n\nThe {{tool_names.task}} tool launches specialized agents (subprocesses) that autonomously handle complex tasks. Each agent type has specific capabilities and tools available to it.\n\nAvailable agent types and the tools they have access to:\n{{#each agents}}\n- **{{id}}**{{#if description}}: {{description}}{{/if}}{{#if tools}}\n - Tools: {{#each tools}}{{this}}{{#unless @last}}, {{/unless}}{{/each}}{{/if}}\n{{/each}}\n\nWhen using the {{tool_names.task}} tool, you must specify a agent_id parameter to select which agent type to use.\n\nWhen NOT to use the {{tool_names.task}} tool:\n- If you want to read a specific file path, use the {{tool_names.read}} or {{tool_names.fs_search}} tool instead of the {{tool_names.task}} tool, to find the match more quickly\n- If you are searching for a specific class definition like \"class Foo\", use the {{tool_names.fs_search}} tool instead, to find the match more quickly\n- If you are searching for code within a specific file or set of 2-3 files, use the {{tool_names.read}} tool instead of the {{tool_names.task}} tool, to find the match more quickly\n- Other tasks that are not related to the agent descriptions above\n\n\nUsage notes:\n- Always include a short description (3-5 words) summarizing what the agent will do\n- Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses\n- When the agent is done, it will return a single message back to you. The result returned by the agent is not visible to the user. To show the user the result, you should send a text message back to the user with a concise summary of the result.\n- Agents can be resumed using the \\`session_id\\` parameter by passing the agent ID from a previous invocation. When resumed, the agent continues with its full previous context preserved. When NOT resuming, each invocation starts fresh and you should provide a detailed task description with all necessary context.\n- When the agent is done, it will return a single message back to you along with its agent ID. You can use this ID to resume the agent later if needed for follow-up work.\n- Provide clear, detailed prompts so the agent can work autonomously and return exactly the information you need.\n- Agents with \"access to current context\" can see the full conversation history before the tool call. When using these agents, you can write concise prompts that reference earlier context (e.g., \"investigate the error discussed above\") instead of repeating information. The agent will receive all prior messages and understand the context.\n- The agent's outputs should generally be trusted\n- Clearly tell the agent whether you expect it to write code or just to do research (search, file reads, web fetches, etc.), since it is not aware of the user's intent\n- If the agent description mentions that it should be used proactively, then you should try your best to use it without the user having to ask for it first. Use your judgement.\n- If the user specifies that they want you to run agents \"in parallel\", you MUST send a single message with multiple {{tool_names.task}} tool use content blocks. For example, if you need to launch both a build-validator agent and a test-runner agent in parallel, send a single message with both tool calls.\n\nExample usage:\n\n\n\"test-runner\": use this agent after you are done writing code to run tests\n\"greeting-responder\": use this agent when to respond to user greetings with a friendly joke\n\n\n\nuser: \"Please write a function that checks if a number is prime\"\nassistant: Sure let me write a function that checks if a number is prime\nassistant: First let me use the {{tool_names.write}} tool to write a function that checks if a number is prime\nassistant: I'm going to use the {{tool_names.write}} tool to write the following code:\n\nfunction isPrime(n) {\n if (n <= 1) return false\n for (let i = 2; i * i <= n; i++) {\n if (n % i === 0) return false\n }\n return true\n}\n\n\nSince a significant piece of code was written and the task was completed, now use the test-runner agent to run the tests\n\nassistant: Now let me use the test-runner agent to run the tests\nassistant: Uses the {{tool_names.task}} tool to launch the test-runner agent\n\n\n\nuser: \"Hello\"\n\nSince the user is greeting, use the greeting-responder agent to respond with a friendly joke\n\nassistant: \"I'm going to use the {{tool_names.task}} tool to launch the greeting-responder agent\"\n" } ]