diff --git a/crates/bashkit/src/scripted_tool/execute.rs b/crates/bashkit/src/scripted_tool/execute.rs index dffd408c..ff71fdf9 100644 --- a/crates/bashkit/src/scripted_tool/execute.rs +++ b/crates/bashkit/src/scripted_tool/execute.rs @@ -145,6 +145,210 @@ impl Builtin for ToolBuiltinAdapter { } } +// ============================================================================ +// HelpBuiltin — runtime schema introspection +// ============================================================================ + +/// Snapshot of a tool definition for the `help` and `discover` builtins. +#[derive(Clone)] +struct ToolDefSnapshot { + name: String, + description: String, + input_schema: serde_json::Value, + tags: Vec, + category: Option, +} + +/// Built-in `help` command for runtime tool schema introspection. +/// +/// Modes: +/// - `help --list` — list all tool names + descriptions +/// - `help ` — human-readable usage +/// - `help --json` — machine-readable JSON schema +struct HelpBuiltin { + tools: Vec, +} + +#[async_trait] +impl Builtin for HelpBuiltin { + async fn execute(&self, ctx: Context<'_>) -> Result { + let args = ctx.args; + + if args.is_empty() || (args.len() == 1 && args[0] == "--list") { + // List all tools + let mut out = String::new(); + for t in &self.tools { + out.push_str(&format!("{:<20} {}\n", t.name, t.description)); + } + return Ok(ExecResult::ok(out)); + } + + // Find the tool name (first non-flag arg) + let tool_name = args.iter().find(|a| !a.starts_with("--")); + let json_mode = args.iter().any(|a| a == "--json"); + + let Some(tool_name) = tool_name else { + return Ok(ExecResult::err( + "usage: help [--list] [] [--json]".to_string(), + 1, + )); + }; + + let Some(tool) = self.tools.iter().find(|t| t.name == *tool_name) else { + return Ok(ExecResult::err( + format!("help: unknown tool: {tool_name}"), + 1, + )); + }; + + if json_mode { + // Machine-readable JSON output + let obj = serde_json::json!({ + "name": tool.name, + "description": tool.description, + "input_schema": tool.input_schema, + }); + let json_str = serde_json::to_string_pretty(&obj).unwrap_or_default(); + return Ok(ExecResult::ok(format!("{json_str}\n"))); + } + + // Human-readable output + let mut out = format!("{} - {}\n", tool.name, tool.description); + if let Some(usage) = usage_from_schema(&tool.input_schema) { + out.push_str(&format!("Usage: {} {}\n", tool.name, usage)); + } + Ok(ExecResult::ok(out)) + } +} + +// ============================================================================ +// DiscoverBuiltin — progressive tool discovery +// ============================================================================ + +/// Built-in `discover` command for exploring large tool sets. +struct DiscoverBuiltin { + tools: Vec, +} + +impl DiscoverBuiltin { + fn filter_tools(&self, args: &[String]) -> (Vec<&ToolDefSnapshot>, bool) { + let json_mode = args.iter().any(|a| a == "--json"); + + if args.iter().any(|a| a == "--categories") { + return (Vec::new(), json_mode); + } + + if let Some(pos) = args.iter().position(|a| a == "--category") { + let cat = args.get(pos + 1).map(|s| s.as_str()).unwrap_or(""); + let filtered: Vec<&ToolDefSnapshot> = self + .tools + .iter() + .filter(|t| t.category.as_deref() == Some(cat)) + .collect(); + return (filtered, json_mode); + } + + if let Some(pos) = args.iter().position(|a| a == "--tag") { + let tag = args.get(pos + 1).map(|s| s.as_str()).unwrap_or(""); + let filtered: Vec<&ToolDefSnapshot> = self + .tools + .iter() + .filter(|t| t.tags.iter().any(|tg| tg == tag)) + .collect(); + return (filtered, json_mode); + } + + if let Some(pos) = args.iter().position(|a| a == "--search") { + let keyword = args + .get(pos + 1) + .map(|s| s.to_lowercase()) + .unwrap_or_default(); + let filtered: Vec<&ToolDefSnapshot> = self + .tools + .iter() + .filter(|t| { + t.name.to_lowercase().contains(&keyword) + || t.description.to_lowercase().contains(&keyword) + }) + .collect(); + return (filtered, json_mode); + } + + (self.tools.iter().collect(), json_mode) + } +} + +#[async_trait] +impl Builtin for DiscoverBuiltin { + async fn execute(&self, ctx: Context<'_>) -> Result { + let args = ctx.args; + + if args.is_empty() { + return Ok(ExecResult::err( + "usage: discover --categories | --category | --tag | --search [--json]".to_string(), + 1, + )); + } + + let json_mode = args.iter().any(|a| a == "--json"); + + // --categories + if args.iter().any(|a| a == "--categories") { + let mut cats: std::collections::BTreeMap = + std::collections::BTreeMap::new(); + for t in &self.tools { + if let Some(ref cat) = t.category { + *cats.entry(cat.clone()).or_insert(0) += 1; + } + } + if json_mode { + let arr: Vec = cats + .iter() + .map(|(name, count)| serde_json::json!({"category": name, "count": count})) + .collect(); + let json_str = + serde_json::to_string_pretty(&arr).unwrap_or_else(|_| "[]".to_string()); + return Ok(ExecResult::ok(format!("{json_str}\n"))); + } + let mut out = String::new(); + for (name, count) in &cats { + let plural = if *count == 1 { "tool" } else { "tools" }; + out.push_str(&format!("{name} ({count} {plural})\n")); + } + return Ok(ExecResult::ok(out)); + } + + let (filtered, _) = self.filter_tools(args); + + if json_mode { + let arr: Vec = filtered + .iter() + .map(|t| { + let mut obj = serde_json::json!({ + "name": t.name, + "description": t.description, + }); + if !t.tags.is_empty() { + obj["tags"] = serde_json::json!(t.tags); + } + if let Some(ref cat) = t.category { + obj["category"] = serde_json::json!(cat); + } + obj + }) + .collect(); + let json_str = serde_json::to_string_pretty(&arr).unwrap_or_else(|_| "[]".to_string()); + return Ok(ExecResult::ok(format!("{json_str}\n"))); + } + + let mut out = String::new(); + for t in &filtered { + out.push_str(&format!("{:<20} {}\n", t.name, t.description)); + } + Ok(ExecResult::ok(out)) + } +} + // ============================================================================ // ScriptedTool — internal helpers // ============================================================================ @@ -169,6 +373,29 @@ impl ScriptedTool { builder = builder.builtin(name, builtin); } + // Register the help and discover builtins + let snapshots: Vec = self + .tools + .iter() + .map(|t| ToolDefSnapshot { + name: t.def.name.clone(), + description: t.def.description.clone(), + input_schema: t.def.input_schema.clone(), + tags: t.def.tags.clone(), + category: t.def.category.clone(), + }) + .collect(); + builder = builder.builtin( + "help".to_string(), + Box::new(HelpBuiltin { + tools: snapshots.clone(), + }), + ); + builder = builder.builtin( + "discover".to_string(), + Box::new(DiscoverBuiltin { tools: snapshots }), + ); + builder.build() } @@ -242,10 +469,22 @@ impl ScriptedTool { prompt.push_str("Output: {stdout, stderr, exit_code}\n\n"); prompt.push_str("## Available tool commands\n\n"); - for t in &self.tools { - prompt.push_str(&format!("- `{}`: {}\n", t.def.name, t.def.description)); - if let Some(usage) = usage_from_schema(&t.def.input_schema) { - prompt.push_str(&format!(" Usage: `{} {}`\n", t.def.name, usage)); + + if self.compact_prompt { + // Compact mode: names + one-liners, defer details to `help` + for t in &self.tools { + prompt.push_str(&format!( + "- `{}`: {} (use `help {}` for params)\n", + t.def.name, t.def.description, t.def.name + )); + } + } else { + // Full mode: include usage hints from schema + for t in &self.tools { + prompt.push_str(&format!("- `{}`: {}\n", t.def.name, t.def.description)); + if let Some(usage) = usage_from_schema(&t.def.input_schema) { + prompt.push_str(&format!(" Usage: `{} {}`\n", t.def.name, usage)); + } } } @@ -258,6 +497,13 @@ impl ScriptedTool { - Standard builtins (echo, grep, sed, awk, etc.) are available\n", ); + if self.compact_prompt { + prompt.push_str( + "- Use `help ` for full usage, `help --json` for schema\n\ + - Use `help --list` to see all available tools\n", + ); + } + prompt } } @@ -364,6 +610,7 @@ impl Tool for ScriptedTool { #[cfg(test)] mod tests { use super::*; + use crate::ToolDef; #[test] fn test_parse_flags_key_value() { @@ -455,6 +702,155 @@ mod tests { ); } + // -- HelpBuiltin tests -- + + fn build_help_test_tool() -> ScriptedTool { + ScriptedTool::builder("test_api") + .short_description("Test API") + .tool( + ToolDef::new("get_user", "Fetch user by ID").with_schema(serde_json::json!({ + "type": "object", + "properties": { + "id": {"type": "integer"} + } + })), + |_args: &super::ToolArgs| Ok("{\"id\":1}\n".to_string()), + ) + .tool( + ToolDef::new("list_orders", "List orders for user").with_schema( + serde_json::json!({ + "type": "object", + "properties": { + "user_id": {"type": "integer"}, + "limit": {"type": "integer"} + } + }), + ), + |_args: &super::ToolArgs| Ok("[]\n".to_string()), + ) + .build() + } + + #[tokio::test] + async fn test_help_list() { + let mut tool = build_help_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "help --list".to_string(), + timeout_ms: None, + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("get_user")); + assert!(resp.stdout.contains("Fetch user by ID")); + assert!(resp.stdout.contains("list_orders")); + } + + #[tokio::test] + async fn test_help_tool_human_readable() { + let mut tool = build_help_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "help get_user".to_string(), + timeout_ms: None, + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("get_user - Fetch user by ID")); + assert!(resp.stdout.contains("--id ")); + } + + #[tokio::test] + async fn test_help_tool_json() { + let mut tool = build_help_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "help get_user --json".to_string(), + timeout_ms: None, + }) + .await; + assert_eq!(resp.exit_code, 0); + let parsed: serde_json::Value = + serde_json::from_str(resp.stdout.trim()).expect("should be valid JSON"); + assert_eq!(parsed["name"], "get_user"); + assert_eq!(parsed["description"], "Fetch user by ID"); + assert!(parsed["input_schema"]["properties"]["id"].is_object()); + } + + #[tokio::test] + async fn test_help_unknown_tool() { + let mut tool = build_help_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "help nonexistent".to_string(), + timeout_ms: None, + }) + .await; + assert_ne!(resp.exit_code, 0); + assert!(resp.stderr.contains("unknown tool")); + } + + #[tokio::test] + async fn test_help_no_args_lists_all() { + let mut tool = build_help_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "help".to_string(), + timeout_ms: None, + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("get_user")); + assert!(resp.stdout.contains("list_orders")); + } + + #[tokio::test] + async fn test_help_json_pipe_jq() { + let mut tool = build_help_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "help get_user --json | jq -r '.name'".to_string(), + timeout_ms: None, + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "get_user"); + } + + #[tokio::test] + async fn test_compact_prompt_omits_usage() { + let tool = ScriptedTool::builder("compact_test") + .compact_prompt(true) + .tool( + ToolDef::new("get_user", "Fetch user").with_schema(serde_json::json!({ + "type": "object", + "properties": { "id": {"type": "integer"} } + })), + |_args: &super::ToolArgs| Ok("ok\n".to_string()), + ) + .build(); + let sp = tool.system_prompt(); + assert!(sp.contains("use `help get_user` for params")); + assert!(!sp.contains("Usage: `get_user --id `")); + assert!(sp.contains("help --json")); + } + + #[tokio::test] + async fn test_non_compact_prompt_has_usage() { + let tool = ScriptedTool::builder("full_test") + .tool( + ToolDef::new("get_user", "Fetch user").with_schema(serde_json::json!({ + "type": "object", + "properties": { "id": {"type": "integer"} } + })), + |_args: &super::ToolArgs| Ok("ok\n".to_string()), + ) + .build(); + let sp = tool.system_prompt(); + assert!(sp.contains("Usage: `get_user --id `")); + assert!(!sp.contains("use `help get_user` for params")); + } + #[tokio::test] async fn test_error_uses_display_not_debug() { use super::ScriptedTool; @@ -481,4 +877,189 @@ mod tests { ); } } + + // -- DiscoverBuiltin tests -- + + fn build_discover_test_tool() -> ScriptedTool { + ScriptedTool::builder("big_api") + .short_description("Big API") + .tool( + ToolDef::new("create_charge", "Create a payment charge") + .with_category("payments") + .with_tags(&["billing", "write"]), + |_args: &super::ToolArgs| Ok("ok\n".to_string()), + ) + .tool( + ToolDef::new("refund", "Issue a refund") + .with_category("payments") + .with_tags(&["billing", "write"]), + |_args: &super::ToolArgs| Ok("ok\n".to_string()), + ) + .tool( + ToolDef::new("get_user", "Fetch user by ID") + .with_category("users") + .with_tags(&["read"]), + |_args: &super::ToolArgs| Ok("ok\n".to_string()), + ) + .tool( + ToolDef::new("delete_user", "Delete a user account") + .with_category("users") + .with_tags(&["admin", "write"]), + |_args: &super::ToolArgs| Ok("ok\n".to_string()), + ) + .tool( + ToolDef::new("get_inventory", "Check inventory levels").with_category("inventory"), + |_args: &super::ToolArgs| Ok("ok\n".to_string()), + ) + .build() + } + + #[tokio::test] + async fn test_discover_categories() { + let mut tool = build_discover_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "discover --categories".to_string(), + timeout_ms: None, + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("payments (2 tools)")); + assert!(resp.stdout.contains("users (2 tools)")); + assert!(resp.stdout.contains("inventory (1 tool)")); + } + + #[tokio::test] + async fn test_discover_category_filter() { + let mut tool = build_discover_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "discover --category payments".to_string(), + timeout_ms: None, + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("create_charge")); + assert!(resp.stdout.contains("refund")); + assert!(!resp.stdout.contains("get_user")); + } + + #[tokio::test] + async fn test_discover_tag_filter() { + let mut tool = build_discover_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "discover --tag admin".to_string(), + timeout_ms: None, + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("delete_user")); + assert!(!resp.stdout.contains("create_charge")); + } + + #[tokio::test] + async fn test_discover_search() { + let mut tool = build_discover_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "discover --search user".to_string(), + timeout_ms: None, + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("get_user")); + assert!(resp.stdout.contains("delete_user")); + assert!(!resp.stdout.contains("create_charge")); + } + + #[tokio::test] + async fn test_discover_search_case_insensitive() { + let mut tool = build_discover_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "discover --search REFUND".to_string(), + timeout_ms: None, + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("refund")); + } + + #[tokio::test] + async fn test_discover_categories_json() { + let mut tool = build_discover_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "discover --categories --json".to_string(), + timeout_ms: None, + }) + .await; + assert_eq!(resp.exit_code, 0); + let arr: Vec = + serde_json::from_str(resp.stdout.trim()).expect("valid JSON"); + assert!( + arr.iter() + .any(|v| v["category"] == "payments" && v["count"] == 2) + ); + } + + #[tokio::test] + async fn test_discover_category_json() { + let mut tool = build_discover_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "discover --category payments --json".to_string(), + timeout_ms: None, + }) + .await; + assert_eq!(resp.exit_code, 0); + let arr: Vec = + serde_json::from_str(resp.stdout.trim()).expect("valid JSON"); + assert_eq!(arr.len(), 2); + assert!(arr.iter().any(|v| v["name"] == "create_charge")); + } + + #[tokio::test] + async fn test_discover_no_args_shows_usage() { + let mut tool = build_discover_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "discover".to_string(), + timeout_ms: None, + }) + .await; + assert_ne!(resp.exit_code, 0); + assert!(resp.stderr.contains("usage:")); + } + + #[tokio::test] + async fn test_discover_tag_json() { + let mut tool = build_discover_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "discover --tag billing --json".to_string(), + timeout_ms: None, + }) + .await; + assert_eq!(resp.exit_code, 0); + let arr: Vec = + serde_json::from_str(resp.stdout.trim()).expect("valid JSON"); + assert_eq!(arr.len(), 2); + assert!(arr.iter().all(|v| { + v["tags"] + .as_array() + .expect("tags array") + .contains(&serde_json::json!("billing")) + })); + } + + #[tokio::test] + async fn test_tooldef_with_tags_and_category() { + let def = ToolDef::new("test", "A test tool") + .with_tags(&["admin", "billing"]) + .with_category("payments"); + assert_eq!(def.tags, vec!["admin", "billing"]); + assert_eq!(def.category.as_deref(), Some("payments")); + } } diff --git a/crates/bashkit/src/scripted_tool/mod.rs b/crates/bashkit/src/scripted_tool/mod.rs index e99f2a81..35ec8931 100644 --- a/crates/bashkit/src/scripted_tool/mod.rs +++ b/crates/bashkit/src/scripted_tool/mod.rs @@ -137,6 +137,10 @@ pub struct ToolDef { pub description: String, /// JSON Schema describing accepted arguments. Empty object if unspecified. pub input_schema: serde_json::Value, + /// Categorical tags for discovery (e.g. `["admin", "billing"]`). + pub tags: Vec, + /// Grouping category for discovery (e.g. `"payments"`). + pub category: Option, } impl ToolDef { @@ -146,6 +150,8 @@ impl ToolDef { name: name.into(), description: description.into(), input_schema: serde_json::Value::Object(Default::default()), + tags: Vec::new(), + category: None, } } @@ -154,6 +160,18 @@ impl ToolDef { self.input_schema = schema; self } + + /// Add categorical tags for discovery filtering. + pub fn with_tags(mut self, tags: &[&str]) -> Self { + self.tags = tags.iter().map(|s| s.to_string()).collect(); + self + } + + /// Set the grouping category for discovery. + pub fn with_category(mut self, category: &str) -> Self { + self.category = Some(category.to_string()); + self + } } // ============================================================================ @@ -243,6 +261,7 @@ pub struct ScriptedToolBuilder { tools: Vec, limits: Option, env_vars: Vec<(String, String)>, + compact_prompt: bool, } impl ScriptedToolBuilder { @@ -253,6 +272,7 @@ impl ScriptedToolBuilder { tools: Vec::new(), limits: None, env_vars: Vec::new(), + compact_prompt: false, } } @@ -290,6 +310,16 @@ impl ScriptedToolBuilder { self } + /// Emit compact `system_prompt()` that omits full schemas and adds help tip. + /// + /// When enabled, `system_prompt()` lists only tool names + one-liners and + /// instructs the LLM to use `help ` / `help --json` for details. + /// Default: `false` (full schemas in prompt, backward compatible). + pub fn compact_prompt(mut self, compact: bool) -> Self { + self.compact_prompt = compact; + self + } + /// Build the [`ScriptedTool`]. pub fn build(self) -> ScriptedTool { let short_desc = self @@ -302,6 +332,7 @@ impl ScriptedToolBuilder { tools: self.tools, limits: self.limits, env_vars: self.env_vars, + compact_prompt: self.compact_prompt, } } } @@ -329,6 +360,7 @@ pub struct ScriptedTool { pub(crate) tools: Vec, pub(crate) limits: Option, pub(crate) env_vars: Vec<(String, String)>, + pub(crate) compact_prompt: bool, } impl ScriptedTool { diff --git a/specs/014-scripted-tool-orchestration.md b/specs/014-scripted-tool-orchestration.md index 3e251ec3..93ddcb07 100644 --- a/specs/014-scripted-tool-orchestration.md +++ b/specs/014-scripted-tool-orchestration.md @@ -25,16 +25,23 @@ pub struct ToolDef { pub name: String, pub description: String, pub input_schema: serde_json::Value, // JSON Schema, empty object if unset + pub tags: Vec, // categorical tags for discovery + pub category: Option, // grouping category for discovery } impl ToolDef { pub fn new(name: impl Into, description: impl Into) -> Self; pub fn with_schema(self, schema: serde_json::Value) -> Self; + pub fn with_tags(self, tags: &[&str]) -> Self; + pub fn with_category(self, category: &str) -> Self; } ``` Standard OpenAPI fields: `name`, `description`, `input_schema`. Schema is optional — defaults to `{}`. +Tags and category are optional metadata for progressive discovery. Tags are free-form labels +(e.g. `["admin", "billing"]`), category is a grouping key (e.g. `"payments"`). + ### ToolArgs — parsed arguments passed to callbacks ```rust @@ -115,6 +122,48 @@ Implements the `Tool` trait. On each `execute()`: Reusable — multiple `execute()` calls share the same `Arc` instances. +### Built-in `help` command + +Registered automatically alongside user tools. Provides runtime schema introspection: + +```bash +help --list # List all tool names + descriptions +help get_user # Human-readable usage +help get_user --json # Machine-readable JSON (pipeable to jq) +``` + +JSON output includes `name`, `description`, and `input_schema` — letting LLMs discover +enum values, required fields, etc. at runtime without loading all schemas into context. + +### Compact prompt mode + +`ScriptedToolBuilder::compact_prompt(true)` switches `system_prompt()` to a compact form +that lists only tool names + one-liners, deferring full schemas to `help`: + +```rust +ScriptedTool::builder("api") + .compact_prompt(true) + .tool(...) + .build() +``` + +This reduces context window usage for large tool sets (50+). Default: `false` (full +schemas in prompt, backward compatible). + +### Built-in `discover` command + +Registered automatically alongside `help`. Provides progressive tool discovery for large tool sets: + +```bash +discover --categories # List all categories with tool counts +discover --category payments # List tools in a category +discover --tag admin # Filter by tag +discover --search user # Search name + description (case-insensitive) +discover --category payments --json # Any mode supports --json output +``` + +Tools must have `tags` and/or `category` set via `ToolDef::with_tags()` / `ToolDef::with_category()` to appear in filtered results. + ### LLM integration `system_prompt()` generates markdown with available tool commands, input schemas (when present), and tips. Example output: @@ -184,9 +233,11 @@ Run: `cargo run --example scripted_tool --features scripted_tool` ## Test coverage -35 unit tests covering: -- Builder configuration (name, description, defaults) +50 unit tests covering: +- Builder configuration (name, description, defaults, compact_prompt) - Introspection (help, system_prompt, schemas, schema rendering) +- Help builtin (--list, human-readable, --json, unknown tool, jq piping, compact vs full prompt) +- Discover builtin (--categories, --category, --tag, --search, --json, no-args usage, case-insensitive search, tag JSON, ToolDef with_tags/with_category) - Flag parsing (`--key value`, `--key=value`, boolean flags, type coercion) - Single tool execution - Pipeline with jq