From 70316dc64a3873585db8e5b7d38153ab5e4c6810 Mon Sep 17 00:00:00 2001 From: Vijayan Sankar Date: Mon, 9 Feb 2026 12:10:13 +0530 Subject: [PATCH] feat: Add Ollama and local model support for skill generation and evaluation This commit adds comprehensive support for using Ollama and other local LLM providers (like LM Studio) with upskill's generate and eval commands. ## Changes ### Core Features - Add --base-url and --provider flags to both generate and eval commands - Monkey patch FastAgent's ModelFactory to handle unknown model names when GENERIC_BASE_URL is set - Auto-detect 'generic' provider when --base-url is provided - Set dummy API keys to bypass authentication checks when using local models ### Generation Improvements - Update prompt to be more explicit for smaller models - Add code fence stripping for models that wrap output in markdown blocks - Pass model parameter through RequestParams to all FastAgent calls - Support model override for all generation functions (generate_skill, generate_tests, improve_skill, refine_skill) ### Evaluation Improvements - Add environment variable configuration for eval command - Format model strings correctly for generic provider - Support loading skills from ./skills/ directory ### Bug Fixes - Fix classmethod monkey patch to properly access __func__ - Fix model formatting logic for eval_model parameter - Add {{agentSkills}} placeholder to skill_gen agent card to enable skill loading ## Usage Examples Generate skill with Ollama: upskill generate "parse YAML" --model llama3.2:latest \ --base-url http://localhost:11434/v1 --no-eval Evaluate skill with local model: upskill eval ./skills/my-skill --model qwen2.5-coder:7b \ --base-url http://localhost:11434/v1 --tests tests.json ## Technical Details The implementation uses FastAgent's generic provider support with environment variables: - GENERIC_BASE_URL: Points to local API endpoint (e.g., http://localhost:11434/v1) - GENERIC_API_KEY: Set to "local" (required but unused by Ollama) - ANTHROPIC_API_KEY: Set to "dummy" to bypass startup checks The monkey patch catches ModelConfigError for unknown models and falls back to generic provider when GENERIC_BASE_URL is configured. ## Limitations Test case generation with --eval-model in generate command may not work due to FastAgent's structured() method not properly respecting model overrides. Workaround: use --no-eval and provide test cases manually with --tests flag. --- src/upskill/agent_cards/skill_gen.md | 2 + src/upskill/cli.py | 148 +++++++++++++++++++++++++-- src/upskill/generate.py | 36 ++++++- 3 files changed, 172 insertions(+), 14 deletions(-) diff --git a/src/upskill/agent_cards/skill_gen.md b/src/upskill/agent_cards/skill_gen.md index b0ab3b0..c299237 100644 --- a/src/upskill/agent_cards/skill_gen.md +++ b/src/upskill/agent_cards/skill_gen.md @@ -6,6 +6,8 @@ You generate "skills" - instruction documents that teach AI coding agents how to When given a task description, create a skill document with clear instructions, examples, and best practices that will help an AI agent complete that type of task reliably. +{{agentSkills}} + ## Example 1: Git Commit Skill Task: "Write good git commit messages" diff --git a/src/upskill/cli.py b/src/upskill/cli.py index 6e8c680..aaed7d0 100644 --- a/src/upskill/cli.py +++ b/src/upskill/cli.py @@ -18,6 +18,36 @@ from rich.table import Table from rich.tree import Tree +# Monkey patch FastAgent to support local models (Ollama) +# This allows using models like 'llama3.2' without them being in FastAgent's known list +try: + from fast_agent.llm.model_factory import ModelFactory, ModelConfig, Provider, ModelConfigError + import os + + # Get the original __func__ to access the unbound classmethod + _original_parse_model_string_func = ModelFactory.parse_model_string.__func__ + + @classmethod + def _patched_parse_model_string(cls, model_string: str, aliases: dict[str, str] | None = None) -> ModelConfig: + try: + return _original_parse_model_string_func(cls, model_string, aliases) + except ModelConfigError: + # Fallback for generic provider if configured via environment + # This enables support for local models via Ollama/LM Studio + if os.environ.get("GENERIC_BASE_URL"): + return ModelConfig( + provider=Provider.GENERIC, + model_name=model_string, + ) + raise + + ModelFactory.parse_model_string = _patched_parse_model_string +except Exception as e: + # Don't crash if patching fails, just print warning + import sys + print(f"Warning: Failed to patch FastAgent for local model support: {e}", file=sys.stderr) + + from upskill.config import Config from upskill.evaluate import evaluate_skill, get_failure_descriptions from upskill.generate import generate_skill, generate_tests, improve_skill, refine_skill @@ -48,9 +78,14 @@ @asynccontextmanager async def _fast_agent_context() -> AsyncIterator[object]: + # Try to load skills from ./skills directory + skills_dir = Path("./skills") + skills_directory = skills_dir if skills_dir.exists() else None + fast = FastAgent( "upskill", ignore_unknown_args=True, + skills_directory=skills_directory, ) @fast.agent() @@ -184,6 +219,14 @@ def main(): @click.option("-o", "--output", type=click.Path(), help="Output directory for skill") @click.option("--no-eval", is_flag=True, help="Skip eval and refinement") @click.option("--eval-model", help="Model to evaluate skill on (different from generation model)") +@click.option( + "--provider", + type=click.Choice(["anthropic", "openai", "generic"]), + help="API provider (auto-detected as 'generic' when --base-url is provided)", +) +@click.option( + "--base-url", help="Custom API endpoint for local models (e.g., http://localhost:11434/v1)", +) @click.option("--runs-dir", type=click.Path(), help="Directory for run logs (default: ./runs)") @click.option("--log-runs/--no-log-runs", default=True, help="Log run data (default: enabled)") def generate( @@ -195,6 +238,8 @@ def generate( output: str | None, no_eval: bool, eval_model: str | None, + provider: str | None, + base_url: str | None, runs_dir: str | None, log_runs: bool, ): @@ -220,10 +265,18 @@ def generate( upskill generate "extract patterns" --from trace.json - # Evaluate on a local model (Ollama): + # Generate with Ollama (local model): + + upskill generate "parse YAML" --model llama3.2:latest \\ + --base-url http://localhost:11434/v1 + + upskill generate "document code" --model qwen2.5-coder:latest \\ + --provider generic --base-url http://localhost:11434/v1 + + # Evaluate on a different model: upskill generate "parse YAML" --eval-model llama3.2:latest \\ - --eval-base-url http://localhost:11434/v1 + --base-url http://localhost:11434/v1 upskill generate "document code" --no-log-runs """ @@ -247,12 +300,15 @@ def generate( output, no_eval, eval_model, + provider, + base_url, runs_dir, log_runs, ) ) + async def _generate_async( task: str, examples: list[str] | None, @@ -262,13 +318,45 @@ async def _generate_async( output: str | None, no_eval: bool, eval_model: str | None, + provider: str | None, + base_url: str | None, runs_dir: str | None, log_runs: bool, ): """Async implementation of generate command.""" + import os + config = Config.load() gen_model = model or config.model + # Configure generic provider for Ollama/local models + if base_url: + os.environ["GENERIC_BASE_URL"] = base_url + # Set a dummy API key if not already set (required by some providers) + if "GENERIC_API_KEY" not in os.environ: + os.environ["GENERIC_API_KEY"] = "local" + + # Also set dummy Anthropic key if missing to bypass startup checks + # This allows using local models without needing an Anthropic key + if "ANTHROPIC_API_KEY" not in os.environ: + os.environ["ANTHROPIC_API_KEY"] = "dummy" + + # Auto-detect provider as generic if base_url is provided + if not provider: + provider = "generic" + + # Format model string for FastAgent + # For non-generic providers, prepend provider prefix if not already present + # For generic provider (local models), DON'T prepend - let the monkey patch handle it + if provider and gen_model and provider != "generic": + known_providers = ["anthropic", "openai", "generic", "google", "bedrock", "vertex"] + has_provider_prefix = any(gen_model.startswith(f"{p}.") for p in known_providers) + + # Only prepend provider prefix for non-generic providers when it's missing + if not has_provider_prefix: + gen_model = f"{provider}.{gen_model}" + + # Setup run logging if enabled batch_id = None batch_folder = None @@ -306,7 +394,7 @@ async def _generate_async( task=task, examples=examples, generator=agent.skill_gen, - model=model, + model=gen_model, ) # Improve existing skill elif from_skill: @@ -319,7 +407,7 @@ async def _generate_async( existing_skill, instructions=task, generator=agent.skill_gen, - model=model, + model=gen_model, ) else: console.print(f"Generating skill with {gen_model}...", style="dim") @@ -327,14 +415,14 @@ async def _generate_async( task=task, examples=examples, generator=agent.skill_gen, - model=model, + model=gen_model, ) if no_eval: _save_and_display(skill, output, config) return console.print("Generating test cases...", style="dim") - test_cases = await generate_tests(task, generator=agent.test_gen, model=model) + test_cases = await generate_tests(task, generator=agent.test_gen, model=gen_model) # Eval loop with refinement (on generation model) prev_success_rate = 0.0 @@ -435,13 +523,22 @@ async def _generate_async( skill, failures, generator=agent.skill_gen, - model=model, + model=gen_model, ) # If eval_model specified, also eval on that model eval_results = None if eval_model: - console.print(f"Evaluating on {eval_model}...", style="dim") + # Format eval_model with provider prefix if needed (skip for generic provider) + formatted_eval_model = eval_model + if provider and provider != "generic": + known_providers = ["anthropic", "openai", "generic", "google", "bedrock", "vertex"] + has_provider_prefix = any(eval_model.startswith(f"{p}.") for p in known_providers) + + if not has_provider_prefix: + formatted_eval_model = f"{provider}.{eval_model}" + + console.print(f"Evaluating on {formatted_eval_model}...", style="dim") # Create run folder for eval model run_folder = None @@ -462,7 +559,7 @@ async def _generate_async( skill, test_cases, evaluator=agent.evaluator, - model=eval_model, + model=formatted_eval_model, ) # Log eval run results (both baseline and with-skill) @@ -704,10 +801,27 @@ async def _eval_async( runs_dir: str | None, ): """Async implementation of eval command.""" + import os from upskill.evaluate import run_test config = Config.load() skill_dir = Path(skill_path) + + # Configure generic provider for Ollama/local models + if base_url: + os.environ["GENERIC_BASE_URL"] = base_url + # Set a dummy API key if not already set (required by some providers) + if "GENERIC_API_KEY" not in os.environ: + os.environ["GENERIC_API_KEY"] = "local" + + # Also set dummy Anthropic key if missing to bypass startup checks + # This allows using local models without needing an Anthropic key + if "ANTHROPIC_API_KEY" not in os.environ: + os.environ["ANTHROPIC_API_KEY"] = "dummy" + + # Auto-detect provider as generic if base_url is provided + if not provider: + provider = "generic" try: skill = Skill.load(skill_dir) @@ -718,6 +832,22 @@ async def _eval_async( # Use default model if none specified if not models: models = [config.effective_eval_model] + + # Format model strings for FastAgent if provider is specified + # This must happen AFTER models is set to default if None + # For generic provider, DON'T prepend prefix - the monkey patch handles it + if models and provider and provider != "generic": + formatted_models = [] + for model in models: + known_providers = ["anthropic", "openai", "generic", "google", "bedrock", "vertex"] + has_provider_prefix = any(model.startswith(f"{p}.") for p in known_providers) + + # Only prepend provider prefix if not already present + if not has_provider_prefix: + formatted_models.append(f"{provider}.{model}") + else: + formatted_models.append(model) + models = formatted_models is_benchmark_mode = len(models) > 1 or num_runs > 1 diff --git a/src/upskill/generate.py b/src/upskill/generate.py index 4b6e054..dcc945f 100644 --- a/src/upskill/generate.py +++ b/src/upskill/generate.py @@ -2,9 +2,12 @@ from __future__ import annotations +import os from datetime import UTC, datetime +from fast_agent.agents.llm_agent import LlmAgent from fast_agent.interfaces import AgentProtocol +from fast_agent.llm.request_params import RequestParams from fast_agent.skills.registry import SkillManifest from upskill.manifest_utils import parse_skill_manifest_text @@ -109,14 +112,31 @@ async def generate_skill( ) -> Skill: """Generate a skill from a task description using FastAgent.""" - prompt = f"Create a skill document that teaches an AI agent how to: {task}" + prompt = f"Task: {task}\n\nOutput ONLY the complete skill document with YAML frontmatter and markdown body. Do NOT explain or describe what the document should contain - OUTPUT THE ACTUAL DOCUMENT DIRECTLY." if examples: prompt += "\n\nExample input/output pairs for this task:\n" + "\n".join( f"- {ex}" for ex in examples ) - skill_text = await generator.send(prompt) + # Pass model to FastAgent if specified + request_params = RequestParams(model=model) if model else None + skill_text = await generator.send(prompt, request_params=request_params) + + # Strip markdown code fences if present (common with smaller models) + skill_text = skill_text.strip() + + # Check if wrapped in code fences (```...```) + if skill_text.startswith("```"): + lines = skill_text.split("\n") + # Remove opening fence (could be ```markdown, ```yaml, or just ```) + if lines and lines[0].startswith("```"): + lines = lines[1:] + # Remove closing fence (and any trailing empty lines) + while lines and (lines[-1].strip() == "```" or lines[-1].strip() == ""): + lines = lines[:-1] + skill_text = "\n".join(lines).strip() + manifest, error = parse_skill_manifest_text(skill_text) if manifest is None: raise ValueError(f"Skill generator did not return valid SKILL.md: {error}") @@ -137,7 +157,9 @@ async def generate_tests( prompt = TEST_GENERATION_PROMPT.replace(TASK_PLACEHOLDER, task) - result, _ = await generator.structured(prompt, TestCaseSuite) + # Pass model to FastAgent if specified + request_params = RequestParams(model=model) if model else None + result, _ = await generator.structured(prompt, TestCaseSuite, request_params=request_params) if result is None: raise ValueError("Test generator did not return structured test cases.") @@ -182,7 +204,9 @@ async def refine_skill( Do not wrap the output in code fences. """ - skill_text = await generator.send(prompt) + # Pass model to FastAgent if specified + request_params = RequestParams(model=model) if model else None + skill_text = await generator.send(prompt, request_params=request_params) manifest, error = parse_skill_manifest_text(skill_text) if manifest is None: raise ValueError(f"Skill refinement did not return valid SKILL.md: {error}") @@ -252,7 +276,9 @@ async def improve_skill( ) - skill_text = await generator.send(prompt) + # Pass model to FastAgent if specified + request_params = RequestParams(model=model) if model else None + skill_text = await generator.send(prompt, request_params=request_params) manifest, error = parse_skill_manifest_text(skill_text) if manifest is None: raise ValueError(f"Skill improvement did not return valid SKILL.md: {error}")