Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/upskill/agent_cards/skill_gen.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ You generate "skills" - instruction documents that teach AI coding agents how to

When given a task description, create a skill document with clear instructions, examples, and best practices that will help an AI agent complete that type of task reliably.

{{agentSkills}}

## Example 1: Git Commit Skill

Task: "Write good git commit messages"
Expand Down
148 changes: 139 additions & 9 deletions src/upskill/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,36 @@
from rich.table import Table
from rich.tree import Tree

# Monkey patch FastAgent to support local models (Ollama)
# This allows using models like 'llama3.2' without them being in FastAgent's known list
try:
from fast_agent.llm.model_factory import ModelFactory, ModelConfig, Provider, ModelConfigError
import os

# Get the original __func__ to access the unbound classmethod
_original_parse_model_string_func = ModelFactory.parse_model_string.__func__

@classmethod
def _patched_parse_model_string(cls, model_string: str, aliases: dict[str, str] | None = None) -> ModelConfig:
try:
return _original_parse_model_string_func(cls, model_string, aliases)
except ModelConfigError:
# Fallback for generic provider if configured via environment
# This enables support for local models via Ollama/LM Studio
if os.environ.get("GENERIC_BASE_URL"):
return ModelConfig(
provider=Provider.GENERIC,
model_name=model_string,
)
raise

ModelFactory.parse_model_string = _patched_parse_model_string
except Exception as e:
# Don't crash if patching fails, just print warning
import sys
print(f"Warning: Failed to patch FastAgent for local model support: {e}", file=sys.stderr)


from upskill.config import Config
from upskill.evaluate import evaluate_skill, get_failure_descriptions
from upskill.generate import generate_skill, generate_tests, improve_skill, refine_skill
Expand Down Expand Up @@ -48,9 +78,14 @@

@asynccontextmanager
async def _fast_agent_context() -> AsyncIterator[object]:
# Try to load skills from ./skills directory
skills_dir = Path("./skills")
skills_directory = skills_dir if skills_dir.exists() else None

fast = FastAgent(
"upskill",
ignore_unknown_args=True,
skills_directory=skills_directory,
)

@fast.agent()
Expand Down Expand Up @@ -184,6 +219,14 @@ def main():
@click.option("-o", "--output", type=click.Path(), help="Output directory for skill")
@click.option("--no-eval", is_flag=True, help="Skip eval and refinement")
@click.option("--eval-model", help="Model to evaluate skill on (different from generation model)")
@click.option(
"--provider",
type=click.Choice(["anthropic", "openai", "generic"]),
help="API provider (auto-detected as 'generic' when --base-url is provided)",
)
@click.option(
"--base-url", help="Custom API endpoint for local models (e.g., http://localhost:11434/v1)",
)
@click.option("--runs-dir", type=click.Path(), help="Directory for run logs (default: ./runs)")
@click.option("--log-runs/--no-log-runs", default=True, help="Log run data (default: enabled)")
def generate(
Expand All @@ -195,6 +238,8 @@ def generate(
output: str | None,
no_eval: bool,
eval_model: str | None,
provider: str | None,
base_url: str | None,
runs_dir: str | None,
log_runs: bool,
):
Expand All @@ -220,10 +265,18 @@ def generate(

upskill generate "extract patterns" --from trace.json

# Evaluate on a local model (Ollama):
# Generate with Ollama (local model):

upskill generate "parse YAML" --model llama3.2:latest \\
--base-url http://localhost:11434/v1

upskill generate "document code" --model qwen2.5-coder:latest \\
--provider generic --base-url http://localhost:11434/v1

# Evaluate on a different model:

upskill generate "parse YAML" --eval-model llama3.2:latest \\
--eval-base-url http://localhost:11434/v1
--base-url http://localhost:11434/v1

upskill generate "document code" --no-log-runs
"""
Expand All @@ -247,12 +300,15 @@ def generate(
output,
no_eval,
eval_model,
provider,
base_url,
runs_dir,
log_runs,
)
)



async def _generate_async(
task: str,
examples: list[str] | None,
Expand All @@ -262,13 +318,45 @@ async def _generate_async(
output: str | None,
no_eval: bool,
eval_model: str | None,
provider: str | None,
base_url: str | None,
runs_dir: str | None,
log_runs: bool,
):
"""Async implementation of generate command."""
import os

config = Config.load()
gen_model = model or config.model

# Configure generic provider for Ollama/local models
if base_url:
os.environ["GENERIC_BASE_URL"] = base_url
# Set a dummy API key if not already set (required by some providers)
if "GENERIC_API_KEY" not in os.environ:
os.environ["GENERIC_API_KEY"] = "local"

# Also set dummy Anthropic key if missing to bypass startup checks
# This allows using local models without needing an Anthropic key
if "ANTHROPIC_API_KEY" not in os.environ:
os.environ["ANTHROPIC_API_KEY"] = "dummy"

# Auto-detect provider as generic if base_url is provided
if not provider:
provider = "generic"

# Format model string for FastAgent
# For non-generic providers, prepend provider prefix if not already present
# For generic provider (local models), DON'T prepend - let the monkey patch handle it
if provider and gen_model and provider != "generic":
known_providers = ["anthropic", "openai", "generic", "google", "bedrock", "vertex"]
has_provider_prefix = any(gen_model.startswith(f"{p}.") for p in known_providers)

# Only prepend provider prefix for non-generic providers when it's missing
if not has_provider_prefix:
gen_model = f"{provider}.{gen_model}"


# Setup run logging if enabled
batch_id = None
batch_folder = None
Expand Down Expand Up @@ -306,7 +394,7 @@ async def _generate_async(
task=task,
examples=examples,
generator=agent.skill_gen,
model=model,
model=gen_model,
)
# Improve existing skill
elif from_skill:
Expand All @@ -319,22 +407,22 @@ async def _generate_async(
existing_skill,
instructions=task,
generator=agent.skill_gen,
model=model,
model=gen_model,
)
else:
console.print(f"Generating skill with {gen_model}...", style="dim")
skill = await generate_skill(
task=task,
examples=examples,
generator=agent.skill_gen,
model=model,
model=gen_model,
)
if no_eval:
_save_and_display(skill, output, config)
return

console.print("Generating test cases...", style="dim")
test_cases = await generate_tests(task, generator=agent.test_gen, model=model)
test_cases = await generate_tests(task, generator=agent.test_gen, model=gen_model)

# Eval loop with refinement (on generation model)
prev_success_rate = 0.0
Expand Down Expand Up @@ -435,13 +523,22 @@ async def _generate_async(
skill,
failures,
generator=agent.skill_gen,
model=model,
model=gen_model,
)

# If eval_model specified, also eval on that model
eval_results = None
if eval_model:
console.print(f"Evaluating on {eval_model}...", style="dim")
# Format eval_model with provider prefix if needed (skip for generic provider)
formatted_eval_model = eval_model
if provider and provider != "generic":
known_providers = ["anthropic", "openai", "generic", "google", "bedrock", "vertex"]
has_provider_prefix = any(eval_model.startswith(f"{p}.") for p in known_providers)

if not has_provider_prefix:
formatted_eval_model = f"{provider}.{eval_model}"

console.print(f"Evaluating on {formatted_eval_model}...", style="dim")

# Create run folder for eval model
run_folder = None
Expand All @@ -462,7 +559,7 @@ async def _generate_async(
skill,
test_cases,
evaluator=agent.evaluator,
model=eval_model,
model=formatted_eval_model,
)

# Log eval run results (both baseline and with-skill)
Expand Down Expand Up @@ -704,10 +801,27 @@ async def _eval_async(
runs_dir: str | None,
):
"""Async implementation of eval command."""
import os
from upskill.evaluate import run_test

config = Config.load()
skill_dir = Path(skill_path)

# Configure generic provider for Ollama/local models
if base_url:
os.environ["GENERIC_BASE_URL"] = base_url
# Set a dummy API key if not already set (required by some providers)
if "GENERIC_API_KEY" not in os.environ:
os.environ["GENERIC_API_KEY"] = "local"

# Also set dummy Anthropic key if missing to bypass startup checks
# This allows using local models without needing an Anthropic key
if "ANTHROPIC_API_KEY" not in os.environ:
os.environ["ANTHROPIC_API_KEY"] = "dummy"

# Auto-detect provider as generic if base_url is provided
if not provider:
provider = "generic"

try:
skill = Skill.load(skill_dir)
Expand All @@ -718,6 +832,22 @@ async def _eval_async(
# Use default model if none specified
if not models:
models = [config.effective_eval_model]

# Format model strings for FastAgent if provider is specified
# This must happen AFTER models is set to default if None
# For generic provider, DON'T prepend prefix - the monkey patch handles it
if models and provider and provider != "generic":
formatted_models = []
for model in models:
known_providers = ["anthropic", "openai", "generic", "google", "bedrock", "vertex"]
has_provider_prefix = any(model.startswith(f"{p}.") for p in known_providers)

# Only prepend provider prefix if not already present
if not has_provider_prefix:
formatted_models.append(f"{provider}.{model}")
else:
formatted_models.append(model)
models = formatted_models

is_benchmark_mode = len(models) > 1 or num_runs > 1

Expand Down
36 changes: 31 additions & 5 deletions src/upskill/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@

from __future__ import annotations

import os
from datetime import UTC, datetime

from fast_agent.agents.llm_agent import LlmAgent
from fast_agent.interfaces import AgentProtocol
from fast_agent.llm.request_params import RequestParams
from fast_agent.skills.registry import SkillManifest

from upskill.manifest_utils import parse_skill_manifest_text
Expand Down Expand Up @@ -109,14 +112,31 @@ async def generate_skill(
) -> Skill:
"""Generate a skill from a task description using FastAgent."""

prompt = f"Create a skill document that teaches an AI agent how to: {task}"
prompt = f"Task: {task}\n\nOutput ONLY the complete skill document with YAML frontmatter and markdown body. Do NOT explain or describe what the document should contain - OUTPUT THE ACTUAL DOCUMENT DIRECTLY."
if examples:
prompt += "\n\nExample input/output pairs for this task:\n" + "\n".join(
f"- {ex}" for ex in examples
)


skill_text = await generator.send(prompt)
# Pass model to FastAgent if specified
request_params = RequestParams(model=model) if model else None
skill_text = await generator.send(prompt, request_params=request_params)

# Strip markdown code fences if present (common with smaller models)
skill_text = skill_text.strip()

# Check if wrapped in code fences (```...```)
if skill_text.startswith("```"):
lines = skill_text.split("\n")
# Remove opening fence (could be ```markdown, ```yaml, or just ```)
if lines and lines[0].startswith("```"):
lines = lines[1:]
# Remove closing fence (and any trailing empty lines)
while lines and (lines[-1].strip() == "```" or lines[-1].strip() == ""):
lines = lines[:-1]
skill_text = "\n".join(lines).strip()

manifest, error = parse_skill_manifest_text(skill_text)
if manifest is None:
raise ValueError(f"Skill generator did not return valid SKILL.md: {error}")
Expand All @@ -137,7 +157,9 @@ async def generate_tests(

prompt = TEST_GENERATION_PROMPT.replace(TASK_PLACEHOLDER, task)

result, _ = await generator.structured(prompt, TestCaseSuite)
# Pass model to FastAgent if specified
request_params = RequestParams(model=model) if model else None
result, _ = await generator.structured(prompt, TestCaseSuite, request_params=request_params)
if result is None:
raise ValueError("Test generator did not return structured test cases.")

Expand Down Expand Up @@ -182,7 +204,9 @@ async def refine_skill(
Do not wrap the output in code fences.
"""

skill_text = await generator.send(prompt)
# Pass model to FastAgent if specified
request_params = RequestParams(model=model) if model else None
skill_text = await generator.send(prompt, request_params=request_params)
manifest, error = parse_skill_manifest_text(skill_text)
if manifest is None:
raise ValueError(f"Skill refinement did not return valid SKILL.md: {error}")
Expand Down Expand Up @@ -252,7 +276,9 @@ async def improve_skill(
)


skill_text = await generator.send(prompt)
# Pass model to FastAgent if specified
request_params = RequestParams(model=model) if model else None
skill_text = await generator.send(prompt, request_params=request_params)
manifest, error = parse_skill_manifest_text(skill_text)
if manifest is None:
raise ValueError(f"Skill improvement did not return valid SKILL.md: {error}")
Expand Down