From d100c498b96f435daba78e4b17ee7a19e479c877 Mon Sep 17 00:00:00 2001 From: sysradium Date: Sun, 1 Feb 2026 23:42:00 +0100 Subject: [PATCH 1/2] docs: clarify local eval model prefixing and LM Studio usage --- README.md | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7066642..54cebcc 100644 --- a/README.md +++ b/README.md @@ -158,6 +158,11 @@ upskill eval ./skills/my-skill/ \ -m "unsloth/GLM-4.7-Flash-GGUF:Q4_0" \ --base-url http://localhost:8080/v1 +# Evaluate on local model (LM Studio server) +upskill eval ./skills/your-skill/ \ + --base-url http://127.0.0.1:1234/v1 \ + --model "generic.qwen2.5-7b-instruct-1m" + # Skip baseline (just test with skill) upskill eval ./skills/my-skill/ --no-baseline @@ -388,8 +393,16 @@ max_refine_attempts: 3 # Refinement iterations Place in your project directory to customize FastAgent settings: ```yaml +# Model format follows FastAgent provider convention: +# .. +# Examples: anthropic.claude-sonnet-4-20250514, openai.gpt-4.1, generic.llama3.2:latest default_model: sonnet +# Generic provider for local OpenAI-compatible endpoints +generic: + api_key: "local" + base_url: "${GENERIC_BASE_URL:http://localhost:11434/v1}" + logger: progress_display: true show_chat: false @@ -472,6 +485,8 @@ upskill uses FastAgent model format: .. ``` +This provider-prefix convention comes from FastAgent LLM providers docs (`https://fast-agent.ai/models/llm_providers/`) and is the canonical format. Use provider-prefixed model names explicitly, especially for local models. + **Examples:** - `sonnet` - Anthropic Claude Sonnet (alias) - `haiku` - Anthropic Claude Haiku (alias) @@ -486,6 +501,15 @@ upskill uses FastAgent model format: upskill supports local models through any OpenAI-compatible endpoint (Ollama, llama.cpp, vLLM, etc.). +**With LM Studio (OpenAI-compatible server):** + +```bash +# In LM Studio, start the local server (default port 1234), then run: +upskill eval ./skills/your-skill/ \ + --base-url http://127.0.0.1:1234/v1 \ + --model "generic.qwen2.5-7b-instruct-1m" +``` + **Quick start with Ollama:** ```bash @@ -494,7 +518,7 @@ ollama serve # Evaluate with a local model upskill eval ./skills/my-skill/ \ - --model llama3.2:latest \ + --model generic.llama3.2:latest \ --base-url http://localhost:11434/v1 ``` @@ -506,7 +530,7 @@ upskill eval ./skills/my-skill/ \ # Evaluate with the local model upskill eval ./skills/my-skill/ \ - --model my-model \ + --model generic.my-model \ --base-url http://localhost:8080/v1 ``` From ab2f8d9d03b550894455f44a69d2cd375fcd2376 Mon Sep 17 00:00:00 2001 From: sysradium Date: Sun, 1 Feb 2026 23:42:46 +0100 Subject: [PATCH 2/2] fix: honor eval base URL via generic provider env --- fastagent.config.yaml | 2 +- src/upskill/cli.py | 10 ++++++++-- uv.lock | 4 ++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/fastagent.config.yaml b/fastagent.config.yaml index 67747f3..aba56e7 100644 --- a/fastagent.config.yaml +++ b/fastagent.config.yaml @@ -12,7 +12,7 @@ default_model: kimi # Override with GENERIC_BASE_URL and GENERIC_API_KEY environment variables generic: api_key: "local" - base_url: "http://localhost:11434/v1" + base_url: "${GENERIC_BASE_URL:http://localhost:11434/v1}" # MCP timeline display settings mcp_timeline: diff --git a/src/upskill/cli.py b/src/upskill/cli.py index 6e8c680..dd5797c 100644 --- a/src/upskill/cli.py +++ b/src/upskill/cli.py @@ -3,6 +3,7 @@ import asyncio import json +import os import sys from collections.abc import AsyncIterator from contextlib import asynccontextmanager @@ -47,7 +48,10 @@ @asynccontextmanager -async def _fast_agent_context() -> AsyncIterator[object]: +async def _fast_agent_context(base_url: str | None = None) -> AsyncIterator[object]: + if base_url: + os.environ["GENERIC_BASE_URL"] = base_url + fast = FastAgent( "upskill", ignore_unknown_args=True, @@ -721,7 +725,7 @@ async def _eval_async( is_benchmark_mode = len(models) > 1 or num_runs > 1 - async with _fast_agent_context() as agent: + async with _fast_agent_context(base_url=base_url) as agent: # Load test cases test_cases: list[TestCase] = [] if tests: @@ -812,6 +816,7 @@ async def _eval_async( tc, evaluator=agent.evaluator, skill=skill, + model=model, ) except Exception as e: console.print(f" [red]Test error: {e}[/red]") @@ -1222,6 +1227,7 @@ async def _benchmark_async( tc, evaluator=agent.evaluator, skill=skill, + model=model, ) except Exception as e: console.print(f" [red]Test error: {e}[/red]") diff --git a/uv.lock b/uv.lock index dd46d91..f06bb73 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 3 +revision = 2 requires-python = ">=3.13.5, <3.14" [[package]] @@ -1727,7 +1727,7 @@ wheels = [ [[package]] name = "upskill" -version = "0.2.0" +version = "0.2.1" source = { editable = "." } dependencies = [ { name = "click" },