From d100c498b96f435daba78e4b17ee7a19e479c877 Mon Sep 17 00:00:00 2001
From: sysradium <sysradium@users.noreply.github.com>
Date: Sun, 1 Feb 2026 23:42:00 +0100
Subject: [PATCH 1/2] docs: clarify local eval model prefixing and LM Studio
 usage

---
 README.md | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 7066642..54cebcc 100644
--- a/README.md
+++ b/README.md
@@ -158,6 +158,11 @@ upskill eval ./skills/my-skill/ \
     -m "unsloth/GLM-4.7-Flash-GGUF:Q4_0" \
     --base-url http://localhost:8080/v1
 
+# Evaluate on local model (LM Studio server)
+upskill eval ./skills/your-skill/ \
+    --base-url http://127.0.0.1:1234/v1 \
+    --model "generic.qwen2.5-7b-instruct-1m"
+
 # Skip baseline (just test with skill)
 upskill eval ./skills/my-skill/ --no-baseline
 
@@ -388,8 +393,16 @@ max_refine_attempts: 3          # Refinement iterations
 Place in your project directory to customize FastAgent settings:
 
 ```yaml
+# Model format follows FastAgent provider convention:
+# <provider>.<model_string>.<reasoning_effort?>
+# Examples: anthropic.claude-sonnet-4-20250514, openai.gpt-4.1, generic.llama3.2:latest
 default_model: sonnet
 
+# Generic provider for local OpenAI-compatible endpoints
+generic:
+  api_key: "local"
+  base_url: "${GENERIC_BASE_URL:http://localhost:11434/v1}"
+
 logger:
   progress_display: true
   show_chat: false
@@ -472,6 +485,8 @@ upskill uses FastAgent model format:
 <provider>.<model>.<reasoning_effort?>
 ```
 
+This provider-prefix convention comes from FastAgent LLM providers docs (`https://fast-agent.ai/models/llm_providers/`) and is the canonical format. Use provider-prefixed model names explicitly, especially for local models.
+
 **Examples:**
 - `sonnet` - Anthropic Claude Sonnet (alias)
 - `haiku` - Anthropic Claude Haiku (alias)
@@ -486,6 +501,15 @@ upskill uses FastAgent model format:
 
 upskill supports local models through any OpenAI-compatible endpoint (Ollama, llama.cpp, vLLM, etc.).
 
+**With LM Studio (OpenAI-compatible server):**
+
+```bash
+# In LM Studio, start the local server (default port 1234), then run:
+upskill eval ./skills/your-skill/ \
+    --base-url http://127.0.0.1:1234/v1 \
+    --model "generic.qwen2.5-7b-instruct-1m"
+```
+
 **Quick start with Ollama:**
 
 ```bash
@@ -494,7 +518,7 @@ ollama serve
 
 # Evaluate with a local model
 upskill eval ./skills/my-skill/ \
-    --model llama3.2:latest \
+    --model generic.llama3.2:latest \
     --base-url http://localhost:11434/v1
 ```
 
@@ -506,7 +530,7 @@ upskill eval ./skills/my-skill/ \
 
 # Evaluate with the local model
 upskill eval ./skills/my-skill/ \
-    --model my-model \
+    --model generic.my-model \
     --base-url http://localhost:8080/v1
 ```
 

From ab2f8d9d03b550894455f44a69d2cd375fcd2376 Mon Sep 17 00:00:00 2001
From: sysradium <sysradium@users.noreply.github.com>
Date: Sun, 1 Feb 2026 23:42:46 +0100
Subject: [PATCH 2/2] fix: honor eval base URL via generic provider env

---
 fastagent.config.yaml |  2 +-
 src/upskill/cli.py    | 10 ++++++++--
 uv.lock               |  4 ++--
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/fastagent.config.yaml b/fastagent.config.yaml
index 67747f3..aba56e7 100644
--- a/fastagent.config.yaml
+++ b/fastagent.config.yaml
@@ -12,7 +12,7 @@ default_model: kimi
 # Override with GENERIC_BASE_URL and GENERIC_API_KEY environment variables
 generic:
   api_key: "local"
-  base_url: "http://localhost:11434/v1"
+  base_url: "${GENERIC_BASE_URL:http://localhost:11434/v1}"
 
 # MCP timeline display settings
 mcp_timeline:
diff --git a/src/upskill/cli.py b/src/upskill/cli.py
index 6e8c680..dd5797c 100644
--- a/src/upskill/cli.py
+++ b/src/upskill/cli.py
@@ -3,6 +3,7 @@
 
 import asyncio
 import json
+import os
 import sys
 from collections.abc import AsyncIterator
 from contextlib import asynccontextmanager
@@ -47,7 +48,10 @@
 
 
 @asynccontextmanager
-async def _fast_agent_context() -> AsyncIterator[object]:
+async def _fast_agent_context(base_url: str | None = None) -> AsyncIterator[object]:
+    if base_url:
+        os.environ["GENERIC_BASE_URL"] = base_url
+
     fast = FastAgent(
         "upskill",
         ignore_unknown_args=True,
@@ -721,7 +725,7 @@ async def _eval_async(
 
     is_benchmark_mode = len(models) > 1 or num_runs > 1
 
-    async with _fast_agent_context() as agent:
+    async with _fast_agent_context(base_url=base_url) as agent:
         # Load test cases
         test_cases: list[TestCase] = []
         if tests:
@@ -812,6 +816,7 @@ async def _eval_async(
                                 tc,
                                 evaluator=agent.evaluator,
                                 skill=skill,
+                                model=model,
                             )
                         except Exception as e:
                             console.print(f"  [red]Test error: {e}[/red]")
@@ -1222,6 +1227,7 @@ async def _benchmark_async(
                             tc,
                             evaluator=agent.evaluator,
                             skill=skill,
+                            model=model,
                         )
                     except Exception as e:
                         console.print(f"  [red]Test error: {e}[/red]")
diff --git a/uv.lock b/uv.lock
index dd46d91..f06bb73 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.13.5, <3.14"
 
 [[package]]
@@ -1727,7 +1727,7 @@ wheels = [
 
 [[package]]
 name = "upskill"
-version = "0.2.0"
+version = "0.2.1"
 source = { editable = "." }
 dependencies = [
     { name = "click" },