From 98a5f7c05f9a72b8c4e71505721e203ac569e924 Mon Sep 17 00:00:00 2001 From: markalexwatson Date: Sat, 21 Feb 2026 12:21:34 +0000 Subject: [PATCH 1/3] feat(ai): upgrade Gemini Flash from 2.0 to 2.5 The old gemini-2.0-flash-exp:free model was experimental. Gemini 2.5 Flash has built-in reasoning capabilities and is now generally available on OpenRouter at $0.30/M input, $2.50/M output (~90% cheaper than Claude). Co-Authored-By: Claude Opus 4.6 --- .claude/skills/ai-setup/SKILL.md | 12 ++++++------ .scripts/lib/llm-client.cjs | 2 +- 06-Resources/Dex_System/AI_Model_Options.md | 4 ++-- System/scripts/configure-ai-models.sh | 8 ++++---- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.claude/skills/ai-setup/SKILL.md b/.claude/skills/ai-setup/SKILL.md index e8d9e865..12a363b1 100644 --- a/.claude/skills/ai-setup/SKILL.md +++ b/.claude/skills/ai-setup/SKILL.md @@ -164,7 +164,7 @@ These are AI models from other companies that cost much less: |-------|-----------------|---------|----------| | **Kimi K2.5** | ~80% cheaper | ⭐⭐⭐⭐ Great | General tasks, thinking | | **DeepSeek V3** | ~95% cheaper | ⭐⭐⭐⭐ Great | Coding, analysis | -| **Gemini Flash** | ~97% cheaper | ⭐⭐⭐ Good | Long documents | +| **Gemini 2.5 Flash** | ~90% cheaper | ⭐⭐⭐⭐ Great | Long documents, reasoning | **My recommendation:** Start with **Kimi K2.5** — it's the closest to Claude in quality. @@ -251,12 +251,12 @@ Generate `~/.pi/agent/models.json`: "cost": { "input": 0.14, "output": 0.28, "cacheRead": 0, "cacheWrite": 0 } }, { - "id": "google/gemini-2.0-flash-exp:free", - "name": "Gemini Flash (Free tier)", + "id": "google/gemini-2.5-flash", + "name": "Gemini 2.5 Flash (Budget)", "input": ["text", "image"], "contextWindow": 1048576, "maxTokens": 8192, - "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 } + "cost": { "input": 0.3, "output": 2.5, "cacheRead": 0, "cacheWrite": 0 } } ] } @@ -272,7 +272,7 @@ Say: I've added three models you can switch to anytime: - **Kimi K2.5** — Best quality budget option - **DeepSeek V3** — Super cheap, great for coding -- **Gemini Flash** — Google's free tier (with limits) +- **Gemini 2.5 Flash** — Google's budget option, great for reasoning **How to use them:** @@ -739,7 +739,7 @@ Want me to help you switch to a smaller model? Some alternatives: -1. **Gemini Flash Free Tier** — Google offers free usage (with limits) +1. **Gemini 2.5 Flash** — Google's budget model, very affordable I can set that up instead 2. **Offline Only** — Skip budget cloud, just use local models diff --git a/.scripts/lib/llm-client.cjs b/.scripts/lib/llm-client.cjs index bbfa73a7..921dd43c 100755 --- a/.scripts/lib/llm-client.cjs +++ b/.scripts/lib/llm-client.cjs @@ -76,7 +76,7 @@ async function generateWithGemini(prompt, options = {}) { const genAI = new GoogleGenerativeAI(GEMINI_API_KEY); const model = genAI.getGenerativeModel({ - model: options.model || 'gemini-2.0-flash-thinking-exp-1219', + model: options.model || 'gemini-2.5-flash', generationConfig: { maxOutputTokens: options.maxOutputTokens || 4096, temperature: options.temperature || 1.0, diff --git a/06-Resources/Dex_System/AI_Model_Options.md b/06-Resources/Dex_System/AI_Model_Options.md index f991da5d..db3ed1cc 100644 --- a/06-Resources/Dex_System/AI_Model_Options.md +++ b/06-Resources/Dex_System/AI_Model_Options.md @@ -23,7 +23,7 @@ Dex can use different AI models depending on your needs: Other companies make AI models that cost much less than Claude: - **Kimi K2.5** (Moonshot AI) — 80% cheaper, similar quality - **DeepSeek V3** — 95% cheaper, great for coding -- **Gemini Flash** (Google) — 97% cheaper, handles long documents +- **Gemini 2.5 Flash** (Google) — 90% cheaper, handles long documents ### Why Use Them? @@ -168,7 +168,7 @@ Run `/ai-status` to see: | Claude Sonnet | `claude-sonnet-4-20250514` | | Kimi K2.5 | `moonshotai/kimi-k2.5` | | DeepSeek V3 | `deepseek/deepseek-chat` | -| Gemini Flash | `google/gemini-2.0-flash-exp:free` | +| Gemini 2.5 Flash | `google/gemini-2.5-flash` | | Qwen (Offline) | `qwen2.5:14b` | --- diff --git a/System/scripts/configure-ai-models.sh b/System/scripts/configure-ai-models.sh index 89a67e64..676c4f87 100755 --- a/System/scripts/configure-ai-models.sh +++ b/System/scripts/configure-ai-models.sh @@ -54,12 +54,12 @@ if [[ -n "$OPENROUTER_KEY" ]]; then "cost": { "input": 0.14, "output": 0.28, "cacheRead": 0, "cacheWrite": 0 } }, { - "id": "google/gemini-2.0-flash-exp:free", - "name": "Gemini Flash (Free)", + "id": "google/gemini-2.5-flash", + "name": "Gemini 2.5 Flash (Budget)", "input": ["text", "image"], "contextWindow": 1048576, "maxTokens": 8192, - "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 } + "cost": { "input": 0.3, "output": 2.5, "cacheRead": 0, "cacheWrite": 0 } } ] } @@ -130,7 +130,7 @@ if [[ -n "$OPENROUTER_KEY" ]]; then echo "☁️ Budget Cloud (OpenRouter):" echo " - Kimi K2.5" echo " - DeepSeek V3" - echo " - Gemini Flash (free tier)" + echo " - Gemini 2.5 Flash" fi if command -v ollama &> /dev/null && ollama list 2>/dev/null | grep -q "$OLLAMA_MODEL"; then From 014a29cf282d49886731deb5da7536d88ad71fe2 Mon Sep 17 00:00:00 2001 From: markalexwatson Date: Sat, 21 Feb 2026 12:29:26 +0000 Subject: [PATCH 2/3] fix(hooks): replace macOS afplay with pw-play for Linux Co-Authored-By: Claude Opus 4.6 --- .claude/settings.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.claude/settings.json b/.claude/settings.json index 2084c1a1..eecc63d9 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -43,7 +43,7 @@ "hooks": [ { "type": "command", - "command": "afplay /System/Library/Sounds/Ping.aiff" + "command": "pw-play /usr/share/sounds/freedesktop/stereo/complete.oga" } ] } @@ -54,7 +54,7 @@ "hooks": [ { "type": "command", - "command": "afplay /System/Library/Sounds/Ping.aiff" + "command": "pw-play /usr/share/sounds/freedesktop/stereo/complete.oga" } ] } From 9c4820780f4d36277b857146f8164c2790f6b256 Mon Sep 17 00:00:00 2001 From: markalexwatson Date: Sat, 21 Feb 2026 20:06:13 +0000 Subject: [PATCH 3/3] feat(ai): configure budget cloud, offline, and smart routing models Set up three-tier AI model strategy: Gemini 2.5 Flash (budget via OpenRouter), Llama 3.3 70B (offline via Ollama), and smart routing rules to automatically select the right model per task. Co-Authored-By: Claude Opus 4.6 --- System/user-profile.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/System/user-profile.yaml b/System/user-profile.yaml index 21c33382..a43c6928 100644 --- a/System/user-profile.yaml +++ b/System/user-profile.yaml @@ -110,3 +110,25 @@ quarterly_planning: # Current quarter dates (auto-calculated) quarter_start_date: "2026-01-01" quarter_end_date: "2026-03-31" + +# AI Model Configuration +ai_models: + budget_cloud: + enabled: true + provider: openrouter + primary_model: google/gemini-2.5-flash + configured_date: 2026-02-21 + offline: + enabled: true + runner: ollama + model: llama3.3:latest + configured_date: 2026-02-21 + smart_routing: + enabled: true + rules: + complex_tasks: + model: claude-sonnet + routine_tasks: + model: google/gemini-2.5-flash + offline: + model: llama3.3:latest