From 98a5f7c05f9a72b8c4e71505721e203ac569e924 Mon Sep 17 00:00:00 2001
From: markalexwatson <mark.a.watson@gmail.com>
Date: Sat, 21 Feb 2026 12:21:34 +0000
Subject: [PATCH 1/3] feat(ai): upgrade Gemini Flash from 2.0 to 2.5

The old gemini-2.0-flash-exp:free model was experimental. Gemini 2.5
Flash has built-in reasoning capabilities and is now generally available
on OpenRouter at $0.30/M input, $2.50/M output (~90% cheaper than Claude).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .claude/skills/ai-setup/SKILL.md            | 12 ++++++------
 .scripts/lib/llm-client.cjs                 |  2 +-
 06-Resources/Dex_System/AI_Model_Options.md |  4 ++--
 System/scripts/configure-ai-models.sh       |  8 ++++----
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.claude/skills/ai-setup/SKILL.md b/.claude/skills/ai-setup/SKILL.md
index e8d9e865..12a363b1 100644
--- a/.claude/skills/ai-setup/SKILL.md
+++ b/.claude/skills/ai-setup/SKILL.md
@@ -164,7 +164,7 @@ These are AI models from other companies that cost much less:
 |-------|-----------------|---------|----------|
 | **Kimi K2.5** | ~80% cheaper | ⭐⭐⭐⭐ Great | General tasks, thinking |
 | **DeepSeek V3** | ~95% cheaper | ⭐⭐⭐⭐ Great | Coding, analysis |
-| **Gemini Flash** | ~97% cheaper | ⭐⭐⭐ Good | Long documents |
+| **Gemini 2.5 Flash** | ~90% cheaper | ⭐⭐⭐⭐ Great | Long documents, reasoning |
 
 **My recommendation:** Start with **Kimi K2.5** — it's the closest to Claude in quality.
 
@@ -251,12 +251,12 @@ Generate `~/.pi/agent/models.json`:
           "cost": { "input": 0.14, "output": 0.28, "cacheRead": 0, "cacheWrite": 0 }
         },
         {
-          "id": "google/gemini-2.0-flash-exp:free",
-          "name": "Gemini Flash (Free tier)",
+          "id": "google/gemini-2.5-flash",
+          "name": "Gemini 2.5 Flash (Budget)",
           "input": ["text", "image"],
           "contextWindow": 1048576,
           "maxTokens": 8192,
-          "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
+          "cost": { "input": 0.3, "output": 2.5, "cacheRead": 0, "cacheWrite": 0 }
         }
       ]
     }
@@ -272,7 +272,7 @@ Say:
 I've added three models you can switch to anytime:
 - **Kimi K2.5** — Best quality budget option
 - **DeepSeek V3** — Super cheap, great for coding
-- **Gemini Flash** — Google's free tier (with limits)
+- **Gemini 2.5 Flash** — Google's budget option, great for reasoning
 
 **How to use them:**
 
@@ -739,7 +739,7 @@ Want me to help you switch to a smaller model?
 
 Some alternatives:
 
-1. **Gemini Flash Free Tier** — Google offers free usage (with limits)
+1. **Gemini 2.5 Flash** — Google's budget model, very affordable
    I can set that up instead
 
 2. **Offline Only** — Skip budget cloud, just use local models
diff --git a/.scripts/lib/llm-client.cjs b/.scripts/lib/llm-client.cjs
index bbfa73a7..921dd43c 100755
--- a/.scripts/lib/llm-client.cjs
+++ b/.scripts/lib/llm-client.cjs
@@ -76,7 +76,7 @@ async function generateWithGemini(prompt, options = {}) {
   const genAI = new GoogleGenerativeAI(GEMINI_API_KEY);
   
   const model = genAI.getGenerativeModel({
-    model: options.model || 'gemini-2.0-flash-thinking-exp-1219',
+    model: options.model || 'gemini-2.5-flash',
     generationConfig: {
       maxOutputTokens: options.maxOutputTokens || 4096,
       temperature: options.temperature || 1.0,
diff --git a/06-Resources/Dex_System/AI_Model_Options.md b/06-Resources/Dex_System/AI_Model_Options.md
index f991da5d..db3ed1cc 100644
--- a/06-Resources/Dex_System/AI_Model_Options.md
+++ b/06-Resources/Dex_System/AI_Model_Options.md
@@ -23,7 +23,7 @@ Dex can use different AI models depending on your needs:
 Other companies make AI models that cost much less than Claude:
 - **Kimi K2.5** (Moonshot AI) — 80% cheaper, similar quality
 - **DeepSeek V3** — 95% cheaper, great for coding
-- **Gemini Flash** (Google) — 97% cheaper, handles long documents
+- **Gemini 2.5 Flash** (Google) — 90% cheaper, handles long documents
 
 ### Why Use Them?
 
@@ -168,7 +168,7 @@ Run `/ai-status` to see:
 | Claude Sonnet | `claude-sonnet-4-20250514` |
 | Kimi K2.5 | `moonshotai/kimi-k2.5` |
 | DeepSeek V3 | `deepseek/deepseek-chat` |
-| Gemini Flash | `google/gemini-2.0-flash-exp:free` |
+| Gemini 2.5 Flash | `google/gemini-2.5-flash` |
 | Qwen (Offline) | `qwen2.5:14b` |
 
 ---
diff --git a/System/scripts/configure-ai-models.sh b/System/scripts/configure-ai-models.sh
index 89a67e64..676c4f87 100755
--- a/System/scripts/configure-ai-models.sh
+++ b/System/scripts/configure-ai-models.sh
@@ -54,12 +54,12 @@ if [[ -n "$OPENROUTER_KEY" ]]; then
           "cost": { "input": 0.14, "output": 0.28, "cacheRead": 0, "cacheWrite": 0 }
         },
         {
-          "id": "google/gemini-2.0-flash-exp:free",
-          "name": "Gemini Flash (Free)",
+          "id": "google/gemini-2.5-flash",
+          "name": "Gemini 2.5 Flash (Budget)",
           "input": ["text", "image"],
           "contextWindow": 1048576,
           "maxTokens": 8192,
-          "cost": { "input": 0, "output": 0, "cacheRead": 0, "cacheWrite": 0 }
+          "cost": { "input": 0.3, "output": 2.5, "cacheRead": 0, "cacheWrite": 0 }
         }
       ]
     }
@@ -130,7 +130,7 @@ if [[ -n "$OPENROUTER_KEY" ]]; then
     echo "☁️  Budget Cloud (OpenRouter):"
     echo "   - Kimi K2.5"
     echo "   - DeepSeek V3"
-    echo "   - Gemini Flash (free tier)"
+    echo "   - Gemini 2.5 Flash"
 fi
 
 if command -v ollama &> /dev/null && ollama list 2>/dev/null | grep -q "$OLLAMA_MODEL"; then

From 014a29cf282d49886731deb5da7536d88ad71fe2 Mon Sep 17 00:00:00 2001
From: markalexwatson <mark.a.watson@gmail.com>
Date: Sat, 21 Feb 2026 12:29:26 +0000
Subject: [PATCH 2/3] fix(hooks): replace macOS afplay with pw-play for Linux

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .claude/settings.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.claude/settings.json b/.claude/settings.json
index 2084c1a1..eecc63d9 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -43,7 +43,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "afplay /System/Library/Sounds/Ping.aiff"
+            "command": "pw-play /usr/share/sounds/freedesktop/stereo/complete.oga"
           }
         ]
       }
@@ -54,7 +54,7 @@
         "hooks": [
           {
             "type": "command",
-            "command": "afplay /System/Library/Sounds/Ping.aiff"
+            "command": "pw-play /usr/share/sounds/freedesktop/stereo/complete.oga"
           }
         ]
       }

From 9c4820780f4d36277b857146f8164c2790f6b256 Mon Sep 17 00:00:00 2001
From: markalexwatson <mark.a.watson@gmail.com>
Date: Sat, 21 Feb 2026 20:06:13 +0000
Subject: [PATCH 3/3] feat(ai): configure budget cloud, offline, and smart
 routing models

Set up three-tier AI model strategy: Gemini 2.5 Flash (budget via
OpenRouter), Llama 3.3 70B (offline via Ollama), and smart routing
rules to automatically select the right model per task.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 System/user-profile.yaml | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/System/user-profile.yaml b/System/user-profile.yaml
index 21c33382..a43c6928 100644
--- a/System/user-profile.yaml
+++ b/System/user-profile.yaml
@@ -110,3 +110,25 @@ quarterly_planning:
   # Current quarter dates (auto-calculated)
   quarter_start_date: "2026-01-01"
   quarter_end_date: "2026-03-31"
+
+# AI Model Configuration
+ai_models:
+  budget_cloud:
+    enabled: true
+    provider: openrouter
+    primary_model: google/gemini-2.5-flash
+    configured_date: 2026-02-21
+  offline:
+    enabled: true
+    runner: ollama
+    model: llama3.3:latest
+    configured_date: 2026-02-21
+  smart_routing:
+    enabled: true
+    rules:
+      complex_tasks:
+        model: claude-sonnet
+      routine_tasks:
+        model: google/gemini-2.5-flash
+      offline:
+        model: llama3.3:latest