From 44df21a0ccf8a47548a1dcd4abab6c3e9d398eb2 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Thu, 5 Mar 2026 22:35:48 +0800 Subject: [PATCH 01/21] fix api key management --- .../src/vedana_backoffice/states/chat.py | 20 +++++-- .../src/vedana_backoffice/states/common.py | 57 ++++++++++++++++++- .../src/vedana_backoffice/states/eval.py | 24 ++++---- 3 files changed, 82 insertions(+), 19 deletions(-) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py index a9702e2e..adeb4f7d 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py @@ -1,6 +1,5 @@ import asyncio import logging -import os import traceback from datetime import datetime from typing import Any, Dict, Tuple @@ -16,7 +15,15 @@ from vedana_etl.app import app as etl_app from vedana_etl.pipeline import get_data_model_pipeline -from vedana_backoffice.states.common import MemLogger, get_vedana_app, load_openrouter_models, DEBUG_MODE, datapipe_log_capture, DebugState +from vedana_backoffice.states.common import ( + MemLogger, + get_vedana_app, + load_openrouter_models, + DEBUG_MODE, + datapipe_log_capture, + DebugState, + resolve_api_key, +) from vedana_backoffice.states.jims import ThreadViewState @@ -263,7 +270,7 @@ async def _run_message(self, thread_id: str, user_text: str) -> Tuple[str, Dict[ pipeline.model = f"{self.provider}/{self.model}" pipeline.enable_filtering = self.enable_dm_filtering pipeline.filter_model = f"{self.provider}/{self.dm_filter_model}" - api_key = os.environ.get("OPENROUTER_API_KEY" if self.provider == "openrouter" else "OPENAI_API_KEY") + api_key = resolve_api_key(self.provider) ctx = await ctl.make_context(llm_settings=LLMSettings(model=self.model, model_api_key=api_key)) @@ -306,9 +313,10 @@ def send(self): if not user_text: return - env_key = "OPENROUTER_API_KEY" if self.provider == "openrouter" else "OPENAI_API_KEY" - if not os.environ.get(env_key): - yield DebugState.open_dialog() + api_key = resolve_api_key(self.provider) + if not api_key: + if DEBUG_MODE: + yield DebugState.open_dialog() return self._append_message("user", user_text) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py index 99b63c23..64d77ae4 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py @@ -4,7 +4,7 @@ import io import logging import os -from typing import Iterable +from typing import Iterable, Optional import httpx import reflex as rx @@ -23,6 +23,51 @@ HAS_OPENAI_KEY = bool(os.environ.get("OPENAI_API_KEY")) HAS_OPENROUTER_KEY = bool(os.environ.get("OPENROUTER_API_KEY")) +# Runtime-scoped API keys configured from the backoffice (debug UI). +# not persisted to environment variables. +RUNTIME_OPENAI_API_KEY: Optional[str] = None +RUNTIME_OPENROUTER_API_KEY: Optional[str] = None + + +def set_runtime_api_keys(openai_api_key: Optional[str] = None, openrouter_api_key: Optional[str] = None) -> None: + """Update in-process API key overrides used by backoffice (chat/eval).""" + global RUNTIME_OPENAI_API_KEY, RUNTIME_OPENROUTER_API_KEY + + if openai_api_key is not None: + openai_api_key = openai_api_key.strip() + if openai_api_key: + RUNTIME_OPENAI_API_KEY = openai_api_key + + if openrouter_api_key is not None: + openrouter_api_key = openrouter_api_key.strip() + if openrouter_api_key: + RUNTIME_OPENROUTER_API_KEY = openrouter_api_key + + +def resolve_api_key(provider: str, page_api_key: Optional[str] = None) -> Optional[str]: + """ + Resolve the effective API key for a given provider + """ + provider = (provider or "openai").lower() + + if page_api_key: + key = page_api_key.strip() + if key: + return key + + if provider == "openrouter": + if RUNTIME_OPENROUTER_API_KEY: + return RUNTIME_OPENROUTER_API_KEY + if llm_settings.openrouter_api_key: + return llm_settings.openrouter_api_key + return None + + if RUNTIME_OPENAI_API_KEY: + return RUNTIME_OPENAI_API_KEY + if llm_settings.model_api_key: + return llm_settings.model_api_key + return None + def _filter_chat_capable_models(models: Iterable[dict]) -> list[str]: """Filter models that support text chat with tool calls.""" @@ -50,6 +95,7 @@ def _filter_chat_capable_models(models: Iterable[dict]) -> list[str]: async def load_openrouter_models() -> list[str]: if not DEBUG_MODE: return [] + api_key = resolve_api_key("openrouter") try: async with httpx.AsyncClient(timeout=15) as client: resp = await client.get(f"{llm_settings.openrouter_api_base_url}/models") @@ -157,8 +203,13 @@ def set_openrouter_api_key(self, value: str) -> None: def save_api_keys(self) -> None: if not self.debug_mode: return - os.environ["OPENAI_API_KEY"] = self.openai_api_key - os.environ["OPENROUTER_API_KEY"] = self.openrouter_api_key + set_runtime_api_keys( + openai_api_key=self.openai_api_key, + openrouter_api_key=self.openrouter_api_key, + ) + # Keep defaults in sync so reopening the dialog reflects the last saved values. + self.default_openai_api_key = self.openai_api_key + self.default_openrouter_api_key = self.openrouter_api_key self.api_key_saved = bool(self.openai_api_key or self.openrouter_api_key) self.show_api_key_dialog = False diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index e4dab886..b67774a1 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -22,7 +22,14 @@ from vedana_core.settings import settings as core_settings from vedana_etl.app import app as etl_app -from vedana_backoffice.states.common import get_vedana_app, load_openrouter_models, datapipe_log_capture, DebugState +from vedana_backoffice.states.common import ( + get_vedana_app, + load_openrouter_models, + datapipe_log_capture, + DebugState, + resolve_api_key, + DEBUG_MODE, +) from vedana_backoffice.util import safe_render_value @@ -1705,9 +1712,7 @@ async def _run_question_thread( pipeline.enable_filtering = self.enable_dm_filtering pipeline.filter_model = f"{self.provider}/{self.dm_filter_model}" - api_key = (os.environ.get( - "OPENROUTER_API_KEY" if self.provider == "openrouter" else "OPENAI_API_KEY" - ) or "").strip() or None + api_key = resolve_api_key(self.provider) ctx = await ctl.make_context(llm_settings=LLMSettings(model=resolved_model, model_api_key=api_key)) events = await ctl.run_pipeline_with_context(pipeline, ctx) @@ -1737,9 +1742,7 @@ async def _judge_answer(self, question_row: dict[str, Any], answer: str, tool_ca except Exception: logging.warning(f"Failed to set judge model {resolved_judge_model}") - api_key = (os.environ.get( - "OPENROUTER_API_KEY" if self.provider == "openrouter" else "OPENAI_API_KEY" - ) or "").strip() + api_key = resolve_api_key(self.provider) if api_key: provider.model_api_key = api_key @@ -1811,9 +1814,10 @@ def run_selected_tests(self): self.error_message = "Judge prompt not loaded. Refresh judge config first." return - env_key = "OPENROUTER_API_KEY" if self.provider == "openrouter" else "OPENAI_API_KEY" - if not os.environ.get(env_key): - yield DebugState.open_dialog() + api_key = resolve_api_key(self.provider) + if not api_key: + if DEBUG_MODE: + yield DebugState.open_dialog() return test_run_name = self.test_run_name.strip() or "" From 78ee35d436504504a29cf0569d70fba9cfa34628 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Thu, 5 Mar 2026 23:01:38 +0800 Subject: [PATCH 02/21] * --- libs/vedana-backoffice/src/vedana_backoffice/states/common.py | 4 ++-- libs/vedana-backoffice/src/vedana_backoffice/states/eval.py | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py index 64d77ae4..14a379ff 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py @@ -56,13 +56,13 @@ def resolve_api_key(provider: str, page_api_key: Optional[str] = None) -> Option return key if provider == "openrouter": - if RUNTIME_OPENROUTER_API_KEY: + if DEBUG_MODE and RUNTIME_OPENROUTER_API_KEY: return RUNTIME_OPENROUTER_API_KEY if llm_settings.openrouter_api_key: return llm_settings.openrouter_api_key return None - if RUNTIME_OPENAI_API_KEY: + if DEBUG_MODE and RUNTIME_OPENAI_API_KEY: return RUNTIME_OPENAI_API_KEY if llm_settings.model_api_key: return llm_settings.model_api_key diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index b67774a1..cdca80cf 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -3,7 +3,6 @@ import hashlib import json import logging -import os import statistics import traceback from dataclasses import asdict, dataclass From 51b3a5a7ffbc372b864f74917c8104efb700713e Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Fri, 6 Mar 2026 00:48:52 +0800 Subject: [PATCH 03/21] * --- libs/vedana-backoffice/src/vedana_backoffice/states/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py index 14a379ff..11856e47 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py @@ -95,7 +95,6 @@ def _filter_chat_capable_models(models: Iterable[dict]) -> list[str]: async def load_openrouter_models() -> list[str]: if not DEBUG_MODE: return [] - api_key = resolve_api_key("openrouter") try: async with httpx.AsyncClient(timeout=15) as client: resp = await client.get(f"{llm_settings.openrouter_api_base_url}/models") From 8b3445f9176217898f110a2b8515efbb7ae2ad12 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Fri, 6 Mar 2026 14:22:05 +0800 Subject: [PATCH 04/21] setup provider in LLMProvider and settings --- .../src/jims_core/llms/llm_provider.py | 18 ++++++++---------- .../src/vedana_backoffice/states/chat.py | 6 +++++- .../src/vedana_backoffice/states/eval.py | 17 ++++++----------- 3 files changed, 19 insertions(+), 22 deletions(-) diff --git a/libs/jims-core/src/jims_core/llms/llm_provider.py b/libs/jims-core/src/jims_core/llms/llm_provider.py index a0e5dd46..4fcb1001 100644 --- a/libs/jims-core/src/jims_core/llms/llm_provider.py +++ b/libs/jims-core/src/jims_core/llms/llm_provider.py @@ -17,6 +17,8 @@ class LLMSettings(BaseSettings): extra="ignore", ) + provider: str = "openai" # "openai" / "openrouter" + model_api_key: str | None = None model: str = "gpt-4.1-nano" embeddings_model: str = "text-embedding-3-large" embeddings_dim: int = 1024 @@ -25,10 +27,6 @@ class LLMSettings(BaseSettings): embeddings_max_tokens_per_batch: int = 200000 # passable api_keys; if None, defaults to env vars - model_api_key: str | None = None - embeddings_model_api_key: str | None = None - - # openrouter_api_key: str | None = None openrouter_api_base_url: str = "https://openrouter.ai/api/v1" @@ -76,10 +74,10 @@ def observe( class LLMProvider: def __init__(self, settings: LLMSettings | None = None) -> None: self._settings = settings or env_settings - self.model = self._settings.model + self.provider = self._settings.provider + self.model = f"{self.provider}/{self._settings.model}" self.model_api_key = self._settings.model_api_key - self.embeddings_model = self._settings.embeddings_model - self.embeddings_model_api_key = self._settings.embeddings_model_api_key + self.embeddings_model = f"{self.provider}/{self._settings.embeddings_model}" self.embeddings_dim = self._settings.embeddings_dim self.max_batch_size = self._settings.embeddings_max_batch_size self.max_tokens_per_batch = self._settings.embeddings_max_tokens_per_batch @@ -135,7 +133,7 @@ async def create_embedding(self, text: str) -> list[float]: model=self.embeddings_model, input=[text], dimensions=self.embeddings_dim, - api_key=self.embeddings_model_api_key, + api_key=self.model_api_key, ) self.observe_create_embedding(response) return response.data[0]["embedding"] @@ -170,7 +168,7 @@ async def create_embeddings(self, texts: list[str]) -> list[list[float]]: model=self.embeddings_model, input=batch, dimensions=self.embeddings_dim, - api_key=self.embeddings_model_api_key, + api_key=self.model_api_key, ) self.observe_create_embedding(response) results.extend(d["embedding"] for d in response.data) @@ -181,7 +179,7 @@ def create_embedding_sync(self, text: str) -> list[float]: model=self.embeddings_model, input=[text], dimensions=self.embeddings_dim, - api_key=self.embeddings_model_api_key, + api_key=self.model_api_key, ) self.observe_create_embedding(response) return response.data[0]["embedding"] diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py index adeb4f7d..d6a265d5 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py @@ -272,7 +272,11 @@ async def _run_message(self, thread_id: str, user_text: str) -> Tuple[str, Dict[ pipeline.filter_model = f"{self.provider}/{self.dm_filter_model}" api_key = resolve_api_key(self.provider) - ctx = await ctl.make_context(llm_settings=LLMSettings(model=self.model, model_api_key=api_key)) + ctx = await ctl.make_context(llm_settings=LLMSettings( + provider=self.provider, + model=self.model, + model_api_key=api_key, + )) events = await ctl.run_pipeline_with_context(pipeline, ctx) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index cdca80cf..7c8a2bde 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -1713,7 +1713,11 @@ async def _run_question_thread( api_key = resolve_api_key(self.provider) - ctx = await ctl.make_context(llm_settings=LLMSettings(model=resolved_model, model_api_key=api_key)) + ctx = await ctl.make_context(llm_settings=LLMSettings( + provider=self.provider, + model=resolved_model, + model_api_key=api_key, + )) events = await ctl.run_pipeline_with_context(pipeline, ctx) answer: str = "" @@ -1734,16 +1738,7 @@ async def _judge_answer(self, question_row: dict[str, Any], answer: str, tool_ca if not judge_prompt: return "fail", "Judge prompt not loaded", 0, 0.0 - provider = LLMProvider() - resolved_judge_model = f"{self.provider}/{self.judge_model}" - try: - provider.set_model(resolved_judge_model) - except Exception: - logging.warning(f"Failed to set judge model {resolved_judge_model}") - - api_key = resolve_api_key(self.provider) - if api_key: - provider.model_api_key = api_key + provider = LLMProvider(settings=LLMSettings(provider=self.provider, model=f"{self.provider}/{self.judge_model}", model_api_key=resolve_api_key(self.provider))) class JudgeResult(BaseModel): test_status: str = Field(description="pass / fail") From 181d64997d30e4f0e384a94775941476a5a7c6ea Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Fri, 6 Mar 2026 14:40:42 +0800 Subject: [PATCH 05/21] fix runtime api_key setting --- .../src/vedana_backoffice/states/chat.py | 11 +-- .../src/vedana_backoffice/states/common.py | 89 ++++++------------- .../src/vedana_backoffice/states/eval.py | 15 +--- 3 files changed, 33 insertions(+), 82 deletions(-) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py index d6a265d5..ff8ebd3c 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py @@ -9,7 +9,7 @@ import reflex as rx from datapipe.compute import Catalog, run_pipeline from jims_core.thread.thread_controller import ThreadController -from jims_core.llms.llm_provider import LLMSettings +from jims_core.llms.llm_provider import LLMSettings, env_settings as llm_settings from jims_core.util import uuid7 from vedana_core.settings import settings as core_settings from vedana_etl.app import app as etl_app @@ -22,7 +22,6 @@ DEBUG_MODE, datapipe_log_capture, DebugState, - resolve_api_key, ) from vedana_backoffice.states.jims import ThreadViewState @@ -270,7 +269,7 @@ async def _run_message(self, thread_id: str, user_text: str) -> Tuple[str, Dict[ pipeline.model = f"{self.provider}/{self.model}" pipeline.enable_filtering = self.enable_dm_filtering pipeline.filter_model = f"{self.provider}/{self.dm_filter_model}" - api_key = resolve_api_key(self.provider) + api_key = llm_settings.model_api_key if not DEBUG_MODE else DebugState.resolve_api_key(self.provider) ctx = await ctl.make_context(llm_settings=LLMSettings( provider=self.provider, @@ -317,12 +316,6 @@ def send(self): if not user_text: return - api_key = resolve_api_key(self.provider) - if not api_key: - if DEBUG_MODE: - yield DebugState.open_dialog() - return - self._append_message("user", user_text) self.input_text = "" self.is_running = True diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py index 11856e47..9fd10b5a 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py @@ -4,7 +4,7 @@ import io import logging import os -from typing import Iterable, Optional +from typing import Iterable import httpx import reflex as rx @@ -20,53 +20,6 @@ EVAL_ENABLED = bool(os.environ.get("GRIST_TEST_SET_DOC_ID")) DEBUG_MODE = (os.environ.get("VEDANA_BACKOFFICE_DEBUG", "").lower() in ("true", "1") or os.environ.get("DEBUG", "").lower() in ("true", "1")) -HAS_OPENAI_KEY = bool(os.environ.get("OPENAI_API_KEY")) -HAS_OPENROUTER_KEY = bool(os.environ.get("OPENROUTER_API_KEY")) - -# Runtime-scoped API keys configured from the backoffice (debug UI). -# not persisted to environment variables. -RUNTIME_OPENAI_API_KEY: Optional[str] = None -RUNTIME_OPENROUTER_API_KEY: Optional[str] = None - - -def set_runtime_api_keys(openai_api_key: Optional[str] = None, openrouter_api_key: Optional[str] = None) -> None: - """Update in-process API key overrides used by backoffice (chat/eval).""" - global RUNTIME_OPENAI_API_KEY, RUNTIME_OPENROUTER_API_KEY - - if openai_api_key is not None: - openai_api_key = openai_api_key.strip() - if openai_api_key: - RUNTIME_OPENAI_API_KEY = openai_api_key - - if openrouter_api_key is not None: - openrouter_api_key = openrouter_api_key.strip() - if openrouter_api_key: - RUNTIME_OPENROUTER_API_KEY = openrouter_api_key - - -def resolve_api_key(provider: str, page_api_key: Optional[str] = None) -> Optional[str]: - """ - Resolve the effective API key for a given provider - """ - provider = (provider or "openai").lower() - - if page_api_key: - key = page_api_key.strip() - if key: - return key - - if provider == "openrouter": - if DEBUG_MODE and RUNTIME_OPENROUTER_API_KEY: - return RUNTIME_OPENROUTER_API_KEY - if llm_settings.openrouter_api_key: - return llm_settings.openrouter_api_key - return None - - if DEBUG_MODE and RUNTIME_OPENAI_API_KEY: - return RUNTIME_OPENAI_API_KEY - if llm_settings.model_api_key: - return llm_settings.model_api_key - return None def _filter_chat_capable_models(models: Iterable[dict]) -> list[str]: @@ -181,45 +134,57 @@ class DebugState(rx.State): show_api_key_dialog: bool = False default_openai_api_key: str = os.environ.get("OPENAI_API_KEY", "") default_openrouter_api_key: str = os.environ.get("OPENROUTER_API_KEY", "") - openai_api_key: str = "" - openrouter_api_key: str = "" + runtime_openai_api_key: str = "" + runtime_openrouter_api_key: str = "" api_key_saved: bool = False @rx.var def openai_key_empty(self) -> bool: - return not self.openai_api_key and not os.environ.get("OPENAI_API_KEY") + return not self.runtime_openai_api_key and not self.default_openai_api_key @rx.var def openrouter_key_empty(self) -> bool: - return not self.openrouter_api_key and not os.environ.get("OPENROUTER_API_KEY") + return not self.runtime_openrouter_api_key and not self.default_openrouter_api_key def set_openai_api_key(self, value: str) -> None: - self.openai_api_key = value + self.runtime_openai_api_key = value def set_openrouter_api_key(self, value: str) -> None: - self.openrouter_api_key = value + self.runtime_openrouter_api_key = value def save_api_keys(self) -> None: if not self.debug_mode: return - set_runtime_api_keys( - openai_api_key=self.openai_api_key, - openrouter_api_key=self.openrouter_api_key, - ) - # Keep defaults in sync so reopening the dialog reflects the last saved values. self.default_openai_api_key = self.openai_api_key self.default_openrouter_api_key = self.openrouter_api_key - self.api_key_saved = bool(self.openai_api_key or self.openrouter_api_key) + self.api_key_saved = bool(self.runtime_openai_api_key or self.runtime_openrouter_api_key) self.show_api_key_dialog = False def close_dialog(self) -> None: self.show_api_key_dialog = False def open_dialog(self) -> None: - self.openai_api_key = self.default_openai_api_key - self.openrouter_api_key = self.default_openrouter_api_key + self.openai_api_key = self.default_openai_api_key or self.runtime_openai_api_key + self.openrouter_api_key = self.default_openrouter_api_key or self.runtime_openrouter_api_key self.show_api_key_dialog = True + def resolve_api_key(self, provider: str) -> str | None: + if provider == "openai": + if self.runtime_openai_api_key: + return self.runtime_openai_api_key + if llm_settings.provider == "openai": + return llm_settings.model_api_key + if self.default_openai_api_key: + return self.default_openai_api_key + elif provider == "openrouter": + if self.runtime_openrouter_api_key: + return self.runtime_openrouter_api_key + if llm_settings.provider == "openrouter": + return llm_settings.model_api_key + if self.default_openrouter_api_key: + return self.default_openrouter_api_key + return None + class TelegramBotState(rx.State): """State for Telegram bot information.""" diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index 7c8a2bde..b6656493 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -14,7 +14,7 @@ import sqlalchemy as sa from datapipe.compute import run_steps from jims_core.db import ThreadDB, ThreadEventDB -from jims_core.llms.llm_provider import LLMProvider, LLMSettings +from jims_core.llms.llm_provider import LLMProvider, LLMSettings, env_settings as llm_settings from jims_core.thread.thread_controller import ThreadController from jims_core.util import uuid7 from pydantic import BaseModel, Field @@ -26,7 +26,6 @@ load_openrouter_models, datapipe_log_capture, DebugState, - resolve_api_key, DEBUG_MODE, ) from vedana_backoffice.util import safe_render_value @@ -1710,8 +1709,7 @@ async def _run_question_thread( pipeline.model = resolved_model pipeline.enable_filtering = self.enable_dm_filtering pipeline.filter_model = f"{self.provider}/{self.dm_filter_model}" - - api_key = resolve_api_key(self.provider) + api_key = llm_settings.model_api_key if not DEBUG_MODE else DebugState.resolve_api_key(self.provider) ctx = await ctl.make_context(llm_settings=LLMSettings( provider=self.provider, @@ -1737,8 +1735,9 @@ async def _judge_answer(self, question_row: dict[str, Any], answer: str, tool_ca judge_prompt = self.judge_prompt if not judge_prompt: return "fail", "Judge prompt not loaded", 0, 0.0 + api_key = llm_settings.model_api_key if not DEBUG_MODE else DebugState.resolve_api_key(self.provider) - provider = LLMProvider(settings=LLMSettings(provider=self.provider, model=f"{self.provider}/{self.judge_model}", model_api_key=resolve_api_key(self.provider))) + provider = LLMProvider(settings=LLMSettings(provider=self.provider, model=f"{self.provider}/{self.judge_model}", model_api_key=api_key)) class JudgeResult(BaseModel): test_status: str = Field(description="pass / fail") @@ -1808,12 +1807,6 @@ def run_selected_tests(self): self.error_message = "Judge prompt not loaded. Refresh judge config first." return - api_key = resolve_api_key(self.provider) - if not api_key: - if DEBUG_MODE: - yield DebugState.open_dialog() - return - test_run_name = self.test_run_name.strip() or "" # Initialize run state From 3c609ed69ec1568335b1c57af2ccde50a7471699 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Fri, 6 Mar 2026 16:05:57 +0800 Subject: [PATCH 06/21] * --- .../src/jims_core/llms/llm_provider.py | 2 +- .../src/vedana_backoffice/states/chat.py | 38 +++++++++++-------- .../src/vedana_backoffice/states/common.py | 8 +--- .../src/vedana_backoffice/ui.py | 2 + 4 files changed, 28 insertions(+), 22 deletions(-) diff --git a/libs/jims-core/src/jims_core/llms/llm_provider.py b/libs/jims-core/src/jims_core/llms/llm_provider.py index 4fcb1001..69ecf410 100644 --- a/libs/jims-core/src/jims_core/llms/llm_provider.py +++ b/libs/jims-core/src/jims_core/llms/llm_provider.py @@ -192,7 +192,7 @@ def create_embeddings_sync(self, texts: list[str]) -> list[list[float]]: model=self.embeddings_model, input=batch, dimensions=self.embeddings_dim, - api_key=self.embeddings_model_api_key, + api_key=self.model_api_key, ) self.observe_create_embedding(response) results.extend(d["embedding"] for d in response.data) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py index ff8ebd3c..9a367de1 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py @@ -8,20 +8,21 @@ import orjson as json import reflex as rx from datapipe.compute import Catalog, run_pipeline +from jims_core.llms.llm_provider import LLMSettings +from jims_core.llms.llm_provider import env_settings as llm_settings from jims_core.thread.thread_controller import ThreadController -from jims_core.llms.llm_provider import LLMSettings, env_settings as llm_settings from jims_core.util import uuid7 from vedana_core.settings import settings as core_settings from vedana_etl.app import app as etl_app from vedana_etl.pipeline import get_data_model_pipeline from vedana_backoffice.states.common import ( + DEBUG_MODE, + DebugState, MemLogger, + datapipe_log_capture, get_vedana_app, load_openrouter_models, - DEBUG_MODE, - datapipe_log_capture, - DebugState, ) from vedana_backoffice.states.jims import ThreadViewState @@ -51,9 +52,7 @@ class ChatState(rx.State): "gpt-4o-mini", "o4-mini", ) - openai_models: list[str] = list( - set(list(_default_models) + [core_settings.model, core_settings.filter_model]) - ) + openai_models: list[str] = list(set(list(_default_models) + [core_settings.model, core_settings.filter_model])) openrouter_models: list[str] = [] openrouter_models_loaded: bool = False model_selection_allowed: bool = DEBUG_MODE @@ -269,14 +268,21 @@ async def _run_message(self, thread_id: str, user_text: str) -> Tuple[str, Dict[ pipeline.model = f"{self.provider}/{self.model}" pipeline.enable_filtering = self.enable_dm_filtering pipeline.filter_model = f"{self.provider}/{self.dm_filter_model}" - api_key = llm_settings.model_api_key if not DEBUG_MODE else DebugState.resolve_api_key(self.provider) - - ctx = await ctl.make_context(llm_settings=LLMSettings( - provider=self.provider, - model=self.model, - model_api_key=api_key, - )) - + + if DEBUG_MODE: + debug_state = await self.get_state(DebugState) + api_key = debug_state.resolve_api_key(self.provider) + else: + api_key = llm_settings.model_api_key + + ctx = await ctl.make_context( + llm_settings=LLMSettings( + provider=self.provider, + model=self.model, + model_api_key=api_key, + ) + ) + events = await ctl.run_pipeline_with_context(pipeline, ctx) answer: str = "" @@ -355,9 +361,11 @@ def reload_data_model(self): @rx.event(background=True) # type: ignore[operator] async def reload_data_model_background(self): try: + def _run_dm_pipeline(): with datapipe_log_capture(): run_pipeline(etl_app.ds, Catalog({}), get_data_model_pipeline()) + await asyncio.to_thread(_run_dm_pipeline) async with self: va = await get_vedana_app() diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py index 9fd10b5a..df84940c 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py @@ -1,14 +1,14 @@ import asyncio -from async_lru import alru_cache -from contextlib import contextmanager import io import logging import os +from contextlib import contextmanager from typing import Iterable import httpx import reflex as rx import requests +from async_lru import alru_cache from jims_core.llms.llm_provider import env_settings as llm_settings from vedana_core.app import VedanaApp, make_vedana_app @@ -155,8 +155,6 @@ def set_openrouter_api_key(self, value: str) -> None: def save_api_keys(self) -> None: if not self.debug_mode: return - self.default_openai_api_key = self.openai_api_key - self.default_openrouter_api_key = self.openrouter_api_key self.api_key_saved = bool(self.runtime_openai_api_key or self.runtime_openrouter_api_key) self.show_api_key_dialog = False @@ -164,8 +162,6 @@ def close_dialog(self) -> None: self.show_api_key_dialog = False def open_dialog(self) -> None: - self.openai_api_key = self.default_openai_api_key or self.runtime_openai_api_key - self.openrouter_api_key = self.default_openrouter_api_key or self.runtime_openrouter_api_key self.show_api_key_dialog = True def resolve_api_key(self, provider: str) -> str | None: diff --git a/libs/vedana-backoffice/src/vedana_backoffice/ui.py b/libs/vedana-backoffice/src/vedana_backoffice/ui.py index 24c08cb1..24c5cd09 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/ui.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/ui.py @@ -79,6 +79,7 @@ def api_key_setup_dialog() -> rx.Component: rx.text("OpenAI API Key", font_size="2", font_weight="500"), rx.input( placeholder="sk-...", + value=DebugState.runtime_openai_api_key, on_change=DebugState.set_openai_api_key, type="password", width="100%", @@ -96,6 +97,7 @@ def api_key_setup_dialog() -> rx.Component: rx.text("OpenRouter API Key", font_size="2", font_weight="500"), rx.input( placeholder="sk-or-...", + value=DebugState.runtime_openrouter_api_key, on_change=DebugState.set_openrouter_api_key, type="password", width="100%", From 6151128398239115f0ee6491e70bdd7dd35bf01a Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Fri, 6 Mar 2026 20:57:24 +0800 Subject: [PATCH 07/21] * --- .../src/vedana_backoffice/states/chat.py | 2 ++ .../src/vedana_backoffice/states/eval.py | 27 ++++++++++++++----- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py index 9a367de1..cfc099dc 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py @@ -272,6 +272,8 @@ async def _run_message(self, thread_id: str, user_text: str) -> Tuple[str, Dict[ if DEBUG_MODE: debug_state = await self.get_state(DebugState) api_key = debug_state.resolve_api_key(self.provider) + if not api_key: + raise ValueError(f"API key not found for {self.provider}/{self.model}") else: api_key = llm_settings.model_api_key diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index b6656493..5e4ae23a 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -1709,13 +1709,22 @@ async def _run_question_thread( pipeline.model = resolved_model pipeline.enable_filtering = self.enable_dm_filtering pipeline.filter_model = f"{self.provider}/{self.dm_filter_model}" - api_key = llm_settings.model_api_key if not DEBUG_MODE else DebugState.resolve_api_key(self.provider) - ctx = await ctl.make_context(llm_settings=LLMSettings( - provider=self.provider, - model=resolved_model, - model_api_key=api_key, - )) + if DEBUG_MODE: + debug_state = await self.get_state(DebugState) + api_key = debug_state.resolve_api_key(self.provider) + if not api_key: + raise ValueError(f"API key not found for {self.provider}/{resolved_model}") + else: + api_key = llm_settings.model_api_key + + ctx = await ctl.make_context( + llm_settings=LLMSettings( + provider=self.provider, + model=resolved_model, + model_api_key=api_key, + ) + ) events = await ctl.run_pipeline_with_context(pipeline, ctx) answer: str = "" @@ -1737,7 +1746,11 @@ async def _judge_answer(self, question_row: dict[str, Any], answer: str, tool_ca return "fail", "Judge prompt not loaded", 0, 0.0 api_key = llm_settings.model_api_key if not DEBUG_MODE else DebugState.resolve_api_key(self.provider) - provider = LLMProvider(settings=LLMSettings(provider=self.provider, model=f"{self.provider}/{self.judge_model}", model_api_key=api_key)) + provider = LLMProvider( + settings=LLMSettings( + provider=self.provider, model=f"{self.provider}/{self.judge_model}", model_api_key=api_key + ) + ) class JudgeResult(BaseModel): test_status: str = Field(description="pass / fail") From 9596a83b3c8d30d76d91958a2d1c941207f1cdc1 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Fri, 6 Mar 2026 21:17:22 +0800 Subject: [PATCH 08/21] * --- .../src/vedana_backoffice/states/eval.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index 5e4ae23a..c7f5a13f 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -1744,7 +1744,14 @@ async def _judge_answer(self, question_row: dict[str, Any], answer: str, tool_ca judge_prompt = self.judge_prompt if not judge_prompt: return "fail", "Judge prompt not loaded", 0, 0.0 - api_key = llm_settings.model_api_key if not DEBUG_MODE else DebugState.resolve_api_key(self.provider) + + if DEBUG_MODE: + debug_state = await self.get_state(DebugState) + api_key = debug_state.resolve_api_key(self.provider) + if not api_key: + raise ValueError(f"API key not found for {self.provider}/{self.judge_model}") + else: + api_key = llm_settings.model_api_key provider = LLMProvider( settings=LLMSettings( From e165509f1ef332b17c922cbd05abb6a1fb8907e9 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Fri, 6 Mar 2026 21:25:18 +0800 Subject: [PATCH 09/21] add gds loading on eval page load --- libs/vedana-backoffice/src/vedana_backoffice/states/eval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index c7f5a13f..7083d40a 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -1987,6 +1987,7 @@ def load_eval_data(self): self.tests_page = 0 # Reset to first page yield yield EvalState.load_eval_data_background() + yield EvalState.refresh_golden_dataset_background() @rx.event(background=True) # type: ignore[operator] async def load_eval_data_background(self): From bf69d15b020e5ddc38f20a2ffc0683088e1534dd Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Fri, 6 Mar 2026 21:51:09 +0800 Subject: [PATCH 10/21] add eval judge default prompt --- .../src/vedana_backoffice/states/eval.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index 7083d40a..e8c61325 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -173,6 +173,18 @@ class CompareRow: } +eval_judge_prompt_template = """\ +You are a strict evaluation judge. Compare the model's answer with the golden answer and the expected retrieval context. +Consider whether the model's answer is factually aligned and sufficiently complete. +Use the provided technical info (retrieval queries) only as hints for whether the context seems adequate. +Return a JSON object with fields: test_status in {'pass','fail'}, comment, errors. + +In comments return answer scoring from 1 to 10, where: +1 – totally wrong answer +10 – totally correct answer +""" + + class EvalState(rx.State): """State holder for evaluation workflow.""" @@ -577,7 +589,7 @@ async def _load_judge_config(self) -> None: vedana_app = await get_vedana_app() dm_pt = await vedana_app.data_model.prompt_templates() - judge_prompt = dm_pt.get("eval_judge_prompt") + judge_prompt = dm_pt.get("eval_judge_prompt", eval_judge_prompt_template) if judge_prompt: text_b = bytearray(judge_prompt, "utf-8") @@ -1824,7 +1836,7 @@ def run_selected_tests(self): self.error_message = "Select at least one question to run tests." return if not self.judge_prompt: - self.error_message = "Judge prompt not loaded. Refresh judge config first." + self.error_message = "Judge prompt not loaded. Refresh data model first." return test_run_name = self.test_run_name.strip() or "" From 293d2a4c0168cc0a2d5961cd7379275fd6b66010 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Wed, 11 Mar 2026 20:28:34 +0700 Subject: [PATCH 11/21] WIP: litellm_api_key --- .../src/jims_core/llms/llm_provider.py | 18 +- .../src/jims_core/thread/thread_context.py | 2 +- .../src/vedana_backoffice/pages/chat.py | 25 +-- .../src/vedana_backoffice/pages/eval.py | 29 +--- .../src/vedana_backoffice/states/chat.py | 124 ++++---------- .../src/vedana_backoffice/states/common.py | 124 ++++++-------- .../src/vedana_backoffice/states/etl.py | 2 +- .../src/vedana_backoffice/states/eval.py | 154 +++++------------- .../src/vedana_backoffice/ui.py | 40 ++--- 9 files changed, 163 insertions(+), 355 deletions(-) diff --git a/libs/jims-core/src/jims_core/llms/llm_provider.py b/libs/jims-core/src/jims_core/llms/llm_provider.py index 69ecf410..61299da2 100644 --- a/libs/jims-core/src/jims_core/llms/llm_provider.py +++ b/libs/jims-core/src/jims_core/llms/llm_provider.py @@ -17,8 +17,6 @@ class LLMSettings(BaseSettings): extra="ignore", ) - provider: str = "openai" # "openai" / "openrouter" - model_api_key: str | None = None model: str = "gpt-4.1-nano" embeddings_model: str = "text-embedding-3-large" embeddings_dim: int = 1024 @@ -26,9 +24,6 @@ class LLMSettings(BaseSettings): embeddings_max_batch_size: int = 2048 embeddings_max_tokens_per_batch: int = 200000 - # passable api_keys; if None, defaults to env vars - openrouter_api_base_url: str = "https://openrouter.ai/api/v1" - env_settings = LLMSettings() # type: ignore @@ -74,10 +69,8 @@ def observe( class LLMProvider: def __init__(self, settings: LLMSettings | None = None) -> None: self._settings = settings or env_settings - self.provider = self._settings.provider - self.model = f"{self.provider}/{self._settings.model}" - self.model_api_key = self._settings.model_api_key - self.embeddings_model = f"{self.provider}/{self._settings.embeddings_model}" + self.model = self._settings.model + self.embeddings_model = self._settings.embeddings_model self.embeddings_dim = self._settings.embeddings_dim self.max_batch_size = self._settings.embeddings_max_batch_size self.max_tokens_per_batch = self._settings.embeddings_max_tokens_per_batch @@ -133,7 +126,6 @@ async def create_embedding(self, text: str) -> list[float]: model=self.embeddings_model, input=[text], dimensions=self.embeddings_dim, - api_key=self.model_api_key, ) self.observe_create_embedding(response) return response.data[0]["embedding"] @@ -168,7 +160,6 @@ async def create_embeddings(self, texts: list[str]) -> list[list[float]]: model=self.embeddings_model, input=batch, dimensions=self.embeddings_dim, - api_key=self.model_api_key, ) self.observe_create_embedding(response) results.extend(d["embedding"] for d in response.data) @@ -179,7 +170,6 @@ def create_embedding_sync(self, text: str) -> list[float]: model=self.embeddings_model, input=[text], dimensions=self.embeddings_dim, - api_key=self.model_api_key, ) self.observe_create_embedding(response) return response.data[0]["embedding"] @@ -192,7 +182,6 @@ def create_embeddings_sync(self, texts: list[str]) -> list[list[float]]: model=self.embeddings_model, input=batch, dimensions=self.embeddings_dim, - api_key=self.model_api_key, ) self.observe_create_embedding(response) results.extend(d["embedding"] for d in response.data) @@ -208,7 +197,6 @@ async def chat_completion_structured[T: BaseModel]( model=self.model, messages=list(messages), response_format=response_format, - api_key=self.model_api_key, ) assert isinstance(completion, litellm.ModelResponse) @@ -232,7 +220,6 @@ async def chat_completion_plain( model=self.model, messages=list(messages), caching=use_cache, - api_key=self.model_api_key, ) assert isinstance(completion, litellm.ModelResponse) @@ -256,7 +243,6 @@ async def chat_completion_with_tools( model=self.model, messages=list(messages), tools=tools, - api_key=self.model_api_key, ) assert isinstance(completion, litellm.ModelResponse) diff --git a/libs/jims-core/src/jims_core/thread/thread_context.py b/libs/jims-core/src/jims_core/thread/thread_context.py index 244bb17b..3cf589d5 100644 --- a/libs/jims-core/src/jims_core/thread/thread_context.py +++ b/libs/jims-core/src/jims_core/thread/thread_context.py @@ -111,7 +111,7 @@ def context(self, conversation_length: int = 20) -> list[CommunicationEvent]: result.append(CommunicationEvent(**event.event_data)) comm_counter += 1 if event.event_type.startswith("context."): - result.append(CommunicationEvent(**event.event_data)) + result.append(CommunicationEvent(**event.event_data)) if comm_counter > conversation_length: break diff --git a/libs/vedana-backoffice/src/vedana_backoffice/pages/chat.py b/libs/vedana-backoffice/src/vedana_backoffice/pages/chat.py index 20f771ee..b21aa1ad 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/pages/chat.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/pages/chat.py @@ -2,7 +2,7 @@ from vedana_backoffice.components.ui_chat import render_message_bubble from vedana_backoffice.states.chat import ChatState -from vedana_backoffice.states.common import AppVersionState +from vedana_backoffice.states.common import AppVersionState, DebugState from vedana_backoffice.ui import app_header @@ -111,7 +111,7 @@ def page() -> rx.Component: rx.cond( ChatState.enable_dm_filtering & AppVersionState.debug_mode, rx.select( - items=ChatState.dm_filter_available_models, + items=DebugState.available_models, value=ChatState.dm_filter_model, on_change=ChatState.set_dm_filter_model, width="16em", @@ -157,21 +157,12 @@ def page() -> rx.Component: ), rx.cond( ChatState.model_selection_allowed, - rx.hstack( - rx.select( - items=["openai", "openrouter"], - value=ChatState.provider, - on_change=ChatState.set_provider, - width="10em", - placeholder="Provider", - ), - rx.select( - items=ChatState.available_models, - value=ChatState.model, - on_change=ChatState.set_model, - width="16em", - placeholder="Select model", - ), + rx.select( + items=DebugState.available_models, + value=ChatState.model, + on_change=ChatState.set_model, + width="20em", + placeholder="Select model", ), rx.badge(ChatState.model, variant="surface", color_scheme="gray", size="3"), ), diff --git a/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py index d45b2c3c..3df69a48 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py @@ -1,6 +1,6 @@ import reflex as rx -from vedana_backoffice.states.common import AppVersionState +from vedana_backoffice.states.common import AppVersionState, DebugState from vedana_backoffice.states.eval import EvalState, RunSummary from vedana_backoffice.states.chat import ChatState from vedana_backoffice.ui import app_header @@ -175,7 +175,7 @@ def _judge_card() -> rx.Component: rx.cond( AppVersionState.debug_mode, rx.select( - items=EvalState.judge_available_models, + items=DebugState.available_models, value=EvalState.judge_model, on_change=EvalState.set_judge_model, width="100%", @@ -235,25 +235,12 @@ def _pipeline_card() -> rx.Component: rx.text("Pipeline model", weight="medium", width="100%"), rx.cond( ChatState.model_selection_allowed, - rx.hstack( - rx.select( - items=["openai", "openrouter"], - value=EvalState.provider, - on_change=EvalState.set_provider, - width="100%", - placeholder="Provider", - ), - rx.select( - items=EvalState.available_models_view, - value=EvalState.pipeline_model, - on_change=EvalState.set_pipeline_model, - width="100%", - placeholder="Select model", - ), - spacing="2", - align="center", - wrap="wrap", + rx.select( + items=DebugState.available_models, + value=EvalState.pipeline_model, + on_change=EvalState.set_pipeline_model, width="100%", + placeholder="Select model", ), rx.text(EvalState.pipeline_model, size="3"), ), @@ -270,7 +257,7 @@ def _pipeline_card() -> rx.Component: rx.cond( AppVersionState.debug_mode, rx.select( - items=EvalState.dm_filter_available_models, + items=DebugState.available_models, value=EvalState.dm_filter_model, on_change=EvalState.set_dm_filter_model, width="100%", diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py index cfc099dc..aeb74d74 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py @@ -9,7 +9,6 @@ import reflex as rx from datapipe.compute import Catalog, run_pipeline from jims_core.llms.llm_provider import LLMSettings -from jims_core.llms.llm_provider import env_settings as llm_settings from jims_core.thread.thread_controller import ThreadController from jims_core.util import uuid7 from vedana_core.settings import settings as core_settings @@ -22,7 +21,6 @@ MemLogger, datapipe_log_capture, get_vedana_app, - load_openrouter_models, ) from vedana_backoffice.states.jims import ThreadViewState @@ -36,104 +34,52 @@ class ChatState(rx.State): chat_thread_id: str = "" data_model_text: str = "" is_refreshing_dm: bool = False - provider: str = "openai" # default llm provider model: str = core_settings.model - _default_models: tuple[str, ...] = ( - "gpt-5.1-chat-latest", - "gpt-5.1", - "gpt-5-chat-latest", - "gpt-5", - "gpt-5-mini", - "gpt-5-nano", - "gpt-4.1", - "gpt-4.1-mini", - "gpt-4.1-nano", - "gpt-4o", - "gpt-4o-mini", - "o4-mini", - ) - openai_models: list[str] = list(set(list(_default_models) + [core_settings.model, core_settings.filter_model])) - openrouter_models: list[str] = [] - openrouter_models_loaded: bool = False + default_models: list[str] = list({core_settings.model, core_settings.filter_model}) + available_models: list[str] = default_models + model_selection_allowed: bool = DEBUG_MODE enable_dm_filtering: bool = core_settings.enable_dm_filtering dm_filter_model: str = core_settings.filter_model - def _models_for_provider(self, provider: str) -> list[str]: - """Return the list of model names for the given provider (openai or openrouter).""" - if provider == "openrouter": - if self.openrouter_models: - return list(self.openrouter_models) - if self.openrouter_models_loaded: - return list(self.openai_models) - return list(self.openai_models) - return list(self.openai_models) - - @rx.var - def available_models(self) -> list[str]: - return self._models_for_provider(self.provider) - - @rx.var - def dm_filter_available_models(self) -> list[str]: - return self._models_for_provider(self.provider) - async def mount(self): - """Load OpenRouter models (fetches on first call, cached thereafter).""" - self.openrouter_models = await load_openrouter_models() - self.openrouter_models_loaded = True - self._sync_available_models() - self._sync_dm_filter_model() + if DEBUG_MODE: + yield DebugState.load_available_models() + + @rx.event(background=True) # type: ignore[operator] + async def refresh_model_list(self) -> None: + async with self: + self.available_models = await self.get_var_value(DebugState.available_models) + self._sync_model() + self._sync_dm_filter_model() def set_input(self, value: str) -> None: self.input_text = value def set_model(self, value: str) -> None: - models = self._models_for_provider(self.provider) - if value in models: - self.model = value + self.model = value def set_enable_dm_filtering(self, value: bool) -> None: self.enable_dm_filtering = value - def set_provider(self, value: str) -> None: - self.provider = value - if self.provider == "openai": # reset defaults when changing back - self.model = core_settings.model - self.dm_filter_model = core_settings.filter_model - self._sync_available_models() - self._sync_dm_filter_model() - def set_dm_filter_model(self, value: str) -> None: - if value in self.dm_filter_available_models: - self.dm_filter_model = value - - def _sync_available_models(self) -> None: - """ - Recompute available_models based on selected provider, and realign - the selected model if it is no longer valid. - """ - - if self.provider == "openrouter": - models = self.openrouter_models - if not models: - if self.openrouter_models_loaded: - self.provider = "openai" - models = self.openai_models - else: - models = self.available_models or self.openai_models - else: - models = self.openai_models - - self.available_models = list(models) + self.dm_filter_model = value + def _sync_model(self) -> None: + """Realign selected model when model list changes.""" if self.model not in self.available_models and self.available_models: - self.model = self.available_models[0] + if core_settings.model in self.available_models: + self.model = core_settings.model + else: + self.model = self.available_models[0] def _sync_dm_filter_model(self) -> None: - """Realign selected filter model when provider or model list changes.""" - models = self._models_for_provider(self.provider) - if self.dm_filter_model not in models and models: - self.dm_filter_model = models[0] + """Realign selected filter model when model list changes.""" + if self.dm_filter_model not in self.available_models and self.available_models: + if core_settings.filter_model in self.available_models: + self.dm_filter_model = core_settings.filter_model + else: + self.dm_filter_model = self.available_models[0] def toggle_details_by_id(self, message_id: str) -> None: for idx, m in enumerate(self.messages): @@ -265,24 +211,12 @@ async def _run_message(self, thread_id: str, user_text: str) -> Tuple[str, Dict[ pipeline = vedana_app.pipeline pipeline.logger = mem_logger - pipeline.model = f"{self.provider}/{self.model}" + pipeline.model = self.model pipeline.enable_filtering = self.enable_dm_filtering - pipeline.filter_model = f"{self.provider}/{self.dm_filter_model}" - - if DEBUG_MODE: - debug_state = await self.get_state(DebugState) - api_key = debug_state.resolve_api_key(self.provider) - if not api_key: - raise ValueError(f"API key not found for {self.provider}/{self.model}") - else: - api_key = llm_settings.model_api_key + pipeline.filter_model = self.dm_filter_model ctx = await ctl.make_context( - llm_settings=LLMSettings( - provider=self.provider, - model=self.model, - model_api_key=api_key, - ) + llm_settings=LLMSettings(model=self.model) ) events = await ctl.run_pipeline_with_context(pipeline, ctx) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py index df84940c..fab85df1 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py @@ -3,13 +3,10 @@ import logging import os from contextlib import contextmanager -from typing import Iterable -import httpx +import litellm import reflex as rx import requests -from async_lru import alru_cache -from jims_core.llms.llm_provider import env_settings as llm_settings from vedana_core.app import VedanaApp, make_vedana_app vedana_app: VedanaApp | None = None @@ -22,41 +19,22 @@ or os.environ.get("DEBUG", "").lower() in ("true", "1")) -def _filter_chat_capable_models(models: Iterable[dict]) -> list[str]: - """Filter models that support text chat with tool calls.""" - result: list[str] = [] - for m in models: - model_id = str(m.get("id", "")).strip() - if not model_id: - continue - - architecture = m.get("architecture", {}) - has_chat = bool( - architecture - and "text" in architecture.get("input_modalities", []) - and "text" in architecture.get("output_modalities", []) +async def load_litellm_models( + *, + provider: str | None = None, + check_provider_endpoint: bool = False, +) -> list[str]: + def _fetch() -> list[str]: + raw = litellm.get_valid_models( + custom_llm_provider=provider, + check_provider_endpoint=check_provider_endpoint, ) - has_tools = "tools" in m.get("supported_parameters", []) - - if has_chat and has_tools: - result.append(model_id) + result: list[str] = [ + model if (provider is None or model.startswith(provider)) else f"{provider}/{model}" for model in raw + ] + return sorted(set(result)) - return result - - -@alru_cache -async def load_openrouter_models() -> list[str]: - if not DEBUG_MODE: - return [] - try: - async with httpx.AsyncClient(timeout=15) as client: - resp = await client.get(f"{llm_settings.openrouter_api_base_url}/models") - resp.raise_for_status() - models = resp.json().get("data", []) - return sorted(_filter_chat_capable_models(models)) - except Exception as exc: - logging.warning(f"Failed to fetch OpenRouter models: {exc}") - return [] + return await asyncio.to_thread(_fetch) # type: ignore[return-value] async def get_vedana_app(): @@ -68,6 +46,7 @@ async def get_vedana_app(): class DatapipeStepError(RuntimeError): """Raised when a datapipe step fails without propagating the exception.""" + pass @@ -132,31 +111,53 @@ class DebugState(rx.State): debug_mode: bool = DEBUG_MODE show_api_key_dialog: bool = False - default_openai_api_key: str = os.environ.get("OPENAI_API_KEY", "") - default_openrouter_api_key: str = os.environ.get("OPENROUTER_API_KEY", "") - runtime_openai_api_key: str = "" - runtime_openrouter_api_key: str = "" + runtime_model_api_key: str = "" + runtime_model_provider: str | None = None api_key_saved: bool = False + available_models: list[str] = [] @rx.var - def openai_key_empty(self) -> bool: - return not self.runtime_openai_api_key and not self.default_openai_api_key + def provider_options(self) -> list[str]: + return ["openai", "openrouter", "anthropic", "cohere", "xai"] - @rx.var - def openrouter_key_empty(self) -> bool: - return not self.runtime_openrouter_api_key and not self.default_openrouter_api_key + @rx.event(background=True) # type: ignore[operator] + async def load_available_models(self) -> None: + if not self.debug_mode: + return + models = await load_litellm_models( + provider=self.runtime_model_provider, + check_provider_endpoint=True if self.runtime_model_provider else False, + ) + async with self: + self.available_models = models + from vedana_backoffice.states.chat import ChatState + from vedana_backoffice.states.eval import EvalState - def set_openai_api_key(self, value: str) -> None: - self.runtime_openai_api_key = value + yield ChatState.refresh_model_list() + yield EvalState.refresh_model_list() - def set_openrouter_api_key(self, value: str) -> None: - self.runtime_openrouter_api_key = value + def set_model_api_key(self, value: str) -> None: + self.runtime_model_api_key = value - def save_api_keys(self) -> None: + def set_model_provider(self, value: str) -> None: + self.runtime_model_provider = value + + def save_api_key(self): if not self.debug_mode: return - self.api_key_saved = bool(self.runtime_openai_api_key or self.runtime_openrouter_api_key) - self.show_api_key_dialog = False + key = self.runtime_model_api_key.strip() + if key: + litellm.api_key = key + self.api_key_saved = True + self.show_api_key_dialog = False + else: + litellm.api_key = None + self.api_key_saved = False + self.show_api_key_dialog = False + self.available_models = [] + self.runtime_model_provider = None + # Background refresh will repopulate available_models and notify Chat/Eval. + yield DebugState.load_available_models() def close_dialog(self) -> None: self.show_api_key_dialog = False @@ -164,23 +165,6 @@ def close_dialog(self) -> None: def open_dialog(self) -> None: self.show_api_key_dialog = True - def resolve_api_key(self, provider: str) -> str | None: - if provider == "openai": - if self.runtime_openai_api_key: - return self.runtime_openai_api_key - if llm_settings.provider == "openai": - return llm_settings.model_api_key - if self.default_openai_api_key: - return self.default_openai_api_key - elif provider == "openrouter": - if self.runtime_openrouter_api_key: - return self.runtime_openrouter_api_key - if llm_settings.provider == "openrouter": - return llm_settings.model_api_key - if self.default_openrouter_api_key: - return self.default_openrouter_api_key - return None - class TelegramBotState(rx.State): """State for Telegram bot information.""" diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/etl.py b/libs/vedana-backoffice/src/vedana_backoffice/states/etl.py index 4e0262ea..2728c608 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/etl.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/etl.py @@ -1273,7 +1273,7 @@ def _load_preview_page(self) -> None: @staticmethod def _preview_select_exprs(columns: list[str], table_alias: str | None = None) -> str: """Build SELECT expressions replacing heavy embedding vectors with a placeholder.""" - prefix = f'{table_alias}.' if table_alias else "" + prefix = f"{table_alias}." if table_alias else "" exprs: list[str] = [] for col in columns: if str(col) == "embedding": diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index e8c61325..fbadcf64 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -14,7 +14,7 @@ import sqlalchemy as sa from datapipe.compute import run_steps from jims_core.db import ThreadDB, ThreadEventDB -from jims_core.llms.llm_provider import LLMProvider, LLMSettings, env_settings as llm_settings +from jims_core.llms.llm_provider import LLMProvider, LLMSettings from jims_core.thread.thread_controller import ThreadController from jims_core.util import uuid7 from pydantic import BaseModel, Field @@ -22,11 +22,9 @@ from vedana_etl.app import app as etl_app from vedana_backoffice.states.common import ( - get_vedana_app, - load_openrouter_models, - datapipe_log_capture, DebugState, - DEBUG_MODE, + datapipe_log_capture, + get_vedana_app, ) from vedana_backoffice.util import safe_render_value @@ -196,52 +194,16 @@ class EvalState(rx.State): selected_question_ids: list[str] = [] test_run_name: str = "" selected_scenario: str = "all" # Filter by scenario - judge_model: str = core_settings.judge_model judge_prompt_id: str = "" judge_prompt: str = "" - provider: str = "openai" pipeline_model: str = core_settings.model embeddings_model: str = core_settings.embeddings_model embeddings_dim: int = core_settings.embeddings_dim enable_dm_filtering: bool = core_settings.enable_dm_filtering - _default_models: tuple[str, ...] = ( - "gpt-5.1-chat-latest", - "gpt-5.1", - "gpt-5-chat-latest", - "gpt-5", - "gpt-5-mini", - "gpt-5-nano", - "gpt-4.1", - "gpt-4.1-mini", - "gpt-4.1-nano", - "gpt-4o", - "gpt-4o-mini", - "o4-mini", - ) - openai_models: list[str] = list( - set([core_settings.model, core_settings.filter_model, core_settings.judge_model] + list(_default_models)) - ) - openrouter_models: list[str] = [] + available_models: list[str] = list({core_settings.model, core_settings.filter_model, core_settings.judge_model}) dm_filter_model: str = core_settings.filter_model dm_id: str = "" - - def _models_for_provider(self, provider: str) -> list[str]: - """Return the list of model names for the given provider (openai or openrouter).""" - if provider == "openrouter": - return list(self.openrouter_models) - return list(self.openai_models) - - @rx.var - def available_models(self) -> list[str]: - return self._models_for_provider(self.provider) - - @rx.var - def dm_filter_available_models(self) -> list[str]: - return self._models_for_provider(self.provider) - - @rx.var - def judge_available_models(self) -> list[str]: - return self._models_for_provider(self.provider) + judge_model: str = core_settings.judge_model tests_rows: list[dict[str, Any]] = [] tests_cost_total: float = 0.0 @@ -402,17 +364,13 @@ def can_compare_runs(self) -> bool: and not self.compare_loading ) - @rx.var - def available_models_view(self) -> list[str]: - return self.available_models - @rx.var def dm_filter_model_display(self) -> str: - return f"{self.provider}/{self.dm_filter_model}" + return self.dm_filter_model @rx.var def judge_model_display(self) -> str: - return f"{self.provider}/{self.judge_model}" + return self.judge_model def toggle_question_selection(self, question: str, checked: bool) -> None: question = str(question or "").strip() @@ -476,7 +434,7 @@ def set_test_run_name(self, value: str) -> None: self.test_run_name = str(value or "").strip() def set_pipeline_model(self, value: str) -> None: - models = self._models_for_provider(self.provider) + models = self._available_models() if value in models: self.pipeline_model = value @@ -484,28 +442,15 @@ def set_enable_dm_filtering(self, value: bool) -> None: self.enable_dm_filtering = value def set_dm_filter_model(self, value: str) -> None: - models = self._models_for_provider(self.provider) + models = self._available_models() if value in models: self.dm_filter_model = value def set_judge_model(self, value: str) -> None: - models = self._models_for_provider(self.provider) + models = self._available_models() if value in models: self.judge_model = value - async def set_provider(self, value: str) -> None: - self.provider = str(value or "openai") - if self.provider == "openrouter" and not self.openrouter_models: - self.openrouter_models = await load_openrouter_models() - else: - # When switching back to OpenAI, reset models to settings defaults - self.pipeline_model = core_settings.model - self.dm_filter_model = core_settings.filter_model - self.judge_model = core_settings.judge_model - self._sync_available_models() - self._sync_dm_filter_model() - self._sync_judge_model() - def set_compare_run_a(self, value: str) -> None: self.compare_run_a = str(value or "").strip() @@ -521,28 +466,42 @@ def _prune_selection(self) -> None: self.selected_question_ids = [q for q in (self.selected_question_ids or []) if q in valid] def _sync_available_models(self) -> None: - """Realign selected pipeline model when provider or model list changes.""" - if self.provider == "openrouter" and not self.openrouter_models: - self.provider = "openai" - models = self._models_for_provider(self.provider) + """Realign selected pipeline model when model list changes.""" + models = self.available_models if self.pipeline_model not in models and models: - self.pipeline_model = models[0] + if core_settings.model in models: + self.pipeline_model = core_settings.model + else: + self.pipeline_model = models[0] def _sync_dm_filter_model(self) -> None: - """Realign selected filter model when provider or model list changes.""" - models = self._models_for_provider(self.provider) + """Realign selected filter model when model list changes.""" + models = self.available_models if self.dm_filter_model not in models and models: - self.dm_filter_model = models[0] + if core_settings.filter_model in models: + self.dm_filter_model = core_settings.filter_model + else: + self.dm_filter_model = models[0] def _sync_judge_model(self) -> None: - """Realign selected judge model when provider or model list changes.""" - models = self._models_for_provider(self.provider) + """Realign selected judge model when model list changes.""" + models = self.available_models if self.judge_model not in models and models: - self.judge_model = models[0] + if core_settings.judge_model in models: + self.judge_model = core_settings.judge_model + else: + self.judge_model = models[0] + + @rx.event(background=True) # type: ignore[operator] + async def refresh_model_list(self) -> None: + async with self: + self.available_models = await self.get_var_value(DebugState.available_models) + self._sync_available_models() + self._sync_dm_filter_model() + self._sync_judge_model() def _resolved_pipeline_model(self) -> str: - provider = self.provider or "openai" - return f"{provider}/{self.pipeline_model}" + return self.pipeline_model def get_eval_gds_from_grist(self): step = next((s for s in etl_app.steps if s._name == "get_eval_gds_from_grist"), None) @@ -1674,7 +1633,6 @@ def _build_thread_config( "judge_model": self.judge_model, "judge_prompt_id": self.judge_prompt_id, "pipeline_model": resolved_model, - "pipeline_provider": self.provider, "embeddings_model": self.embeddings_model, "embeddings_dim": self.embeddings_dim, "dm_id": self.dm_id, @@ -1720,23 +1678,9 @@ async def _run_question_thread( resolved_model = self._resolved_pipeline_model() pipeline.model = resolved_model pipeline.enable_filtering = self.enable_dm_filtering - pipeline.filter_model = f"{self.provider}/{self.dm_filter_model}" + pipeline.filter_model = self.dm_filter_model - if DEBUG_MODE: - debug_state = await self.get_state(DebugState) - api_key = debug_state.resolve_api_key(self.provider) - if not api_key: - raise ValueError(f"API key not found for {self.provider}/{resolved_model}") - else: - api_key = llm_settings.model_api_key - - ctx = await ctl.make_context( - llm_settings=LLMSettings( - provider=self.provider, - model=resolved_model, - model_api_key=api_key, - ) - ) + ctx = await ctl.make_context(llm_settings=LLMSettings(model=resolved_model)) events = await ctl.run_pipeline_with_context(pipeline, ctx) answer: str = "" @@ -1749,7 +1693,9 @@ async def _run_question_thread( return str(thread_id), answer, technical_info - async def _judge_answer(self, question_row: dict[str, Any], answer: str, tool_calls: str) -> tuple[str, str, int, float]: + async def _judge_answer( + self, question_row: dict[str, Any], answer: str, tool_calls: str + ) -> tuple[str, str, int, float]: """Judge model answer with current judge prompt/model and rating. Returns (status, comment, rating, judge_cost). """ @@ -1757,19 +1703,7 @@ async def _judge_answer(self, question_row: dict[str, Any], answer: str, tool_ca if not judge_prompt: return "fail", "Judge prompt not loaded", 0, 0.0 - if DEBUG_MODE: - debug_state = await self.get_state(DebugState) - api_key = debug_state.resolve_api_key(self.provider) - if not api_key: - raise ValueError(f"API key not found for {self.provider}/{self.judge_model}") - else: - api_key = llm_settings.model_api_key - - provider = LLMProvider( - settings=LLMSettings( - provider=self.provider, model=f"{self.provider}/{self.judge_model}", model_api_key=api_key - ) - ) + provider = LLMProvider(settings=LLMSettings(model=self.judge_model)) class JudgeResult(BaseModel): test_status: str = Field(description="pass / fail") @@ -2005,7 +1939,7 @@ def load_eval_data(self): async def load_eval_data_background(self): try: async with self: - self.openrouter_models = await load_openrouter_models() + self.available_models = await self.get_var_value(DebugState.available_models) self._sync_available_models() self._sync_dm_filter_model() self._sync_judge_model() diff --git a/libs/vedana-backoffice/src/vedana_backoffice/ui.py b/libs/vedana-backoffice/src/vedana_backoffice/ui.py index 24c5cd09..f068833f 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/ui.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/ui.py @@ -53,14 +53,14 @@ def debug_badge() -> rx.Component: }, on_click=DebugState.open_dialog, ), - content="Click to enter or reset your OpenAI / OpenRouter API key (required for chat and eval model selection)", + content="Click to set your LiteLLM API key for debug mode model selection", ), rx.fragment(), ) def api_key_setup_dialog() -> rx.Component: - """Dialog to prompt for OpenAI and OpenRouter API keys; highlights missing fields.""" + """Dialog to prompt for a runtime API key used by LiteLLM in debug mode.""" _missing_style = { "border": "2px solid #d97706", "border_radius": "6px", @@ -71,41 +71,33 @@ def api_key_setup_dialog() -> rx.Component: rx.dialog.content( rx.dialog.title("API Key Setup"), rx.dialog.description( - "Add API keys for the providers you use.", + "Paste your LiteLLM-compatible API key here and click 'Save' to use compatible models. Remove the key to revert to default key and models.", margin_bottom="1em", ), rx.vstack( rx.vstack( - rx.text("OpenAI API Key", font_size="2", font_weight="500"), - rx.input( - placeholder="sk-...", - value=DebugState.runtime_openai_api_key, - on_change=DebugState.set_openai_api_key, - type="password", + rx.text("Provider", font_size="2", font_weight="500"), + rx.select( + items=DebugState.provider_options, + value=DebugState.runtime_model_provider, + on_change=DebugState.set_model_provider, width="100%", - style=rx.cond( - DebugState.openai_key_empty, - {**_normal_style, **_missing_style}, - _normal_style, - ), + placeholder="Select provider", + style=_normal_style, ), spacing="2", width="100%", align="start", ), rx.vstack( - rx.text("OpenRouter API Key", font_size="2", font_weight="500"), + rx.text("Runtime API Key", font_size="2", font_weight="500"), rx.input( - placeholder="sk-or-...", - value=DebugState.runtime_openrouter_api_key, - on_change=DebugState.set_openrouter_api_key, + placeholder="sk-...", + value=DebugState.runtime_model_api_key, + on_change=DebugState.set_model_api_key, type="password", width="100%", - style=rx.cond( - DebugState.openrouter_key_empty, - {**_normal_style, **_missing_style}, - _normal_style, - ), + style=_normal_style, ), spacing="2", width="100%", @@ -123,7 +115,7 @@ def api_key_setup_dialog() -> rx.Component: rx.button( "Save", color_scheme="blue", - on_click=DebugState.save_api_keys, + on_click=DebugState.save_api_key, ), justify="end", spacing="3", From 62b243d4b70c101dae3f543c9d6999d61f425359 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Fri, 13 Mar 2026 00:23:20 +0700 Subject: [PATCH 12/21] * --- .../src/vedana_backoffice/states/chat.py | 2 +- .../src/vedana_backoffice/states/eval.py | 11 ++++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py index aeb74d74..589784a6 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py @@ -49,7 +49,7 @@ async def mount(self): @rx.event(background=True) # type: ignore[operator] async def refresh_model_list(self) -> None: async with self: - self.available_models = await self.get_var_value(DebugState.available_models) + self.available_models = await self.get_var_value(DebugState.available_models) # type: ignore[arg-type] self._sync_model() self._sync_dm_filter_model() diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index fbadcf64..ad92c4fb 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -434,21 +434,18 @@ def set_test_run_name(self, value: str) -> None: self.test_run_name = str(value or "").strip() def set_pipeline_model(self, value: str) -> None: - models = self._available_models() - if value in models: + if value in self.available_models: self.pipeline_model = value def set_enable_dm_filtering(self, value: bool) -> None: self.enable_dm_filtering = value def set_dm_filter_model(self, value: str) -> None: - models = self._available_models() - if value in models: + if value in self.available_models: self.dm_filter_model = value def set_judge_model(self, value: str) -> None: - models = self._available_models() - if value in models: + if value in self.available_models: self.judge_model = value def set_compare_run_a(self, value: str) -> None: @@ -495,7 +492,7 @@ def _sync_judge_model(self) -> None: @rx.event(background=True) # type: ignore[operator] async def refresh_model_list(self) -> None: async with self: - self.available_models = await self.get_var_value(DebugState.available_models) + self.available_models = await self.get_var_value(DebugState.available_models) # type: ignore[arg-type] self._sync_available_models() self._sync_dm_filter_model() self._sync_judge_model() From ba42eb40729d445777db7067547fd27733599d3a Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Fri, 13 Mar 2026 00:30:50 +0700 Subject: [PATCH 13/21] *: typing --- libs/vedana-backoffice/src/vedana_backoffice/states/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py index fab85df1..7c52befd 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py @@ -121,7 +121,7 @@ def provider_options(self) -> list[str]: return ["openai", "openrouter", "anthropic", "cohere", "xai"] @rx.event(background=True) # type: ignore[operator] - async def load_available_models(self) -> None: + async def load_available_models(self): if not self.debug_mode: return models = await load_litellm_models( From 8d30a48729aec3b884aded0a012aeca8839e0ce3 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Fri, 13 Mar 2026 00:33:21 +0700 Subject: [PATCH 14/21] edit docs in .env.example --- apps/vedana/.env.example | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/vedana/.env.example b/apps/vedana/.env.example index 3ac6bff8..34bc7dc4 100644 --- a/apps/vedana/.env.example +++ b/apps/vedana/.env.example @@ -24,8 +24,11 @@ JIMS_DB_CONN_URI="postgresql://postgres:postgres@db:5432" # Main LLM model MODEL="gpt-4.1-mini" -# Embeddings are provided via LiteLLM as well. OpenRouter's embedding models are not supported there at the moment +# Embeddings are provided via LiteLLM as well. +# You can use OpenAI directly: EMBEDDINGS_MODEL="text-embedding-3-large" +# Or route embeddings through OpenRouter (requires OPENROUTER_API_KEY): +# EMBEDDINGS_MODEL="openrouter/openai/text-embedding-3-large" # Note: changing dimensions of embeddings requires an SQL migration. EMBEDDINGS_DIM=1024 EMBEDDINGS_MAX_BATCH_SIZE=2048 From 12dbb7a9412dd639f75bbf41580b47abcfa1f838 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Fri, 13 Mar 2026 14:23:39 +0700 Subject: [PATCH 15/21] default routing with openrouter --- libs/vedana-backoffice/pyproject.toml | 2 +- .../src/vedana_backoffice/states/chat.py | 4 +++ .../src/vedana_backoffice/states/eval.py | 27 ++++++++++--------- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/libs/vedana-backoffice/pyproject.toml b/libs/vedana-backoffice/pyproject.toml index 5a409efd..77c60297 100644 --- a/libs/vedana-backoffice/pyproject.toml +++ b/libs/vedana-backoffice/pyproject.toml @@ -10,7 +10,7 @@ authors = [ ] dependencies = [ - "reflex>=0.8.26,<0.9.0", + "reflex>=0.8.27,<0.9.0", "orjson>=3.11.3", "vedana-core", "vedana-etl", diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py index 589784a6..76110568 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py @@ -70,6 +70,8 @@ def _sync_model(self) -> None: if self.model not in self.available_models and self.available_models: if core_settings.model in self.available_models: self.model = core_settings.model + elif self.available_models[0].startswith("openrouter") and "openrouter/openrouter/free" in self.available_models: + self.model = "openrouter/openrouter/free" # Openrouter has an endpoint with all free models, set it as default else: self.model = self.available_models[0] @@ -78,6 +80,8 @@ def _sync_dm_filter_model(self) -> None: if self.dm_filter_model not in self.available_models and self.available_models: if core_settings.filter_model in self.available_models: self.dm_filter_model = core_settings.filter_model + elif self.available_models[0].startswith("openrouter") and "openrouter/openrouter/free" in self.available_models: + self.dm_filter_model = "openrouter/openrouter/free" else: self.dm_filter_model = self.available_models[0] diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index ad92c4fb..194b1b23 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -464,30 +464,33 @@ def _prune_selection(self) -> None: def _sync_available_models(self) -> None: """Realign selected pipeline model when model list changes.""" - models = self.available_models - if self.pipeline_model not in models and models: - if core_settings.model in models: + if self.pipeline_model not in self.available_models and self.available_models: + if core_settings.model in self.available_models: self.pipeline_model = core_settings.model + elif self.available_models[0].startswith("openrouter") and "openrouter/openrouter/free" in self.available_models: + self.pipeline_model = "openrouter/openrouter/free" # Openrouter has an endpoint with all free models, set it as default else: - self.pipeline_model = models[0] + self.pipeline_model = self.available_models[0] def _sync_dm_filter_model(self) -> None: """Realign selected filter model when model list changes.""" - models = self.available_models - if self.dm_filter_model not in models and models: - if core_settings.filter_model in models: + if self.dm_filter_model not in self.available_models and self.available_models: + if core_settings.filter_model in self.available_models: self.dm_filter_model = core_settings.filter_model + elif self.available_models[0].startswith("openrouter") and "openrouter/openrouter/free" in self.available_models: + self.dm_filter_model = "openrouter/openrouter/free" else: - self.dm_filter_model = models[0] + self.dm_filter_model = self.available_models[0] def _sync_judge_model(self) -> None: """Realign selected judge model when model list changes.""" - models = self.available_models - if self.judge_model not in models and models: - if core_settings.judge_model in models: + if self.judge_model not in self.available_models and self.available_models: + if core_settings.judge_model in self.available_models: self.judge_model = core_settings.judge_model + elif self.available_models[0].startswith("openrouter") and "openrouter/openrouter/free" in self.available_models: + self.judge_model = "openrouter/openrouter/free" else: - self.judge_model = models[0] + self.judge_model = self.available_models[0] @rx.event(background=True) # type: ignore[operator] async def refresh_model_list(self) -> None: From ac82dde434e05066695da012ddf1d449d0700556 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Fri, 13 Mar 2026 15:18:20 +0700 Subject: [PATCH 16/21] fix openai models fetch --- .../src/vedana_backoffice/pages/eval.py | 2 +- .../pages/jims_thread_list_page.py | 16 ++++++++++++++-- .../src/vedana_backoffice/states/common.py | 6 +++++- .../src/vedana_backoffice/states/eval.py | 8 ++++++++ .../src/vedana_backoffice/vedana_backoffice.py | 2 +- 5 files changed, 29 insertions(+), 5 deletions(-) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py index 3df69a48..a211ff39 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py @@ -1,8 +1,8 @@ import reflex as rx +from vedana_backoffice.states.chat import ChatState from vedana_backoffice.states.common import AppVersionState, DebugState from vedana_backoffice.states.eval import EvalState, RunSummary -from vedana_backoffice.states.chat import ChatState from vedana_backoffice.ui import app_header diff --git a/libs/vedana-backoffice/src/vedana_backoffice/pages/jims_thread_list_page.py b/libs/vedana-backoffice/src/vedana_backoffice/pages/jims_thread_list_page.py index 965a11a5..e38a467f 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/pages/jims_thread_list_page.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/pages/jims_thread_list_page.py @@ -687,7 +687,13 @@ def priority_badge(value: str) -> rx.Component: # type: ignore[valid-type] rx.text(ThreadListState.rows_display, size="2", color="gray"), rx.spacer(), rx.hstack( - rx.button("⏮", variant="soft", size="1", on_click=ThreadListState.first_page, disabled=~ThreadListState.has_prev_page), # type: ignore[operator] + rx.button( + "⏮", + variant="soft", + size="1", + on_click=ThreadListState.first_page, + disabled=~ThreadListState.has_prev_page, + ), # type: ignore[operator] rx.button( "← Prev", variant="soft", @@ -703,7 +709,13 @@ def priority_badge(value: str) -> rx.Component: # type: ignore[valid-type] on_click=ThreadListState.next_page, disabled=~ThreadListState.has_next_page, ), # type: ignore[operator] - rx.button("⏭", variant="soft", size="1", on_click=ThreadListState.last_page, disabled=~ThreadListState.has_next_page), # type: ignore[operator] + rx.button( + "⏭", + variant="soft", + size="1", + on_click=ThreadListState.last_page, + disabled=~ThreadListState.has_next_page, + ), # type: ignore[operator] spacing="2", align="center", ), diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py index 7c52befd..943334a5 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py @@ -126,10 +126,14 @@ async def load_available_models(self): return models = await load_litellm_models( provider=self.runtime_model_provider, - check_provider_endpoint=True if self.runtime_model_provider else False, + # check_provider is not necessary in most cases + # check_provider_endpoint=True if self.runtime_model_provider and self.runtime_model_provider == "openrouter" else False, ) async with self: self.available_models = models + if not models and not self.api_key_saved: + self.show_api_key_dialog = True + from vedana_backoffice.states.chat import ChatState from vedana_backoffice.states.eval import EvalState diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index 194b1b23..dfd41781 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -23,6 +23,8 @@ from vedana_backoffice.states.common import ( DebugState, + DEBUG_MODE, + EVAL_ENABLED, datapipe_log_capture, get_vedana_app, ) @@ -1935,6 +1937,12 @@ def load_eval_data(self): yield EvalState.load_eval_data_background() yield EvalState.refresh_golden_dataset_background() + async def mount(self): + if EVAL_ENABLED: + # yield EvalState.load_eval_data_background() + if DEBUG_MODE: + yield DebugState.load_available_models() + @rx.event(background=True) # type: ignore[operator] async def load_eval_data_background(self): try: diff --git a/libs/vedana-backoffice/src/vedana_backoffice/vedana_backoffice.py b/libs/vedana-backoffice/src/vedana_backoffice/vedana_backoffice.py index 717f688a..368ecdc2 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/vedana_backoffice.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/vedana_backoffice.py @@ -15,4 +15,4 @@ app.add_page(etl_page, route="/etl", title="ETL", on_load=EtlState.load_pipeline_metadata) app.add_page(chat_page, route="/chat", title="Chat", on_load=ChatState.reset_session) app.add_page(jims_thread_list_page, route="/jims", title="JIMS", on_load=ThreadListState.get_data) -app.add_page(eval_page, route="/eval", title="Evaluation", on_load=EvalState.load_eval_data) +app.add_page(eval_page, route="/eval", title="Evaluation", on_load=EvalState.mount) From cf2adf0333f46432a37170614898fa42889e7b12 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Fri, 13 Mar 2026 15:45:42 +0700 Subject: [PATCH 17/21] better model selection between providers --- .../src/vedana_backoffice/states/chat.py | 26 ++++++++----- .../src/vedana_backoffice/states/eval.py | 39 ++++++++++++------- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py index 76110568..c427a123 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py @@ -68,22 +68,28 @@ def set_dm_filter_model(self, value: str) -> None: def _sync_model(self) -> None: """Realign selected model when model list changes.""" if self.model not in self.available_models and self.available_models: - if core_settings.model in self.available_models: - self.model = core_settings.model - elif self.available_models[0].startswith("openrouter") and "openrouter/openrouter/free" in self.available_models: - self.model = "openrouter/openrouter/free" # Openrouter has an endpoint with all free models, set it as default + for model in self.available_models: + if model.endswith(core_settings.model): # or model.rsplit("/", 1)[-1] == core_settings.model + self.model = model + break else: - self.model = self.available_models[0] + if "openrouter/openrouter/free" in self.available_models: + self.model = "openrouter/openrouter/free" # Openrouter has an endpoint with all free models, set it as default + else: + self.model = self.available_models[0] def _sync_dm_filter_model(self) -> None: """Realign selected filter model when model list changes.""" if self.dm_filter_model not in self.available_models and self.available_models: - if core_settings.filter_model in self.available_models: - self.dm_filter_model = core_settings.filter_model - elif self.available_models[0].startswith("openrouter") and "openrouter/openrouter/free" in self.available_models: - self.dm_filter_model = "openrouter/openrouter/free" + for model in self.available_models: + if model.endswith(core_settings.filter_model): + self.dm_filter_model = model + break else: - self.dm_filter_model = self.available_models[0] + if "openrouter/openrouter/free" in self.available_models: # openrouter provider + self.dm_filter_model = "openrouter/openrouter/free" + else: + self.dm_filter_model = self.available_models[0] def toggle_details_by_id(self, message_id: str) -> None: for idx, m in enumerate(self.messages): diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index dfd41781..25825783 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -467,32 +467,41 @@ def _prune_selection(self) -> None: def _sync_available_models(self) -> None: """Realign selected pipeline model when model list changes.""" if self.pipeline_model not in self.available_models and self.available_models: - if core_settings.model in self.available_models: - self.pipeline_model = core_settings.model - elif self.available_models[0].startswith("openrouter") and "openrouter/openrouter/free" in self.available_models: - self.pipeline_model = "openrouter/openrouter/free" # Openrouter has an endpoint with all free models, set it as default + for model in self.available_models: + if model.endswith(core_settings.model): + self.pipeline_model = model + break else: - self.pipeline_model = self.available_models[0] + if "openrouter/openrouter/free" in self.available_models: # openrouter provider + self.pipeline_model = "openrouter/openrouter/free" # Openrouter has an endpoint with all free models, set it as default + else: + self.pipeline_model = self.available_models[0] def _sync_dm_filter_model(self) -> None: """Realign selected filter model when model list changes.""" if self.dm_filter_model not in self.available_models and self.available_models: - if core_settings.filter_model in self.available_models: - self.dm_filter_model = core_settings.filter_model - elif self.available_models[0].startswith("openrouter") and "openrouter/openrouter/free" in self.available_models: - self.dm_filter_model = "openrouter/openrouter/free" + for model in self.available_models: + if model.endswith(core_settings.filter_model): + self.dm_filter_model = model + break else: - self.dm_filter_model = self.available_models[0] + if "openrouter/openrouter/free" in self.available_models: + self.dm_filter_model = "openrouter/openrouter/free" + else: + self.dm_filter_model = self.available_models[0] def _sync_judge_model(self) -> None: """Realign selected judge model when model list changes.""" if self.judge_model not in self.available_models and self.available_models: - if core_settings.judge_model in self.available_models: - self.judge_model = core_settings.judge_model - elif self.available_models[0].startswith("openrouter") and "openrouter/openrouter/free" in self.available_models: - self.judge_model = "openrouter/openrouter/free" + for model in self.available_models: + if model.endswith(core_settings.judge_model): + self.judge_model = model + break else: - self.judge_model = self.available_models[0] + if "openrouter/openrouter/free" in self.available_models: + self.judge_model = "openrouter/openrouter/free" + else: + self.judge_model = self.available_models[0] @rx.event(background=True) # type: ignore[operator] async def refresh_model_list(self) -> None: From e6ba873220bce649b3d3f7990b702ba72afe4984 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Fri, 13 Mar 2026 23:36:43 +0700 Subject: [PATCH 18/21] fix None response_cost from litellm in observe() breaking pipeline --- libs/jims-core/src/jims_core/llms/llm_provider.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/jims-core/src/jims_core/llms/llm_provider.py b/libs/jims-core/src/jims_core/llms/llm_provider.py index 61299da2..58d3c060 100644 --- a/libs/jims-core/src/jims_core/llms/llm_provider.py +++ b/libs/jims-core/src/jims_core/llms/llm_provider.py @@ -100,7 +100,7 @@ def observe_completion(self, completion: litellm.ModelResponse) -> None: prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, cached_tokens=cached_tokens, - request_cost=completion._hidden_params.get("response_cost", 0), # or litellm.completion_cost(completion) + request_cost=completion._hidden_params.get("response_cost") or 0, # or litellm.completion_cost(completion) ) llm_calls_total.labels(completion.model).inc() @@ -116,7 +116,7 @@ def observe_create_embedding(self, res: litellm.EmbeddingResponse) -> None: if usage is not None: self.usage[model].observe( prompt_tokens=usage.prompt_tokens, - request_cost=res._hidden_params.get("response_cost", 0), # or litellm.completion_cost(res) + request_cost=res._hidden_params.get("response_cost") or 0, # or litellm.completion_cost(res) ) llm_usage_prompt_tokens_total.labels(model).inc(usage.prompt_tokens) From a89b7ebe47572f35bb6c54dd7085bb8d31ac3368 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Tue, 17 Mar 2026 17:00:19 +0800 Subject: [PATCH 19/21] WIP: resolve embeddings model in backoffice --- .../src/vedana_backoffice/pages/chat.py | 17 +++++++++++ .../src/vedana_backoffice/pages/eval.py | 21 +++++++++++--- .../src/vedana_backoffice/states/chat.py | 5 +++- .../src/vedana_backoffice/states/common.py | 27 +++++++++++++++++ .../src/vedana_backoffice/states/eval.py | 29 ++++++++++++------- 5 files changed, 84 insertions(+), 15 deletions(-) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/pages/chat.py b/libs/vedana-backoffice/src/vedana_backoffice/pages/chat.py index b21aa1ad..94aff6c7 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/pages/chat.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/pages/chat.py @@ -120,6 +120,23 @@ def page() -> rx.Component: rx.text(ChatState.dm_filter_model, size="1", color="gray"), ), rx.spacer(), + rx.cond( + AppVersionState.debug_mode, + rx.cond( + DebugState.embeddings_model_available, + rx.text( + f"Embeddings: {DebugState.embeddings_model}", + size="1", + color="gray", + ), + rx.text( + f"Embeddings: {DebugState.default_embeddings_model} (unavailable for provider)", + size="1", + color="red", + ), + ), + rx.fragment(), + ), rx.hstack( rx.cond( ChatState.chat_thread_id != "", diff --git a/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py index a211ff39..fb8f6fa6 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py @@ -21,7 +21,11 @@ def _selection_and_actions() -> rx.Component: color_scheme="blue", on_click=EvalState.run_selected_tests, loading=EvalState.is_running, - disabled=rx.cond(EvalState.can_run, False, True), # type: ignore[arg-type] + disabled=rx.cond( + DebugState.debug_mode & ~DebugState.embeddings_model_available, + True, + rx.cond((EvalState.selected_count > 0) & ~EvalState.is_running, False, True), # type: ignore[arg-type] + ), ), rx.spacer(), rx.button( @@ -278,9 +282,18 @@ def _pipeline_card() -> rx.Component: ), rx.box( rx.text("Embeddings", weight="medium"), - rx.text( - rx.cond(EvalState.embeddings_model != "", EvalState.embeddings_model, "—"), - size="3", + rx.cond( + AppVersionState.debug_mode, + rx.cond( + DebugState.embeddings_model_available, + rx.text(DebugState.embeddings_model, size="3"), + rx.text( + f"{EvalState.default_embeddings_model} (unavailable for provider)", + size="3", + color="red", + ), + ), + rx.text(EvalState.default_embeddings_model, size="3"), ), rx.text( EvalState.embeddings_dim_label, diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py index c427a123..11fb8b4f 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py @@ -226,7 +226,10 @@ async def _run_message(self, thread_id: str, user_text: str) -> Tuple[str, Dict[ pipeline.filter_model = self.dm_filter_model ctx = await ctl.make_context( - llm_settings=LLMSettings(model=self.model) + llm_settings=LLMSettings( + model=self.model, + embeddings_model=core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model) + ) ) events = await ctl.run_pipeline_with_context(pipeline, ctx) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py index 943334a5..2b2d5235 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py @@ -8,6 +8,7 @@ import reflex as rx import requests from vedana_core.app import VedanaApp, make_vedana_app +from vedana_core.settings import settings as core_settings vedana_app: VedanaApp | None = None @@ -113,6 +114,7 @@ class DebugState(rx.State): show_api_key_dialog: bool = False runtime_model_api_key: str = "" runtime_model_provider: str | None = None + default_embeddings_model: str = core_settings.embeddings_model api_key_saved: bool = False available_models: list[str] = [] @@ -120,6 +122,31 @@ class DebugState(rx.State): def provider_options(self) -> list[str]: return ["openai", "openrouter", "anthropic", "cohere", "xai"] + @rx.var + def embeddings_model(self) -> bool: + """embeddings model is fixed so its availability/correct name can be resolved here.""" + models = self.available_models + if not models: + return core_settings.embeddings_model + for m in models: + if m.rsplit("/", 1)[-1] == core_settings.embeddings_model: + return m + else: + _model, embeddings_model_provider, _1, _2 = litellm.get_llm_provider(core_settings.embeddings_model) + + if self.runtime_model_provider == "openrouter": # openrouter does not list embeddings models in its model list + if embeddings_model_provider == "openrouter": + return core_settings.embeddings_model + elif core_settings.embeddings_model.startswith(embeddings_model_provider): + return f"openrouter/{core_settings.embeddings_model}" + elif embeddings_model_provider: + return f"openrouter/{embeddings_model_provider}/{core_settings.embeddings_model}" + return None + + @rx.var + def embeddings_model_available(self) -> bool: + return self.embeddings_model is not None + @rx.event(background=True) # type: ignore[operator] async def load_available_models(self): if not self.debug_mode: diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index 25825783..23543b9f 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -199,7 +199,7 @@ class EvalState(rx.State): judge_prompt_id: str = "" judge_prompt: str = "" pipeline_model: str = core_settings.model - embeddings_model: str = core_settings.embeddings_model + default_embeddings_model: str = core_settings.embeddings_model embeddings_dim: int = core_settings.embeddings_dim enable_dm_filtering: bool = core_settings.enable_dm_filtering available_models: list[str] = list({core_settings.model, core_settings.filter_model, core_settings.judge_model}) @@ -307,10 +307,6 @@ def all_selected(self) -> bool: rows = len(self.eval_gds_rows_with_selection) # Use filtered count return 0 < rows == self.selected_count - @rx.var - def can_run(self) -> bool: - return (self.selected_count > 0) and (not self.is_running) - @rx.var def cost_label(self) -> str: if self.tests_cost_total > 0: @@ -1182,7 +1178,7 @@ async def _build_eval_meta_payload(self, vedana_app, test_run_id: str, test_run_ ) run_config = RunConfig( pipeline_model=self._resolved_pipeline_model(), - embeddings_model=self.embeddings_model, + embeddings_model=core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model), embeddings_dim=self.embeddings_dim, ) return { @@ -1644,7 +1640,7 @@ def _build_thread_config( "judge_model": self.judge_model, "judge_prompt_id": self.judge_prompt_id, "pipeline_model": resolved_model, - "embeddings_model": self.embeddings_model, + "embeddings_model": core_settings.embeddings_model, "embeddings_dim": self.embeddings_dim, "dm_id": self.dm_id, } @@ -1691,7 +1687,15 @@ async def _run_question_thread( pipeline.enable_filtering = self.enable_dm_filtering pipeline.filter_model = self.dm_filter_model - ctx = await ctl.make_context(llm_settings=LLMSettings(model=resolved_model)) + async with self: + pipeline_embeddings_model = core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model) + + ctx = await ctl.make_context( + llm_settings=LLMSettings( + model=resolved_model, + embeddings_model=pipeline_embeddings_model, + ) + ) events = await ctl.run_pipeline_with_context(pipeline, ctx) answer: str = "" @@ -1714,7 +1718,12 @@ async def _judge_answer( if not judge_prompt: return "fail", "Judge prompt not loaded", 0, 0.0 - provider = LLMProvider(settings=LLMSettings(model=self.judge_model)) + provider = LLMProvider( + settings=LLMSettings( + model=self.judge_model, + embeddings_model=core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model) + ) + ) class JudgeResult(BaseModel): test_status: str = Field(description="pass / fail") @@ -1846,7 +1855,7 @@ async def _run_one(question: str) -> dict[str, Any]: "judge_prompt_id": self.judge_prompt_id, "dm_id": self.dm_id, "pipeline_model": resolved_pipeline_model, - "embeddings_model": self.embeddings_model, + "embeddings_model": core_settings.embeddings_model, "embeddings_dim": self.embeddings_dim, "test_run_id": test_run_id, "test_run_name": test_run_name, From 2da875b7ce959e09d4bb492655ce67e1ac5b8615 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Tue, 17 Mar 2026 22:15:54 +0800 Subject: [PATCH 20/21] * --- libs/vedana-backoffice/src/vedana_backoffice/states/eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index 23543b9f..138fdec4 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -549,7 +549,7 @@ async def _load_eval_questions(self) -> None: self._prune_selection() async def _load_judge_config(self) -> None: - self.judge_model = core_settings.judge_model + self.judge_model = core_settings.judge_model if not self.judge_model else self.judge_model self.judge_prompt_id = "" self.judge_prompt = "" From af8d575c5031bbbfb023504bef1cbb65a3ea2061 Mon Sep 17 00:00:00 2001 From: Timur Sheidaev Date: Tue, 17 Mar 2026 22:27:34 +0800 Subject: [PATCH 21/21] typing --- .../src/vedana_backoffice/states/chat.py | 2 +- .../src/vedana_backoffice/states/common.py | 4 ++-- .../src/vedana_backoffice/states/eval.py | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py index 11fb8b4f..20454839 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py @@ -228,7 +228,7 @@ async def _run_message(self, thread_id: str, user_text: str) -> Tuple[str, Dict[ ctx = await ctl.make_context( llm_settings=LLMSettings( model=self.model, - embeddings_model=core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model) + embeddings_model=core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model) # type: ignore[arg-type] ) ) diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py index 2b2d5235..d948c834 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py @@ -38,7 +38,7 @@ def _fetch() -> list[str]: return await asyncio.to_thread(_fetch) # type: ignore[return-value] -async def get_vedana_app(): +async def get_vedana_app() -> VedanaApp: global vedana_app if vedana_app is None: vedana_app = await make_vedana_app() @@ -123,7 +123,7 @@ def provider_options(self) -> list[str]: return ["openai", "openrouter", "anthropic", "cohere", "xai"] @rx.var - def embeddings_model(self) -> bool: + def embeddings_model(self) -> str | None: """embeddings model is fixed so its availability/correct name can be resolved here.""" models = self.available_models if not models: diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index 138fdec4..e4804a48 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -1178,7 +1178,7 @@ async def _build_eval_meta_payload(self, vedana_app, test_run_id: str, test_run_ ) run_config = RunConfig( pipeline_model=self._resolved_pipeline_model(), - embeddings_model=core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model), + embeddings_model=core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model), # type: ignore[arg-type] embeddings_dim=self.embeddings_dim, ) return { @@ -1688,12 +1688,12 @@ async def _run_question_thread( pipeline.filter_model = self.dm_filter_model async with self: - pipeline_embeddings_model = core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model) + pipeline_embeddings_model = core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model) # type: ignore[arg-type] ctx = await ctl.make_context( llm_settings=LLMSettings( model=resolved_model, - embeddings_model=pipeline_embeddings_model, + embeddings_model=pipeline_embeddings_model ) ) events = await ctl.run_pipeline_with_context(pipeline, ctx) @@ -1721,7 +1721,7 @@ async def _judge_answer( provider = LLMProvider( settings=LLMSettings( model=self.judge_model, - embeddings_model=core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model) + embeddings_model=core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model) # type: ignore[arg-type] ) )