diff --git a/apps/vedana/.env.example b/apps/vedana/.env.example index 3ac6bff8..34bc7dc4 100644 --- a/apps/vedana/.env.example +++ b/apps/vedana/.env.example @@ -24,8 +24,11 @@ JIMS_DB_CONN_URI="postgresql://postgres:postgres@db:5432" # Main LLM model MODEL="gpt-4.1-mini" -# Embeddings are provided via LiteLLM as well. OpenRouter's embedding models are not supported there at the moment +# Embeddings are provided via LiteLLM as well. +# You can use OpenAI directly: EMBEDDINGS_MODEL="text-embedding-3-large" +# Or route embeddings through OpenRouter (requires OPENROUTER_API_KEY): +# EMBEDDINGS_MODEL="openrouter/openai/text-embedding-3-large" # Note: changing dimensions of embeddings requires an SQL migration. EMBEDDINGS_DIM=1024 EMBEDDINGS_MAX_BATCH_SIZE=2048 diff --git a/libs/jims-core/src/jims_core/llms/llm_provider.py b/libs/jims-core/src/jims_core/llms/llm_provider.py index a0e5dd46..58d3c060 100644 --- a/libs/jims-core/src/jims_core/llms/llm_provider.py +++ b/libs/jims-core/src/jims_core/llms/llm_provider.py @@ -24,13 +24,6 @@ class LLMSettings(BaseSettings): embeddings_max_batch_size: int = 2048 embeddings_max_tokens_per_batch: int = 200000 - # passable api_keys; if None, defaults to env vars - model_api_key: str | None = None - embeddings_model_api_key: str | None = None - - # openrouter_api_key: str | None = None - openrouter_api_base_url: str = "https://openrouter.ai/api/v1" - env_settings = LLMSettings() # type: ignore @@ -77,9 +70,7 @@ class LLMProvider: def __init__(self, settings: LLMSettings | None = None) -> None: self._settings = settings or env_settings self.model = self._settings.model - self.model_api_key = self._settings.model_api_key self.embeddings_model = self._settings.embeddings_model - self.embeddings_model_api_key = self._settings.embeddings_model_api_key self.embeddings_dim = self._settings.embeddings_dim self.max_batch_size = self._settings.embeddings_max_batch_size self.max_tokens_per_batch = self._settings.embeddings_max_tokens_per_batch @@ -109,7 +100,7 @@ def observe_completion(self, completion: litellm.ModelResponse) -> None: prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, cached_tokens=cached_tokens, - request_cost=completion._hidden_params.get("response_cost", 0), # or litellm.completion_cost(completion) + request_cost=completion._hidden_params.get("response_cost") or 0, # or litellm.completion_cost(completion) ) llm_calls_total.labels(completion.model).inc() @@ -125,7 +116,7 @@ def observe_create_embedding(self, res: litellm.EmbeddingResponse) -> None: if usage is not None: self.usage[model].observe( prompt_tokens=usage.prompt_tokens, - request_cost=res._hidden_params.get("response_cost", 0), # or litellm.completion_cost(res) + request_cost=res._hidden_params.get("response_cost") or 0, # or litellm.completion_cost(res) ) llm_usage_prompt_tokens_total.labels(model).inc(usage.prompt_tokens) @@ -135,7 +126,6 @@ async def create_embedding(self, text: str) -> list[float]: model=self.embeddings_model, input=[text], dimensions=self.embeddings_dim, - api_key=self.embeddings_model_api_key, ) self.observe_create_embedding(response) return response.data[0]["embedding"] @@ -170,7 +160,6 @@ async def create_embeddings(self, texts: list[str]) -> list[list[float]]: model=self.embeddings_model, input=batch, dimensions=self.embeddings_dim, - api_key=self.embeddings_model_api_key, ) self.observe_create_embedding(response) results.extend(d["embedding"] for d in response.data) @@ -181,7 +170,6 @@ def create_embedding_sync(self, text: str) -> list[float]: model=self.embeddings_model, input=[text], dimensions=self.embeddings_dim, - api_key=self.embeddings_model_api_key, ) self.observe_create_embedding(response) return response.data[0]["embedding"] @@ -194,7 +182,6 @@ def create_embeddings_sync(self, texts: list[str]) -> list[list[float]]: model=self.embeddings_model, input=batch, dimensions=self.embeddings_dim, - api_key=self.embeddings_model_api_key, ) self.observe_create_embedding(response) results.extend(d["embedding"] for d in response.data) @@ -210,7 +197,6 @@ async def chat_completion_structured[T: BaseModel]( model=self.model, messages=list(messages), response_format=response_format, - api_key=self.model_api_key, ) assert isinstance(completion, litellm.ModelResponse) @@ -234,7 +220,6 @@ async def chat_completion_plain( model=self.model, messages=list(messages), caching=use_cache, - api_key=self.model_api_key, ) assert isinstance(completion, litellm.ModelResponse) @@ -258,7 +243,6 @@ async def chat_completion_with_tools( model=self.model, messages=list(messages), tools=tools, - api_key=self.model_api_key, ) assert isinstance(completion, litellm.ModelResponse) diff --git a/libs/jims-core/src/jims_core/thread/thread_context.py b/libs/jims-core/src/jims_core/thread/thread_context.py index 244bb17b..3cf589d5 100644 --- a/libs/jims-core/src/jims_core/thread/thread_context.py +++ b/libs/jims-core/src/jims_core/thread/thread_context.py @@ -111,7 +111,7 @@ def context(self, conversation_length: int = 20) -> list[CommunicationEvent]: result.append(CommunicationEvent(**event.event_data)) comm_counter += 1 if event.event_type.startswith("context."): - result.append(CommunicationEvent(**event.event_data)) + result.append(CommunicationEvent(**event.event_data)) if comm_counter > conversation_length: break diff --git a/libs/vedana-backoffice/pyproject.toml b/libs/vedana-backoffice/pyproject.toml index 5a409efd..77c60297 100644 --- a/libs/vedana-backoffice/pyproject.toml +++ b/libs/vedana-backoffice/pyproject.toml @@ -10,7 +10,7 @@ authors = [ ] dependencies = [ - "reflex>=0.8.26,<0.9.0", + "reflex>=0.8.27,<0.9.0", "orjson>=3.11.3", "vedana-core", "vedana-etl", diff --git a/libs/vedana-backoffice/src/vedana_backoffice/pages/chat.py b/libs/vedana-backoffice/src/vedana_backoffice/pages/chat.py index 20f771ee..94aff6c7 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/pages/chat.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/pages/chat.py @@ -2,7 +2,7 @@ from vedana_backoffice.components.ui_chat import render_message_bubble from vedana_backoffice.states.chat import ChatState -from vedana_backoffice.states.common import AppVersionState +from vedana_backoffice.states.common import AppVersionState, DebugState from vedana_backoffice.ui import app_header @@ -111,7 +111,7 @@ def page() -> rx.Component: rx.cond( ChatState.enable_dm_filtering & AppVersionState.debug_mode, rx.select( - items=ChatState.dm_filter_available_models, + items=DebugState.available_models, value=ChatState.dm_filter_model, on_change=ChatState.set_dm_filter_model, width="16em", @@ -120,6 +120,23 @@ def page() -> rx.Component: rx.text(ChatState.dm_filter_model, size="1", color="gray"), ), rx.spacer(), + rx.cond( + AppVersionState.debug_mode, + rx.cond( + DebugState.embeddings_model_available, + rx.text( + f"Embeddings: {DebugState.embeddings_model}", + size="1", + color="gray", + ), + rx.text( + f"Embeddings: {DebugState.default_embeddings_model} (unavailable for provider)", + size="1", + color="red", + ), + ), + rx.fragment(), + ), rx.hstack( rx.cond( ChatState.chat_thread_id != "", @@ -157,21 +174,12 @@ def page() -> rx.Component: ), rx.cond( ChatState.model_selection_allowed, - rx.hstack( - rx.select( - items=["openai", "openrouter"], - value=ChatState.provider, - on_change=ChatState.set_provider, - width="10em", - placeholder="Provider", - ), - rx.select( - items=ChatState.available_models, - value=ChatState.model, - on_change=ChatState.set_model, - width="16em", - placeholder="Select model", - ), + rx.select( + items=DebugState.available_models, + value=ChatState.model, + on_change=ChatState.set_model, + width="20em", + placeholder="Select model", ), rx.badge(ChatState.model, variant="surface", color_scheme="gray", size="3"), ), diff --git a/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py index d45b2c3c..fb8f6fa6 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py @@ -1,8 +1,8 @@ import reflex as rx -from vedana_backoffice.states.common import AppVersionState -from vedana_backoffice.states.eval import EvalState, RunSummary from vedana_backoffice.states.chat import ChatState +from vedana_backoffice.states.common import AppVersionState, DebugState +from vedana_backoffice.states.eval import EvalState, RunSummary from vedana_backoffice.ui import app_header @@ -21,7 +21,11 @@ def _selection_and_actions() -> rx.Component: color_scheme="blue", on_click=EvalState.run_selected_tests, loading=EvalState.is_running, - disabled=rx.cond(EvalState.can_run, False, True), # type: ignore[arg-type] + disabled=rx.cond( + DebugState.debug_mode & ~DebugState.embeddings_model_available, + True, + rx.cond((EvalState.selected_count > 0) & ~EvalState.is_running, False, True), # type: ignore[arg-type] + ), ), rx.spacer(), rx.button( @@ -175,7 +179,7 @@ def _judge_card() -> rx.Component: rx.cond( AppVersionState.debug_mode, rx.select( - items=EvalState.judge_available_models, + items=DebugState.available_models, value=EvalState.judge_model, on_change=EvalState.set_judge_model, width="100%", @@ -235,25 +239,12 @@ def _pipeline_card() -> rx.Component: rx.text("Pipeline model", weight="medium", width="100%"), rx.cond( ChatState.model_selection_allowed, - rx.hstack( - rx.select( - items=["openai", "openrouter"], - value=EvalState.provider, - on_change=EvalState.set_provider, - width="100%", - placeholder="Provider", - ), - rx.select( - items=EvalState.available_models_view, - value=EvalState.pipeline_model, - on_change=EvalState.set_pipeline_model, - width="100%", - placeholder="Select model", - ), - spacing="2", - align="center", - wrap="wrap", + rx.select( + items=DebugState.available_models, + value=EvalState.pipeline_model, + on_change=EvalState.set_pipeline_model, width="100%", + placeholder="Select model", ), rx.text(EvalState.pipeline_model, size="3"), ), @@ -270,7 +261,7 @@ def _pipeline_card() -> rx.Component: rx.cond( AppVersionState.debug_mode, rx.select( - items=EvalState.dm_filter_available_models, + items=DebugState.available_models, value=EvalState.dm_filter_model, on_change=EvalState.set_dm_filter_model, width="100%", @@ -291,9 +282,18 @@ def _pipeline_card() -> rx.Component: ), rx.box( rx.text("Embeddings", weight="medium"), - rx.text( - rx.cond(EvalState.embeddings_model != "", EvalState.embeddings_model, "—"), - size="3", + rx.cond( + AppVersionState.debug_mode, + rx.cond( + DebugState.embeddings_model_available, + rx.text(DebugState.embeddings_model, size="3"), + rx.text( + f"{EvalState.default_embeddings_model} (unavailable for provider)", + size="3", + color="red", + ), + ), + rx.text(EvalState.default_embeddings_model, size="3"), ), rx.text( EvalState.embeddings_dim_label, diff --git a/libs/vedana-backoffice/src/vedana_backoffice/pages/jims_thread_list_page.py b/libs/vedana-backoffice/src/vedana_backoffice/pages/jims_thread_list_page.py index 965a11a5..e38a467f 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/pages/jims_thread_list_page.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/pages/jims_thread_list_page.py @@ -687,7 +687,13 @@ def priority_badge(value: str) -> rx.Component: # type: ignore[valid-type] rx.text(ThreadListState.rows_display, size="2", color="gray"), rx.spacer(), rx.hstack( - rx.button("⏮", variant="soft", size="1", on_click=ThreadListState.first_page, disabled=~ThreadListState.has_prev_page), # type: ignore[operator] + rx.button( + "⏮", + variant="soft", + size="1", + on_click=ThreadListState.first_page, + disabled=~ThreadListState.has_prev_page, + ), # type: ignore[operator] rx.button( "← Prev", variant="soft", @@ -703,7 +709,13 @@ def priority_badge(value: str) -> rx.Component: # type: ignore[valid-type] on_click=ThreadListState.next_page, disabled=~ThreadListState.has_next_page, ), # type: ignore[operator] - rx.button("⏭", variant="soft", size="1", on_click=ThreadListState.last_page, disabled=~ThreadListState.has_next_page), # type: ignore[operator] + rx.button( + "⏭", + variant="soft", + size="1", + on_click=ThreadListState.last_page, + disabled=~ThreadListState.has_next_page, + ), # type: ignore[operator] spacing="2", align="center", ), diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py index a9702e2e..20454839 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/chat.py @@ -1,6 +1,5 @@ import asyncio import logging -import os import traceback from datetime import datetime from typing import Any, Dict, Tuple @@ -9,14 +8,20 @@ import orjson as json import reflex as rx from datapipe.compute import Catalog, run_pipeline -from jims_core.thread.thread_controller import ThreadController from jims_core.llms.llm_provider import LLMSettings +from jims_core.thread.thread_controller import ThreadController from jims_core.util import uuid7 from vedana_core.settings import settings as core_settings from vedana_etl.app import app as etl_app from vedana_etl.pipeline import get_data_model_pipeline -from vedana_backoffice.states.common import MemLogger, get_vedana_app, load_openrouter_models, DEBUG_MODE, datapipe_log_capture, DebugState +from vedana_backoffice.states.common import ( + DEBUG_MODE, + DebugState, + MemLogger, + datapipe_log_capture, + get_vedana_app, +) from vedana_backoffice.states.jims import ThreadViewState @@ -29,106 +34,62 @@ class ChatState(rx.State): chat_thread_id: str = "" data_model_text: str = "" is_refreshing_dm: bool = False - provider: str = "openai" # default llm provider model: str = core_settings.model - _default_models: tuple[str, ...] = ( - "gpt-5.1-chat-latest", - "gpt-5.1", - "gpt-5-chat-latest", - "gpt-5", - "gpt-5-mini", - "gpt-5-nano", - "gpt-4.1", - "gpt-4.1-mini", - "gpt-4.1-nano", - "gpt-4o", - "gpt-4o-mini", - "o4-mini", - ) - openai_models: list[str] = list( - set(list(_default_models) + [core_settings.model, core_settings.filter_model]) - ) - openrouter_models: list[str] = [] - openrouter_models_loaded: bool = False + default_models: list[str] = list({core_settings.model, core_settings.filter_model}) + available_models: list[str] = default_models + model_selection_allowed: bool = DEBUG_MODE enable_dm_filtering: bool = core_settings.enable_dm_filtering dm_filter_model: str = core_settings.filter_model - def _models_for_provider(self, provider: str) -> list[str]: - """Return the list of model names for the given provider (openai or openrouter).""" - if provider == "openrouter": - if self.openrouter_models: - return list(self.openrouter_models) - if self.openrouter_models_loaded: - return list(self.openai_models) - return list(self.openai_models) - return list(self.openai_models) - - @rx.var - def available_models(self) -> list[str]: - return self._models_for_provider(self.provider) - - @rx.var - def dm_filter_available_models(self) -> list[str]: - return self._models_for_provider(self.provider) - async def mount(self): - """Load OpenRouter models (fetches on first call, cached thereafter).""" - self.openrouter_models = await load_openrouter_models() - self.openrouter_models_loaded = True - self._sync_available_models() - self._sync_dm_filter_model() + if DEBUG_MODE: + yield DebugState.load_available_models() + + @rx.event(background=True) # type: ignore[operator] + async def refresh_model_list(self) -> None: + async with self: + self.available_models = await self.get_var_value(DebugState.available_models) # type: ignore[arg-type] + self._sync_model() + self._sync_dm_filter_model() def set_input(self, value: str) -> None: self.input_text = value def set_model(self, value: str) -> None: - models = self._models_for_provider(self.provider) - if value in models: - self.model = value + self.model = value def set_enable_dm_filtering(self, value: bool) -> None: self.enable_dm_filtering = value - def set_provider(self, value: str) -> None: - self.provider = value - if self.provider == "openai": # reset defaults when changing back - self.model = core_settings.model - self.dm_filter_model = core_settings.filter_model - self._sync_available_models() - self._sync_dm_filter_model() - def set_dm_filter_model(self, value: str) -> None: - if value in self.dm_filter_available_models: - self.dm_filter_model = value - - def _sync_available_models(self) -> None: - """ - Recompute available_models based on selected provider, and realign - the selected model if it is no longer valid. - """ - - if self.provider == "openrouter": - models = self.openrouter_models - if not models: - if self.openrouter_models_loaded: - self.provider = "openai" - models = self.openai_models - else: - models = self.available_models or self.openai_models - else: - models = self.openai_models - - self.available_models = list(models) + self.dm_filter_model = value + def _sync_model(self) -> None: + """Realign selected model when model list changes.""" if self.model not in self.available_models and self.available_models: - self.model = self.available_models[0] + for model in self.available_models: + if model.endswith(core_settings.model): # or model.rsplit("/", 1)[-1] == core_settings.model + self.model = model + break + else: + if "openrouter/openrouter/free" in self.available_models: + self.model = "openrouter/openrouter/free" # Openrouter has an endpoint with all free models, set it as default + else: + self.model = self.available_models[0] def _sync_dm_filter_model(self) -> None: - """Realign selected filter model when provider or model list changes.""" - models = self._models_for_provider(self.provider) - if self.dm_filter_model not in models and models: - self.dm_filter_model = models[0] + """Realign selected filter model when model list changes.""" + if self.dm_filter_model not in self.available_models and self.available_models: + for model in self.available_models: + if model.endswith(core_settings.filter_model): + self.dm_filter_model = model + break + else: + if "openrouter/openrouter/free" in self.available_models: # openrouter provider + self.dm_filter_model = "openrouter/openrouter/free" + else: + self.dm_filter_model = self.available_models[0] def toggle_details_by_id(self, message_id: str) -> None: for idx, m in enumerate(self.messages): @@ -260,13 +221,17 @@ async def _run_message(self, thread_id: str, user_text: str) -> Tuple[str, Dict[ pipeline = vedana_app.pipeline pipeline.logger = mem_logger - pipeline.model = f"{self.provider}/{self.model}" + pipeline.model = self.model pipeline.enable_filtering = self.enable_dm_filtering - pipeline.filter_model = f"{self.provider}/{self.dm_filter_model}" - api_key = os.environ.get("OPENROUTER_API_KEY" if self.provider == "openrouter" else "OPENAI_API_KEY") + pipeline.filter_model = self.dm_filter_model + + ctx = await ctl.make_context( + llm_settings=LLMSettings( + model=self.model, + embeddings_model=core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model) # type: ignore[arg-type] + ) + ) - ctx = await ctl.make_context(llm_settings=LLMSettings(model=self.model, model_api_key=api_key)) - events = await ctl.run_pipeline_with_context(pipeline, ctx) answer: str = "" @@ -306,11 +271,6 @@ def send(self): if not user_text: return - env_key = "OPENROUTER_API_KEY" if self.provider == "openrouter" else "OPENAI_API_KEY" - if not os.environ.get(env_key): - yield DebugState.open_dialog() - return - self._append_message("user", user_text) self.input_text = "" self.is_running = True @@ -350,9 +310,11 @@ def reload_data_model(self): @rx.event(background=True) # type: ignore[operator] async def reload_data_model_background(self): try: + def _run_dm_pipeline(): with datapipe_log_capture(): run_pipeline(etl_app.ds, Catalog({}), get_data_model_pipeline()) + await asyncio.to_thread(_run_dm_pipeline) async with self: va = await get_vedana_app() diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py index 99b63c23..d948c834 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/common.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/common.py @@ -1,16 +1,14 @@ import asyncio -from async_lru import alru_cache -from contextlib import contextmanager import io import logging import os -from typing import Iterable +from contextlib import contextmanager -import httpx +import litellm import reflex as rx import requests -from jims_core.llms.llm_provider import env_settings as llm_settings from vedana_core.app import VedanaApp, make_vedana_app +from vedana_core.settings import settings as core_settings vedana_app: VedanaApp | None = None @@ -20,48 +18,27 @@ EVAL_ENABLED = bool(os.environ.get("GRIST_TEST_SET_DOC_ID")) DEBUG_MODE = (os.environ.get("VEDANA_BACKOFFICE_DEBUG", "").lower() in ("true", "1") or os.environ.get("DEBUG", "").lower() in ("true", "1")) -HAS_OPENAI_KEY = bool(os.environ.get("OPENAI_API_KEY")) -HAS_OPENROUTER_KEY = bool(os.environ.get("OPENROUTER_API_KEY")) - - -def _filter_chat_capable_models(models: Iterable[dict]) -> list[str]: - """Filter models that support text chat with tool calls.""" - result: list[str] = [] - for m in models: - model_id = str(m.get("id", "")).strip() - if not model_id: - continue - - architecture = m.get("architecture", {}) - has_chat = bool( - architecture - and "text" in architecture.get("input_modalities", []) - and "text" in architecture.get("output_modalities", []) - ) - has_tools = "tools" in m.get("supported_parameters", []) - if has_chat and has_tools: - result.append(model_id) - return result +async def load_litellm_models( + *, + provider: str | None = None, + check_provider_endpoint: bool = False, +) -> list[str]: + def _fetch() -> list[str]: + raw = litellm.get_valid_models( + custom_llm_provider=provider, + check_provider_endpoint=check_provider_endpoint, + ) + result: list[str] = [ + model if (provider is None or model.startswith(provider)) else f"{provider}/{model}" for model in raw + ] + return sorted(set(result)) + return await asyncio.to_thread(_fetch) # type: ignore[return-value] -@alru_cache -async def load_openrouter_models() -> list[str]: - if not DEBUG_MODE: - return [] - try: - async with httpx.AsyncClient(timeout=15) as client: - resp = await client.get(f"{llm_settings.openrouter_api_base_url}/models") - resp.raise_for_status() - models = resp.json().get("data", []) - return sorted(_filter_chat_capable_models(models)) - except Exception as exc: - logging.warning(f"Failed to fetch OpenRouter models: {exc}") - return [] - -async def get_vedana_app(): +async def get_vedana_app() -> VedanaApp: global vedana_app if vedana_app is None: vedana_app = await make_vedana_app() @@ -70,6 +47,7 @@ async def get_vedana_app(): class DatapipeStepError(RuntimeError): """Raised when a datapipe step fails without propagating the exception.""" + pass @@ -134,40 +112,88 @@ class DebugState(rx.State): debug_mode: bool = DEBUG_MODE show_api_key_dialog: bool = False - default_openai_api_key: str = os.environ.get("OPENAI_API_KEY", "") - default_openrouter_api_key: str = os.environ.get("OPENROUTER_API_KEY", "") - openai_api_key: str = "" - openrouter_api_key: str = "" + runtime_model_api_key: str = "" + runtime_model_provider: str | None = None + default_embeddings_model: str = core_settings.embeddings_model api_key_saved: bool = False + available_models: list[str] = [] @rx.var - def openai_key_empty(self) -> bool: - return not self.openai_api_key and not os.environ.get("OPENAI_API_KEY") + def provider_options(self) -> list[str]: + return ["openai", "openrouter", "anthropic", "cohere", "xai"] @rx.var - def openrouter_key_empty(self) -> bool: - return not self.openrouter_api_key and not os.environ.get("OPENROUTER_API_KEY") + def embeddings_model(self) -> str | None: + """embeddings model is fixed so its availability/correct name can be resolved here.""" + models = self.available_models + if not models: + return core_settings.embeddings_model + for m in models: + if m.rsplit("/", 1)[-1] == core_settings.embeddings_model: + return m + else: + _model, embeddings_model_provider, _1, _2 = litellm.get_llm_provider(core_settings.embeddings_model) + + if self.runtime_model_provider == "openrouter": # openrouter does not list embeddings models in its model list + if embeddings_model_provider == "openrouter": + return core_settings.embeddings_model + elif core_settings.embeddings_model.startswith(embeddings_model_provider): + return f"openrouter/{core_settings.embeddings_model}" + elif embeddings_model_provider: + return f"openrouter/{embeddings_model_provider}/{core_settings.embeddings_model}" + return None - def set_openai_api_key(self, value: str) -> None: - self.openai_api_key = value + @rx.var + def embeddings_model_available(self) -> bool: + return self.embeddings_model is not None - def set_openrouter_api_key(self, value: str) -> None: - self.openrouter_api_key = value + @rx.event(background=True) # type: ignore[operator] + async def load_available_models(self): + if not self.debug_mode: + return + models = await load_litellm_models( + provider=self.runtime_model_provider, + # check_provider is not necessary in most cases + # check_provider_endpoint=True if self.runtime_model_provider and self.runtime_model_provider == "openrouter" else False, + ) + async with self: + self.available_models = models + if not models and not self.api_key_saved: + self.show_api_key_dialog = True + + from vedana_backoffice.states.chat import ChatState + from vedana_backoffice.states.eval import EvalState - def save_api_keys(self) -> None: + yield ChatState.refresh_model_list() + yield EvalState.refresh_model_list() + + def set_model_api_key(self, value: str) -> None: + self.runtime_model_api_key = value + + def set_model_provider(self, value: str) -> None: + self.runtime_model_provider = value + + def save_api_key(self): if not self.debug_mode: return - os.environ["OPENAI_API_KEY"] = self.openai_api_key - os.environ["OPENROUTER_API_KEY"] = self.openrouter_api_key - self.api_key_saved = bool(self.openai_api_key or self.openrouter_api_key) - self.show_api_key_dialog = False + key = self.runtime_model_api_key.strip() + if key: + litellm.api_key = key + self.api_key_saved = True + self.show_api_key_dialog = False + else: + litellm.api_key = None + self.api_key_saved = False + self.show_api_key_dialog = False + self.available_models = [] + self.runtime_model_provider = None + # Background refresh will repopulate available_models and notify Chat/Eval. + yield DebugState.load_available_models() def close_dialog(self) -> None: self.show_api_key_dialog = False def open_dialog(self) -> None: - self.openai_api_key = self.default_openai_api_key - self.openrouter_api_key = self.default_openrouter_api_key self.show_api_key_dialog = True diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/etl.py b/libs/vedana-backoffice/src/vedana_backoffice/states/etl.py index 4e0262ea..2728c608 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/etl.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/etl.py @@ -1273,7 +1273,7 @@ def _load_preview_page(self) -> None: @staticmethod def _preview_select_exprs(columns: list[str], table_alias: str | None = None) -> str: """Build SELECT expressions replacing heavy embedding vectors with a placeholder.""" - prefix = f'{table_alias}.' if table_alias else "" + prefix = f"{table_alias}." if table_alias else "" exprs: list[str] = [] for col in columns: if str(col) == "embedding": diff --git a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py index e4dab886..e4804a48 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/states/eval.py @@ -3,7 +3,6 @@ import hashlib import json import logging -import os import statistics import traceback from dataclasses import asdict, dataclass @@ -22,7 +21,13 @@ from vedana_core.settings import settings as core_settings from vedana_etl.app import app as etl_app -from vedana_backoffice.states.common import get_vedana_app, load_openrouter_models, datapipe_log_capture, DebugState +from vedana_backoffice.states.common import ( + DebugState, + DEBUG_MODE, + EVAL_ENABLED, + datapipe_log_capture, + get_vedana_app, +) from vedana_backoffice.util import safe_render_value @@ -168,6 +173,18 @@ class CompareRow: } +eval_judge_prompt_template = """\ +You are a strict evaluation judge. Compare the model's answer with the golden answer and the expected retrieval context. +Consider whether the model's answer is factually aligned and sufficiently complete. +Use the provided technical info (retrieval queries) only as hints for whether the context seems adequate. +Return a JSON object with fields: test_status in {'pass','fail'}, comment, errors. + +In comments return answer scoring from 1 to 10, where: +1 – totally wrong answer +10 – totally correct answer +""" + + class EvalState(rx.State): """State holder for evaluation workflow.""" @@ -179,52 +196,16 @@ class EvalState(rx.State): selected_question_ids: list[str] = [] test_run_name: str = "" selected_scenario: str = "all" # Filter by scenario - judge_model: str = core_settings.judge_model judge_prompt_id: str = "" judge_prompt: str = "" - provider: str = "openai" pipeline_model: str = core_settings.model - embeddings_model: str = core_settings.embeddings_model + default_embeddings_model: str = core_settings.embeddings_model embeddings_dim: int = core_settings.embeddings_dim enable_dm_filtering: bool = core_settings.enable_dm_filtering - _default_models: tuple[str, ...] = ( - "gpt-5.1-chat-latest", - "gpt-5.1", - "gpt-5-chat-latest", - "gpt-5", - "gpt-5-mini", - "gpt-5-nano", - "gpt-4.1", - "gpt-4.1-mini", - "gpt-4.1-nano", - "gpt-4o", - "gpt-4o-mini", - "o4-mini", - ) - openai_models: list[str] = list( - set([core_settings.model, core_settings.filter_model, core_settings.judge_model] + list(_default_models)) - ) - openrouter_models: list[str] = [] + available_models: list[str] = list({core_settings.model, core_settings.filter_model, core_settings.judge_model}) dm_filter_model: str = core_settings.filter_model dm_id: str = "" - - def _models_for_provider(self, provider: str) -> list[str]: - """Return the list of model names for the given provider (openai or openrouter).""" - if provider == "openrouter": - return list(self.openrouter_models) - return list(self.openai_models) - - @rx.var - def available_models(self) -> list[str]: - return self._models_for_provider(self.provider) - - @rx.var - def dm_filter_available_models(self) -> list[str]: - return self._models_for_provider(self.provider) - - @rx.var - def judge_available_models(self) -> list[str]: - return self._models_for_provider(self.provider) + judge_model: str = core_settings.judge_model tests_rows: list[dict[str, Any]] = [] tests_cost_total: float = 0.0 @@ -326,10 +307,6 @@ def all_selected(self) -> bool: rows = len(self.eval_gds_rows_with_selection) # Use filtered count return 0 < rows == self.selected_count - @rx.var - def can_run(self) -> bool: - return (self.selected_count > 0) and (not self.is_running) - @rx.var def cost_label(self) -> str: if self.tests_cost_total > 0: @@ -385,17 +362,13 @@ def can_compare_runs(self) -> bool: and not self.compare_loading ) - @rx.var - def available_models_view(self) -> list[str]: - return self.available_models - @rx.var def dm_filter_model_display(self) -> str: - return f"{self.provider}/{self.dm_filter_model}" + return self.dm_filter_model @rx.var def judge_model_display(self) -> str: - return f"{self.provider}/{self.judge_model}" + return self.judge_model def toggle_question_selection(self, question: str, checked: bool) -> None: question = str(question or "").strip() @@ -459,36 +432,20 @@ def set_test_run_name(self, value: str) -> None: self.test_run_name = str(value or "").strip() def set_pipeline_model(self, value: str) -> None: - models = self._models_for_provider(self.provider) - if value in models: + if value in self.available_models: self.pipeline_model = value def set_enable_dm_filtering(self, value: bool) -> None: self.enable_dm_filtering = value def set_dm_filter_model(self, value: str) -> None: - models = self._models_for_provider(self.provider) - if value in models: + if value in self.available_models: self.dm_filter_model = value def set_judge_model(self, value: str) -> None: - models = self._models_for_provider(self.provider) - if value in models: + if value in self.available_models: self.judge_model = value - async def set_provider(self, value: str) -> None: - self.provider = str(value or "openai") - if self.provider == "openrouter" and not self.openrouter_models: - self.openrouter_models = await load_openrouter_models() - else: - # When switching back to OpenAI, reset models to settings defaults - self.pipeline_model = core_settings.model - self.dm_filter_model = core_settings.filter_model - self.judge_model = core_settings.judge_model - self._sync_available_models() - self._sync_dm_filter_model() - self._sync_judge_model() - def set_compare_run_a(self, value: str) -> None: self.compare_run_a = str(value or "").strip() @@ -504,28 +461,54 @@ def _prune_selection(self) -> None: self.selected_question_ids = [q for q in (self.selected_question_ids or []) if q in valid] def _sync_available_models(self) -> None: - """Realign selected pipeline model when provider or model list changes.""" - if self.provider == "openrouter" and not self.openrouter_models: - self.provider = "openai" - models = self._models_for_provider(self.provider) - if self.pipeline_model not in models and models: - self.pipeline_model = models[0] + """Realign selected pipeline model when model list changes.""" + if self.pipeline_model not in self.available_models and self.available_models: + for model in self.available_models: + if model.endswith(core_settings.model): + self.pipeline_model = model + break + else: + if "openrouter/openrouter/free" in self.available_models: # openrouter provider + self.pipeline_model = "openrouter/openrouter/free" # Openrouter has an endpoint with all free models, set it as default + else: + self.pipeline_model = self.available_models[0] def _sync_dm_filter_model(self) -> None: - """Realign selected filter model when provider or model list changes.""" - models = self._models_for_provider(self.provider) - if self.dm_filter_model not in models and models: - self.dm_filter_model = models[0] + """Realign selected filter model when model list changes.""" + if self.dm_filter_model not in self.available_models and self.available_models: + for model in self.available_models: + if model.endswith(core_settings.filter_model): + self.dm_filter_model = model + break + else: + if "openrouter/openrouter/free" in self.available_models: + self.dm_filter_model = "openrouter/openrouter/free" + else: + self.dm_filter_model = self.available_models[0] def _sync_judge_model(self) -> None: - """Realign selected judge model when provider or model list changes.""" - models = self._models_for_provider(self.provider) - if self.judge_model not in models and models: - self.judge_model = models[0] + """Realign selected judge model when model list changes.""" + if self.judge_model not in self.available_models and self.available_models: + for model in self.available_models: + if model.endswith(core_settings.judge_model): + self.judge_model = model + break + else: + if "openrouter/openrouter/free" in self.available_models: + self.judge_model = "openrouter/openrouter/free" + else: + self.judge_model = self.available_models[0] + + @rx.event(background=True) # type: ignore[operator] + async def refresh_model_list(self) -> None: + async with self: + self.available_models = await self.get_var_value(DebugState.available_models) # type: ignore[arg-type] + self._sync_available_models() + self._sync_dm_filter_model() + self._sync_judge_model() def _resolved_pipeline_model(self) -> str: - provider = self.provider or "openai" - return f"{provider}/{self.pipeline_model}" + return self.pipeline_model def get_eval_gds_from_grist(self): step = next((s for s in etl_app.steps if s._name == "get_eval_gds_from_grist"), None) @@ -566,13 +549,13 @@ async def _load_eval_questions(self) -> None: self._prune_selection() async def _load_judge_config(self) -> None: - self.judge_model = core_settings.judge_model + self.judge_model = core_settings.judge_model if not self.judge_model else self.judge_model self.judge_prompt_id = "" self.judge_prompt = "" vedana_app = await get_vedana_app() dm_pt = await vedana_app.data_model.prompt_templates() - judge_prompt = dm_pt.get("eval_judge_prompt") + judge_prompt = dm_pt.get("eval_judge_prompt", eval_judge_prompt_template) if judge_prompt: text_b = bytearray(judge_prompt, "utf-8") @@ -1195,7 +1178,7 @@ async def _build_eval_meta_payload(self, vedana_app, test_run_id: str, test_run_ ) run_config = RunConfig( pipeline_model=self._resolved_pipeline_model(), - embeddings_model=self.embeddings_model, + embeddings_model=core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model), # type: ignore[arg-type] embeddings_dim=self.embeddings_dim, ) return { @@ -1657,8 +1640,7 @@ def _build_thread_config( "judge_model": self.judge_model, "judge_prompt_id": self.judge_prompt_id, "pipeline_model": resolved_model, - "pipeline_provider": self.provider, - "embeddings_model": self.embeddings_model, + "embeddings_model": core_settings.embeddings_model, "embeddings_dim": self.embeddings_dim, "dm_id": self.dm_id, } @@ -1703,13 +1685,17 @@ async def _run_question_thread( resolved_model = self._resolved_pipeline_model() pipeline.model = resolved_model pipeline.enable_filtering = self.enable_dm_filtering - pipeline.filter_model = f"{self.provider}/{self.dm_filter_model}" + pipeline.filter_model = self.dm_filter_model - api_key = (os.environ.get( - "OPENROUTER_API_KEY" if self.provider == "openrouter" else "OPENAI_API_KEY" - ) or "").strip() or None + async with self: + pipeline_embeddings_model = core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model) # type: ignore[arg-type] - ctx = await ctl.make_context(llm_settings=LLMSettings(model=resolved_model, model_api_key=api_key)) + ctx = await ctl.make_context( + llm_settings=LLMSettings( + model=resolved_model, + embeddings_model=pipeline_embeddings_model + ) + ) events = await ctl.run_pipeline_with_context(pipeline, ctx) answer: str = "" @@ -1722,7 +1708,9 @@ async def _run_question_thread( return str(thread_id), answer, technical_info - async def _judge_answer(self, question_row: dict[str, Any], answer: str, tool_calls: str) -> tuple[str, str, int, float]: + async def _judge_answer( + self, question_row: dict[str, Any], answer: str, tool_calls: str + ) -> tuple[str, str, int, float]: """Judge model answer with current judge prompt/model and rating. Returns (status, comment, rating, judge_cost). """ @@ -1730,18 +1718,12 @@ async def _judge_answer(self, question_row: dict[str, Any], answer: str, tool_ca if not judge_prompt: return "fail", "Judge prompt not loaded", 0, 0.0 - provider = LLMProvider() - resolved_judge_model = f"{self.provider}/{self.judge_model}" - try: - provider.set_model(resolved_judge_model) - except Exception: - logging.warning(f"Failed to set judge model {resolved_judge_model}") - - api_key = (os.environ.get( - "OPENROUTER_API_KEY" if self.provider == "openrouter" else "OPENAI_API_KEY" - ) or "").strip() - if api_key: - provider.model_api_key = api_key + provider = LLMProvider( + settings=LLMSettings( + model=self.judge_model, + embeddings_model=core_settings.embeddings_model if not DEBUG_MODE else await self.get_var_value(DebugState.embeddings_model) # type: ignore[arg-type] + ) + ) class JudgeResult(BaseModel): test_status: str = Field(description="pass / fail") @@ -1808,12 +1790,7 @@ def run_selected_tests(self): self.error_message = "Select at least one question to run tests." return if not self.judge_prompt: - self.error_message = "Judge prompt not loaded. Refresh judge config first." - return - - env_key = "OPENROUTER_API_KEY" if self.provider == "openrouter" else "OPENAI_API_KEY" - if not os.environ.get(env_key): - yield DebugState.open_dialog() + self.error_message = "Judge prompt not loaded. Refresh data model first." return test_run_name = self.test_run_name.strip() or "" @@ -1878,7 +1855,7 @@ async def _run_one(question: str) -> dict[str, Any]: "judge_prompt_id": self.judge_prompt_id, "dm_id": self.dm_id, "pipeline_model": resolved_pipeline_model, - "embeddings_model": self.embeddings_model, + "embeddings_model": core_settings.embeddings_model, "embeddings_dim": self.embeddings_dim, "test_run_id": test_run_id, "test_run_name": test_run_name, @@ -1976,12 +1953,19 @@ def load_eval_data(self): self.tests_page = 0 # Reset to first page yield yield EvalState.load_eval_data_background() + yield EvalState.refresh_golden_dataset_background() + + async def mount(self): + if EVAL_ENABLED: + # yield EvalState.load_eval_data_background() + if DEBUG_MODE: + yield DebugState.load_available_models() @rx.event(background=True) # type: ignore[operator] async def load_eval_data_background(self): try: async with self: - self.openrouter_models = await load_openrouter_models() + self.available_models = await self.get_var_value(DebugState.available_models) self._sync_available_models() self._sync_dm_filter_model() self._sync_judge_model() diff --git a/libs/vedana-backoffice/src/vedana_backoffice/ui.py b/libs/vedana-backoffice/src/vedana_backoffice/ui.py index 24c08cb1..f068833f 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/ui.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/ui.py @@ -53,14 +53,14 @@ def debug_badge() -> rx.Component: }, on_click=DebugState.open_dialog, ), - content="Click to enter or reset your OpenAI / OpenRouter API key (required for chat and eval model selection)", + content="Click to set your LiteLLM API key for debug mode model selection", ), rx.fragment(), ) def api_key_setup_dialog() -> rx.Component: - """Dialog to prompt for OpenAI and OpenRouter API keys; highlights missing fields.""" + """Dialog to prompt for a runtime API key used by LiteLLM in debug mode.""" _missing_style = { "border": "2px solid #d97706", "border_radius": "6px", @@ -71,39 +71,33 @@ def api_key_setup_dialog() -> rx.Component: rx.dialog.content( rx.dialog.title("API Key Setup"), rx.dialog.description( - "Add API keys for the providers you use.", + "Paste your LiteLLM-compatible API key here and click 'Save' to use compatible models. Remove the key to revert to default key and models.", margin_bottom="1em", ), rx.vstack( rx.vstack( - rx.text("OpenAI API Key", font_size="2", font_weight="500"), - rx.input( - placeholder="sk-...", - on_change=DebugState.set_openai_api_key, - type="password", + rx.text("Provider", font_size="2", font_weight="500"), + rx.select( + items=DebugState.provider_options, + value=DebugState.runtime_model_provider, + on_change=DebugState.set_model_provider, width="100%", - style=rx.cond( - DebugState.openai_key_empty, - {**_normal_style, **_missing_style}, - _normal_style, - ), + placeholder="Select provider", + style=_normal_style, ), spacing="2", width="100%", align="start", ), rx.vstack( - rx.text("OpenRouter API Key", font_size="2", font_weight="500"), + rx.text("Runtime API Key", font_size="2", font_weight="500"), rx.input( - placeholder="sk-or-...", - on_change=DebugState.set_openrouter_api_key, + placeholder="sk-...", + value=DebugState.runtime_model_api_key, + on_change=DebugState.set_model_api_key, type="password", width="100%", - style=rx.cond( - DebugState.openrouter_key_empty, - {**_normal_style, **_missing_style}, - _normal_style, - ), + style=_normal_style, ), spacing="2", width="100%", @@ -121,7 +115,7 @@ def api_key_setup_dialog() -> rx.Component: rx.button( "Save", color_scheme="blue", - on_click=DebugState.save_api_keys, + on_click=DebugState.save_api_key, ), justify="end", spacing="3", diff --git a/libs/vedana-backoffice/src/vedana_backoffice/vedana_backoffice.py b/libs/vedana-backoffice/src/vedana_backoffice/vedana_backoffice.py index 717f688a..368ecdc2 100644 --- a/libs/vedana-backoffice/src/vedana_backoffice/vedana_backoffice.py +++ b/libs/vedana-backoffice/src/vedana_backoffice/vedana_backoffice.py @@ -15,4 +15,4 @@ app.add_page(etl_page, route="/etl", title="ETL", on_load=EtlState.load_pipeline_metadata) app.add_page(chat_page, route="/chat", title="Chat", on_load=ChatState.reset_session) app.add_page(jims_thread_list_page, route="/jims", title="JIMS", on_load=ThreadListState.get_data) -app.add_page(eval_page, route="/eval", title="Evaluation", on_load=EvalState.load_eval_data) +app.add_page(eval_page, route="/eval", title="Evaluation", on_load=EvalState.mount)