Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion apps/vedana/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@ JIMS_DB_CONN_URI="postgresql://postgres:postgres@db:5432"

# Main LLM model
MODEL="gpt-4.1-mini"
# Embeddings are provided via LiteLLM as well. OpenRouter's embedding models are not supported there at the moment
# Embeddings are provided via LiteLLM as well.
# You can use OpenAI directly:
EMBEDDINGS_MODEL="text-embedding-3-large"
# Or route embeddings through OpenRouter (requires OPENROUTER_API_KEY):
# EMBEDDINGS_MODEL="openrouter/openai/text-embedding-3-large"
# Note: changing dimensions of embeddings requires an SQL migration.
EMBEDDINGS_DIM=1024
EMBEDDINGS_MAX_BATCH_SIZE=2048
Expand Down
20 changes: 2 additions & 18 deletions libs/jims-core/src/jims_core/llms/llm_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,6 @@ class LLMSettings(BaseSettings):
embeddings_max_batch_size: int = 2048
embeddings_max_tokens_per_batch: int = 200000

# passable api_keys; if None, defaults to env vars
model_api_key: str | None = None
embeddings_model_api_key: str | None = None

# openrouter_api_key: str | None = None
openrouter_api_base_url: str = "https://openrouter.ai/api/v1"


env_settings = LLMSettings() # type: ignore

Expand Down Expand Up @@ -77,9 +70,7 @@ class LLMProvider:
def __init__(self, settings: LLMSettings | None = None) -> None:
self._settings = settings or env_settings
self.model = self._settings.model
self.model_api_key = self._settings.model_api_key
self.embeddings_model = self._settings.embeddings_model
self.embeddings_model_api_key = self._settings.embeddings_model_api_key
self.embeddings_dim = self._settings.embeddings_dim
self.max_batch_size = self._settings.embeddings_max_batch_size
self.max_tokens_per_batch = self._settings.embeddings_max_tokens_per_batch
Expand Down Expand Up @@ -109,7 +100,7 @@ def observe_completion(self, completion: litellm.ModelResponse) -> None:
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
cached_tokens=cached_tokens,
request_cost=completion._hidden_params.get("response_cost", 0), # or litellm.completion_cost(completion)
request_cost=completion._hidden_params.get("response_cost") or 0, # or litellm.completion_cost(completion)
)

llm_calls_total.labels(completion.model).inc()
Expand All @@ -125,7 +116,7 @@ def observe_create_embedding(self, res: litellm.EmbeddingResponse) -> None:
if usage is not None:
self.usage[model].observe(
prompt_tokens=usage.prompt_tokens,
request_cost=res._hidden_params.get("response_cost", 0), # or litellm.completion_cost(res)
request_cost=res._hidden_params.get("response_cost") or 0, # or litellm.completion_cost(res)
)
llm_usage_prompt_tokens_total.labels(model).inc(usage.prompt_tokens)

Expand All @@ -135,7 +126,6 @@ async def create_embedding(self, text: str) -> list[float]:
model=self.embeddings_model,
input=[text],
dimensions=self.embeddings_dim,
api_key=self.embeddings_model_api_key,
)
self.observe_create_embedding(response)
return response.data[0]["embedding"]
Expand Down Expand Up @@ -170,7 +160,6 @@ async def create_embeddings(self, texts: list[str]) -> list[list[float]]:
model=self.embeddings_model,
input=batch,
dimensions=self.embeddings_dim,
api_key=self.embeddings_model_api_key,
)
self.observe_create_embedding(response)
results.extend(d["embedding"] for d in response.data)
Expand All @@ -181,7 +170,6 @@ def create_embedding_sync(self, text: str) -> list[float]:
model=self.embeddings_model,
input=[text],
dimensions=self.embeddings_dim,
api_key=self.embeddings_model_api_key,
)
self.observe_create_embedding(response)
return response.data[0]["embedding"]
Expand All @@ -194,7 +182,6 @@ def create_embeddings_sync(self, texts: list[str]) -> list[list[float]]:
model=self.embeddings_model,
input=batch,
dimensions=self.embeddings_dim,
api_key=self.embeddings_model_api_key,
)
self.observe_create_embedding(response)
results.extend(d["embedding"] for d in response.data)
Expand All @@ -210,7 +197,6 @@ async def chat_completion_structured[T: BaseModel](
model=self.model,
messages=list(messages),
response_format=response_format,
api_key=self.model_api_key,
)
assert isinstance(completion, litellm.ModelResponse)

Expand All @@ -234,7 +220,6 @@ async def chat_completion_plain(
model=self.model,
messages=list(messages),
caching=use_cache,
api_key=self.model_api_key,
)
assert isinstance(completion, litellm.ModelResponse)

Expand All @@ -258,7 +243,6 @@ async def chat_completion_with_tools(
model=self.model,
messages=list(messages),
tools=tools,
api_key=self.model_api_key,
)
assert isinstance(completion, litellm.ModelResponse)

Expand Down
2 changes: 1 addition & 1 deletion libs/jims-core/src/jims_core/thread/thread_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def context(self, conversation_length: int = 20) -> list[CommunicationEvent]:
result.append(CommunicationEvent(**event.event_data))
comm_counter += 1
if event.event_type.startswith("context."):
result.append(CommunicationEvent(**event.event_data))
result.append(CommunicationEvent(**event.event_data))
if comm_counter > conversation_length:
break

Expand Down
2 changes: 1 addition & 1 deletion libs/vedana-backoffice/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [
]

dependencies = [
"reflex>=0.8.26,<0.9.0",
"reflex>=0.8.27,<0.9.0",
"orjson>=3.11.3",
"vedana-core",
"vedana-etl",
Expand Down
42 changes: 25 additions & 17 deletions libs/vedana-backoffice/src/vedana_backoffice/pages/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from vedana_backoffice.components.ui_chat import render_message_bubble
from vedana_backoffice.states.chat import ChatState
from vedana_backoffice.states.common import AppVersionState
from vedana_backoffice.states.common import AppVersionState, DebugState
from vedana_backoffice.ui import app_header


Expand Down Expand Up @@ -111,7 +111,7 @@ def page() -> rx.Component:
rx.cond(
ChatState.enable_dm_filtering & AppVersionState.debug_mode,
rx.select(
items=ChatState.dm_filter_available_models,
items=DebugState.available_models,
value=ChatState.dm_filter_model,
on_change=ChatState.set_dm_filter_model,
width="16em",
Expand All @@ -120,6 +120,23 @@ def page() -> rx.Component:
rx.text(ChatState.dm_filter_model, size="1", color="gray"),
),
rx.spacer(),
rx.cond(
AppVersionState.debug_mode,
rx.cond(
DebugState.embeddings_model_available,
rx.text(
f"Embeddings: {DebugState.embeddings_model}",
size="1",
color="gray",
),
rx.text(
f"Embeddings: {DebugState.default_embeddings_model} (unavailable for provider)",
size="1",
color="red",
),
),
rx.fragment(),
),
rx.hstack(
rx.cond(
ChatState.chat_thread_id != "",
Expand Down Expand Up @@ -157,21 +174,12 @@ def page() -> rx.Component:
),
rx.cond(
ChatState.model_selection_allowed,
rx.hstack(
rx.select(
items=["openai", "openrouter"],
value=ChatState.provider,
on_change=ChatState.set_provider,
width="10em",
placeholder="Provider",
),
rx.select(
items=ChatState.available_models,
value=ChatState.model,
on_change=ChatState.set_model,
width="16em",
placeholder="Select model",
),
rx.select(
items=DebugState.available_models,
value=ChatState.model,
on_change=ChatState.set_model,
width="20em",
placeholder="Select model",
),
rx.badge(ChatState.model, variant="surface", color_scheme="gray", size="3"),
),
Expand Down
52 changes: 26 additions & 26 deletions libs/vedana-backoffice/src/vedana_backoffice/pages/eval.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import reflex as rx

from vedana_backoffice.states.common import AppVersionState
from vedana_backoffice.states.eval import EvalState, RunSummary
from vedana_backoffice.states.chat import ChatState
from vedana_backoffice.states.common import AppVersionState, DebugState
from vedana_backoffice.states.eval import EvalState, RunSummary
from vedana_backoffice.ui import app_header


Expand All @@ -21,7 +21,11 @@ def _selection_and_actions() -> rx.Component:
color_scheme="blue",
on_click=EvalState.run_selected_tests,
loading=EvalState.is_running,
disabled=rx.cond(EvalState.can_run, False, True), # type: ignore[arg-type]
disabled=rx.cond(
DebugState.debug_mode & ~DebugState.embeddings_model_available,
True,
rx.cond((EvalState.selected_count > 0) & ~EvalState.is_running, False, True), # type: ignore[arg-type]
),
),
rx.spacer(),
rx.button(
Expand Down Expand Up @@ -175,7 +179,7 @@ def _judge_card() -> rx.Component:
rx.cond(
AppVersionState.debug_mode,
rx.select(
items=EvalState.judge_available_models,
items=DebugState.available_models,
value=EvalState.judge_model,
on_change=EvalState.set_judge_model,
width="100%",
Expand Down Expand Up @@ -235,25 +239,12 @@ def _pipeline_card() -> rx.Component:
rx.text("Pipeline model", weight="medium", width="100%"),
rx.cond(
ChatState.model_selection_allowed,
rx.hstack(
rx.select(
items=["openai", "openrouter"],
value=EvalState.provider,
on_change=EvalState.set_provider,
width="100%",
placeholder="Provider",
),
rx.select(
items=EvalState.available_models_view,
value=EvalState.pipeline_model,
on_change=EvalState.set_pipeline_model,
width="100%",
placeholder="Select model",
),
spacing="2",
align="center",
wrap="wrap",
rx.select(
items=DebugState.available_models,
value=EvalState.pipeline_model,
on_change=EvalState.set_pipeline_model,
width="100%",
placeholder="Select model",
),
rx.text(EvalState.pipeline_model, size="3"),
),
Expand All @@ -270,7 +261,7 @@ def _pipeline_card() -> rx.Component:
rx.cond(
AppVersionState.debug_mode,
rx.select(
items=EvalState.dm_filter_available_models,
items=DebugState.available_models,
value=EvalState.dm_filter_model,
on_change=EvalState.set_dm_filter_model,
width="100%",
Expand All @@ -291,9 +282,18 @@ def _pipeline_card() -> rx.Component:
),
rx.box(
rx.text("Embeddings", weight="medium"),
rx.text(
rx.cond(EvalState.embeddings_model != "", EvalState.embeddings_model, "—"),
size="3",
rx.cond(
AppVersionState.debug_mode,
rx.cond(
DebugState.embeddings_model_available,
rx.text(DebugState.embeddings_model, size="3"),
rx.text(
f"{EvalState.default_embeddings_model} (unavailable for provider)",
size="3",
color="red",
),
),
rx.text(EvalState.default_embeddings_model, size="3"),
),
rx.text(
EvalState.embeddings_dim_label,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -687,7 +687,13 @@ def priority_badge(value: str) -> rx.Component: # type: ignore[valid-type]
rx.text(ThreadListState.rows_display, size="2", color="gray"),
rx.spacer(),
rx.hstack(
rx.button("⏮", variant="soft", size="1", on_click=ThreadListState.first_page, disabled=~ThreadListState.has_prev_page), # type: ignore[operator]
rx.button(
"⏮",
variant="soft",
size="1",
on_click=ThreadListState.first_page,
disabled=~ThreadListState.has_prev_page,
), # type: ignore[operator]
rx.button(
"← Prev",
variant="soft",
Expand All @@ -703,7 +709,13 @@ def priority_badge(value: str) -> rx.Component: # type: ignore[valid-type]
on_click=ThreadListState.next_page,
disabled=~ThreadListState.has_next_page,
), # type: ignore[operator]
rx.button("⏭", variant="soft", size="1", on_click=ThreadListState.last_page, disabled=~ThreadListState.has_next_page), # type: ignore[operator]
rx.button(
"⏭",
variant="soft",
size="1",
on_click=ThreadListState.last_page,
disabled=~ThreadListState.has_next_page,
), # type: ignore[operator]
spacing="2",
align="center",
),
Expand Down
Loading
Loading