ProjectTech4DevAI · Prajna1999 · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026 · Jan 21, 2026
diff --git a/backend/app/alembic/versions/045_add_llm_call_table.py b/backend/app/alembic/versions/045_add_llm_call_table.py
@@ -0,0 +1,201 @@
+"""add_llm_call_table
+
+Revision ID: 045
+Revises: 044
+Create Date: 2026-01-26 15:20:23.873332
+
+"""
+from alembic import op
+import sqlalchemy as sa
+import sqlmodel.sql.sqltypes
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = "045"
+down_revision = "044"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table(
+        "llm_call",
+        sa.Column(
+            "id",
+            sa.Uuid(),
+            nullable=False,
+            comment="Unique identifier for the LLM call record",
+        ),
+        sa.Column(
+            "job_id",
+            sa.Uuid(),
+            nullable=False,
+            comment="Reference to the parent job (status tracked in job table)",
+        ),
+        sa.Column(
+            "project_id",
+            sa.Integer(),
+            nullable=False,
+            comment="Reference to the project this LLM call belongs to",
+        ),
+        sa.Column(
+            "organization_id",
+            sa.Integer(),
+            nullable=False,
+            comment="Reference to the organization this LLM call belongs to",
+        ),
+        sa.Column(
+            "input",
+            sqlmodel.sql.sqltypes.AutoString(),
+            nullable=False,
+            comment="User input - text string, binary data, or file path for multimodal",
+        ),
+        sa.Column(
+            "input_type",
+            sa.String(),
+            nullable=False,
+            comment="Input type: text, audio, image",
+        ),
+        sa.Column(
+            "output_type",
+            sa.String(),
+            nullable=True,
+            comment="Expected output type: text, audio, image",
+        ),
+        sa.Column(
+            "provider",
+            sa.String(),
+            nullable=False,
+            comment="AI provider: openai, google, anthropic",
+        ),
+        sa.Column(
+            "model",
+            sqlmodel.sql.sqltypes.AutoString(),
+            nullable=False,
+            comment="Specific model used e.g. 'gpt-4o', 'gemini-2.5-pro'",
+        ),
+        sa.Column(
+            "provider_response_id",
+            sqlmodel.sql.sqltypes.AutoString(),
+            nullable=True,
+            comment="Original response ID from the provider (e.g., OpenAI's response ID)",
+        ),
+        sa.Column(
+            "content",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+            comment="Response content: {text: '...'}, {audio_bytes: '...'}, or {image: '...'}",
+        ),
+        sa.Column(
+            "usage",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+            comment="Token usage: {input_tokens, output_tokens, reasoning_tokens}",
+        ),
+        sa.Column(
+            "conversation_id",
+            sqlmodel.sql.sqltypes.AutoString(),
+            nullable=True,
+            comment="Identifier linking this response to its conversation thread",
+        ),
+        sa.Column(
+            "auto_create",
+            sa.Boolean(),
+            nullable=True,
+            comment="Whether to auto-create conversation if conversation_id doesn't exist (OpenAI specific)",
+        ),
+        sa.Column(
+            "config",
+            postgresql.JSONB(astext_type=sa.Text()),
+            nullable=True,
+            comment="Configuration: {config_id, config_version} for stored config OR {config_blob} for ad-hoc config",
+        ),
+        sa.Column(
+            "created_at",
+            sa.DateTime(),
+            nullable=False,
+            comment="Timestamp when the LLM call was created",
+        ),
+        sa.Column(
+            "updated_at",
+            sa.DateTime(),
+            nullable=False,
+            comment="Timestamp when the LLM call was last updated",
+        ),
+        sa.Column(
+            "deleted_at",
+            sa.DateTime(),
+            nullable=True,
+            comment="Timestamp when the record was soft-deleted",
+        ),
+        sa.ForeignKeyConstraint(["job_id"], ["job.id"], ondelete="CASCADE"),
+        sa.ForeignKeyConstraint(
+            ["organization_id"], ["organization.id"], ondelete="CASCADE"
+        ),
+        sa.ForeignKeyConstraint(["project_id"], ["project.id"], ondelete="CASCADE"),
+        sa.PrimaryKeyConstraint("id"),
+    )
+    op.create_index(
+        "idx_llm_call_conversation_id",
+        "llm_call",
+        ["conversation_id"],
+        unique=False,
+        postgresql_where=sa.text("conversation_id IS NOT NULL AND deleted_at IS NULL"),
+    )
+    op.create_index(
+        "idx_llm_call_job_id",
+        "llm_call",
+        ["job_id"],
+        unique=False,
+        postgresql_where=sa.text("deleted_at IS NULL"),
+    )
+    op.alter_column(
+        "collection",
+        "llm_service_name",
+        existing_type=sa.VARCHAR(),
+        comment="Name of the LLM service",
+        existing_comment="Name of the LLM service provider",
+        existing_nullable=False,
+    )
+    op.alter_column(
+        "llm_call",
+        "provider",
+        existing_type=sa.VARCHAR(),
+        comment="AI provider as sent by user (e.g openai, -native, google)",
+        existing_comment="AI provider: openai, google, anthropic",
+        existing_nullable=False,
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.alter_column(
+        "collection",
+        "llm_service_name",
+        existing_type=sa.VARCHAR(),
+        comment="Name of the LLM service provider",
+        existing_comment="Name of the LLM service",
+        existing_nullable=False,
+    )
+    op.alter_column(
+        "llm_call",
+        "provider",
+        existing_type=sa.VARCHAR(),
+        comment="AI provider: openai, google, anthropic",
+        existing_comment="AI provider as sent by user (e.g openai, -native, google)",
+        existing_nullable=False,
+    )
+    op.drop_index(
+        "idx_llm_call_job_id",
+        table_name="llm_call",
+        postgresql_where=sa.text("deleted_at IS NULL"),
+    )
+    op.drop_index(
+        "idx_llm_call_conversation_id",
+        table_name="llm_call",
+        postgresql_where=sa.text("conversation_id IS NOT NULL AND deleted_at IS NULL"),
+    )
+    op.drop_table("llm_call")
+    # ### end Alembic commands ###
diff --git a/backend/app/api/routes/config/version.py b/backend/app/api/routes/config/version.py
@@ -4,7 +4,7 @@
 from app.api.deps import SessionDep, AuthContextDep
 from app.crud.config import ConfigCrud, ConfigVersionCrud
 from app.models import (
-    ConfigVersionCreate,
+    ConfigVersionUpdatePartial,
     ConfigVersionPublic,
     Message,
     ConfigVersionItems,
@@ -24,18 +24,21 @@
 )
 def create_version(
     config_id: UUID,
-    version_create: ConfigVersionCreate,
+    version_create: ConfigVersionUpdatePartial,
     current_user: AuthContextDep,
     session: SessionDep,
 ):
     """
     Create a new version for an existing configuration.
-    The version number is automatically incremented.
+
+    Only include the fields you want to update in config_blob.
+    Provider, model, and params can be changed.
+    Type is inherited from existing config and cannot be changed.
     """
     version_crud = ConfigVersionCrud(
         session=session, project_id=current_user.project_.id, config_id=config_id
     )
-    version = version_crud.create_or_raise(version_create=version_create)
+    version = version_crud.create_from_partial_or_raise(version_create=version_create)
 
     return APIResponse.success_response(
         data=ConfigVersionPublic(**version.model_dump()),

diff --git a/backend/app/core/audio_utils.py b/backend/app/core/audio_utils.py
@@ -0,0 +1,45 @@
+"""
+Audio processing utilities for format conversion.
+
+This module provides utilities for converting audio between different formats,
+particularly for TTS output post-processing.
+"""
+
+import logging
+from pydub import AudioSegment
+import io
+
+logger = logging.getLogger(__name__)
+
+
+def convert_pcm_to_mp3(
+    pcm_bytes: bytes, sample_rate: int = 24000
+) -> tuple[bytes | None, str | None]:
+    try:
+        audio = AudioSegment(
+            data=pcm_bytes, sample_width=2, frame_rate=sample_rate, channels=1
+        )
+
+        output_buffer = io.BytesIO()
+        audio.export(output_buffer, format="mp3", bitrate="192k")
+        return output_buffer.getvalue(), None
+    except Exception as e:
+        return None, str(e)
+
+
+def convert_pcm_to_ogg(
+    pcm_bytes: bytes, sample_rate: int = 24000
+) -> tuple[bytes | None, str | None]:
+    """Convert raw PCM to OGG with Opus codec."""
+    try:
+        audio = AudioSegment(
+            data=pcm_bytes, sample_width=2, frame_rate=sample_rate, channels=1
+        )
+
+        output_buffer = io.BytesIO()
+        audio.export(
+            output_buffer, format="ogg", codec="libopus", parameters=["-b:a", "64k"]
+        )
+        return output_buffer.getvalue(), None
+    except Exception as e:
+        return None, str(e)
-def convert_pcm_to_ogg(
-    pcm_bytes: bytes, sample_rate: int = 24000
-) -> tuple[bytes | None, str | None]:
-    """Convert raw PCM to OGG with Opus codec."""
-    try:
-        audio = AudioSegment(
-            data=pcm_bytes, sample_width=2, frame_rate=sample_rate, channels=1
-        )
-
-        output_buffer = io.BytesIO()
-        audio.export(
-            output_buffer, format="ogg", codec="libopus", parameters=["-b:a", "64k"]
-        )
-        return output_buffer.getvalue(), None
-    except Exception as e:
-        return None, str(e)
+def convert_pcm_to_ogg(
+    pcm_bytes: bytes, sample_rate: int = 24000
+) -> tuple[bytes | None, str | None]:
+    """Convert raw PCM to OGG with Opus codec."""
+    try:
+        audio = AudioSegment(
+            data=pcm_bytes, sample_width=2, frame_rate=sample_rate, channels=1
+        )
+
+        output_buffer = io.BytesIO()
+        audio.export(
+            output_buffer, format="ogg", codec="libopus", parameters=["-b:a", "64k"]
+        )
+        return output_buffer.getvalue(), None
+    except Exception as e:
+        logger.error(f"[convert_pcm_to_ogg] Failed to convert PCM to OGG: {e}")
+        return None, str(e)
-def convert_pcm_to_ogg(
-    pcm_bytes: bytes, sample_rate: int = 24000
-) -> tuple[bytes | None, str | None]:
-    """Convert raw PCM to OGG with Opus codec."""
-    try:
-        audio = AudioSegment(
-            data=pcm_bytes, sample_width=2, frame_rate=sample_rate, channels=1
-        )
-
-        output_buffer = io.BytesIO()
-        audio.export(
-            output_buffer, format="ogg", codec="libopus", parameters=["-b:a", "64k"]
-        )
-        return output_buffer.getvalue(), None
-    except Exception as e:
-        return None, str(e)
+def convert_pcm_to_ogg(
+    pcm_bytes: bytes, sample_rate: int = 24000
+) -> tuple[bytes | None, str | None]:
+    """Convert raw PCM to OGG with Opus codec."""
+    try:
+        audio = AudioSegment(
+            data=pcm_bytes, sample_width=2, frame_rate=sample_rate, channels=1
+        )
+
+        output_buffer = io.BytesIO()
+        audio.export(
+            output_buffer, format="ogg", codec="libopus", parameters=["-b:a", "64k"]
+        )
+        return output_buffer.getvalue(), None
+    except Exception as e:
+        logger.error(f"[convert_pcm_to_ogg] Failed to convert PCM to OGG: {e}")
+        return None, str(e)
diff --git a/backend/app/core/langfuse/langfuse.py b/backend/app/core/langfuse/langfuse.py
@@ -6,11 +6,45 @@
 from asgi_correlation_id import correlation_id
 from langfuse import Langfuse
 from langfuse.client import StatefulGenerationClient, StatefulTraceClient
-from app.models.llm import NativeCompletionConfig, QueryParams, LLMCallResponse
+from app.models.llm import (
+    NativeCompletionConfig,
+    QueryParams,
+    LLMCallResponse,
+    TextOutput,
+    AudioOutput,
+)
 
 logger = logging.getLogger(__name__)
 
 
+def extract_output_value(
+    llm_output: TextOutput | AudioOutput | None,
+) -> str | dict[str, Any]:
+    """Extract output value from LLM output for logging/tracing.
+
+    Args:
+        llm_output: The output (TextOutput, AudioOutput, or None)
+
+    Returns:
+        String value for text output, or dict with metadata for audio output
+    """
+    if not llm_output:
+        return ""
+
+    if isinstance(llm_output, TextOutput):
+        return llm_output.content.value
+    elif isinstance(llm_output, AudioOutput):
+        # For audio, return metadata instead of the full base64 data
+        return {
+            "type": "audio",
+            "format": llm_output.content.format,
+            "mime_type": llm_output.content.mime_type,
+            "length": len(llm_output.content.value),
+        }
+    else:
+        return str(llm_output)
+
+
 class LangfuseTracer:
     def __init__(
         self,
@@ -228,7 +262,7 @@ def langfuse_call(fn, *args, **kwargs):
                         generation.end,
                         output={
                             "status": "success",
-                            "output": response.response.output.text,
+                            "output": extract_output_value(response.response.output),
                         },
                         usage_details={
                             "input": response.usage.input_tokens,
@@ -241,7 +275,7 @@ def langfuse_call(fn, *args, **kwargs):
                         trace.update,
                         output={
                             "status": "success",
-                            "output": response.response.output.text,
+                            "output": extract_output_value(response.response.output),
                         },
                         session_id=session_id or response.response.conversation_id,
                     )

diff --git a/backend/app/core/providers.py b/backend/app/core/providers.py
@@ -12,6 +12,7 @@ class Provider(str, Enum):
     OPENAI = "openai"
     AWS = "aws"
     LANGFUSE = "langfuse"
+    GOOGLE = "google"
 
 
 @dataclass
@@ -30,6 +31,7 @@ class ProviderConfig:
     Provider.LANGFUSE: ProviderConfig(
         required_fields=["secret_key", "public_key", "host"]
     ),
+    Provider.GOOGLE: ProviderConfig(required_fields=["api_key"]),
 }