diff --git a/.gitignore b/.gitignore index a7bda6e..52e67d6 100644 --- a/.gitignore +++ b/.gitignore @@ -58,4 +58,4 @@ tests/performance/results/ .coverage* # Local test script -tools/local_test_script.py +tools/tmp/* diff --git a/alembic/versions/1876c1c4bc96_add_price_table.py b/alembic/versions/1876c1c4bc96_add_price_table.py new file mode 100644 index 0000000..27874d7 --- /dev/null +++ b/alembic/versions/1876c1c4bc96_add_price_table.py @@ -0,0 +1,182 @@ +"""add price table + +Revision ID: 1876c1c4bc96 +Revises: 831fc2cf16ee +Create Date: 2025-08-11 17:57:04.438535 + +""" +from alembic import op +import sqlalchemy as sa +from csv import DictReader +from datetime import datetime, UTC, timedelta +import os +import decimal + +# revision identifiers, used by Alembic. +revision = '1876c1c4bc96' +down_revision = '831fc2cf16ee' +branch_labels = None +depends_on = None + +def upgrade() -> None: + # Create model_pricing table + op.create_table( + 'model_pricing', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), + sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), + sa.Column('deleted_at', sa.DateTime(timezone=True), nullable=True), + sa.Column('provider_name', sa.String(), nullable=False), + sa.Column('model_name', sa.String(), nullable=False), + sa.Column('effective_date', sa.DateTime(timezone=True), nullable=False), + sa.Column('end_date', sa.DateTime(timezone=True), nullable=True), + sa.Column('input_token_price', sa.DECIMAL(precision=12, scale=8), nullable=False), + sa.Column('output_token_price', sa.DECIMAL(precision=12, scale=8), nullable=False), + sa.Column('cached_token_price', sa.DECIMAL(precision=12, scale=8), nullable=False, server_default=sa.text('0')), + sa.Column('currency', sa.String(length=3), nullable=False, server_default='USD'), + sa.Column('price_source', sa.String(length=50), nullable=False, server_default='manual'), + sa.PrimaryKeyConstraint('id') + ) + + # Create indexes for model_pricing + op.create_index('ix_model_pricing_active', 'model_pricing', + ['provider_name', 'model_name', 'effective_date', 'end_date']) + op.create_index('ix_model_pricing_temporal', 'model_pricing', + ['effective_date', 'end_date']) + op.create_index('ix_model_pricing_unique_period', 'model_pricing', + ['provider_name', 'model_name', 'effective_date'], unique=True) + op.create_index(op.f('ix_model_pricing_provider_name'), 'model_pricing', ['provider_name']) + op.create_index(op.f('ix_model_pricing_model_name'), 'model_pricing', ['model_name']) + op.create_index(op.f('ix_model_pricing_id'), 'model_pricing', ['id']) + + # Create fallback_pricing table + op.create_table( + 'fallback_pricing', + sa.Column('id', sa.Integer(), nullable=False), + sa.Column('created_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), + sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False, server_default=sa.text('CURRENT_TIMESTAMP')), + sa.Column('deleted_at', sa.DateTime(timezone=True), nullable=True), + sa.Column('provider_name', sa.String(), nullable=True), + sa.Column('fallback_type', sa.String(length=20), nullable=False), + sa.Column('effective_date', sa.DateTime(timezone=True), nullable=False), + sa.Column('end_date', sa.DateTime(timezone=True), nullable=True), + sa.Column('input_token_price', sa.DECIMAL(precision=12, scale=8), nullable=False), + sa.Column('output_token_price', sa.DECIMAL(precision=12, scale=8), nullable=False), + sa.Column('cached_token_price', sa.DECIMAL(precision=12, scale=8), nullable=False, server_default=sa.text('0')), + sa.Column('currency', sa.String(length=3), nullable=False, server_default='USD'), + sa.Column('description', sa.String(length=255), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + + # Create indexes for fallback_pricing + op.create_index('ix_fallback_pricing_active', 'fallback_pricing', + ['provider_name', 'fallback_type', 'effective_date', 'end_date']) + op.create_index('ix_fallback_pricing_type', 'fallback_pricing', + ['fallback_type', 'effective_date']) + op.create_index(op.f('ix_fallback_pricing_provider_name'), 'fallback_pricing', ['provider_name']) + op.create_index(op.f('ix_fallback_pricing_fallback_type'), 'fallback_pricing', ['fallback_type']) + op.create_index(op.f('ix_fallback_pricing_id'), 'fallback_pricing', ['id']) + + # Insert model pricing data + effective_date = datetime.now(UTC) - timedelta(days=30) + csv_path = os.path.join(os.path.dirname(__file__), "..", "..", "tools", "data", "model_pricing_init.csv") + with open(csv_path, "r") as f: + reader = DictReader(f) + rows_to_insert = [] + for row in reader: + rows_to_insert.append({ + "provider_name": row["provider_name"], + "model_name": row["model_name"], + "effective_date": effective_date, + "input_token_price": (decimal.Decimal(str(row["input_token_price"])) * 1000).normalize(), + "output_token_price": (decimal.Decimal(str(row["output_token_price"])) * 1000).normalize(), + "price_source": "manual" + }) + if rows_to_insert: + connection = op.get_bind() + connection.execute( + sa.text(""" + INSERT INTO model_pricing (provider_name, model_name, effective_date, input_token_price, output_token_price, price_source) + VALUES (:provider_name, :model_name, :effective_date, :input_token_price, :output_token_price, 'manual') + """), + rows_to_insert, + ) + + # Insert some initial fallback pricing data + # For all the providers in the model_pricing table, insert a fallback pricing record with the provider_default fallback_type, set the prcie to be the average of the input_token_price and output_token_price + # For global fallback, set the provider_name to NULL, and the fallback_type to global_default, and the price to be the average of the input_token_price and output_token_price of all the providers in the model_pricing table + # The effective_date should be the same as the effective_date of the model_pricing table + + # Get all unique providers from model_pricing table + providers_result = connection.execute( + sa.text("SELECT DISTINCT provider_name FROM model_pricing") + ).fetchall() + + fallback_rows = [] + + # Insert provider-specific fallback pricing + for provider_row in providers_result: + provider_name = provider_row[0] + + # Calculate average prices for this provider + avg_prices_result = connection.execute( + sa.text(""" + SELECT + AVG(input_token_price) as avg_input_price, + AVG(output_token_price) as avg_output_price + FROM model_pricing + WHERE provider_name = :provider_name + """), + {"provider_name": provider_name} + ).fetchone() + + avg_input_price = avg_prices_result[0] + avg_output_price = avg_prices_result[1] + + fallback_rows.append({ + "provider_name": provider_name, + "fallback_type": "provider_default", + "effective_date": effective_date, + "input_token_price": avg_input_price, + "output_token_price": avg_output_price, + "description": f"Default pricing for {provider_name} provider" + }) + + # Calculate global average prices + global_avg_result = connection.execute( + sa.text(""" + SELECT + AVG(input_token_price) as avg_input_price, + AVG(output_token_price) as avg_output_price + FROM model_pricing + """) + ).fetchone() + + global_avg_input_price = global_avg_result[0] + global_avg_output_price = global_avg_result[1] + + # Insert global fallback pricing + fallback_rows.append({ + "provider_name": None, + "fallback_type": "global_default", + "effective_date": effective_date, + "input_token_price": global_avg_input_price, + "output_token_price": global_avg_output_price, + "description": "Global default pricing for all providers" + }) + + # Insert fallback pricing data + if fallback_rows: + connection.execute( + sa.text(""" + INSERT INTO fallback_pricing (provider_name, fallback_type, effective_date, input_token_price, output_token_price, description) + VALUES (:provider_name, :fallback_type, :effective_date, :input_token_price, :output_token_price, :description) + """), + fallback_rows, + ) + + +def downgrade() -> None: + # Drop tables in reverse order + op.drop_table('fallback_pricing') + op.drop_table('model_pricing') diff --git a/alembic/versions/b206e9a941e3_add_cost_tracking_to_usage_tracker_table.py b/alembic/versions/b206e9a941e3_add_cost_tracking_to_usage_tracker_table.py new file mode 100644 index 0000000..3f4c860 --- /dev/null +++ b/alembic/versions/b206e9a941e3_add_cost_tracking_to_usage_tracker_table.py @@ -0,0 +1,28 @@ +"""add cost tracking to usage_tracker table + +Revision ID: b206e9a941e3 +Revises: 1876c1c4bc96 +Create Date: 2025-08-11 18:19:08.581296 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'b206e9a941e3' +down_revision = '1876c1c4bc96' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column('usage_tracker', sa.Column('cost', sa.DECIMAL(precision=12, scale=8), nullable=True)) + op.add_column('usage_tracker', sa.Column('currency', sa.String(length=3), nullable=True)) + op.add_column('usage_tracker', sa.Column('pricing_source', sa.String(length=255), nullable=True)) + + +def downgrade() -> None: + op.drop_column('usage_tracker', 'cost') + op.drop_column('usage_tracker', 'currency') + op.drop_column('usage_tracker', 'pricing_source') diff --git a/app/models/pricing.py b/app/models/pricing.py new file mode 100644 index 0000000..83b2158 --- /dev/null +++ b/app/models/pricing.py @@ -0,0 +1,67 @@ +# app/models/pricing.py +import datetime +from datetime import UTC +from sqlalchemy import Column, DateTime, String, DECIMAL, Index + +from .base import BaseModel + + +class ModelPricing(BaseModel): + """ + Store pricing information for specific models with temporal support + """ + __tablename__ = "model_pricing" + + provider_name = Column(String, nullable=False, index=True) + model_name = Column(String, nullable=False, index=True) + + # Temporal fields for price changes over time + effective_date = Column(DateTime(timezone=True), nullable=False, default=datetime.datetime.now(UTC)) + end_date = Column(DateTime(timezone=True), nullable=True) # NULL means currently active + + # Pricing per 1K tokens (using DECIMAL for precision) + input_token_price = Column(DECIMAL(12, 8), nullable=False) # Price per 1K input tokens + output_token_price = Column(DECIMAL(12, 8), nullable=False) # Price per 1K output tokens + cached_token_price = Column(DECIMAL(12, 8), nullable=False, default=0) # Price per 1K cached tokens + + # Metadata + currency = Column(String(3), nullable=False, default='USD') + + # Indexes for efficient querying + __table_args__ = ( + # Index for finding active pricing for a model + Index('ix_model_pricing_active', 'provider_name', 'model_name', 'effective_date', 'end_date'), + # Index for temporal queries + Index('ix_model_pricing_temporal', 'effective_date', 'end_date'), + # Unique constraint for overlapping periods (business rule enforcement) + Index('ix_model_pricing_unique_period', 'provider_name', 'model_name', 'effective_date', unique=True), + ) + + +class FallbackPricing(BaseModel): + """ + Store fallback pricing for providers and global defaults + """ + __tablename__ = "fallback_pricing" + + provider_name = Column(String, nullable=True, index=True) # NULL for global fallback + fallback_type = Column(String(20), nullable=False, index=True) # 'provider_default', 'global_default' + + # Temporal fields + effective_date = Column(DateTime(timezone=True), nullable=False, default=datetime.datetime.now(UTC)) + end_date = Column(DateTime(timezone=True), nullable=True) + + # Pricing per 1K tokens + input_token_price = Column(DECIMAL(12, 8), nullable=False) + output_token_price = Column(DECIMAL(12, 8), nullable=False) + cached_token_price = Column(DECIMAL(12, 8), nullable=False, default=0) + + # Metadata + currency = Column(String(3), nullable=False, default='USD') + description = Column(String(255), nullable=True) # Optional description + + # Indexes + __table_args__ = ( + Index('ix_fallback_pricing_active', 'provider_name', 'fallback_type', 'effective_date', 'end_date'), + Index('ix_fallback_pricing_type', 'fallback_type', 'effective_date'), + ) diff --git a/app/models/provider_key.py b/app/models/provider_key.py index eb191e3..5300190 100644 --- a/app/models/provider_key.py +++ b/app/models/provider_key.py @@ -27,3 +27,4 @@ class ProviderKey(BaseModel): back_populates="allowed_provider_keys", lazy="selectin", ) + usage_tracker = relationship("UsageTracker", back_populates="provider_key") diff --git a/app/models/usage_tracker.py b/app/models/usage_tracker.py index 161fec0..c1798a3 100644 --- a/app/models/usage_tracker.py +++ b/app/models/usage_tracker.py @@ -2,7 +2,8 @@ from datetime import UTC import uuid -from sqlalchemy import Column, DateTime, ForeignKey, Integer, String +from sqlalchemy import Column, DateTime, ForeignKey, Integer, String, DECIMAL +from sqlalchemy.orm import relationship from sqlalchemy.dialects.postgresql import UUID from .base import Base @@ -21,3 +22,8 @@ class UsageTracker(Base): output_tokens = Column(Integer, nullable=True) cached_tokens = Column(Integer, nullable=True) reasoning_tokens = Column(Integer, nullable=True) + cost = Column(DECIMAL(12, 8), nullable=True) + currency = Column(String(3), nullable=True) + pricing_source = Column(String(255), nullable=True) + + provider_key = relationship("ProviderKey", back_populates="usage_tracker") diff --git a/app/services/pricing_service.py b/app/services/pricing_service.py new file mode 100644 index 0000000..e06d7ed --- /dev/null +++ b/app/services/pricing_service.py @@ -0,0 +1,496 @@ +import hashlib +from datetime import datetime, UTC +from decimal import Decimal +from typing import Dict, Optional, Any +from sqlalchemy import select, and_, or_, func, text +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.logger import get_logger +from app.core.async_cache import async_provider_service_cache +from app.models.pricing import ModelPricing, FallbackPricing + +logger = get_logger(name="pricing_service") + +class PricingService: + """ + Cache-optimized pricing service that minimizes database hits + + Cache Strategy: + 1. Exact match cache: Active model pricing (TTL: 1 day) + 2. Fallback cache: Provider fallback pricing (TTL: 12 hours) + 3. Emergency fallback: Hard-coded prices (TTL: 6 hours) + """ + + # Cache TTL constants + EXACT_CACHE_TTL = 86400 # 1 day - active model pricing + FALLBACK_CACHE_TTL = 43200 # 12 hours - provider fallbacks + EMERGENCY_CACHE_TTL = 21600 # 6 hours - emergency fallback + + # Emergency fallback prices (per 1K tokens) + EMERGENCY_PRICING = { + 'input_price': Decimal('0.01'), + 'output_price': Decimal('0.03'), + 'cached_price': Decimal('0.001'), + 'currency': 'USD' + } + + @staticmethod + async def calculate_usage_cost( + db: AsyncSession, + provider_name: str, + model_name: str, + prompt_tokens: int = 0, + completion_tokens: int = 0, + cached_tokens: int = 0, + calculation_date: Optional[datetime] = None + ) -> Dict[str, Any]: + """ + Calculate token costs with aggressive caching to minimize DB hits + """ + if calculation_date is None: + calculation_date = datetime.now(UTC) + + # Generate cache key for this exact pricing lookup + cache_key = PricingService._generate_pricing_cache_key( + provider_name, model_name, calculation_date + ) + + # Try to get complete pricing info from cache + pricing_info = await async_provider_service_cache.get(cache_key) + + if pricing_info is None: + # Cache miss - fetch pricing with smart fallback logic + pricing_info = await PricingService._fetch_pricing_with_smart_caching( + db, provider_name, model_name, calculation_date + ) + + # Cache the result with appropriate TTL based on data source + ttl = PricingService._get_cache_ttl(pricing_info['source']) + await async_provider_service_cache.set(cache_key, pricing_info, ttl=ttl) + + logger.debug(f"Cached pricing for {provider_name}/{model_name} with TTL {ttl}s") + + # Calculate costs using cached pricing + input_tokens = prompt_tokens - cached_tokens + output_tokens = completion_tokens + return PricingService._calculate_costs_from_pricing( + pricing_info, input_tokens, output_tokens, cached_tokens + ) + + @staticmethod + async def _fetch_pricing_with_smart_caching( + db: AsyncSession, + provider_name: str, + model_name: str, + calculation_date: datetime + ) -> Dict[str, Any]: + """ + Fetch pricing with smart caching at multiple levels + """ + + # Level 1: Try exact model cache (hot cache) + exact_cache_key = f"pricing:exact:{provider_name}:{model_name}" + exact_pricing = await async_provider_service_cache.get(exact_cache_key) + + if exact_pricing and PricingService._is_pricing_valid_for_date(exact_pricing, calculation_date): + logger.debug(f"Hot cache hit for {provider_name}/{model_name}") + return {**exact_pricing, 'source': 'exact_match'} + + # Level 2: Try provider fallback cache (warm cache) + provider_cache_key = f"pricing:provider_fallback:{provider_name}" + provider_fallback = await async_provider_service_cache.get(provider_cache_key) + + if provider_fallback and PricingService._is_pricing_valid_for_date(provider_fallback, calculation_date): + logger.debug(f"Warm cache hit for provider {provider_name}") + return {**provider_fallback, 'source': 'fallback_provider'} + + # Level 3: Try global fallback cache (warm cache) + global_cache_key = f"pricing:global_fallback" + global_fallback = await async_provider_service_cache.get(global_cache_key) + + if global_fallback and PricingService._is_pricing_valid_for_date(global_fallback, calculation_date): + logger.debug("Warm cache hit for global fallback") + return {**global_fallback, 'source': 'fallback_global'} + + # Cache miss - hit database (this should be rare) + logger.info(f"Cache miss - fetching from DB for {provider_name}/{model_name}") + return await PricingService._fetch_from_database_with_caching( + db, provider_name, model_name, calculation_date + ) + + @staticmethod + async def _fetch_from_database_with_caching( + db: AsyncSession, + provider_name: str, + model_name: str, + calculation_date: datetime + ) -> Dict[str, Any]: + """ + Fetch from database and populate multiple cache levels + """ + + # Try exact model match + exact_pricing = await PricingService._get_exact_model_pricing_db( + db, provider_name, model_name, calculation_date + ) + + if exact_pricing: + # Cache exact model pricing (hot cache) + cache_key = f"pricing:exact:{provider_name}:{model_name}" + await async_provider_service_cache.set( + cache_key, exact_pricing, ttl=PricingService.EXACT_CACHE_TTL + ) + return {**exact_pricing, 'source': 'exact_match'} + + # Try provider fallback + provider_fallback = await PricingService._get_provider_fallback_pricing_db( + db, provider_name, calculation_date + ) + + if provider_fallback: + # Cache provider fallback (warm cache) + cache_key = f"pricing:provider_fallback:{provider_name}" + await async_provider_service_cache.set( + cache_key, provider_fallback, ttl=PricingService.FALLBACK_CACHE_TTL + ) + logger.warning(f"Using provider fallback pricing for {provider_name}/{model_name}") + return {**provider_fallback, 'source': 'fallback_provider'} + + # Try global fallback + global_fallback = await PricingService._get_global_fallback_pricing_db( + db, calculation_date + ) + + if global_fallback: + # Cache global fallback (warm cache) + cache_key = f"pricing:global_fallback" + await async_provider_service_cache.set( + cache_key, global_fallback, ttl=PricingService.FALLBACK_CACHE_TTL + ) + logger.warning(f"Using global fallback pricing for {provider_name}/{model_name}") + return {**global_fallback, 'source': 'fallback_global'} + + # Emergency fallback (never cache this - it's always available) + logger.error(f"No pricing found for {provider_name}/{model_name}, using emergency fallback") + return {**PricingService.EMERGENCY_PRICING, 'source': 'emergency_fallback'} + + @staticmethod + async def warm_pricing_cache(db: AsyncSession, provider_names: Optional[list[str]] = None) -> Dict[str, int]: + """ + Pre-warm the pricing cache with frequently accessed models and fallbacks + This should be called periodically (e.g., via cron job) + """ + stats = {'exact_models': 0, 'provider_fallbacks': 0, 'global_fallback': 0} + current_time = datetime.now(UTC) + + # Get active pricing for all providers or specific ones + query = select(ModelPricing).where( + and_( + ModelPricing.effective_date <= current_time, + or_(ModelPricing.end_date.is_(None), ModelPricing.end_date > current_time) + ) + ) + + if provider_names: + query = query.where(ModelPricing.provider_name.in_(provider_names)) + + result = await db.execute(query) + active_pricing = result.scalars().all() + + # Cache exact model pricing + for pricing in active_pricing: + cache_key = f"pricing:exact:{pricing.provider_name}:{pricing.model_name}" + pricing_data = { + 'input_price': pricing.input_token_price, + 'output_price': pricing.output_token_price, + 'cached_price': pricing.cached_token_price, + 'currency': pricing.currency, + 'effective_date': pricing.effective_date.isoformat(), + 'end_date': pricing.end_date.isoformat() if pricing.end_date else None, + } + await async_provider_service_cache.set( + cache_key, pricing_data, ttl=PricingService.EXACT_CACHE_TTL + ) + stats['exact_models'] += 1 + + # Cache provider fallbacks + fallback_query = select(FallbackPricing).where( + and_( + FallbackPricing.effective_date <= current_time, + or_(FallbackPricing.end_date.is_(None), FallbackPricing.end_date > current_time), + FallbackPricing.fallback_type == 'provider_default' + ) + ) + + if provider_names: + fallback_query = fallback_query.where(FallbackPricing.provider_name.in_(provider_names)) + + result = await db.execute(fallback_query) + provider_fallbacks = result.scalars().all() + + for fallback in provider_fallbacks: + cache_key = f"pricing:provider_fallback:{fallback.provider_name}" + fallback_data = { + 'input_price': fallback.input_token_price, + 'output_price': fallback.output_token_price, + 'cached_price': fallback.cached_token_price, + 'currency': 'USD', # Assuming USD for fallbacks + 'effective_date': fallback.effective_date.isoformat(), + 'end_date': fallback.end_date.isoformat() if fallback.end_date else None, + } + await async_provider_service_cache.set( + cache_key, fallback_data, ttl=PricingService.FALLBACK_CACHE_TTL + ) + stats['provider_fallbacks'] += 1 + + # Cache global fallback + global_fallback_query = select(FallbackPricing).where( + and_( + FallbackPricing.effective_date <= current_time, + or_(FallbackPricing.end_date.is_(None), FallbackPricing.end_date > current_time), + FallbackPricing.fallback_type == 'global_default' + ) + ).order_by(FallbackPricing.effective_date.desc()).limit(1) + + result = await db.execute(global_fallback_query) + global_fallback = result.scalar_one_or_none() + + if global_fallback: + cache_key = f"pricing:global_fallback" + global_data = { + 'input_price': global_fallback.input_token_price, + 'output_price': global_fallback.output_token_price, + 'cached_price': global_fallback.cached_token_price, + 'currency': 'USD', + 'effective_date': global_fallback.effective_date.isoformat(), + 'end_date': global_fallback.end_date.isoformat() if global_fallback.end_date else None, + } + await async_provider_service_cache.set( + cache_key, global_data, ttl=PricingService.FALLBACK_CACHE_TTL + ) + stats['global_fallback'] = 1 + + logger.info(f"Pricing cache warmed: {stats}") + return stats + + @staticmethod + async def invalidate_pricing_cache( + provider_name: Optional[str] = None, + model_name: Optional[str] = None + ) -> None: + """ + Invalidate pricing cache entries + """ + if provider_name and model_name: + # Invalidate specific model + cache_key = f"pricing:exact:{provider_name}:{model_name}" + await async_provider_service_cache.delete(cache_key) + logger.info(f"Invalidated pricing cache for {provider_name}/{model_name}") + + elif provider_name: + # Invalidate entire provider + await PricingService._invalidate_provider_pricing_cache(provider_name) + logger.info(f"Invalidated pricing cache for provider {provider_name}") + + else: + # Invalidate all pricing cache + await PricingService._invalidate_all_pricing_cache() + logger.info("Invalidated all pricing cache") + + @staticmethod + async def _invalidate_provider_pricing_cache(provider_name: str) -> None: + """Invalidate all cache entries for a specific provider""" + prefixes = [ + f"pricing:exact:{provider_name}:", + f"pricing:provider_fallback:{provider_name}", + f"pricing:lookup:{provider_name}:" + ] + + # Handle in-memory cache + if hasattr(async_provider_service_cache, "cache"): + async with async_provider_service_cache.lock: + keys_to_delete = [] + for key in async_provider_service_cache.cache.keys(): + if any(key.startswith(prefix) for prefix in prefixes): + keys_to_delete.append(key) + + for key in keys_to_delete: + await async_provider_service_cache.delete(key) + + # Handle Redis cache + if hasattr(async_provider_service_cache, "client"): + try: + for prefix in prefixes: + redis_pattern = f"{os.getenv('REDIS_PREFIX', 'forge')}:{prefix}*" + async for redis_key in async_provider_service_cache.client.scan_iter(match=redis_pattern): + key_str = redis_key.decode() if isinstance(redis_key, bytes) else redis_key + internal_key = key_str.split(":", 1)[-1] + await async_provider_service_cache.delete(internal_key) + except Exception as exc: + logger.warning(f"Failed to invalidate Redis pricing cache: {exc}") + + @staticmethod + async def _invalidate_all_pricing_cache() -> None: + """Invalidate all pricing-related cache entries""" + # Handle in-memory cache + if hasattr(async_provider_service_cache, "cache"): + async with async_provider_service_cache.lock: + keys_to_delete = [] + for key in async_provider_service_cache.cache.keys(): + if key.startswith("pricing:"): + keys_to_delete.append(key) + + for key in keys_to_delete: + await async_provider_service_cache.delete(key) + + # Handle Redis cache + if hasattr(async_provider_service_cache, "client"): + try: + redis_pattern = f"{os.getenv('REDIS_PREFIX', 'forge')}:pricing:*" + async for redis_key in async_provider_service_cache.client.scan_iter(match=redis_pattern): + key_str = redis_key.decode() if isinstance(redis_key, bytes) else redis_key + internal_key = key_str.split(":", 1)[-1] + await async_provider_service_cache.delete(internal_key) + except Exception as exc: + logger.warning(f"Failed to invalidate all Redis pricing cache: {exc}") + + # Helper methods + @staticmethod + def _generate_pricing_cache_key(provider_name: str, model_name: str, calculation_date: datetime) -> str: + """Generate a cache key for pricing lookups""" + date_str = calculation_date.strftime("%Y-%m-%d") + key_data = f"{provider_name}:{model_name}:{date_str}" + key_hash = hashlib.md5(key_data.encode()).hexdigest()[:12] + return f"pricing:lookup:{provider_name}:{key_hash}" + + @staticmethod + def _get_cache_ttl(source: str) -> int: + """Get appropriate TTL based on pricing data source""" + ttl_map = { + 'exact_match': PricingService.EXACT_CACHE_TTL, + 'fallback_provider': PricingService.FALLBACK_CACHE_TTL, + 'fallback_global': PricingService.FALLBACK_CACHE_TTL, + 'emergency_fallback': PricingService.EMERGENCY_CACHE_TTL, + } + return ttl_map.get(source, PricingService.EMERGENCY_CACHE_TTL) + + @staticmethod + def _is_pricing_valid_for_date(pricing_info: Dict[str, Any], calculation_date: datetime) -> bool: + """Check if cached pricing is valid for the given date""" + if 'effective_date' not in pricing_info: + return True # Emergency fallback is always valid + + effective_date = datetime.fromisoformat(pricing_info['effective_date']) + end_date = None + if pricing_info.get('end_date'): + end_date = datetime.fromisoformat(pricing_info['end_date']) + + return (calculation_date >= effective_date and + (end_date is None or calculation_date < end_date)) + + @staticmethod + def _calculate_costs_from_pricing( + pricing_info: Dict[str, Any], + input_tokens: int, + output_tokens: int, + cached_tokens: int, + ) -> Dict[str, Any]: + """Calculate costs using pricing information""" + input_cost = Decimal(str(input_tokens)) * pricing_info['input_price'] / 1000 + output_cost = Decimal(str(output_tokens)) * pricing_info['output_price'] / 1000 + cached_cost = Decimal(str(cached_tokens)) * pricing_info['cached_price'] / 1000 + + total_cost = input_cost + output_cost + cached_cost + + return { + 'total_cost': total_cost, + 'breakdown': { + 'input_cost': input_cost, + 'output_cost': output_cost, + 'cached_cost': cached_cost, + }, + 'pricing_source': pricing_info['source'], + 'currency': pricing_info['currency'] + } + + # Database query methods (only called on cache misses) + @staticmethod + async def _get_exact_model_pricing_db(db: AsyncSession, provider_name: str, model_name: str, calculation_date: datetime) -> Optional[Dict[str, Any]]: + """Get model pricing from database using longest prefix matching with pure SQL""" + + query = select(ModelPricing).where( + ModelPricing.provider_name == provider_name, + ModelPricing.effective_date <= calculation_date, + or_(ModelPricing.end_date.is_(None), ModelPricing.end_date > calculation_date), + # The input model starts with the stored model name (prefix match) + text(f"'{model_name}' ilike concat(model_name, '%%')") + ).order_by( + # Longest prefix first + func.length(ModelPricing.model_name).desc(), + ModelPricing.effective_date.desc() + ).limit(1) + + result = await db.execute(query) + pricing = result.scalar_one_or_none() + + if pricing: + if pricing.model_name != model_name: + logger.debug(f"Prefix match: '{model_name}' matched with '{pricing.model_name}'") + + return { + 'input_price': pricing.input_token_price, + 'output_price': pricing.output_token_price, + 'cached_price': pricing.cached_token_price, + 'currency': pricing.currency, + 'effective_date': pricing.effective_date.isoformat(), + 'end_date': pricing.end_date.isoformat() if pricing.end_date else None, + } + + return None + + @staticmethod + async def _get_provider_fallback_pricing_db(db: AsyncSession, provider_name: str, calculation_date: datetime) -> Optional[Dict[str, Any]]: + """Get provider fallback pricing from database""" + query = select(FallbackPricing).where( + FallbackPricing.provider_name == provider_name, + FallbackPricing.fallback_type == 'provider_default', + FallbackPricing.effective_date <= calculation_date, + or_(FallbackPricing.end_date.is_(None), FallbackPricing.end_date > calculation_date) + ).order_by(FallbackPricing.effective_date.desc()).limit(1) + + result = await db.execute(query) + fallback = result.scalar_one_or_none() + + if fallback: + return { + 'input_price': fallback.input_token_price, + 'output_price': fallback.output_token_price, + 'cached_price': fallback.cached_token_price, + 'currency': 'USD', + 'effective_date': fallback.effective_date.isoformat(), + 'end_date': fallback.end_date.isoformat() if fallback.end_date else None, + } + return None + + @staticmethod + async def _get_global_fallback_pricing_db(db: AsyncSession, calculation_date: datetime) -> Optional[Dict[str, Any]]: + """Get global fallback pricing from database""" + query = select(FallbackPricing).where( + FallbackPricing.fallback_type == 'global_default', + FallbackPricing.effective_date <= calculation_date, + or_(FallbackPricing.end_date.is_(None), FallbackPricing.end_date > calculation_date) + ).order_by(FallbackPricing.effective_date.desc()).limit(1) + + result = await db.execute(query) + fallback = result.scalar_one_or_none() + + if fallback: + return { + 'input_price': fallback.input_token_price, + 'output_price': fallback.output_token_price, + 'cached_price': fallback.cached_token_price, + 'currency': 'USD', + 'effective_date': fallback.effective_date.isoformat(), + 'end_date': fallback.end_date.isoformat() if fallback.end_date else None, + } + return None diff --git a/app/services/providers/usage_tracker_service.py b/app/services/providers/usage_tracker_service.py index 3ad2496..40bde88 100644 --- a/app/services/providers/usage_tracker_service.py +++ b/app/services/providers/usage_tracker_service.py @@ -3,10 +3,13 @@ from sqlalchemy import delete from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.exc import NoResultFound +from sqlalchemy.orm import selectinload +from sqlalchemy import select import uuid from app.core.logger import get_logger from app.models.usage_tracker import UsageTracker +from app.services.pricing_service import PricingService logger = get_logger(name="usage_tracker") @@ -52,19 +55,37 @@ async def update_usage_tracker( return try: - usage_tracker = await db.get_one(UsageTracker, usage_tracker_id) + result = await db.execute( + select(UsageTracker) + .options(selectinload(UsageTracker.provider_key)) + .filter(UsageTracker.id == usage_tracker_id) + ) + usage_tracker = result.scalar_one_or_none() + now = datetime.now(UTC) + logger.info(f"provider_key: {usage_tracker.provider_key.provider_name}") + price_info = await PricingService.calculate_usage_cost( + db, + usage_tracker.provider_key.provider_name.lower(), + usage_tracker.model.lower(), + input_tokens, + output_tokens, + cached_tokens, + now, + ) usage_tracker.input_tokens = input_tokens usage_tracker.output_tokens = output_tokens usage_tracker.cached_tokens = cached_tokens - usage_tracker.reasoning_tokens = reasoning_tokens - usage_tracker.updated_at = datetime.now(UTC) + usage_tracker.updated_at = now + usage_tracker.cost = price_info['total_cost'] + usage_tracker.currency = price_info['currency'] + usage_tracker.pricing_source = price_info['pricing_source'] await db.commit() logger.debug(f"Updated usage tracker {usage_tracker_id} with input_tokens {input_tokens}, output_tokens {output_tokens}, cached_tokens {cached_tokens}, reasoning_tokens {reasoning_tokens}") except NoResultFound: logger.error(f"Usage tracker not found: {usage_tracker_id}") except Exception as e: await db.rollback() - logger.error(f"Failed to update usage tracker: {e}") + logger.exception(f"Failed to update usage tracker: {e}") @staticmethod async def delete_usage_tracker_record( diff --git a/tools/data/model_pricing_init.csv b/tools/data/model_pricing_init.csv new file mode 100644 index 0000000..4095a56 --- /dev/null +++ b/tools/data/model_pricing_init.csv @@ -0,0 +1,527 @@ +provider_name,model_name,input_token_price,output_token_price +openai,gpt-4,0.03,0.06 +openai,gpt-4.1,0.002,0.008 +openai,gpt-4.1-2025-04-14,0.002,0.008 +openai,gpt-4.1-mini,0.0004,0.0016 +openai,gpt-4.1-mini-2025-04-14,0.0004,0.0016 +openai,gpt-4.1-nano,0.0001,0.0004 +openai,gpt-4.1-nano-2025-04-14,0.0001,0.0004 +openai,gpt-4o,0.0025,0.01 +openai,gpt-4o-search-preview-2025-03-11,0.0025,0.01 +openai,gpt-4o-search-preview,0.0025,0.01 +openai,gpt-4.5-preview,0.075,0.15 +openai,gpt-4.5-preview-2025-02-27,0.075,0.15 +openai,gpt-4o-audio-preview,0.0025,0.01 +openai,gpt-4o-audio-preview-2024-12-17,0.0025,0.01 +openai,gpt-4o-audio-preview-2024-10-01,0.0025,0.01 +openai,gpt-4o-audio-preview-2025-06-03,0.0025,0.01 +openai,gpt-4o-mini-audio-preview,0.00015,0.0006 +openai,gpt-4o-mini-audio-preview-2024-12-17,0.00015,0.0006 +openai,gpt-4o-mini,0.00015,0.0006 +openai,gpt-4o-mini-search-preview-2025-03-11,0.00015,0.0006 +openai,gpt-4o-mini-search-preview,0.00015,0.0006 +openai,gpt-4o-mini-2024-07-18,0.00015,0.0006 +openai,gpt-5,0.00125,0.01 +openai,gpt-5-mini,0.00025,0.002 +openai,gpt-5-nano,0.00005,0.0004 +openai,gpt-5-chat,0.00125,0.01 +openai,gpt-5-chat-latest,0.00125,0.01 +openai,gpt-5-2025-08-07,0.00125,0.01 +openai,gpt-5-mini-2025-08-07,0.00025,0.002 +openai,gpt-5-nano-2025-08-07,0.00005,0.0004 +openai,codex-mini-latest,0.0015,0.006 +openai,o1-pro,0.15,0.6 +openai,o1-pro-2025-03-19,0.15,0.6 +openai,o1,0.015,0.06 +openai,o1-mini,0.0011,0.0044 +azure,computer-use-preview,0.003,0.012 +openai,o3-deep-research,0.01,0.04 +openai,o3-deep-research-2025-06-26,0.01,0.04 +openai,o3-pro,0.02,0.08 +openai,o3-pro-2025-06-10,0.02,0.08 +openai,o3,0.002,0.008 +openai,o3-2025-04-16,0.002,0.008 +openai,o3-mini,0.0011,0.0044 +openai,o3-mini-2025-01-31,0.0011,0.0044 +openai,o4-mini,0.0011,0.0044 +openai,o4-mini-deep-research,0.002,0.008 +openai,o4-mini-deep-research-2025-06-26,0.002,0.008 +openai,o4-mini-2025-04-16,0.0011,0.0044 +openai,o1-mini-2024-09-12,0.003,0.012 +openai,o1-preview,0.015,0.06 +openai,o1-preview-2024-09-12,0.015,0.06 +openai,o1-2024-12-17,0.015,0.06 +openai,chatgpt-4o-latest,0.005,0.015 +openai,gpt-4o-2024-05-13,0.005,0.015 +openai,gpt-4o-2024-08-06,0.0025,0.01 +openai,gpt-4o-2024-11-20,0.0025,0.01 +openai,gpt-4o-realtime-preview-2024-10-01,0.005,0.02 +openai,gpt-4o-realtime-preview,0.005,0.02 +openai,gpt-4o-realtime-preview-2024-12-17,0.005,0.02 +openai,gpt-4o-realtime-preview-2025-06-03,0.005,0.02 +openai,gpt-4o-mini-realtime-preview,0.0006,0.0024 +openai,gpt-4o-mini-realtime-preview-2024-12-17,0.0006,0.0024 +openai,gpt-4-turbo-preview,0.01,0.03 +openai,gpt-4-0314,0.03,0.06 +openai,gpt-4-0613,0.03,0.06 +openai,gpt-4-32k,0.06,0.12 +openai,gpt-4-32k-0314,0.06,0.12 +openai,gpt-4-32k-0613,0.06,0.12 +openai,gpt-4-turbo,0.01,0.03 +openai,gpt-4-turbo-2024-04-09,0.01,0.03 +openai,gpt-4-1106-preview,0.01,0.03 +openai,gpt-4-0125-preview,0.01,0.03 +openai,gpt-4-vision-preview,0.01,0.03 +openai,gpt-4-1106-vision-preview,0.01,0.03 +openai,gpt-3.5-turbo,0.0015,0.002 +openai,gpt-3.5-turbo-0301,0.0015,0.002 +openai,gpt-3.5-turbo-0613,0.0015,0.002 +openai,gpt-3.5-turbo-1106,0.001,0.002 +openai,gpt-3.5-turbo-0125,0.0005,0.0015 +openai,gpt-3.5-turbo-16k,0.003,0.004 +openai,gpt-3.5-turbo-16k-0613,0.003,0.004 +openai,ft:gpt-3.5-turbo,0.003,0.006 +openai,ft:gpt-3.5-turbo-0125,0.003,0.006 +openai,ft:gpt-3.5-turbo-1106,0.003,0.006 +openai,ft:gpt-3.5-turbo-0613,0.003,0.006 +openai,ft:gpt-4-0613,0.03,0.06 +openai,ft:gpt-4o-2024-08-06,0.00375,0.015 +openai,ft:gpt-4o-2024-11-20,0.00375,0.015 +openai,ft:gpt-4o-mini-2024-07-18,0.0003,0.0012 +openai,gpt-4o-transcribe,0.0025,0.01 +openai,gpt-4o-mini-transcribe,0.00125,0.005 +openai,gpt-4o-mini-tts,0.0025,0.01 +azure,gpt-5,0.00125,0.01 +azure,gpt-5-2025-08-07,0.00125,0.01 +azure,gpt-5-mini,0.00025,0.002 +azure,gpt-5-mini-2025-08-07,0.00025,0.002 +azure,gpt-5-nano-2025-08-07,0.00005,0.0004 +azure,gpt-5-nano,0.00005,0.0004 +azure,gpt-5-chat,0.00125,0.01 +azure,gpt-5-chat-latest,0.00125,0.01 +azure,gpt-4o-mini-tts,0.0025,0.01 +azure,gpt-4o-audio-preview-2024-12-17,0.0025,0.01 +azure,gpt-4o-mini-audio-preview-2024-12-17,0.0025,0.01 +azure,gpt-4.1,0.002,0.008 +azure,gpt-4.1-2025-04-14,0.002,0.008 +azure,gpt-4.1-mini,0.0004,0.0016 +azure,gpt-4.1-mini-2025-04-14,0.0004,0.0016 +azure,gpt-4.1-nano,0.0001,0.0004 +azure,gpt-4.1-nano-2025-04-14,0.0001,0.0004 +azure,o3-pro,0.02,0.08 +azure,o3-pro-2025-06-10,0.02,0.08 +azure,o3,0.002,0.008 +azure,o3-2025-04-16,0.01,0.04 +azure,o3-deep-research,0.01,0.04 +azure,o4-mini,0.0011,0.0044 +azure,gpt-4o-mini-realtime-preview-2024-12-17,0.0006,0.0024 +azure,gpt-4o-realtime-preview-2024-12-17,0.005,0.02 +azure,gpt-4o-realtime-preview-2024-10-01,0.005,0.02 +azure,o4-mini-2025-04-16,0.0011,0.0044 +azure,o3-mini-2025-01-31,0.0011,0.0044 +azure,gpt-4o-transcribe,0.0025,0.01 +azure,gpt-4o-mini-transcribe,0.00125,0.005 +azure,o3-mini,0.0011,0.0044 +azure,o1-mini,0.00121,0.00484 +azure,o1-mini-2024-09-12,0.0011,0.0044 +azure,o1,0.015,0.06 +azure,o1-2024-12-17,0.015,0.06 +azure,codex-mini,0.0015,0.006 +azure,o1-preview,0.015,0.06 +azure,o1-preview-2024-09-12,0.015,0.06 +azure,gpt-4.5-preview,0.075,0.15 +azure,gpt-4o,0.0025,0.01 +azure,gpt-4o-2024-08-06,0.0025,0.01 +azure,gpt-4o-2024-11-20,0.00275,0.011 +azure,gpt-4o-2024-05-13,0.005,0.015 +azure,gpt-4o-mini,0.000165,0.00066 +azure,gpt-4o-mini-2024-07-18,0.000165,0.00066 +azure,gpt-4-turbo-2024-04-09,0.01,0.03 +azure,gpt-4-0125-preview,0.01,0.03 +azure,gpt-4-1106-preview,0.01,0.03 +azure,gpt-4-0613,0.03,0.06 +azure,gpt-4-32k-0613,0.06,0.12 +azure,gpt-4-32k,0.06,0.12 +azure,gpt-4,0.03,0.06 +azure,gpt-4-turbo,0.01,0.03 +azure,gpt-4-turbo-vision-preview,0.01,0.03 +azure,gpt-35-turbo-16k-0613,0.003,0.004 +azure,gpt-35-turbo-1106,0.001,0.002 +azure,gpt-35-turbo-0613,0.0015,0.002 +azure,gpt-35-turbo-0301,0.0002,0.002 +azure,gpt-35-turbo-0125,0.0005,0.0015 +azure,gpt-3.5-turbo-0125,0.0005,0.0015 +azure,gpt-35-turbo-16k,0.003,0.004 +azure,gpt-35-turbo,0.0005,0.0015 +azure,gpt-3.5-turbo,0.0005,0.0015 +azure,mistral-large-latest,0.008,0.024 +azure,mistral-large-2402,0.008,0.024 +azure,command-r-plus,0.003,0.015 +azure,grok-3,0.0033,0.0165 +azure,grok-3-mini,0.000275,0.00138 +azure,deepseek-r1,0.00135,0.0054 +azure,deepseek-v3,0.00114,0.00456 +azure,deepseek-v3-0324,0.00114,0.00456 +azure,jamba-instruct,0.0005,0.0007 +azure,jais-30b-chat,3.2,9.71 +azure,mistral-nemo,0.00015,0.00015 +azure,mistral-medium-2505,0.0004,0.002 +azure,mistral-large,0.004,0.012 +azure,mistral-small,0.001,0.003 +azure,mistral-small-2503,0.001,0.003 +azure,mistral-large-2407,0.002,0.006 +azure,ministral-3b,0.00004,0.00004 +azure,llama-3.2-11b-vision-instruct,0.00037,0.00037 +azure,llama-3.3-70b-instruct,0.00071,0.00071 +azure,llama-4-scout-17b-16e-instruct,0.0002,0.00078 +azure,llama-4-maverick-17b-128e-instruct-fp8,0.00141,0.00035 +azure,llama-3.2-90b-vision-instruct,0.00204,0.00204 +azure,meta-llama-3-70b-instruct,0.0011,0.00037 +azure,meta-llama-3.1-8b-instruct,0.0003,0.00061 +azure,meta-llama-3.1-70b-instruct,0.00268,0.00354 +azure,meta-llama-3.1-405b-instruct,0.00533,0.016 +azure,phi-4-mini-instruct,0.000075,0.0003 +azure,phi-4-multimodal-instruct,0.00008,0.00032 +azure,phi-4,0.000125,0.0005 +azure,phi-3.5-mini-instruct,0.00013,0.00052 +azure,phi-3.5-vision-instruct,0.00013,0.00052 +azure,phi-3.5-moe-instruct,0.00016,0.00064 +azure,phi-3-mini-4k-instruct,0.00013,0.00052 +azure,phi-3-mini-128k-instruct,0.00013,0.00052 +azure,phi-3-small-8k-instruct,0.00015,0.0006 +azure,phi-3-small-128k-instruct,0.00015,0.0006 +azure,phi-3-medium-4k-instruct,0.00017,0.00068 +azure,phi-3-medium-128k-instruct,0.00017,0.00068 +mistral,mistral-tiny,0.00025,0.00025 +mistral,mistral-small,0.0001,0.0003 +mistral,mistral-small-latest,0.0001,0.0003 +mistral,mistral-medium,0.0027,0.0081 +mistral,mistral-medium-latest,0.0004,0.002 +mistral,mistral-medium-2505,0.0004,0.002 +mistral,mistral-medium-2312,0.0027,0.0081 +mistral,mistral-large-latest,0.002,0.006 +mistral,mistral-large-2411,0.002,0.006 +mistral,mistral-large-2402,0.004,0.012 +mistral,mistral-large-2407,0.003,0.009 +mistral,pixtral-large-latest,0.002,0.006 +mistral,pixtral-large-2411,0.002,0.006 +mistral,pixtral-12b-2409,0.00015,0.00015 +mistral,open-mistral-7b,0.00025,0.00025 +mistral,open-mixtral-8x7b,0.0007,0.0007 +mistral,open-mixtral-8x22b,0.002,0.006 +mistral,codestral-latest,0.001,0.003 +mistral,codestral-2405,0.001,0.003 +mistral,open-mistral-nemo,0.0003,0.0003 +mistral,open-mistral-nemo-2407,0.0003,0.0003 +mistral,open-codestral-mamba,0.00025,0.00025 +mistral,codestral-mamba-latest,0.00025,0.00025 +mistral,devstral-small-2505,0.0001,0.0003 +mistral,devstral-small-2507,0.0001,0.0003 +mistral,devstral-medium-2507,0.0004,0.002 +mistral,magistral-medium-latest,0.002,0.005 +mistral,magistral-medium-2506,0.002,0.005 +mistral,magistral-small-latest,0.0005,0.0015 +mistral,magistral-small-2506,0.0005,0.0015 +deepseek,deepseek-reasoner,0.00055,0.00219 +deepseek,deepseek-chat,0.00027,0.0011 +deepseek,deepseek-r1,0.00055,0.00219 +deepseek,deepseek-v3,0.00027,0.0011 +xai,grok-beta,0.005,0.015 +xai,grok-2-vision-1212,0.002,0.01 +xai,grok-2-vision-latest,0.002,0.01 +xai,grok-2-vision,0.002,0.01 +xai,grok-3,0.003,0.015 +xai,grok-3-latest,0.003,0.015 +xai,grok-3-beta,0.003,0.015 +xai,grok-3-fast-beta,0.005,0.025 +xai,grok-3-fast-latest,0.005,0.025 +xai,grok-3-mini,0.0003,0.0005 +xai,grok-3-mini-latest,0.0003,0.0005 +xai,grok-3-mini-fast,0.0006,0.004 +xai,grok-3-mini-fast-latest,0.0006,0.004 +xai,grok-3-mini-beta,0.0003,0.0005 +xai,grok-3-mini-fast-beta,0.0006,0.004 +xai,grok-vision-beta,0.005,0.015 +xai,grok-2-1212,0.002,0.01 +xai,grok-2,0.002,0.01 +xai,grok-2-latest,0.002,0.01 +xai,grok-4,0.003,0.015 +xai,grok-4-0709,0.003,0.015 +xai,grok-4-latest,0.003,0.015 +deepseek,deepseek-coder,0.00014,0.00028 +groq,deepseek-r1-distill-llama-70b,0.00075,0.00099 +groq,llama-3.3-70b-versatile,0.00059,0.00079 +groq,llama-3.3-70b-specdec,0.00059,0.00099 +groq,llama-guard-3-8b,0.0002,0.0002 +groq,llama2-70b-4096,0.0007,0.0008 +groq,llama3-8b-8192,0.00005,0.00008 +groq,llama-3.2-1b-preview,0.00004,0.00004 +groq,llama-3.2-3b-preview,0.00006,0.00006 +groq,llama-3.2-11b-text-preview,0.00018,0.00018 +groq,llama-3.2-11b-vision-preview,0.00018,0.00018 +groq,llama-3.2-90b-text-preview,0.0009,0.0009 +groq,llama-3.2-90b-vision-preview,0.0009,0.0009 +groq,llama3-70b-8192,0.00059,0.00079 +groq,llama-3.1-8b-instant,0.00005,0.00008 +groq,llama-3.1-70b-versatile,0.00059,0.00079 +groq,llama-3.1-405b-reasoning,0.00059,0.00079 +groq,mistral-saba-24b,0.00079,0.00079 +groq,mixtral-8x7b-32768,0.00024,0.00024 +groq,gemma-7b-it,0.00007,0.00007 +groq,gemma2-9b-it,0.0002,0.0002 +groq,llama3-groq-70b-8192-tool-use-preview,0.00089,0.00089 +groq,llama3-groq-8b-8192-tool-use-preview,0.00019,0.00019 +cerebras,llama3.1-8b,0.0001,0.0001 +cerebras,llama3.1-70b,0.0006,0.0006 +cerebras,llama-3.3-70b,0.00085,0.0012 +cerebras,qwen-3-32b,0.0004,0.0008 +anthropic,claude-3-haiku-20240307,0.00025,0.00125 +anthropic,claude-3-5-haiku-20241022,0.0008,0.004 +anthropic,claude-3-5-haiku-latest,0.001,0.005 +anthropic,claude-3-opus-latest,0.015,0.075 +anthropic,claude-3-opus-20240229,0.015,0.075 +anthropic,claude-3-5-sonnet-latest,0.003,0.015 +anthropic,claude-3-5-sonnet-20240620,0.003,0.015 +anthropic,claude-opus-4-20250514,0.015,0.075 +anthropic,claude-opus-4-1,0.015,0.075 +anthropic,claude-opus-4-1-20250805,0.015,0.075 +anthropic,claude-sonnet-4-20250514,0.003,0.015 +anthropic,claude-4-opus-20250514,0.015,0.075 +anthropic,claude-4-sonnet-20250514,0.003,0.015 +anthropic,claude-3-7-sonnet-latest,0.003,0.015 +anthropic,claude-3-7-sonnet-20250219,0.003,0.015 +anthropic,claude-3-5-sonnet-20241022,0.003,0.015 +vertex,text-bison32k,0.000125,0.000125 +vertex,text-bison32k@002,0.000125,0.000125 +vertex,text-unicorn,0.01,0.028 +vertex,text-unicorn@001,0.01,0.028 +vertex,chat-bison,0.000125,0.000125 +vertex,chat-bison@001,0.000125,0.000125 +vertex,chat-bison@002,0.000125,0.000125 +vertex,chat-bison-32k,0.000125,0.000125 +vertex,chat-bison-32k@002,0.000125,0.000125 +vertex,code-bison,0.000125,0.000125 +vertex,code-bison@001,0.000125,0.000125 +vertex,code-bison@002,0.000125,0.000125 +vertex,code-bison32k,0.000125,0.000125 +vertex,code-bison-32k@002,0.000125,0.000125 +vertex,code-gecko@001,0.000125,0.000125 +vertex,code-gecko@002,0.000125,0.000125 +vertex,code-gecko,0.000125,0.000125 +vertex,code-gecko-latest,0.000125,0.000125 +vertex,codechat-bison@latest,0.000125,0.000125 +vertex,codechat-bison,0.000125,0.000125 +vertex,codechat-bison@001,0.000125,0.000125 +vertex,codechat-bison@002,0.000125,0.000125 +vertex,codechat-bison-32k,0.000125,0.000125 +vertex,codechat-bison-32k@002,0.000125,0.000125 +vertex,gemini-pro,0.0005,0.0015 +vertex,gemini-1.0-pro,0.0005,0.0015 +vertex,gemini-1.0-pro-001,0.0005,0.0015 +vertex,gemini-1.0-ultra,0.0005,0.0015 +vertex,gemini-1.0-ultra-001,0.0005,0.0015 +vertex,gemini-1.0-pro-002,0.0005,0.0015 +vertex,gemini-1.5-pro,0.00125,0.005 +vertex,gemini-1.5-pro-002,0.00125,0.005 +vertex,gemini-1.5-pro-001,0.00125,0.005 +vertex,gemini-1.5-pro-preview-0514,0.000078125,0.0003125 +vertex,gemini-1.5-pro-preview-0215,0.000078125,0.0003125 +vertex,gemini-1.5-pro-preview-0409,0.000078125,0.0003125 +vertex,gemini-1.5-flash,0.000075,0.0003 +vertex,gemini-1.5-flash-exp-0827,0.000004688,0.0000046875 +vertex,gemini-1.5-flash-002,0.000075,0.0003 +vertex,gemini-1.5-flash-001,0.000075,0.0003 +vertex,gemini-1.5-flash-preview-0514,0.000075,0.0000046875 +vertex,gemini-pro-vision,0.0005,0.0015 +vertex,gemini-1.0-pro-vision,0.0005,0.0015 +vertex,gemini-1.0-pro-vision-001,0.0005,0.0015 +vertex,gemini-2.5-pro-exp-03-25,0.00125,0.01 +vertex,gemini-2.0-pro-exp-02-05,0.00125,0.01 +vertex,gemini-2.0-flash-exp,0.00015,0.0006 +vertex,gemini-2.0-flash-001,0.00015,0.0006 +vertex,gemini-2.5-pro,0.00125,0.01 +gemini,models/gemini-2.5-pro,0.00125,0.01 +gemini,models/gemini-2.5-flash,0.0003,0.0025 +vertex,gemini-2.5-flash,0.0003,0.0025 +gemini,models/gemini-2.0-flash-live-001,0.00035,0.0015 +gemini,models/gemini-2.5-flash-preview-tts,0.00015,0.0006 +gemini,models/gemini-2.5-flash-preview-05-20,0.0003,0.0025 +gemini,models/gemini-2.5-flash-preview-04-17,0.00015,0.0006 +gemini,models/gemini-2.5-flash-lite-preview-06-17,0.0001,0.0004 +gemini,models/gemini-2.5-flash-lite,0.0001,0.0004 +vertex,gemini-2.5-flash-preview-05-20,0.0003,0.0025 +vertex,gemini-2.5-flash-preview-04-17,0.00015,0.0006 +vertex,gemini-2.5-flash-lite-preview-06-17,0.0001,0.0004 +vertex,gemini-2.5-flash-lite,0.0001,0.0004 +vertex,gemini-2.0-flash,0.0001,0.0004 +vertex,gemini-2.0-flash-lite,0.000075,0.0003 +vertex,gemini-2.0-flash-lite-001,0.000075,0.0003 +vertex,gemini-2.5-pro-preview-06-05,0.00125,0.01 +vertex,gemini-2.5-pro-preview-05-06,0.00125,0.01 +vertex,gemini-2.5-pro-preview-03-25,0.00125,0.01 +vertex,gemini-2.0-flash-preview-image-generation,0.0001,0.0004 +vertex,gemini-2.5-pro-preview-tts,0.00125,0.01 +gemini,models/gemini-2.0-flash-preview-image-generation,0.0001,0.0004 +gemini,models/gemini-2.0-flash,0.0001,0.0004 +gemini,models/gemini-2.0-flash-lite,0.000075,0.0003 +gemini,models/gemini-2.0-flash-001,0.0001,0.0004 +gemini,models/gemini-2.5-pro-preview-tts,0.00125,0.01 +gemini,models/gemini-2.5-pro-preview-06-05,0.00125,0.01 +gemini,models/gemini-2.5-pro-preview-05-06,0.00125,0.01 +gemini,models/gemini-2.5-pro-preview-03-25,0.00125,0.01 +gemini,models/gemini-2.0-flash-lite-preview-02-05,0.000075,0.0003 +vertex,gemini-2.0-flash-live-preview-04-09,0.0005,0.002 +gemini,models/gemini-1.5-flash-002,0.000075,0.0003 +gemini,models/gemini-1.5-flash-001,0.000075,0.0003 +gemini,models/gemini-1.5-flash,0.000075,0.0003 +gemini,models/gemini-1.5-flash-latest,0.000075,0.0003 +gemini,models/gemini-pro,0.00035,0.00105 +gemini,models/gemini-1.5-pro,0.0035,0.0105 +gemini,models/gemini-1.5-pro-002,0.0035,0.0105 +gemini,models/gemini-1.5-pro-001,0.0035,0.0105 +gemini,models/gemini-1.5-pro-exp-0801,0.0035,0.0105 +gemini,models/gemini-1.5-pro-latest,0.0035,0.00105 +gemini,models/gemini-pro-vision,0.00035,0.00105 +gemini,models/gemini-gemma-2-27b-it,0.00035,0.00105 +gemini,models/gemini-gemma-2-9b-it,0.00035,0.00105 +cohere,command-a-03-2025,0.0025,0.01 +cohere,command-r,0.00015,0.0006 +cohere,command-r-08-2024,0.00015,0.0006 +cohere,command-r7b-12-2024,0.00015,0.0000375 +cohere,command-light,0.0003,0.0006 +cohere,command-r-plus,0.0025,0.01 +cohere,command-r-plus-08-2024,0.0025,0.01 +cohere,command-nightly,0.001,0.002 +cohere,command,0.001,0.002 +bedrock,ai21.j2-mid-v1,0.0125,0.0125 +bedrock,ai21.j2-ultra-v1,0.0188,0.0188 +bedrock,ai21.jamba-instruct-v1:0,0.0005,0.0007 +bedrock,ai21.jamba-1-5-large-v1:0,0.002,0.008 +bedrock,ai21.jamba-1-5-mini-v1:0,0.0002,0.0004 +bedrock,amazon.titan-text-lite-v1,0.0003,0.0004 +bedrock,amazon.titan-text-express-v1,0.0013,0.0017 +bedrock,amazon.titan-text-premier-v1:0,0.0005,0.0015 +bedrock,mistral.mistral-7b-instruct-v0:2,0.00015,0.0002 +bedrock,mistral.mixtral-8x7b-instruct-v0:1,0.00045,0.0007 +bedrock,mistral.mistral-large-2402-v1:0,0.008,0.024 +bedrock,mistral.mistral-large-2407-v1:0,0.003,0.009 +bedrock,mistral.mistral-small-2402-v1:0,0.001,0.003 +bedrock,eu.mistral.pixtral-large-2502-v1:0,0.002,0.006 +bedrock,us.mistral.pixtral-large-2502-v1:0,0.002,0.006 +bedrock,amazon.nova-micro-v1:0,0.000035,0.00014 +bedrock,us.amazon.nova-micro-v1:0,0.000035,0.00014 +bedrock,eu.amazon.nova-micro-v1:0,0.000046,0.000184 +bedrock,amazon.nova-lite-v1:0,0.00006,0.00024 +bedrock,us.amazon.nova-lite-v1:0,0.00006,0.00024 +bedrock,eu.amazon.nova-lite-v1:0,0.000078,0.000312 +bedrock,amazon.nova-pro-v1:0,0.0008,0.0032 +bedrock,us.amazon.nova-pro-v1:0,0.0008,0.0032 +bedrock,eu.amazon.nova-pro-v1:0,0.00105,0.0042 +bedrock,apac.amazon.nova-micro-v1:0,0.000037,0.000148 +bedrock,apac.amazon.nova-lite-v1:0,0.000063,0.000252 +bedrock,apac.amazon.nova-pro-v1:0,0.00084,0.00336 +bedrock,us.amazon.nova-premier-v1:0,0.0025,0.0125 +bedrock,anthropic.claude-3-sonnet-20240229-v1:0,0.003,0.015 +bedrock,anthropic.claude-3-5-sonnet-20240620-v1:0,0.003,0.015 +bedrock,openai.gpt-oss-20b-1:0,0.00007,0.0003 +bedrock,openai.gpt-oss-120b-1:0,0.00015,0.0006 +bedrock,anthropic.claude-opus-4-1-20250805-v1:0,0.015,0.075 +bedrock,anthropic.claude-opus-4-20250514-v1:0,0.015,0.075 +bedrock,anthropic.claude-sonnet-4-20250514-v1:0,0.003,0.015 +bedrock,anthropic.claude-3-7-sonnet-20250219-v1:0,0.003,0.015 +bedrock,anthropic.claude-3-5-sonnet-20241022-v2:0,0.003,0.015 +bedrock,anthropic.claude-3-haiku-20240307-v1:0,0.00025,0.00125 +bedrock,anthropic.claude-3-5-haiku-20241022-v1:0,0.0008,0.004 +bedrock,anthropic.claude-3-opus-20240229-v1:0,0.015,0.075 +bedrock,us.anthropic.claude-3-sonnet-20240229-v1:0,0.003,0.015 +bedrock,us.anthropic.claude-3-5-sonnet-20240620-v1:0,0.003,0.015 +bedrock,us.anthropic.claude-3-5-sonnet-20241022-v2:0,0.003,0.015 +bedrock,us.anthropic.claude-3-7-sonnet-20250219-v1:0,0.003,0.015 +bedrock,us.anthropic.claude-opus-4-1-20250805-v1:0,0.015,0.075 +bedrock,us.anthropic.claude-opus-4-20250514-v1:0,0.015,0.075 +bedrock,us.anthropic.claude-sonnet-4-20250514-v1:0,0.003,0.015 +bedrock,us.anthropic.claude-3-haiku-20240307-v1:0,0.00025,0.00125 +bedrock,us.anthropic.claude-3-5-haiku-20241022-v1:0,0.0008,0.004 +bedrock,us.anthropic.claude-3-opus-20240229-v1:0,0.015,0.075 +bedrock,eu.anthropic.claude-3-sonnet-20240229-v1:0,0.003,0.015 +bedrock,eu.anthropic.claude-3-5-sonnet-20240620-v1:0,0.003,0.015 +bedrock,eu.anthropic.claude-3-5-sonnet-20241022-v2:0,0.003,0.015 +bedrock,eu.anthropic.claude-3-7-sonnet-20250219-v1:0,0.003,0.015 +bedrock,eu.anthropic.claude-3-haiku-20240307-v1:0,0.00025,0.00125 +bedrock,eu.anthropic.claude-opus-4-1-20250805-v1:0,0.015,0.075 +bedrock,eu.anthropic.claude-opus-4-20250514-v1:0,0.015,0.075 +bedrock,eu.anthropic.claude-sonnet-4-20250514-v1:0,0.003,0.015 +bedrock,apac.anthropic.claude-3-haiku-20240307-v1:0,0.00025,0.00125 +bedrock,apac.anthropic.claude-3-sonnet-20240229-v1:0,0.003,0.015 +bedrock,apac.anthropic.claude-3-5-sonnet-20240620-v1:0,0.003,0.015 +bedrock,apac.anthropic.claude-3-5-sonnet-20241022-v2:0,0.003,0.015 +bedrock,apac.anthropic.claude-sonnet-4-20250514-v1:0,0.003,0.015 +bedrock,eu.anthropic.claude-3-5-haiku-20241022-v1:0,0.00025,0.00125 +bedrock,eu.anthropic.claude-3-opus-20240229-v1:0,0.015,0.075 +bedrock,anthropic.claude-v1,0.008,0.024 +bedrock,anthropic.claude-v2,0.008,0.024 +bedrock,anthropic.claude-v2:1,0.008,0.024 +bedrock,anthropic.claude-instant-v1,0.0008,0.0024 +bedrock,cohere.command-text-v14,0.0015,0.002 +bedrock,cohere.command-light-text-v14,0.0003,0.0006 +bedrock,cohere.command-r-plus-v1:0,0.003,0.015 +bedrock,cohere.command-r-v1:0,0.0005,0.0015 +bedrock,us.deepseek.r1-v1:0,0.00135,0.0054 +bedrock,meta.llama3-3-70b-instruct-v1:0,0.00072,0.00072 +bedrock,meta.llama2-13b-chat-v1,0.00075,0.001 +bedrock,meta.llama2-70b-chat-v1,0.00195,0.00256 +bedrock,meta.llama3-8b-instruct-v1:0,0.0003,0.0006 +bedrock,meta.llama3-70b-instruct-v1:0,0.00265,0.0035 +bedrock,meta.llama3-1-8b-instruct-v1:0,0.00022,0.00022 +bedrock,us.meta.llama3-1-8b-instruct-v1:0,0.00022,0.00022 +bedrock,meta.llama3-1-70b-instruct-v1:0,0.00099,0.00099 +bedrock,us.meta.llama3-1-70b-instruct-v1:0,0.00099,0.00099 +bedrock,meta.llama3-1-405b-instruct-v1:0,0.00532,0.016 +bedrock,us.meta.llama3-1-405b-instruct-v1:0,0.00532,0.016 +bedrock,meta.llama3-2-1b-instruct-v1:0,0.0001,0.0001 +bedrock,us.meta.llama3-2-1b-instruct-v1:0,0.0001,0.0001 +bedrock,eu.meta.llama3-2-1b-instruct-v1:0,0.00013,0.00013 +bedrock,meta.llama3-2-3b-instruct-v1:0,0.00015,0.00015 +bedrock,us.meta.llama3-2-3b-instruct-v1:0,0.00015,0.00015 +bedrock,eu.meta.llama3-2-3b-instruct-v1:0,0.00019,0.00019 +bedrock,meta.llama3-2-11b-instruct-v1:0,0.00035,0.00035 +bedrock,us.meta.llama3-2-11b-instruct-v1:0,0.00035,0.00035 +bedrock,meta.llama3-2-90b-instruct-v1:0,0.002,0.002 +bedrock,us.meta.llama3-2-90b-instruct-v1:0,0.002,0.002 +bedrock,us.meta.llama3-3-70b-instruct-v1:0,0.00072,0.00072 +bedrock,meta.llama4-maverick-17b-instruct-v1:0,0.00024,0.00097 +bedrock,us.meta.llama4-maverick-17b-instruct-v1:0,0.00024,0.00097 +bedrock,meta.llama4-scout-17b-instruct-v1:0,0.00017,0.00066 +bedrock,us.meta.llama4-scout-17b-instruct-v1:0,0.00017,0.00066 +together,together-ai-up-to-4b,0.0001,0.0001 +together,together-ai-4.1b-8b,0.0002,0.0002 +together,together-ai-8.1b-21b,0.0003,0.0003 +together,together-ai-21.1b-41b,0.0008,0.0008 +together,together-ai-41.1b-80b,0.0009,0.0009 +together,together-ai-81.1b-110b,0.0018,0.0018 +perplexity,codellama-34b-instruct,0.00035,0.0014 +perplexity,codellama-70b-instruct,0.0007,0.0028 +perplexity,llama-3.1-70b-instruct,0.001,0.001 +perplexity,llama-3.1-8b-instruct,0.0002,0.0002 +perplexity,llama-3.1-sonar-huge-128k-online,0.005,0.005 +perplexity,llama-3.1-sonar-large-128k-online,0.001,0.001 +perplexity,llama-3.1-sonar-large-128k-chat,0.001,0.001 +perplexity,llama-3.1-sonar-small-128k-chat,0.0002,0.0002 +perplexity,llama-3.1-sonar-small-128k-online,0.0002,0.0002 +perplexity,pplx-7b-chat,0.00007,0.00028 +perplexity,pplx-70b-chat,0.0007,0.0028 +perplexity,llama-2-70b-chat,0.0007,0.0028 +perplexity,mistral-7b-instruct,0.00007,0.00028 +perplexity,mixtral-8x7b-instruct,0.00007,0.00028 +perplexity,sonar-small-chat,0.00007,0.00028 +perplexity,sonar-medium-chat,0.0006,0.0018 +perplexity,sonar,0.001,0.001 +perplexity,sonar-pro,0.003,0.015 +perplexity,sonar-reasoning,0.001,0.005 +perplexity,sonar-reasoning-pro,0.002,0.008 +perplexity,sonar-deep-research,0.002,0.008 +fireworks,fireworks-ai-up-to-4b,0.0002,0.0002 +fireworks,fireworks-ai-4.1b-to-16b,0.0002,0.0002 +fireworks,fireworks-ai-above-16b,0.0009,0.0009 +fireworks,fireworks-ai-moe-up-to-56b,0.0005,0.0005 +fireworks,fireworks-ai-56b-to-176b,0.0012,0.0012