diff --git a/app/api/routes/claude_code.py b/app/api/routes/claude_code.py new file mode 100644 index 0000000..6503544 --- /dev/null +++ b/app/api/routes/claude_code.py @@ -0,0 +1,384 @@ +""" +Claude Code compatible API endpoints. +Handles Anthropic format requests and converts them to/from OpenAI format via Forge's infrastructure. +""" + +import inspect +import json +import time +import uuid +from typing import Any, Union + +from fastapi import APIRouter, Depends, HTTPException, Request +from fastapi.responses import JSONResponse, StreamingResponse +from pydantic import ValidationError +from sqlalchemy.ext.asyncio import AsyncSession + +from app.api.dependencies import get_user_by_api_key +from app.api.routes.proxy import _get_allowed_provider_names +from app.api.schemas.anthropic import ( + AnthropicErrorResponse, + AnthropicErrorType, + AnthropicMessagesRequest, + AnthropicMessagesResponse, + TokenCountRequest, + TokenCountResponse, +) +from app.core.database import get_async_db +from app.core.logger import get_logger +from app.models.user import User +from app.services.provider_service import ProviderService +from app.utils.anthropic_converter import ( + convert_anthropic_to_openai_messages, + convert_anthropic_tools_to_openai, + convert_anthropic_tool_choice_to_openai, + convert_openai_to_anthropic_response, + count_tokens_for_anthropic_request, +) +from app.utils.anthropic_streaming import handle_anthropic_streaming_response_from_openai_stream + +router = APIRouter() +logger = get_logger(name="claude_code") + + +def _build_anthropic_error_response( + error_type: str, + message: str, + status_code: int, +) -> JSONResponse: + """Creates a JSONResponse with Anthropic-formatted error.""" + error_resp_model = AnthropicErrorResponse( + error={ + "type": error_type, + "message": message, + } + ) + return JSONResponse( + status_code=status_code, + content=error_resp_model.model_dump(exclude_unset=True) + ) + + +async def _log_and_return_error_response( + request: Request, + status_code: int, + anthropic_error_type: str, + error_message: str, + caught_exception: Exception = None, +) -> JSONResponse: + """Log error and return Anthropic-formatted error response.""" + request_id = getattr(request.state, "request_id", "unknown") + start_time_mono = getattr(request.state, "start_time_monotonic", time.monotonic()) + duration_ms = (time.monotonic() - start_time_mono) * 1000 + + log_data = { + "status_code": status_code, + "duration_ms": duration_ms, + "error_type": anthropic_error_type, + "client_ip": request.client.host if request.client else "unknown", + } + + if caught_exception: + logger.error( + f"Claude Code request failed: {error_message}", + extra={"request_id": request_id, "data": log_data}, + exc_info=caught_exception, + ) + else: + logger.error( + f"Claude Code request failed: {error_message}", + extra={"request_id": request_id, "data": log_data}, + ) + + return _build_anthropic_error_response( + anthropic_error_type, error_message, status_code + ) + + +@router.post("/messages", response_model=None, tags=["Claude Code"], status_code=200) +async def create_message_proxy( + request: Request, + user: User = Depends(get_user_by_api_key), + db: AsyncSession = Depends(get_async_db), +) -> Union[JSONResponse, StreamingResponse]: + """ + Main endpoint for Claude Code message completions, proxied through Forge to providers. + Handles request/response conversions, streaming, and dynamic model selection. + """ + request_id = str(uuid.uuid4()) + request.state.request_id = request_id + request.state.start_time_monotonic = time.monotonic() + + try: + # Parse request body + raw_body = await request.json() + logger.debug( + "Received Claude Code request body", + extra={ + "request_id": request_id, + "data": {"body": raw_body}, + }, + ) + + anthropic_request = AnthropicMessagesRequest.model_validate(raw_body) + except json.JSONDecodeError as e: + return await _log_and_return_error_response( + request, + 400, + AnthropicErrorType.INVALID_REQUEST, + "Invalid JSON body.", + e, + ) + except ValidationError as e: + return await _log_and_return_error_response( + request, + 422, + AnthropicErrorType.INVALID_REQUEST, + f"Invalid request body: {e.errors()}", + e, + ) + + is_stream = anthropic_request.stream or False + + # Count tokens for logging + estimated_input_tokens = count_tokens_for_anthropic_request( + messages=anthropic_request.messages, + system=anthropic_request.system, + model_name=anthropic_request.model, + tools=anthropic_request.tools, + request_id=request_id, + ) + + logger.info( + "Processing new Claude Code message request", + extra={ + "request_id": request_id, + "data": { + "model": anthropic_request.model, + "stream": is_stream, + "estimated_input_tokens": estimated_input_tokens, + "client_ip": request.client.host if request.client else "unknown", + "user_agent": request.headers.get("user-agent", "unknown"), + }, + }, + ) + + try: + # Convert Anthropic format to OpenAI format + openai_messages = convert_anthropic_to_openai_messages( + anthropic_request.messages, + anthropic_request.system, + request_id=request_id + ) + openai_tools = convert_anthropic_tools_to_openai(anthropic_request.tools) + openai_tool_choice = convert_anthropic_tool_choice_to_openai( + anthropic_request.tool_choice, request_id + ) + except Exception as e: + return await _log_and_return_error_response( + request, + 500, + AnthropicErrorType.API_ERROR, + "Error during request conversion.", + e, + ) + + # Build OpenAI-compatible request for Forge + # Cap max_tokens to reasonable limits to avoid provider errors + max_tokens = anthropic_request.max_tokens + if max_tokens > 16384: # GPT-4o and most models limit + max_tokens = 16384 + logger.warning( + f"max_tokens capped from {anthropic_request.max_tokens} to {max_tokens} to comply with model limits", + extra={"request_id": request_id} + ) + + openai_payload = { + "model": anthropic_request.model, + "messages": openai_messages, + "max_tokens": max_tokens, + "stream": is_stream, + } + + # Add optional parameters if present + if anthropic_request.temperature is not None: + openai_payload["temperature"] = anthropic_request.temperature + if anthropic_request.top_p is not None: + openai_payload["top_p"] = anthropic_request.top_p + if anthropic_request.stop_sequences: + openai_payload["stop"] = anthropic_request.stop_sequences + if openai_tools: + openai_payload["tools"] = openai_tools + if openai_tool_choice: + openai_payload["tool_choice"] = openai_tool_choice + if anthropic_request.metadata and anthropic_request.metadata.get("user_id"): + openai_payload["user"] = str(anthropic_request.metadata.get("user_id")) + + logger.debug( + "Prepared OpenAI request parameters for Forge", + extra={ + "request_id": request_id, + "data": {"params": openai_payload}, + }, + ) + + try: + # Use Forge's provider service to process the request + provider_service = await ProviderService.async_get_instance(user, db) + allowed_provider_names = await _get_allowed_provider_names(request, db) + + # Process request through Forge + response = await provider_service.process_request( + "chat/completions", + openai_payload, + allowed_provider_names=allowed_provider_names + ) + + # Handle streaming response + if inspect.isasyncgen(response): + logger.debug( + "Initiating streaming request to provider via Forge", + extra={"request_id": request_id}, + ) + + return StreamingResponse( + handle_anthropic_streaming_response_from_openai_stream( + response, + anthropic_request.model, + estimated_input_tokens, + request_id, + request.state.start_time_monotonic, + ), + media_type="text/event-stream", + headers={ + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "X-Accel-Buffering": "no", + } + ) + + # Handle non-streaming response + else: + logger.debug( + "Received provider response via Forge", + extra={ + "request_id": request_id, + "data": {"response": response}, + }, + ) + + # Convert OpenAI response back to Anthropic format + anthropic_response = convert_openai_to_anthropic_response( + response, anthropic_request.model, request_id=request_id + ) + + duration_ms = (time.monotonic() - request.state.start_time_monotonic) * 1000 + logger.info( + "Claude Code non-streaming request completed successfully", + extra={ + "request_id": request_id, + "data": { + "status_code": 200, + "duration_ms": duration_ms, + "input_tokens": anthropic_response.usage.input_tokens, + "output_tokens": anthropic_response.usage.output_tokens, + "stop_reason": anthropic_response.stop_reason, + }, + }, + ) + + logger.debug( + "Prepared Claude Code response", + extra={ + "request_id": request_id, + "data": {"response": anthropic_response.model_dump(exclude_unset=True)}, + }, + ) + + return JSONResponse( + content=anthropic_response.model_dump(exclude_unset=True) + ) + + except ValueError as e: + return await _log_and_return_error_response( + request, + 400, + AnthropicErrorType.INVALID_REQUEST, + str(e), + e, + ) + except Exception as e: + # Handle provider API errors specifically + from app.exceptions.exceptions import ProviderAPIException + if isinstance(e, ProviderAPIException): + return await _log_and_return_error_response( + request, + e.error_code, + AnthropicErrorType.API_ERROR, + f"Provider error: {e.error_message}", + e, + ) + + # Log the actual exception details for debugging + error_msg = str(e).replace("{", "{{").replace("}", "}}") # Escape braces for logging + logger.error( + f"Detailed error in Claude Code processing: {type(e).__name__}: {error_msg}", + extra={"request_id": request_id}, + exc_info=e, + ) + return await _log_and_return_error_response( + request, + 500, + AnthropicErrorType.API_ERROR, + f"An unexpected error occurred while processing the request: {str(e)}", + e, + ) + + +@router.post( + "/messages/count_tokens", + response_model=TokenCountResponse, + tags=["Claude Code Utility"] +) +async def count_tokens_endpoint( + request: Request, + user: User = Depends(get_user_by_api_key), + db: AsyncSession = Depends(get_async_db), +) -> TokenCountResponse: + """Estimates token count for given Anthropic messages and system prompt.""" + request_id = str(uuid.uuid4()) + request.state.request_id = request_id + start_time_mono = time.monotonic() + + try: + body = await request.json() + count_request = TokenCountRequest.model_validate(body) + except json.JSONDecodeError as e: + raise HTTPException(status_code=400, detail="Invalid JSON body.") from e + except ValidationError as e: + raise HTTPException( + status_code=422, detail=f"Invalid request body: {e.errors()}" + ) from e + + token_count = count_tokens_for_anthropic_request( + messages=count_request.messages, + system=count_request.system, + model_name=count_request.model, + tools=count_request.tools, + request_id=request_id, + ) + + duration_ms = (time.monotonic() - start_time_mono) * 1000 + logger.info( + f"Counted {token_count} tokens", + extra={ + "request_id": request_id, + "data": { + "duration_ms": duration_ms, + "token_count": token_count, + "model": count_request.model, + }, + }, + ) + + return TokenCountResponse(input_tokens=token_count) \ No newline at end of file diff --git a/app/api/schemas/anthropic.py b/app/api/schemas/anthropic.py new file mode 100644 index 0000000..0aa6d11 --- /dev/null +++ b/app/api/schemas/anthropic.py @@ -0,0 +1,146 @@ +from typing import Any, Dict, List, Literal, Optional, Union +from pydantic import BaseModel, Field, field_validator + +from app.core.logger import get_logger + +logger = get_logger(name="anthropic_schemas") + +# Content Block Models +class ContentBlockText(BaseModel): + type: Literal["text"] + text: str + + +class ContentBlockImageSource(BaseModel): + type: str + media_type: str + data: str + + +class ContentBlockImage(BaseModel): + type: Literal["image"] + source: ContentBlockImageSource + + +class ContentBlockToolUse(BaseModel): + type: Literal["tool_use"] + id: str + name: str + input: Dict[str, Any] + + +class ContentBlockToolResult(BaseModel): + type: Literal["tool_result"] + tool_use_id: str + content: Union[str, List[Dict[str, Any]], List[Any]] + is_error: Optional[bool] = None + + +ContentBlock = Union[ + ContentBlockText, ContentBlockImage, ContentBlockToolUse, ContentBlockToolResult +] + + +# System Content +class SystemContent(BaseModel): + type: Literal["text"] + text: str + + +# Message Model +class AnthropicMessage(BaseModel): + role: Literal["user", "assistant"] + content: Union[str, List[ContentBlock]] + + +# Tool Models +class Tool(BaseModel): + name: str + description: Optional[str] = None + input_schema: Dict[str, Any] = Field(..., alias="input_schema") + + +class ToolChoice(BaseModel): + type: Literal["auto", "any", "tool"] + name: Optional[str] = None + + +# Main Request Model +class AnthropicMessagesRequest(BaseModel): + model: str + max_tokens: int + messages: List[AnthropicMessage] + system: Optional[Union[str, List[SystemContent]]] = None + stop_sequences: Optional[List[str]] = None + stream: Optional[bool] = False + temperature: Optional[float] = None + top_p: Optional[float] = None + top_k: Optional[int] = None + metadata: Optional[Dict[str, Any]] = None + tools: Optional[List[Tool]] = None + tool_choice: Optional[ToolChoice] = None + + @field_validator("top_k") + def check_top_k(cls, v: Optional[int]) -> Optional[int]: + if v is not None: + logger.warning( + f"Parameter 'top_k' provided by client but is not directly supported by the OpenAI Chat Completions API and will be ignored. Value: {v}" + ) + return v + + +# Token Count Request/Response +class TokenCountRequest(BaseModel): + model: str + messages: List[AnthropicMessage] + system: Optional[Union[str, List[SystemContent]]] = None + tools: Optional[List[Tool]] = None + + +class TokenCountResponse(BaseModel): + input_tokens: int + + +# Usage Model +class Usage(BaseModel): + input_tokens: int + output_tokens: int + + +# Error Models +class AnthropicErrorType: + INVALID_REQUEST = "invalid_request_error" + AUTHENTICATION = "authentication_error" + PERMISSION = "permission_error" + NOT_FOUND = "not_found_error" + RATE_LIMIT = "rate_limit_error" + API_ERROR = "api_error" + OVERLOADED = "overloaded_error" + REQUEST_TOO_LARGE = "request_too_large_error" + + +class AnthropicErrorDetail(BaseModel): + type: str + message: str + provider: Optional[str] = None + provider_message: Optional[str] = None + provider_code: Optional[Union[str, int]] = None + + +class AnthropicErrorResponse(BaseModel): + type: Literal["error"] = "error" + error: AnthropicErrorDetail + + +# Response Model +class AnthropicMessagesResponse(BaseModel): + id: str + type: Literal["message"] = "message" + role: Literal["assistant"] = "assistant" + model: str + content: List[ContentBlock] + stop_reason: Optional[ + Literal["end_turn", "max_tokens", "stop_sequence", "tool_use", "error"] + ] = None + stop_sequence: Optional[str] = None + usage: Usage \ No newline at end of file diff --git a/app/main.py b/app/main.py index 1bb2ea2..12a9f8c 100644 --- a/app/main.py +++ b/app/main.py @@ -12,6 +12,7 @@ api_keys, auth, health, + claude_code, provider_keys, proxy, stats, @@ -166,6 +167,8 @@ def create_app() -> FastAPI: v1_router.include_router(proxy.router, tags=["proxy"]) v1_router.include_router(stats.router, prefix="/stats", tags=["stats"]) v1_router.include_router(webhooks.router, prefix="/webhooks", tags=["webhooks"]) + # Claude Code compatible API endpoints + v1_router.include_router(claude_code.router, tags=["Claude Code API"]) # Health check routes (not versioned) app.include_router(health.router, tags=["health"]) diff --git a/app/utils/anthropic_converter.py b/app/utils/anthropic_converter.py new file mode 100644 index 0000000..ba55e60 --- /dev/null +++ b/app/utils/anthropic_converter.py @@ -0,0 +1,441 @@ +""" +Conversion utilities between Anthropic and OpenAI formats for Claude Code support. +Based on the conversion logic from convert.py but adapted for Forge's architecture. +""" + +import json +import time +import uuid +from typing import Any, Dict, List, Optional, Union + +import tiktoken + +from app.api.schemas.anthropic import ( + AnthropicMessage, + AnthropicMessagesResponse, + ContentBlock, + ContentBlockText, + ContentBlockImage, + ContentBlockToolUse, + ContentBlockToolResult, + SystemContent, + Tool, + ToolChoice, + Usage, +) +from app.api.schemas.openai import ChatMessage, OpenAIContentModel +from app.core.logger import get_logger + +logger = get_logger(name="anthropic_converter") + +# Token encoder cache +_token_encoder_cache: Dict[str, tiktoken.Encoding] = {} + + +def get_token_encoder(model_name: str = "gpt-4", request_id: Optional[str] = None) -> tiktoken.Encoding: + """Gets a tiktoken encoder, caching it for performance.""" + cache_key = "gpt-4" + if cache_key not in _token_encoder_cache: + try: + _token_encoder_cache[cache_key] = tiktoken.encoding_for_model(cache_key) + except Exception: + try: + _token_encoder_cache[cache_key] = tiktoken.get_encoding("cl100k_base") + logger.warning( + f"Could not load tiktoken encoder for '{cache_key}', using 'cl100k_base'. Token counts may be approximate." + ) + except Exception as e: + logger.error(f"Failed to load any tiktoken encoder. Token counting will be inaccurate: {e}") + + class DummyEncoder: + def encode(self, text: str) -> List[int]: + return list(range(len(text))) + + _token_encoder_cache[cache_key] = DummyEncoder() + return _token_encoder_cache[cache_key] + + +def count_tokens_for_anthropic_request( + messages: List[AnthropicMessage], + system: Optional[Union[str, List[SystemContent]]], + model_name: str, + tools: Optional[List[Tool]] = None, + request_id: Optional[str] = None, +) -> int: + """Count tokens for an Anthropic request.""" + enc = get_token_encoder(model_name, request_id) + total_tokens = 0 + + # Count system message tokens + if isinstance(system, str): + total_tokens += len(enc.encode(system)) + elif isinstance(system, list): + for block in system: + if isinstance(block, SystemContent) and block.type == "text": + total_tokens += len(enc.encode(block.text)) + + # Count message tokens + for msg in messages: + total_tokens += 4 # Base tokens per message + if msg.role: + total_tokens += len(enc.encode(msg.role)) + + if isinstance(msg.content, str): + total_tokens += len(enc.encode(msg.content)) + elif isinstance(msg.content, list): + for block in msg.content: + if isinstance(block, ContentBlockText): + total_tokens += len(enc.encode(block.text)) + elif isinstance(block, ContentBlockImage): + total_tokens += 768 # Estimated tokens for image + elif isinstance(block, ContentBlockToolUse): + total_tokens += len(enc.encode(block.name)) + try: + input_str = json.dumps(block.input) + total_tokens += len(enc.encode(input_str)) + except Exception: + logger.warning(f"Failed to serialize tool input for token counting: {block.name}") + elif isinstance(block, ContentBlockToolResult): + try: + content_str = "" + if isinstance(block.content, str): + content_str = block.content + elif isinstance(block.content, list): + for item in block.content: + if isinstance(item, dict) and item.get("type") == "text": + content_str += item.get("text", "") + else: + content_str += json.dumps(item) + else: + content_str = json.dumps(block.content) + total_tokens += len(enc.encode(content_str)) + except Exception: + logger.warning("Failed to serialize tool result for token counting") + + # Count tool tokens + if tools: + total_tokens += 2 + for tool in tools: + total_tokens += len(enc.encode(tool.name)) + if tool.description: + total_tokens += len(enc.encode(tool.description)) + try: + schema_str = json.dumps(tool.input_schema) + total_tokens += len(enc.encode(schema_str)) + except Exception: + logger.warning(f"Failed to serialize tool schema for token counting: {tool.name}") + + logger.debug(f"Estimated {total_tokens} input tokens for model {model_name}") + return total_tokens + + +def _serialize_tool_result_content_for_openai( + anthropic_tool_result_content: Union[str, List[Dict[str, Any]], List[Any]], + request_id: Optional[str], +) -> str: + """Serializes Anthropic tool result content into a single string for OpenAI.""" + if isinstance(anthropic_tool_result_content, str): + return anthropic_tool_result_content + + if isinstance(anthropic_tool_result_content, list): + processed_parts = [] + contains_non_text_block = False + for item in anthropic_tool_result_content: + if isinstance(item, dict) and item.get("type") == "text" and "text" in item: + processed_parts.append(str(item["text"])) + else: + try: + processed_parts.append(json.dumps(item)) + contains_non_text_block = True + except TypeError: + processed_parts.append(f"") + contains_non_text_block = True + + result_str = "\n".join(processed_parts) + if contains_non_text_block: + logger.warning( + f"Tool result content list contained non-text or complex items; parts were JSON stringified. Preview: {result_str[:100]}" + ) + return result_str + + try: + return json.dumps(anthropic_tool_result_content) + except TypeError as e: + logger.warning(f"Failed to serialize tool result content to JSON: {e}") + return json.dumps({ + "error": "Serialization failed", + "original_type": str(type(anthropic_tool_result_content)), + }) + + +def convert_anthropic_to_openai_messages( + anthropic_messages: List[AnthropicMessage], + anthropic_system: Optional[Union[str, List[SystemContent]]] = None, + request_id: Optional[str] = None, +) -> List[Dict[str, Any]]: + """Convert Anthropic messages to OpenAI format.""" + openai_messages: List[Dict[str, Any]] = [] + + # Handle system message + system_text_content = "" + if isinstance(anthropic_system, str): + system_text_content = anthropic_system + elif isinstance(anthropic_system, list): + system_texts = [ + block.text + for block in anthropic_system + if isinstance(block, SystemContent) and block.type == "text" + ] + if len(system_texts) < len(anthropic_system): + logger.warning("Non-text content blocks in Anthropic system prompt were ignored") + system_text_content = "\n".join(system_texts) + + if system_text_content: + openai_messages.append({"role": "system", "content": system_text_content}) + + # Convert messages + for i, msg in enumerate(anthropic_messages): + role = msg.role + content = msg.content + + if isinstance(content, str): + openai_messages.append({"role": role, "content": content}) + continue + + if isinstance(content, list): + openai_parts_for_user_message = [] + assistant_tool_calls = [] + text_content_for_assistant = [] + + if not content and role == "user": + openai_messages.append({"role": "user", "content": ""}) + continue + if not content and role == "assistant": + openai_messages.append({"role": "assistant", "content": ""}) + continue + + for block_idx, block in enumerate(content): + if isinstance(block, ContentBlockText): + if role == "user": + openai_parts_for_user_message.append({"type": "text", "text": block.text}) + elif role == "assistant": + text_content_for_assistant.append(block.text) + + elif isinstance(block, ContentBlockImage) and role == "user": + if block.source.type == "base64": + openai_parts_for_user_message.append({ + "type": "image_url", + "image_url": { + "url": f"data:{block.source.media_type};base64,{block.source.data}" + }, + }) + else: + logger.warning( + f"Image block with source type '{block.source.type}' (expected 'base64') ignored in user message {i}" + ) + + elif isinstance(block, ContentBlockToolUse) and role == "assistant": + try: + args_str = json.dumps(block.input) + except Exception as e: + logger.error(f"Failed to serialize tool input for tool '{block.name}': {e}") + args_str = "{}" + + assistant_tool_calls.append({ + "id": block.id, + "type": "function", + "function": {"name": block.name, "arguments": args_str}, + }) + + elif isinstance(block, ContentBlockToolResult) and role == "user": + serialized_content = _serialize_tool_result_content_for_openai( + block.content, request_id + ) + openai_messages.append({ + "role": "tool", + "tool_call_id": block.tool_use_id, + "content": serialized_content, + }) + + # Handle user message parts + if role == "user" and openai_parts_for_user_message: + is_multimodal = any( + part["type"] == "image_url" for part in openai_parts_for_user_message + ) + if is_multimodal or len(openai_parts_for_user_message) > 1: + openai_messages.append({"role": "user", "content": openai_parts_for_user_message}) + elif ( + len(openai_parts_for_user_message) == 1 + and openai_parts_for_user_message[0]["type"] == "text" + ): + openai_messages.append({ + "role": "user", + "content": openai_parts_for_user_message[0]["text"], + }) + elif not openai_parts_for_user_message: + openai_messages.append({"role": "user", "content": ""}) + + # Handle assistant message + if role == "assistant": + assistant_text = "\n".join(filter(None, text_content_for_assistant)) + if assistant_text: + openai_messages.append({"role": "assistant", "content": assistant_text}) + + if assistant_tool_calls: + if ( + openai_messages + and openai_messages[-1]["role"] == "assistant" + and openai_messages[-1].get("content") + ): + openai_messages.append({ + "role": "assistant", + "content": None, + "tool_calls": assistant_tool_calls, + }) + elif ( + openai_messages + and openai_messages[-1]["role"] == "assistant" + and not openai_messages[-1].get("tool_calls") + ): + openai_messages[-1]["tool_calls"] = assistant_tool_calls + openai_messages[-1]["content"] = None + else: + openai_messages.append({ + "role": "assistant", + "content": None, + "tool_calls": assistant_tool_calls, + }) + + # Clean up messages + final_openai_messages = [] + for msg_dict in openai_messages: + if ( + msg_dict.get("role") == "assistant" + and msg_dict.get("tool_calls") + and msg_dict.get("content") is not None + ): + logger.warning("Corrected assistant message with tool_calls to have content: None") + msg_dict["content"] = None + final_openai_messages.append(msg_dict) + + return final_openai_messages + + +def convert_anthropic_tools_to_openai( + anthropic_tools: Optional[List[Tool]], +) -> Optional[List[Dict[str, Any]]]: + """Convert Anthropic tools to OpenAI format.""" + if not anthropic_tools: + return None + return [ + { + "type": "function", + "function": { + "name": t.name, + "description": t.description or "", + "parameters": t.input_schema, + }, + } + for t in anthropic_tools + ] + + +def convert_anthropic_tool_choice_to_openai( + anthropic_choice: Optional[ToolChoice], + request_id: Optional[str] = None, +) -> Optional[Union[str, Dict[str, Any]]]: + """Convert Anthropic tool choice to OpenAI format.""" + if not anthropic_choice: + return None + if anthropic_choice.type == "auto": + return "auto" + if anthropic_choice.type == "any": + logger.warning( + "Anthropic tool_choice type 'any' mapped to OpenAI 'auto'. Exact behavior might differ" + ) + return "auto" + if anthropic_choice.type == "tool" and anthropic_choice.name: + return {"type": "function", "function": {"name": anthropic_choice.name}} + + logger.warning(f"Unsupported Anthropic tool_choice: {anthropic_choice}. Defaulting to 'auto'") + return "auto" + + +def convert_openai_to_anthropic_response( + openai_response: Dict[str, Any], + original_anthropic_model_name: str, + request_id: Optional[str] = None, +) -> AnthropicMessagesResponse: + """Convert OpenAI response to Anthropic format.""" + anthropic_content: List[ContentBlock] = [] + anthropic_stop_reason = None + + stop_reason_map = { + "stop": "end_turn", + "length": "max_tokens", + "tool_calls": "tool_use", + "function_call": "tool_use", + "content_filter": "stop_sequence", + None: "end_turn", + } + + if openai_response.get("choices"): + choice = openai_response["choices"][0] + message = choice.get("message", {}) + finish_reason = choice.get("finish_reason") + + anthropic_stop_reason = stop_reason_map.get(finish_reason, "end_turn") + + # Handle text content + if message.get("content"): + anthropic_content.append(ContentBlockText(type="text", text=message["content"])) + + # Handle tool calls + if message.get("tool_calls"): + for call in message["tool_calls"]: + if call.get("type") == "function": + tool_input_dict: Dict[str, Any] = {} + try: + parsed_input = json.loads(call["function"]["arguments"]) + if isinstance(parsed_input, dict): + tool_input_dict = parsed_input + else: + tool_input_dict = {"value": parsed_input} + logger.warning( + f"OpenAI tool arguments for '{call['function']['name']}' parsed to non-dict type. Wrapped in 'value'" + ) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse JSON arguments for tool '{call['function']['name']}': {e}") + tool_input_dict = {"error_parsing_arguments": call["function"]["arguments"]} + + anthropic_content.append(ContentBlockToolUse( + type="tool_use", + id=call["id"], + name=call["function"]["name"], + input=tool_input_dict, + )) + if finish_reason == "tool_calls": + anthropic_stop_reason = "tool_use" + + if not anthropic_content: + anthropic_content.append(ContentBlockText(type="text", text="")) + + usage = openai_response.get("usage", {}) + anthropic_usage = Usage( + input_tokens=usage.get("prompt_tokens", 0), + output_tokens=usage.get("completion_tokens", 0), + ) + + response_id = openai_response.get("id", f"msg_{request_id}_completed") + if not response_id.startswith("msg_"): + response_id = f"msg_{response_id}" + + return AnthropicMessagesResponse( + id=response_id, + type="message", + role="assistant", + model=original_anthropic_model_name, + content=anthropic_content, + stop_reason=anthropic_stop_reason, + usage=anthropic_usage, + ) \ No newline at end of file diff --git a/app/utils/anthropic_streaming.py b/app/utils/anthropic_streaming.py new file mode 100644 index 0000000..0fe7ebe --- /dev/null +++ b/app/utils/anthropic_streaming.py @@ -0,0 +1,263 @@ +""" +Streaming utilities for converting OpenAI streaming responses to Anthropic SSE format. +Based on the streaming logic from convert.py but adapted for Forge's architecture. +""" + +import json +import time +import uuid +from typing import Any, AsyncGenerator, Dict, List, Optional, Union + +import tiktoken + +from app.api.schemas.anthropic import ContentBlockText, ContentBlockToolUse +from app.core.logger import get_logger + +logger = get_logger(name="anthropic_streaming") + + +async def handle_anthropic_streaming_response_from_openai_stream( + openai_stream: AsyncGenerator[bytes, None], + original_anthropic_model_name: str, + estimated_input_tokens: int, + request_id: str, + start_time_mono: float, +) -> AsyncGenerator[str, None]: + """ + Consumes an OpenAI stream and yields Anthropic-compatible SSE events. + Handles content block indexing for mixed text/tool_use correctly. + """ + + anthropic_message_id = f"msg_stream_{request_id}_{uuid.uuid4().hex[:8]}" + + next_anthropic_block_idx = 0 + text_block_anthropic_idx: Optional[int] = None + + openai_tool_idx_to_anthropic_block_idx: Dict[int, int] = {} + tool_states: Dict[int, Dict[str, Any]] = {} + sent_tool_block_starts: set[int] = set() + + output_token_count = 0 + final_anthropic_stop_reason = None + + enc = tiktoken.encoding_for_model("gpt-4") # Use gpt-4 encoding as approximation + + openai_to_anthropic_stop_reason_map = { + "stop": "end_turn", + "length": "max_tokens", + "tool_calls": "tool_use", + "function_call": "tool_use", + "content_filter": "stop_sequence", + None: None, + } + + stream_status_code = 200 + stream_final_message = "Streaming request completed successfully." + + try: + # Send initial message_start event + message_start_event_data = { + "type": "message_start", + "message": { + "id": anthropic_message_id, + "type": "message", + "role": "assistant", + "model": original_anthropic_model_name, + "content": [], + "stop_reason": None, + "stop_sequence": None, + "usage": {"input_tokens": estimated_input_tokens, "output_tokens": 0}, + }, + } + yield f"event: message_start\ndata: {json.dumps(message_start_event_data)}\n\n" + yield f"event: ping\ndata: {json.dumps({'type': 'ping'})}\n\n" + + # Process the OpenAI stream + async for chunk_bytes in openai_stream: + try: + chunk_str = chunk_bytes.decode('utf-8') + if chunk_str.strip() == "data: [DONE]": + break + + if not chunk_str.startswith("data: "): + continue + + data_str = chunk_str[6:].strip() # Remove "data: " prefix + if not data_str: + continue + + chunk_data = json.loads(data_str) + + if not chunk_data.get("choices"): + continue + + delta = chunk_data["choices"][0].get("delta", {}) + openai_finish_reason = chunk_data["choices"][0].get("finish_reason") + + # Handle content delta + if delta.get("content"): + content = delta["content"] + output_token_count += len(enc.encode(content)) + + if text_block_anthropic_idx is None: + text_block_anthropic_idx = next_anthropic_block_idx + next_anthropic_block_idx += 1 + + start_text_event = { + "type": "content_block_start", + "index": text_block_anthropic_idx, + "content_block": {"type": "text", "text": ""}, + } + yield f"event: content_block_start\ndata: {json.dumps(start_text_event)}\n\n" + + text_delta_event = { + "type": "content_block_delta", + "index": text_block_anthropic_idx, + "delta": {"type": "text_delta", "text": content}, + } + yield f"event: content_block_delta\ndata: {json.dumps(text_delta_event)}\n\n" + + # Handle tool calls delta + if delta.get("tool_calls"): + for tool_delta in delta["tool_calls"]: + openai_tc_idx = tool_delta.get("index", 0) + + if openai_tc_idx not in openai_tool_idx_to_anthropic_block_idx: + current_anthropic_tool_block_idx = next_anthropic_block_idx + next_anthropic_block_idx += 1 + openai_tool_idx_to_anthropic_block_idx[openai_tc_idx] = current_anthropic_tool_block_idx + + tool_states[current_anthropic_tool_block_idx] = { + "id": tool_delta.get("id") or f"tool_ph_{request_id}_{current_anthropic_tool_block_idx}", + "name": "", + "arguments_buffer": "", + } + else: + current_anthropic_tool_block_idx = openai_tool_idx_to_anthropic_block_idx[openai_tc_idx] + + tool_state = tool_states[current_anthropic_tool_block_idx] + + # Update tool ID if provided + if tool_delta.get("id") and tool_state["id"].startswith("tool_ph_"): + tool_state["id"] = tool_delta["id"] + + # Update function details + if tool_delta.get("function"): + if tool_delta["function"].get("name"): + tool_state["name"] = tool_delta["function"]["name"] + if tool_delta["function"].get("arguments"): + args_chunk = tool_delta["function"]["arguments"] + tool_state["arguments_buffer"] += args_chunk + output_token_count += len(enc.encode(args_chunk)) + + # Send content_block_start for tools when we have enough info + if ( + current_anthropic_tool_block_idx not in sent_tool_block_starts + and tool_state["id"] + and not tool_state["id"].startswith("tool_ph_") + and tool_state["name"] + ): + start_tool_event = { + "type": "content_block_start", + "index": current_anthropic_tool_block_idx, + "content_block": { + "type": "tool_use", + "id": tool_state["id"], + "name": tool_state["name"], + "input": {}, + }, + } + yield f"event: content_block_start\ndata: {json.dumps(start_tool_event)}\n\n" + sent_tool_block_starts.add(current_anthropic_tool_block_idx) + + # Send delta for tool arguments + if ( + tool_delta.get("function", {}).get("arguments") + and current_anthropic_tool_block_idx in sent_tool_block_starts + ): + args_delta_event = { + "type": "content_block_delta", + "index": current_anthropic_tool_block_idx, + "delta": { + "type": "input_json_delta", + "partial_json": tool_delta["function"]["arguments"], + }, + } + yield f"event: content_block_delta\ndata: {json.dumps(args_delta_event)}\n\n" + + # Handle finish reason + if openai_finish_reason: + final_anthropic_stop_reason = openai_to_anthropic_stop_reason_map.get( + openai_finish_reason, "end_turn" + ) + if openai_finish_reason == "tool_calls": + final_anthropic_stop_reason = "tool_use" + break + + except json.JSONDecodeError: + logger.warning(f"Failed to parse chunk: {chunk_str}") + continue + except Exception as e: + logger.error(f"Error processing stream chunk: {e}") + continue + + # Send content_block_stop events + if text_block_anthropic_idx is not None: + yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': text_block_anthropic_idx})}\n\n" + + for anthropic_tool_idx in sent_tool_block_starts: + tool_state_to_finalize = tool_states.get(anthropic_tool_idx) + if tool_state_to_finalize: + try: + json.loads(tool_state_to_finalize["arguments_buffer"]) + except json.JSONDecodeError: + logger.warning( + f"Buffered arguments for tool '{tool_state_to_finalize.get('name')}' did not form valid JSON" + ) + yield f"event: content_block_stop\ndata: {json.dumps({'type': 'content_block_stop', 'index': anthropic_tool_idx})}\n\n" + + if final_anthropic_stop_reason is None: + final_anthropic_stop_reason = "end_turn" + + # Send final events + message_delta_event = { + "type": "message_delta", + "delta": { + "stop_reason": final_anthropic_stop_reason, + "stop_sequence": None, + }, + "usage": {"output_tokens": output_token_count}, + } + yield f"event: message_delta\ndata: {json.dumps(message_delta_event)}\n\n" + yield f"event: message_stop\ndata: {json.dumps({'type': 'message_stop'})}\n\n" + + except Exception as e: + stream_status_code = 500 + final_anthropic_stop_reason = "error" + + logger.error(f"Error during OpenAI stream conversion: {e}") + + # Send error event + error_event = { + "type": "error", + "error": { + "type": "api_error", + "message": f"Stream processing error: {str(e)}", + } + } + yield f"event: error\ndata: {json.dumps(error_event)}\n\n" + + finally: + duration_ms = (time.monotonic() - start_time_mono) * 1000 + log_data = { + "status_code": stream_status_code, + "duration_ms": duration_ms, + "input_tokens": estimated_input_tokens, + "output_tokens": output_token_count, + "stop_reason": final_anthropic_stop_reason, + } + + if stream_status_code == 200: + logger.info(f"Streaming request completed successfully: {log_data}") + else: + logger.error(f"Streaming request failed: {log_data}") \ No newline at end of file diff --git a/docs/claude_code_support.md b/docs/claude_code_support.md new file mode 100644 index 0000000..2d7b6a8 --- /dev/null +++ b/docs/claude_code_support.md @@ -0,0 +1,293 @@ +# Claude Code Support in Forge + +Forge now supports Claude Code compatible API endpoints, allowing you to use Anthropic's message format while leveraging Forge's provider management and routing capabilities. + +## Overview + +The Claude Code support enables: +- **Anthropic Format**: Send requests in Anthropic's native message format +- **Provider Agnostic**: Route to any provider supported by Forge (OpenAI, Anthropic, etc.) +- **Seamless Conversion**: Automatic conversion between Anthropic and OpenAI formats +- **Full Feature Support**: Streaming, tools, token counting, and all Anthropic features +- **Forge Integration**: Leverage Forge's API key management, provider routing, and caching + +## Endpoints + +### POST `/v1/messages` + +Main endpoint for Claude Code message completions. + +**Request Format (Anthropic Compatible):** +```json +{ + "model": "claude-3-haiku-20240307", + "max_tokens": 1000, + "messages": [ + { + "role": "user", + "content": "Hello, how are you?" + } + ], + "system": "You are a helpful assistant.", + "temperature": 0.7, + "stream": false +} +``` + +**Response Format (Anthropic Compatible):** +```json +{ + "id": "msg_01ABC123DEF456", + "type": "message", + "role": "assistant", + "model": "claude-3-haiku-20240307", + "content": [ + { + "type": "text", + "text": "Hello! I'm doing well, thank you for asking." + } + ], + "stop_reason": "end_turn", + "usage": { + "input_tokens": 10, + "output_tokens": 12 + } +} +``` + +### POST `/v1/messages/count_tokens` + +Estimates token count for Anthropic messages. + +**Request:** +```json +{ + "model": "claude-3-haiku-20240307", + "messages": [ + { + "role": "user", + "content": "Count tokens for this message." + } + ], + "system": "You are a helpful assistant." +} +``` + +**Response:** +```json +{ + "input_tokens": 15 +} +``` + +## Features + +### 1. Non-Streaming Requests + +Standard request-response pattern with complete message returned at once. + +```python +import requests + +response = requests.post( + "http://localhost:8000/v1/messages", + headers={"Authorization": "Bearer forge-your-api-key"}, + json={ + "model": "claude-3-haiku-20240307", + "max_tokens": 100, + "messages": [{"role": "user", "content": "Hello!"}] + } +) +``` + +### 2. Streaming Requests + +Server-Sent Events (SSE) for real-time response streaming. + +```python +response = requests.post( + "http://localhost:8000/v1/messages", + headers={"Authorization": "Bearer forge-your-api-key"}, + json={ + "model": "claude-3-haiku-20240307", + "max_tokens": 100, + "messages": [{"role": "user", "content": "Tell me a story"}], + "stream": True + }, + stream=True +) + +for line in response.iter_lines(): + if line.startswith(b'data: '): + # Process SSE events + pass +``` + +### 3. Tool Usage + +Support for function calling with tools. + +```json +{ + "model": "claude-3-haiku-20240307", + "max_tokens": 200, + "messages": [ + { + "role": "user", + "content": "What's the weather in NYC?" + } + ], + "tools": [ + { + "name": "get_weather", + "description": "Get weather for a location", + "input_schema": { + "type": "object", + "properties": { + "location": {"type": "string"} + }, + "required": ["location"] + } + } + ], + "tool_choice": {"type": "auto"} +} +``` + +### 4. Multimodal Support + +Support for images in user messages. + +```json +{ + "model": "claude-3-haiku-20240307", + "max_tokens": 100, + "messages": [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": "What's in this image?" + }, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": "/9j/4AAQSkZJRgABAQEA..." + } + } + ] + } + ] +} +``` + +## How It Works + +1. **Request Reception**: Claude Code endpoint receives Anthropic format request +2. **Format Conversion**: Request is converted to OpenAI format +3. **Forge Routing**: Converted request is routed through Forge's provider system +4. **Provider Processing**: Request is sent to the appropriate provider (OpenAI, Anthropic, etc.) +5. **Response Conversion**: Provider response is converted back to Anthropic format +6. **Client Response**: Anthropic-formatted response is returned to client + +## Configuration + +### API Keys + +Use your existing Forge API key for authentication: + +```bash +curl -X POST "http://localhost:8000/v1/messages" \ + -H "Authorization: Bearer forge-your-api-key" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "claude-3-haiku-20240307", + "max_tokens": 100, + "messages": [{"role": "user", "content": "Hello!"}] + }' +``` + +### Model Routing + +Models are routed through Forge's provider configuration: +- `claude-3-haiku-20240307` โ†’ Routed to configured Claude provider +- `gpt-4` โ†’ Routed to configured OpenAI provider +- Custom model mappings work as configured in Forge + +### Provider Scope + +Claude Code endpoints respect Forge API key provider scopes: +- If your Forge key is scoped to specific providers, only those providers will be accessible +- Unrestricted keys can access all configured providers + +## Error Handling + +Errors are returned in Anthropic format: + +```json +{ + "type": "error", + "error": { + "type": "invalid_request_error", + "message": "Missing required field: max_tokens" + } +} +``` + +Error types match Anthropic's error taxonomy: +- `invalid_request_error`: Invalid request format +- `authentication_error`: Invalid API key +- `rate_limit_error`: Rate limit exceeded +- `api_error`: Provider API error + +## Supported Parameters + +### Required +- `model`: Model name (routed through Forge) +- `max_tokens`: Maximum tokens to generate +- `messages`: Array of message objects + +### Optional +- `system`: System prompt (string or array) +- `temperature`: Sampling temperature (0.0 to 1.0) +- `top_p`: Nucleus sampling parameter +- `top_k`: Top-k sampling (logged but ignored - not supported by OpenAI) +- `stop_sequences`: Array of stop sequences +- `stream`: Enable streaming (default: false) +- `tools`: Array of tool definitions +- `tool_choice`: Tool choice strategy +- `metadata`: Request metadata + +## Examples + +See `examples/claude_code_example.py` for comprehensive usage examples including: +- Non-streaming requests +- Streaming requests +- Token counting +- Tool usage +- Error handling + +## Migration from Direct Anthropic API + +To migrate from direct Anthropic API usage: + +1. **Change Base URL**: Update from `https://api.anthropic.com/v1` to your Forge instance +2. **Update Authentication**: Use your Forge API key instead of Anthropic API key +3. **Keep Request Format**: No changes needed to request/response format +4. **Benefit from Forge**: Gain provider flexibility, key management, and routing + +## Performance Considerations + +- **Conversion Overhead**: Minimal latency added for format conversion +- **Streaming Efficiency**: SSE events are converted in real-time +- **Token Counting**: Uses tiktoken for accurate token estimation +- **Caching**: Leverages Forge's existing caching infrastructure + +## Limitations + +- `top_k` parameter is accepted but ignored (OpenAI doesn't support it) +- Some advanced Anthropic features may not be available depending on target provider +- Provider-specific model capabilities apply (e.g., vision support) \ No newline at end of file diff --git a/examples/claude_code_example.py b/examples/claude_code_example.py new file mode 100644 index 0000000..9ae6a11 --- /dev/null +++ b/examples/claude_code_example.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python3 +""" +Example usage of the Claude Code compatible API endpoints in Forge. + +This example demonstrates how to make requests to the new Claude Code endpoints +that accept Anthropic format and route through Forge's provider infrastructure. +""" + +import json +import requests + +# Configuration +FORGE_BASE_URL = "http://localhost:8000/v1" +FORGE_API_KEY = "forge-your-api-key-here" # Replace with your actual Forge API key + +headers = { + "Authorization": f"Bearer {FORGE_API_KEY}", + "Content-Type": "application/json", +} + + +def test_non_streaming_request(): + """Test a non-streaming Claude Code request.""" + print("๐Ÿงช Testing non-streaming Claude Code request...") + + request_data = { + "model": "claude-3-haiku-20240307", # This will be routed through Forge + "max_tokens": 100, + "messages": [ + { + "role": "user", + "content": "Hello! Can you help me understand how Forge works?" + } + ], + "system": "You are a helpful assistant explaining the Forge AI middleware service.", + "temperature": 0.7 + } + + response = requests.post( + f"{FORGE_BASE_URL}/messages", + headers=headers, + json=request_data + ) + + if response.status_code == 200: + result = response.json() + print("โœ… Success!") + print(f"Model: {result['model']}") + print(f"Content: {result['content'][0]['text']}") + print(f"Usage: {result['usage']}") + else: + print(f"โŒ Error: {response.status_code}") + print(response.text) + + +def test_streaming_request(): + """Test a streaming Claude Code request.""" + print("\n๐Ÿงช Testing streaming Claude Code request...") + + request_data = { + "model": "claude-3-haiku-20240307", + "max_tokens": 100, + "messages": [ + { + "role": "user", + "content": "Tell me a short story about AI." + } + ], + "stream": True, + "temperature": 0.8 + } + + response = requests.post( + f"{FORGE_BASE_URL}/messages", + headers=headers, + json=request_data, + stream=True + ) + + if response.status_code == 200: + print("โœ… Streaming response:") + for line in response.iter_lines(): + if line: + line_str = line.decode('utf-8') + if line_str.startswith('data: '): + data_str = line_str[6:] # Remove 'data: ' prefix + if data_str != '[DONE]': + try: + event_data = json.loads(data_str) + if event_data.get('type') == 'content_block_delta': + delta = event_data.get('delta', {}) + if delta.get('type') == 'text_delta': + print(delta.get('text', ''), end='', flush=True) + except json.JSONDecodeError: + continue + print("\nโœ… Streaming completed!") + else: + print(f"โŒ Error: {response.status_code}") + print(response.text) + + +def test_token_counting(): + """Test the token counting endpoint.""" + print("\n๐Ÿงช Testing token counting...") + + request_data = { + "model": "claude-3-haiku-20240307", + "messages": [ + { + "role": "user", + "content": "This is a test message to count tokens." + }, + { + "role": "assistant", + "content": "I understand. I'll help you count the tokens in this conversation." + } + ], + "system": "You are a helpful assistant." + } + + response = requests.post( + f"{FORGE_BASE_URL}/messages/count_tokens", + headers=headers, + json=request_data + ) + + if response.status_code == 200: + result = response.json() + print(f"โœ… Token count: {result['input_tokens']}") + else: + print(f"โŒ Error: {response.status_code}") + print(response.text) + + +def test_tool_usage(): + """Test Claude Code with tool usage.""" + print("\n๐Ÿงช Testing tool usage...") + + request_data = { + "model": "claude-3-haiku-20240307", + "max_tokens": 200, + "messages": [ + { + "role": "user", + "content": "What's the weather like in San Francisco?" + } + ], + "tools": [ + { + "name": "get_weather", + "description": "Get the current weather for a location", + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The location to get weather for" + } + }, + "required": ["location"] + } + } + ], + "tool_choice": {"type": "auto"} + } + + response = requests.post( + f"{FORGE_BASE_URL}/messages", + headers=headers, + json=request_data + ) + + if response.status_code == 200: + result = response.json() + print("โœ… Tool usage response:") + for content in result['content']: + if content['type'] == 'text': + print(f"Text: {content['text']}") + elif content['type'] == 'tool_use': + print(f"Tool: {content['name']}") + print(f"Input: {content['input']}") + else: + print(f"โŒ Error: {response.status_code}") + print(response.text) + + +if __name__ == "__main__": + print("๐Ÿš€ Claude Code API Examples for Forge") + print("=" * 50) + + # Test various scenarios + test_non_streaming_request() + test_streaming_request() + test_token_counting() + test_tool_usage() + + print("\nโœจ All tests completed!") + print("\nNote: Make sure to:") + print("1. Replace FORGE_API_KEY with your actual Forge API key") + print("2. Have appropriate provider API keys configured in Forge") + print("3. Start the Forge server before running these examples") \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 5075668..c588f65 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,6 +35,7 @@ dependencies = [ "redis>=4.6.0", # sync & async clients used by shared cache "loguru>=0.7.0", "aiobotocore~=2.0", + "tiktoken>=0.5.0", # for token counting in Claude Code support ] [project.optional-dependencies] diff --git a/uv.lock b/uv.lock index 32eea87..da652f5 100644 --- a/uv.lock +++ b/uv.lock @@ -534,6 +534,7 @@ dependencies = [ { name = "requests" }, { name = "sqlalchemy", extra = ["asyncio"] }, { name = "svix" }, + { name = "tiktoken" }, { name = "uvicorn" }, ] @@ -582,6 +583,7 @@ requires-dist = [ { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.2.0" }, { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.0" }, { name = "svix", specifier = ">=1.13.0" }, + { name = "tiktoken", specifier = ">=0.5.0" }, { name = "uvicorn", specifier = ">=0.22.0" }, ] provides-extras = ["dev"] @@ -1283,6 +1285,44 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/13/67/e60968d3b0e077495a8fee89cf3f2373db98e528288a48f1ee44967f6e8c/redis-6.2.0-py3-none-any.whl", hash = "sha256:c8ddf316ee0aab65f04a11229e94a64b2618451dab7a67cb2f77eb799d872d5e", size = 278659 }, ] +[[package]] +name = "regex" +version = "2024.11.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8e/5f/bd69653fbfb76cf8604468d3b4ec4c403197144c7bfe0e6a5fc9e02a07cb/regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519", size = 399494 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/30/9a87ce8336b172cc232a0db89a3af97929d06c11ceaa19d97d84fa90a8f8/regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a", size = 483781 }, + { url = "https://files.pythonhosted.org/packages/01/e8/00008ad4ff4be8b1844786ba6636035f7ef926db5686e4c0f98093612add/regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9", size = 288455 }, + { url = "https://files.pythonhosted.org/packages/60/85/cebcc0aff603ea0a201667b203f13ba75d9fc8668fab917ac5b2de3967bc/regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2", size = 284759 }, + { url = "https://files.pythonhosted.org/packages/94/2b/701a4b0585cb05472a4da28ee28fdfe155f3638f5e1ec92306d924e5faf0/regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4", size = 794976 }, + { url = "https://files.pythonhosted.org/packages/4b/bf/fa87e563bf5fee75db8915f7352e1887b1249126a1be4813837f5dbec965/regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577", size = 833077 }, + { url = "https://files.pythonhosted.org/packages/a1/56/7295e6bad94b047f4d0834e4779491b81216583c00c288252ef625c01d23/regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3", size = 823160 }, + { url = "https://files.pythonhosted.org/packages/fb/13/e3b075031a738c9598c51cfbc4c7879e26729c53aa9cca59211c44235314/regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e", size = 796896 }, + { url = "https://files.pythonhosted.org/packages/24/56/0b3f1b66d592be6efec23a795b37732682520b47c53da5a32c33ed7d84e3/regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe", size = 783997 }, + { url = "https://files.pythonhosted.org/packages/f9/a1/eb378dada8b91c0e4c5f08ffb56f25fcae47bf52ad18f9b2f33b83e6d498/regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e", size = 781725 }, + { url = "https://files.pythonhosted.org/packages/83/f2/033e7dec0cfd6dda93390089864732a3409246ffe8b042e9554afa9bff4e/regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29", size = 789481 }, + { url = "https://files.pythonhosted.org/packages/83/23/15d4552ea28990a74e7696780c438aadd73a20318c47e527b47a4a5a596d/regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39", size = 852896 }, + { url = "https://files.pythonhosted.org/packages/e3/39/ed4416bc90deedbfdada2568b2cb0bc1fdb98efe11f5378d9892b2a88f8f/regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51", size = 860138 }, + { url = "https://files.pythonhosted.org/packages/93/2d/dd56bb76bd8e95bbce684326302f287455b56242a4f9c61f1bc76e28360e/regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad", size = 787692 }, + { url = "https://files.pythonhosted.org/packages/0b/55/31877a249ab7a5156758246b9c59539abbeba22461b7d8adc9e8475ff73e/regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54", size = 262135 }, + { url = "https://files.pythonhosted.org/packages/38/ec/ad2d7de49a600cdb8dd78434a1aeffe28b9d6fc42eb36afab4a27ad23384/regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b", size = 273567 }, + { url = "https://files.pythonhosted.org/packages/90/73/bcb0e36614601016552fa9344544a3a2ae1809dc1401b100eab02e772e1f/regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84", size = 483525 }, + { url = "https://files.pythonhosted.org/packages/0f/3f/f1a082a46b31e25291d830b369b6b0c5576a6f7fb89d3053a354c24b8a83/regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4", size = 288324 }, + { url = "https://files.pythonhosted.org/packages/09/c9/4e68181a4a652fb3ef5099e077faf4fd2a694ea6e0f806a7737aff9e758a/regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0", size = 284617 }, + { url = "https://files.pythonhosted.org/packages/fc/fd/37868b75eaf63843165f1d2122ca6cb94bfc0271e4428cf58c0616786dce/regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0", size = 795023 }, + { url = "https://files.pythonhosted.org/packages/c4/7c/d4cd9c528502a3dedb5c13c146e7a7a539a3853dc20209c8e75d9ba9d1b2/regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7", size = 833072 }, + { url = "https://files.pythonhosted.org/packages/4f/db/46f563a08f969159c5a0f0e722260568425363bea43bb7ae370becb66a67/regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7", size = 823130 }, + { url = "https://files.pythonhosted.org/packages/db/60/1eeca2074f5b87df394fccaa432ae3fc06c9c9bfa97c5051aed70e6e00c2/regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c", size = 796857 }, + { url = "https://files.pythonhosted.org/packages/10/db/ac718a08fcee981554d2f7bb8402f1faa7e868c1345c16ab1ebec54b0d7b/regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3", size = 784006 }, + { url = "https://files.pythonhosted.org/packages/c2/41/7da3fe70216cea93144bf12da2b87367590bcf07db97604edeea55dac9ad/regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07", size = 781650 }, + { url = "https://files.pythonhosted.org/packages/a7/d5/880921ee4eec393a4752e6ab9f0fe28009435417c3102fc413f3fe81c4e5/regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e", size = 789545 }, + { url = "https://files.pythonhosted.org/packages/dc/96/53770115e507081122beca8899ab7f5ae28ae790bfcc82b5e38976df6a77/regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6", size = 853045 }, + { url = "https://files.pythonhosted.org/packages/31/d3/1372add5251cc2d44b451bd94f43b2ec78e15a6e82bff6a290ef9fd8f00a/regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4", size = 860182 }, + { url = "https://files.pythonhosted.org/packages/ed/e3/c446a64984ea9f69982ba1a69d4658d5014bc7a0ea468a07e1a1265db6e2/regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d", size = 787733 }, + { url = "https://files.pythonhosted.org/packages/2b/f1/e40c8373e3480e4f29f2692bd21b3e05f296d3afebc7e5dcf21b9756ca1c/regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff", size = 262122 }, + { url = "https://files.pythonhosted.org/packages/45/94/bc295babb3062a731f52621cdc992d123111282e291abaf23faa413443ea/regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a", size = 273545 }, +] + [[package]] name = "requests" version = "2.32.3" @@ -1429,6 +1469,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/56/f3/5633e45bc01825c4464b6b1e98e05052e532139e827c4ea8c54f5eafb022/svix-1.67.0-py3-none-any.whl", hash = "sha256:4f195bea0ac7c33c54f29bb486e3814e9c50123be303bfba5064d1e607274668", size = 95009 }, ] +[[package]] +name = "tiktoken" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "regex" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ea/cf/756fedf6981e82897f2d570dd25fa597eb3f4459068ae0572d7e888cfd6f/tiktoken-0.9.0.tar.gz", hash = "sha256:d02a5ca6a938e0490e1ff957bc48c8b078c88cb83977be1625b1fd8aac792c5d", size = 35991 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cf/e5/21ff33ecfa2101c1bb0f9b6df750553bd873b7fb532ce2cb276ff40b197f/tiktoken-0.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e88f121c1c22b726649ce67c089b90ddda8b9662545a8aeb03cfef15967ddd03", size = 1065073 }, + { url = "https://files.pythonhosted.org/packages/8e/03/a95e7b4863ee9ceec1c55983e4cc9558bcfd8f4f80e19c4f8a99642f697d/tiktoken-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a6600660f2f72369acb13a57fb3e212434ed38b045fd8cc6cdd74947b4b5d210", size = 1008075 }, + { url = "https://files.pythonhosted.org/packages/40/10/1305bb02a561595088235a513ec73e50b32e74364fef4de519da69bc8010/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95e811743b5dfa74f4b227927ed86cbc57cad4df859cb3b643be797914e41794", size = 1140754 }, + { url = "https://files.pythonhosted.org/packages/1b/40/da42522018ca496432ffd02793c3a72a739ac04c3794a4914570c9bb2925/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99376e1370d59bcf6935c933cb9ba64adc29033b7e73f5f7569f3aad86552b22", size = 1196678 }, + { url = "https://files.pythonhosted.org/packages/5c/41/1e59dddaae270ba20187ceb8aa52c75b24ffc09f547233991d5fd822838b/tiktoken-0.9.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:badb947c32739fb6ddde173e14885fb3de4d32ab9d8c591cbd013c22b4c31dd2", size = 1259283 }, + { url = "https://files.pythonhosted.org/packages/5b/64/b16003419a1d7728d0d8c0d56a4c24325e7b10a21a9dd1fc0f7115c02f0a/tiktoken-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:5a62d7a25225bafed786a524c1b9f0910a1128f4232615bf3f8257a73aaa3b16", size = 894897 }, + { url = "https://files.pythonhosted.org/packages/7a/11/09d936d37f49f4f494ffe660af44acd2d99eb2429d60a57c71318af214e0/tiktoken-0.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2b0e8e05a26eda1249e824156d537015480af7ae222ccb798e5234ae0285dbdb", size = 1064919 }, + { url = "https://files.pythonhosted.org/packages/80/0e/f38ba35713edb8d4197ae602e80837d574244ced7fb1b6070b31c29816e0/tiktoken-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:27d457f096f87685195eea0165a1807fae87b97b2161fe8c9b1df5bd74ca6f63", size = 1007877 }, + { url = "https://files.pythonhosted.org/packages/fe/82/9197f77421e2a01373e27a79dd36efdd99e6b4115746ecc553318ecafbf0/tiktoken-0.9.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cf8ded49cddf825390e36dd1ad35cd49589e8161fdcb52aa25f0583e90a3e01", size = 1140095 }, + { url = "https://files.pythonhosted.org/packages/f2/bb/4513da71cac187383541facd0291c4572b03ec23c561de5811781bbd988f/tiktoken-0.9.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc156cb314119a8bb9748257a2eaebd5cc0753b6cb491d26694ed42fc7cb3139", size = 1195649 }, + { url = "https://files.pythonhosted.org/packages/fa/5c/74e4c137530dd8504e97e3a41729b1103a4ac29036cbfd3250b11fd29451/tiktoken-0.9.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cd69372e8c9dd761f0ab873112aba55a0e3e506332dd9f7522ca466e817b1b7a", size = 1258465 }, + { url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669 }, +] + [[package]] name = "types-deprecated" version = "1.2.15.20250304"