Skip to content

Implement Multi-provider LLM Abstractions #17

@obie

Description

@obie

Overview

Implement a unified interface for multiple LLM providers beyond OpenAI, including Anthropic, Cohere, Google, and local models.

Description

Currently Desiru primarily supports OpenAI through RAIX. We need a unified abstraction layer that supports multiple providers while maintaining consistent interfaces and behavior.

Key Features to Implement

  • Unified LLM interface
  • Provider-specific adapters
  • Automatic fallback mechanisms
  • Token counting across providers
  • Cost tracking
  • Model capability detection

Implementation Requirements

1. Base LLM Interface

module Desiru
  module LLM
    class Base
      attr_reader :model, :provider, :config
      
      def initialize(model:, **config)
        @model = model
        @provider = self.class.name.split('::').last.downcase
        @config = config
        validate_configuration\!
      end
      
      # Core interface
      def generate(prompt:, **options)
        raise NotImplementedError
      end
      
      def generate_streaming(prompt:, **options, &block)
        raise NotImplementedError
      end
      
      def count_tokens(text)
        raise NotImplementedError
      end
      
      def embed(text)
        raise NotImplementedError
      end
      
      # Capability detection
      def supports_streaming?
        false
      end
      
      def supports_functions?
        false
      end
      
      def supports_vision?
        false
      end
      
      def max_tokens
        raise NotImplementedError
      end
      
      # Cost calculation
      def calculate_cost(input_tokens:, output_tokens:)
        pricing = self.class.pricing[@model]
        return 0 unless pricing
        
        (input_tokens * pricing[:input] + output_tokens * pricing[:output]) / 1000.0
      end
    end
  end
end

2. Provider Implementations

# OpenAI Adapter
module Desiru::LLM
  class OpenAI < Base
    MODELS = {
      "gpt-4" => { max_tokens: 8192, supports_functions: true },
      "gpt-4-turbo" => { max_tokens: 128000, supports_functions: true },
      "gpt-3.5-turbo" => { max_tokens: 4096, supports_functions: true }
    }
    
    def initialize(model: "gpt-3.5-turbo", api_key: nil, **config)
      super
      @client = ::OpenAI::Client.new(access_token: api_key || ENV['OPENAI_API_KEY'])
    end
    
    def generate(prompt:, temperature: 0.7, max_tokens: nil, **options)
      response = @client.chat(
        parameters: {
          model: @model,
          messages: format_messages(prompt),
          temperature: temperature,
          max_tokens: max_tokens,
          **options
        }
      )
      
      format_response(response)
    end
    
    def supports_streaming?
      true
    end
    
    def supports_functions?
      MODELS[@model][:supports_functions]
    end
  end
  
  # Anthropic Adapter
  class Anthropic < Base
    MODELS = {
      "claude-3-opus" => { max_tokens: 200000 },
      "claude-3-sonnet" => { max_tokens: 200000 },
      "claude-3-haiku" => { max_tokens: 200000 }
    }
    
    def initialize(model: "claude-3-haiku", api_key: nil, **config)
      super
      require 'anthropic'
      @client = ::Anthropic::Client.new(access_token: api_key || ENV['ANTHROPIC_API_KEY'])
    end
    
    def generate(prompt:, **options)
      response = @client.messages(
        model: @model,
        messages: format_messages_anthropic(prompt),
        max_tokens: options[:max_tokens] || 1000
      )
      
      format_response(response)
    end
  end
  
  # Local Model Adapter (Ollama)
  class Ollama < Base
    def initialize(model: "llama2", base_url: "http://localhost:11434", **config)
      super
      @base_url = base_url
    end
    
    def generate(prompt:, **options)
      # HTTP request to Ollama API
    end
    
    def supports_streaming?
      true
    end
  end
end

3. Model Router

module Desiru::LLM
  class Router
    def initialize(providers: {}, fallback_order: [])
      @providers = providers
      @fallback_order = fallback_order
    end
    
    def route(model_spec)
      if model_spec.include?('/')
        provider_name, model = model_spec.split('/', 2)
        provider = @providers[provider_name]
        raise "Unknown provider: #{provider_name}" unless provider
        
        provider.new(model: model)
      else
        # Try to infer provider from model name
        infer_provider(model_spec)
      end
    end
    
    def with_fallback(primary_model, &block)
      begin
        yield route(primary_model)
      rescue => e
        @fallback_order.each do |fallback_model|
          begin
            return yield route(fallback_model)
          rescue
            # Try next fallback
          end
        end
        raise e
      end
    end
  end
end

4. Module Integration

module Desiru
  class Module
    def initialize(signature:, model: nil, **options)
      @signature = parse_signature(signature)
      @llm = setup_llm(model)
      @options = options
    end
    
    private
    
    def setup_llm(model_spec)
      model_spec ||= Desiru.configuration.default_model
      
      router = LLM::Router.new(
        providers: {
          'openai' => LLM::OpenAI,
          'anthropic' => LLM::Anthropic,
          'ollama' => LLM::Ollama,
          'cohere' => LLM::Cohere
        }
      )
      
      router.route(model_spec)
    end
    
    def generate_completion(prompt)
      @llm.generate(
        prompt: prompt,
        temperature: @options[:temperature] || 0.7,
        max_tokens: @options[:max_tokens]
      )
    end
  end
end

5. Token Counting

module Desiru::LLM
  module TokenCounter
    # Use tiktoken for OpenAI models
    class OpenAICounter
      def initialize(model)
        @encoding = Tiktoken.encoding_for_model(model)
      end
      
      def count(text)
        @encoding.encode(text).length
      end
    end
    
    # Approximate for others
    class ApproximateCounter
      def count(text)
        # Rough approximation: 1 token ≈ 4 characters
        (text.length / 4.0).ceil
      end
    end
  end
end

Example Usage

# Direct provider usage
llm = Desiru::LLM::Anthropic.new(model: "claude-3-sonnet")
response = llm.generate(prompt: "What is Ruby?")

# Using model router
module_config = {
  model: "anthropic/claude-3-haiku", # provider/model format
  temperature: 0.5
}

# Automatic routing in modules
cot = Desiru::Modules::ChainOfThought.new(
  signature: "question -> answer",
  model: "openai/gpt-4"  # Uses OpenAI
)

# With fallback
router = Desiru::LLM::Router.new(
  fallback_order: ["openai/gpt-4", "anthropic/claude-3-sonnet", "ollama/llama2"]
)

router.with_fallback("openai/gpt-4") do |llm|
  llm.generate(prompt: "Complex question")
end

Configuration

Desiru.configure do |config|
  # Default model for all modules
  config.default_model = "anthropic/claude-3-haiku"
  
  # Provider API keys
  config.llm_providers = {
    openai: { api_key: ENV['OPENAI_API_KEY'] },
    anthropic: { api_key: ENV['ANTHROPIC_API_KEY'] },
    cohere: { api_key: ENV['COHERE_API_KEY'] }
  }
  
  # Fallback configuration
  config.llm_fallback_enabled = true
  config.llm_fallback_order = ["anthropic/claude-3-haiku", "openai/gpt-3.5-turbo"]
end

Testing Requirements

  • Mock providers for testing
  • Integration tests with real APIs (optional)
  • Token counting accuracy tests
  • Fallback mechanism tests
  • Cost calculation tests

Dependencies

  • Consider using existing gems where available:
    • ruby-openai
    • anthropic (if available)
    • tiktoken_ruby for token counting

Priority

High - Critical for flexibility and avoiding vendor lock-in

Metadata

Metadata

Assignees

No one assigned

    Labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions