Skip to content

Implement Program Serialization and Versioning #21

@obie

Description

@obie

Overview

Implement comprehensive serialization support for saving and loading compiled DSPy programs, including version management and cross-version compatibility.

Description

Serialization allows trained/optimized programs to be saved and deployed without re-optimization. This includes saving module states, demonstrations, optimized parameters, and metadata.

Key Features to Implement

  • Save/load complete program states
  • Version tracking and compatibility
  • Compression for large programs
  • Partial serialization (specific modules)
  • Migration between versions
  • Human-readable and binary formats

Implementation Requirements

1. Serialization Framework

module Desiru
  module Serialization
    class Serializer
      CURRENT_VERSION = "1.0.0"
      
      def serialize(program, format: :json)
        data = {
          version: CURRENT_VERSION,
          metadata: build_metadata(program),
          program: serialize_program(program),
          timestamp: Time.now.iso8601
        }
        
        case format
        when :json
          JSON.pretty_generate(data)
        when :msgpack
          MessagePack.pack(data)
        when :yaml
          YAML.dump(data)
        else
          raise ArgumentError, "Unknown format: #{format}"
        end
      end
      
      def deserialize(data, format: :json)
        parsed = case format
        when :json
          JSON.parse(data, symbolize_names: true)
        when :msgpack
          MessagePack.unpack(data)
        when :yaml
          YAML.load(data)
        end
        
        check_version_compatibility(parsed[:version])
        deserialize_program(parsed[:program], parsed[:metadata])
      end
      
      private
      
      def serialize_program(program)
        {
          class: program.class.name,
          modules: serialize_modules(program),
          configuration: program.configuration,
          custom_data: program.respond_to?(:custom_serialization) ? 
            program.custom_serialization : nil
        }
      end
      
      def serialize_modules(program)
        program.modules.map do |name, mod|
          {
            name: name,
            class: mod.class.name,
            signature: serialize_signature(mod.signature),
            demonstrations: mod.demonstrations,
            parameters: serialize_parameters(mod),
            metadata: mod.metadata
          }
        end
      end
    end
  end
end

2. Version Management

module Desiru::Serialization
  class Version
    include Comparable
    
    attr_reader :major, :minor, :patch
    
    def initialize(version_string)
      @major, @minor, @patch = version_string.split('.').map(&:to_i)
    end
    
    def <=>(other)
      [major, minor, patch] <=> [other.major, other.minor, other.patch]
    end
    
    def compatible_with?(other)
      # Same major version = compatible
      major == other.major
    end
    
    def to_s
      "#{major}.#{minor}.#{patch}"
    end
  end
  
  class VersionManager
    def self.check_compatibility(saved_version, current_version = CURRENT_VERSION)
      saved = Version.new(saved_version)
      current = Version.new(current_version)
      
      unless saved.compatible_with?(current)
        raise IncompatibleVersionError, 
          "Saved version #{saved} incompatible with current #{current}"
      end
      
      if saved < current
        migrate(saved, current)
      end
    end
    
    def self.migrate(from_version, to_version)
      # Apply migrations in sequence
      migrations = find_migrations(from_version, to_version)
      
      migrations.reduce(data) do |current_data, migration|
        migration.apply(current_data)
      end
    end
  end
end

3. Program State Management

module Desiru
  class Program
    def save(filepath, format: :json, compress: false)
      serializer = Serialization::Serializer.new
      data = serializer.serialize(self, format: format)
      
      if compress
        data = Zlib::Deflate.deflate(data)
        filepath += '.gz'
      end
      
      File.write(filepath, data)
      
      SaveResult.new(
        filepath: filepath,
        size: File.size(filepath),
        checksum: Digest::SHA256.file(filepath).hexdigest
      )
    end
    
    def self.load(filepath, format: :auto)
      data = File.read(filepath)
      
      # Handle compression
      if filepath.end_with?('.gz')
        data = Zlib::Inflate.inflate(data)
      end
      
      # Auto-detect format
      if format == :auto
        format = detect_format(data)
      end
      
      serializer = Serialization::Serializer.new
      serializer.deserialize(data, format: format)
    end
    
    # Partial saves
    def export_module(module_name, filepath)
      mod = @modules[module_name]
      raise "Module not found: #{module_name}" unless mod
      
      data = {
        module: Serialization::Serializer.new.serialize_module(mod),
        parent_program: self.class.name,
        exported_at: Time.now
      }
      
      File.write(filepath, JSON.pretty_generate(data))
    end
    
    def import_module(filepath, as: nil)
      data = JSON.parse(File.read(filepath), symbolize_names: true)
      
      module_data = data[:module]
      module_name = as || module_data[:name]
      
      # Reconstruct module
      mod = Serialization::Serializer.new.deserialize_module(module_data)
      @modules[module_name] = mod
    end
  end
end

4. Module Serialization

module Desiru
  class Module
    def to_h
      {
        signature: @signature.to_s,
        demonstrations: @demonstrations.map(&:to_h),
        parameters: serializable_parameters,
        metadata: {
          created_at: @created_at,
          optimization_score: @optimization_score,
          usage_count: @usage_count
        }
      }
    end
    
    def self.from_h(data)
      instance = new(signature: data[:signature])
      instance.demonstrations = data[:demonstrations].map { |d| Example.new(**d) }
      instance.load_parameters(data[:parameters])
      instance.metadata = data[:metadata]
      instance
    end
    
    private
    
    def serializable_parameters
      {
        temperature: @temperature,
        max_tokens: @max_tokens,
        instruction_template: @instruction_template,
        custom: @custom_parameters
      }
    end
  end
end

5. Advanced Features

module Desiru::Serialization
  # Incremental saves
  class IncrementalSerializer
    def initialize(base_filepath)
      @base_filepath = base_filepath
      @versions = []
    end
    
    def save_checkpoint(program, tag: nil)
      version = @versions.size + 1
      filepath = "#{@base_filepath}.v#{version}"
      filepath += ".#{tag}" if tag
      
      if @versions.any?
        # Save only differences
        save_diff(program, filepath)
      else
        # Save complete state
        program.save(filepath)
      end
      
      @versions << { version: version, tag: tag, filepath: filepath }
    end
    
    def load_checkpoint(version: nil, tag: nil)
      checkpoint = find_checkpoint(version, tag)
      
      # Load base + all diffs up to checkpoint
      program = Program.load(@versions.first[:filepath])
      
      @versions[1..checkpoint[:version]-1].each do |v|
        apply_diff(program, v[:filepath])
      end
      
      program
    end
  end
  
  # Encryption for sensitive data
  class EncryptedSerializer < Serializer
    def initialize(key)
      @cipher = OpenSSL::Cipher.new('AES-256-GCM')
      @key = key
    end
    
    def serialize(program, format: :json)
      data = super
      encrypt(data)
    end
    
    def deserialize(encrypted_data, format: :json)
      data = decrypt(encrypted_data)
      super(data, format: format)
    end
  end
end

Example Usage

# Save optimized program
program = MyQAProgram.new
optimizer = MIPROv2.new(program: program)
optimized = optimizer.compile(dataset, metric)

# Save in different formats
optimized.save("qa_model.json")
optimized.save("qa_model.msgpack", format: :msgpack)
optimized.save("qa_model_compressed.json", compress: true)

# Load in production
production_program = Desiru::Program.load("qa_model.json")
result = production_program.forward(question: "What is DSPy?")

# Incremental saves during training
checkpointer = Desiru::Serialization::IncrementalSerializer.new("training_run")

epochs.times do |epoch|
  optimizer.step(program, batch)
  checkpointer.save_checkpoint(program, tag: "epoch_#{epoch}")
end

# Load specific checkpoint
best_program = checkpointer.load_checkpoint(tag: "epoch_5")

# Export/import specific modules
program.export_module(:chain_of_thought, "cot_module.json")

another_program = AnotherProgram.new
another_program.import_module("cot_module.json", as: :reasoning)

# Encrypted saves for sensitive models
secure_serializer = Desiru::Serialization::EncryptedSerializer.new(
  ENV['MODEL_ENCRYPTION_KEY']
)
encrypted = secure_serializer.serialize(program)
File.write("secure_model.enc", encrypted)

# Version migration
begin
  old_program = Desiru::Program.load("old_model_v0.9.json")
rescue IncompatibleVersionError => e
  puts "Running migration: #{e.message}"
  # Automatic migration attempted
end

Migration Example

module Desiru::Serialization::Migrations
  class V1_0_to_V1_1
    def self.apply(data)
      # Add new required fields
      data[:modules].each do |mod|
        mod[:parameters][:streaming_enabled] ||= false
      end
      
      data
    end
  end
end

Testing Requirements

  • Round-trip serialization tests
  • Cross-version compatibility tests
  • Large program serialization performance
  • Compression effectiveness tests
  • Encryption/decryption tests
  • Migration testing

Priority

Medium - Important for production deployment but can work without it initially

Metadata

Metadata

Assignees

No one assigned

    Labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions