diff --git a/pages/docs/concepts/_meta.tsx b/pages/docs/concepts/_meta.tsx index 2952ed1..57d4fee 100644 --- a/pages/docs/concepts/_meta.tsx +++ b/pages/docs/concepts/_meta.tsx @@ -2,11 +2,14 @@ const meta = { provider_clients: "Model Providers", agent: "Agents", completion: "Completions", + streaming: "Streaming", extractors: "Extractors", tools: "Tools", embeddings: "Embeddings", + media_generation: "Image, Audio & Transcription", loaders: "Loaders", chains: "Chains", + evals: "Evals", observability: "Observability", }; diff --git a/pages/docs/concepts/completion.mdx b/pages/docs/concepts/completion.mdx index 8e346e3..324b76d 100644 --- a/pages/docs/concepts/completion.mdx +++ b/pages/docs/concepts/completion.mdx @@ -33,7 +33,38 @@ async fn prompt(&self, prompt: &str) -> Result; async fn chat(&self, prompt: &str, history: Vec) -> Result; ``` -### 2. Low-Level Control +#### `TypedPrompt` Trait + +- Structured output interface for typed completions +- Returns deserialized structured data instead of raw strings +- The target type must implement `serde::Deserialize` and `schemars::JsonSchema` + +```rust +pub trait TypedPrompt: WasmCompatSend + WasmCompatSync { + type TypedRequest<'a, T>: IntoFuture> + where Self: 'a, + T: JsonSchema + DeserializeOwned + WasmCompatSend + 'a; + + // Required method + fn prompt_typed( + &self, + prompt: impl Into + WasmCompatSend, + ) -> Self::TypedRequest<'_, T> + where T: JsonSchema + DeserializeOwned + WasmCompatSend; +} +``` + +This is useful when you need the LLM to return structured data (e.g., JSON conforming to a specific schema) rather than free-form text. See the [Structured Output](#structured-output) section below for more details. + +### 2. Streaming Interfaces + +Rig provides streaming counterparts for all high-level traits. See [Streaming](./streaming.mdx) for full details. + +- `StreamingPrompt`: Streaming one-shot prompts +- `StreamingChat`: Streaming chat with history +- `StreamingCompletion`: Low-level streaming completion interface + +### 3. Low-Level Control #### `Completion` Trait @@ -41,81 +72,52 @@ async fn chat(&self, prompt: &str, history: Vec) -> Result, - ) -> impl std::future::Future> + Send; -} - -/// Trait defininig a low-level LLM completion interface +```rust pub trait Completion { /// Generates a completion request builder for the given `prompt` and `chat_history`. - /// This function is meant to be called by the user to further customize the - /// request at prompt time before sending it. - /// - /// ❗IMPORTANT: The type that implements this trait might have already - /// populated fields in the builder (the exact fields depend on the type). - /// For fields that have already been set by the model, calling the corresponding - /// method on the builder will overwrite the value set by the model. - /// - /// For example, the request builder returned by [`Agent::completion`](crate::agent::Agent::completion) will already - /// contain the `preamble` provided when creating the agent. + /// Fields pre-populated by the implementing type (e.g., Agent preamble) can be + /// overwritten by calling the corresponding method on the builder. fn completion( &self, prompt: &str, chat_history: Vec, - ) -> impl std::future::Future, CompletionError>> + Send; -} - -/// General completion response struct that contains the high-level completion choice -/// and the raw response. -#[derive(Debug)] -pub struct CompletionResponse { - /// The completion choice returned by the completion model provider - pub choice: ModelChoice, - /// The raw response returned by the completion model provider - pub raw_response: T, + ) -> impl Future, CompletionError>> + Send; } +``` -/// Enum representing the high-level completion choice returned by the completion model provider. -#[derive(Debug)] -pub enum ModelChoice { - /// Represents a completion response as a message - Message(String), - /// Represents a completion response as a tool call of the form - /// `ToolCall(function_name, function_params)`. - ToolCall(String, serde_json::Value), -} +#### `CompletionModel` Trait -/// Trait defining a completion model that can be used to generate completion responses. -/// This trait is meant to be implemented by the user to define a custom completion model, -/// either from a third party provider (e.g.: OpenAI) or a local model. -pub trait CompletionModel: Clone + Send + Sync { - /// The raw response type returned by the underlying completion model. - type Response: Send + Sync; +The provider interface that must be implemented for each LLM backend. In v0.31.0, this trait lives at `rig::completion::request::CompletionModel` (re-exported via `rig::completion`). - /// Generates a completion response for the given completion request. +```rust +pub trait CompletionModel: + Clone + + WasmCompatSend + + WasmCompatSync { + type Response: WasmCompatSend + WasmCompatSync + Serialize + DeserializeOwned; + type StreamingResponse: Clone + Unpin + WasmCompatSend + WasmCompatSync + Serialize + DeserializeOwned + GetTokenUsage; + type Client; + + // Required methods + fn make(client: &Self::Client, model: impl Into) -> Self; fn completion( &self, request: CompletionRequest, - ) -> impl std::future::Future, CompletionError>> - + Send; + ) -> impl Future, CompletionError>> + WasmCompatSend; - /// Generates a completion request builder for the given `prompt`. - fn completion_request(&self, prompt: &str) -> CompletionRequestBuilder { - CompletionRequestBuilder::new(self.clone(), prompt.to_string()) - } + fn stream( + &self, + request: CompletionRequest, + ) -> impl Future, CompletionError>> + WasmCompatSend; + + // Provided method + fn completion_request( + &self, + prompt: impl Into, + ) -> CompletionRequestBuilder { ... } } ``` -#### `CompletionModel` Trait - -- Provider interface implementation -- Raw request handling -- Response parsing and error management - ## Request Building ### CompletionRequestBuilder @@ -132,51 +134,94 @@ let request = model.completion_request("prompt") .build(); ``` -### Request Components +## Response Handling + +### CompletionResponse + +The `CompletionResponse` struct wraps the model's response along with the raw provider-specific data: + +```rust +pub struct CompletionResponse { + /// One or more assistant content items (text, tool calls, reasoning, etc.) + pub choice: OneOrMany, + /// The raw response from the provider + pub raw_response: T, +} +``` + +### AssistantContent -1. **Core Elements** +In v0.31.0, the old `ModelChoice` enum has been replaced by a richer `AssistantContent` enum (in `rig::completion::message`) that supports multimodal responses: - - Prompt text - - System preamble - - Chat history - - Temperature - - Max tokens +```rust +pub enum AssistantContent { + /// Plain text response + Text(Text), + /// A tool call requested by the model + ToolCall(ToolCall), + /// Reasoning/chain-of-thought content (for models that support it) + Reasoning(Reasoning), +} +``` -2. **Context Management** +The `Text` struct wraps a string, while `ToolCall` contains the tool call ID, function name, and arguments: + +```rust +pub struct ToolCall { + pub id: String, + pub function: ToolFunction, +} - - Document attachments - - Metadata handling - - Formatting controls +pub struct ToolFunction { + pub name: String, + pub arguments: serde_json::Value, +} +``` -3. **Tool Integration** - - Tool definitions - - Parameter validation - - Response parsing +### Message Types -## Response Handling +The `Message` enum represents conversation messages with rich content support: -### CompletionResponse +```rust +pub enum Message { + User { content: OneOrMany }, + Assistant { content: OneOrMany }, +} +``` -Structured response type with: +`UserContent` supports text, images, audio, documents, video, and tool results: ```rust -enum ModelChoice { - Message(String), - ToolCall(String, Value) +pub enum UserContent { + Text(Text), + ToolResult(ToolResult), + Image(Image), + Audio(Audio), + Document(Document), + Video(Video), } +``` + +### Token Usage -struct CompletionResponse { - choice: ModelChoice, - raw_response: T, +v0.31.0 adds a `Usage` struct and the `GetTokenUsage` trait for tracking token consumption: + +```rust +pub struct Usage { + pub prompt_tokens: u64, + pub completion_tokens: u64, + pub total_tokens: u64, } ``` +Implement the `GetTokenUsage` trait on your provider's raw response type to expose token metrics. + ### Error Handling Comprehensive error types: ```rust -enum CompletionError { +pub enum CompletionError { HttpError(reqwest::Error), JsonError(serde_json::Error), RequestError(Box), @@ -185,13 +230,23 @@ enum CompletionError { } ``` +For structured output, there is an additional error type: + +```rust +pub enum StructuredOutputError { + CompletionError(CompletionError), + JsonError(serde_json::Error), + // ... +} +``` + ## Usage Patterns ### Basic Completion ```rust -let openai = Client::new(api_key); -let model = openai.completion_model("gpt-4"); +let openai = openai::Client::from_env(); +let model = openai.completion_model("gpt-4o"); let response = model .prompt("Explain quantum computing") @@ -201,7 +256,9 @@ let response = model ### Contextual Chat ```rust -let chat_response = model +use rig::completion::Message; + +let chat_response = agent .chat( "Continue the discussion", vec![Message::user("Previous context")] @@ -212,7 +269,7 @@ let chat_response = model ### Advanced Request Configuration ```rust -let request = model +let response = model .completion_request("Complex query") .preamble("Expert system") .temperature(0.8) @@ -222,6 +279,27 @@ let request = model .await?; ``` +### Structured Output + +Using the `TypedPrompt` trait (implemented by `Agent`), you can get structured responses: + +```rust +use schemars::JsonSchema; +use serde::Deserialize; + +#[derive(Deserialize, JsonSchema)] +struct SentimentAnalysis { + /// The sentiment score from -1.0 to 1.0 + score: f64, + /// The sentiment label + label: String, +} + +let result: SentimentAnalysis = agent + .prompt_typed("Analyze the sentiment of: 'I love this product!'") + .await?; +``` + ## Provider Integration ### Implementing New Providers @@ -245,7 +323,9 @@ impl CompletionModel for CustomProvider { - Use `Prompt` for simple interactions - Use `Chat` for conversational flows + - Use `TypedPrompt` for structured data extraction - Use `Completion` for fine-grained control + - Use `StreamingPrompt`/`StreamingChat` when you need incremental output 2. **Error Handling** @@ -256,18 +336,19 @@ impl CompletionModel for CustomProvider { 3. **Resource Management** - Reuse model instances - Batch similar requests - - Monitor token usage + - Monitor token usage via the `GetTokenUsage` trait ## See Also - [Agent System](./agent.mdx) - [Tool Integration](./tools.mdx) +- [Streaming](./streaming.mdx) - [Provider Implementation](../integrations/model_providers.mdx)
diff --git a/pages/docs/concepts/evals.mdx b/pages/docs/concepts/evals.mdx new file mode 100644 index 0000000..28fe487 --- /dev/null +++ b/pages/docs/concepts/evals.mdx @@ -0,0 +1,212 @@ +--- +title: Evals +description: Evaluating LLM outputs with Rig's evals framework. +--- + +import { Cards } from "nextra/components"; + +# Evals Framework + +> Requires the `experimental` feature flag: `cargo add rig-core -F experimental` + +As of v0.31.0, Rig includes an experimental evaluation framework (`rig::evals`) for testing and measuring the quality of LLM outputs. Evals provide a structured way to assess whether your agents, prompts, and RAG systems produce correct, relevant, and high-quality responses. + +## Overview + +The evals module is inspired by [OpenAI's evals framework](https://github.com/openai/evals) and provides: + +- A core `Eval` trait for defining custom evaluators +- Built-in metrics: LLM-as-a-judge, LLM scoring, and semantic similarity +- Structured outcomes: pass, fail, or invalid + +## Core Trait: `Eval` + +The `Eval` trait is the foundation of the framework: + +```rust +pub trait Eval { + type Input; + type Output; + + async fn eval( + &self, + input: Self::Input, + output: Self::Output, + ) -> Result; +} +``` + +Every evaluator takes some input (what was sent to the LLM) and output (what the LLM produced), then returns an `EvalOutcome`: + +```rust +pub enum EvalOutcome { + /// The output passed the evaluation criteria + Pass, + /// The output failed the evaluation criteria + Fail, + /// The evaluation could not be completed (e.g., parse error) + Invalid(String), +} +``` + +## Built-in Metrics + +### LLM Judge (`LlmJudgeMetric`) + +Uses an LLM to judge whether an output meets certain criteria. You provide a schema type that implements the `Judgment` trait: + +```rust +use rig::evals::{LlmJudgeMetric, Judgment}; +use schemars::JsonSchema; +use serde::Deserialize; + +#[derive(Deserialize, JsonSchema)] +struct FactualityJudgment { + /// Whether the response is factually accurate + is_factual: bool, + /// Explanation for the judgment + reasoning: String, +} + +impl Judgment for FactualityJudgment { + fn passed(&self) -> bool { + self.is_factual + } +} + +let judge = LlmJudgeMetric::::builder(model) + .preamble("You are a factuality judge. Evaluate whether the response is factually accurate.") + .build(); + +let outcome = judge.eval( + "What is the capital of France?", + "The capital of France is Paris." +).await?; + +assert!(matches!(outcome, EvalOutcome::Pass)); +``` + +### LLM Judge with Custom Function (`LlmJudgeMetricWithFn`) + +Instead of implementing the `Judgment` trait, you can provide a function pointer that determines pass/fail: + +```rust +let judge = LlmJudgeMetric::::builder(model) + .preamble("Evaluate the response.") + .with_judge_fn(|schema: &MySchema| schema.score > 0.5) + .build(); +``` + +### LLM Score (`LlmScoreMetric`) + +Uses an LLM to assign a numerical score to an output: + +```rust +use rig::evals::{LlmScoreMetric, LlmScoreMetricScore}; + +let scorer = LlmScoreMetric::builder(model) + .preamble("Score the response quality from 0 to 10.") + .threshold(7.0) // Scores >= 7.0 pass + .build(); + +let outcome = scorer.eval( + "Explain quantum entanglement", + "Quantum entanglement is when two particles become linked..." +).await?; +``` + +The LLM is asked to return a `LlmScoreMetricScore`: + +```rust +pub struct LlmScoreMetricScore { + /// The numerical score + pub score: f64, + /// Explanation for the score + pub reasoning: String, +} +``` + +### Semantic Similarity (`SemanticSimilarityMetric`) + +Measures cosine similarity between embeddings of the expected and actual output. This is a non-LLM metric -- it uses embedding models only: + +```rust +use rig::evals::SemanticSimilarityMetric; + +let metric = SemanticSimilarityMetric::builder(embedding_model) + .threshold(0.85) // Cosine similarity >= 0.85 passes + .build(); + +let outcome = metric.eval( + "The cat sat on the mat", // expected + "A cat was sitting on a mat" // actual +).await?; +``` + +The resulting score is available as a `SemanticSimilarityMetricScore`: + +```rust +pub struct SemanticSimilarityMetricScore { + pub similarity: f64, +} +``` + +## Writing Custom Evals + +Implement the `Eval` trait for any custom evaluation logic: + +```rust +use rig::evals::{Eval, EvalOutcome, EvalError}; + +struct LengthCheck { + min_length: usize, + max_length: usize, +} + +impl Eval for LengthCheck { + type Input = String; + type Output = String; + + async fn eval( + &self, + _input: Self::Input, + output: Self::Output, + ) -> Result { + let len = output.len(); + if len >= self.min_length && len <= self.max_length { + Ok(EvalOutcome::Pass) + } else { + Ok(EvalOutcome::Fail) + } + } +} +``` + +## Best Practices + +1. **Combine Metrics**: Use multiple eval metrics together. For example, combine an LLM judge for factuality with semantic similarity for relevance. + +2. **Determinism**: LLM-based evals are inherently non-deterministic. Run them multiple times and look at aggregate results for reliable assessments. + +3. **Thresholds**: Start with permissive thresholds and tighten them as you understand your system's behavior. + +4. **Cost**: LLM-as-a-judge evals incur additional API costs. Consider using cheaper models for judging when possible, and use non-LLM metrics (like semantic similarity) where appropriate. + +5. **Invalid Outcomes**: Always handle `EvalOutcome::Invalid` -- it indicates the eval itself failed (e.g., the judge LLM returned unparseable output), not that the tested output was bad. + +## Experimental Status + +The evals module is behind the `experimental` feature flag. The API may change in future versions as the framework matures. Feedback is welcome -- see the [contributing guide](../how_to_contribute.mdx). + +## See Also + +- [Extractors](./extractors.mdx) -- Structured data extraction (used internally by LLM judge metrics) +- [Embeddings](./embeddings.mdx) -- Embedding models (used by semantic similarity metric) + +
+ + diff --git a/pages/docs/concepts/media_generation.mdx b/pages/docs/concepts/media_generation.mdx new file mode 100644 index 0000000..eb2cda3 --- /dev/null +++ b/pages/docs/concepts/media_generation.mdx @@ -0,0 +1,249 @@ +--- +title: Image, Audio & Transcription +description: Image generation, audio generation (TTS), and audio transcription in Rig. +--- + +import { Cards } from "nextra/components"; + +# Image, Audio & Transcription + +As of v0.31.0, Rig provides unified abstractions for image generation, audio generation (text-to-speech), and audio transcription (speech-to-text) alongside its core text completion and embedding capabilities. + +## Image Generation + +> Requires the `image` feature flag: `cargo add rig-core -F image` + +The `rig::image_generation` module provides the `ImageGenerationModel` trait for generating images from text prompts. + +### Core Types + +```rust +pub trait ImageGenerationModel: Clone + Send + Sync { + type Response: Send + Sync; + + async fn image_generation( + &self, + request: ImageGenerationRequest, + ) -> Result; +} +``` + +### Request Building + +Use `ImageGenerationRequestBuilder` to construct requests: + +```rust +use rig::image_generation::{ImageGeneration, ImageGenerationRequest}; + +let response = model + .image_generation_request("A futuristic city at sunset") + .size("1024x1024") + .send() + .await?; + +// Access the generated image +let image_data = response.image; +``` + +### ImageGenerationResponse + +```rust +pub struct ImageGenerationResponse { + /// The generated image data + pub image: Vec, + /// The raw provider response + pub raw_response: serde_json::Value, +} +``` + +### Using with Agents + +Image generation models can be accessed through providers that support them: + +```rust +let openai = openai::Client::from_env(); +let dalle = openai.image_generation_model("dall-e-3"); + +let response = dalle + .image_generation_request("A robot painting a landscape") + .send() + .await?; +``` + +## Audio Generation (Text-to-Speech) + +> Requires the `audio` feature flag: `cargo add rig-core -F audio` + +The `rig::audio_generation` module provides the `AudioGenerationModel` trait for converting text to speech. + +### Core Types + +```rust +pub trait AudioGenerationModel: + Sized + + Clone + + WasmCompatSend + + WasmCompatSync { + type Response: Send + Sync; + type Client; + + // Required methods + fn make(client: &Self::Client, model: impl Into) -> Self; + fn audio_generation( + &self, + request: AudioGenerationRequest, + ) -> impl Future, AudioGenerationError>> + Send; + + // Provided method + fn audio_generation_request(&self) -> AudioGenerationRequestBuilder { ... } +} +``` + +### Request Building + +```rust +use rig::audio_generation::AudioGeneration; + +let response = model + .audio_generation_request("Hello, how can I help you today?") + .voice("alloy") + .send() + .await?; + +// Access the generated audio +let audio_bytes = response.audio; +``` + +### AudioGenerationResponse + +```rust +pub struct AudioGenerationResponse { + /// The generated audio data + pub audio: Vec, + /// The raw provider response + pub raw_response: serde_json::Value, +} +``` + +## Audio Transcription (Speech-to-Text) + +The `rig::transcription` module provides the `TranscriptionModel` trait for transcribing audio to text. + +### Core Trait + +```rust +pub trait TranscriptionModel: + Clone + + WasmCompatSend + + WasmCompatSync { + type Response: WasmCompatSend + WasmCompatSync; + type Client; + + // Required methods + fn make(client: &Self::Client, model: impl Into) -> Self; + fn transcription( + &self, + request: TranscriptionRequest, + ) -> impl Future, TranscriptionError>> + WasmCompatSend; + + // Provided method + fn transcription_request(&self) -> TranscriptionRequestBuilder { ... } +} +``` + +### Request Building + +```rust +use rig::transcription::Transcription; + +let audio_data: Vec = std::fs::read("audio.mp3")?; + +let response = model + .transcription_request(audio_data) + .language("en") + .send() + .await?; + +println!("Transcription: {}", response.text); +``` + +### TranscriptionResponse + +```rust +pub struct TranscriptionResponse { + /// The transcribed text + pub text: String, + /// The raw provider response + pub raw_response: serde_json::Value, +} +``` + +## Provider Support + +Not all providers support all media types. Here is a summary of current support: + +| Provider | Image Generation | Audio Generation | Transcription | +| --------------- | ---------------- | ---------------- | ------------- | +| OpenAI | Yes (DALL-E) | Yes (TTS) | Yes (Whisper) | +| Other providers | Varies | Varies | Varies | + +Check the individual [provider documentation](../integrations/model_providers.mdx) for specific model support. + +## Client Trait Integration + +These capabilities integrate with the provider client system. Use the corresponding client traits to create models: + +```rust +use rig::client::CompletionClient; +// Image generation, audio generation, and transcription are accessed +// through provider-specific methods on the client. + +let openai = openai::Client::from_env(); + +// Completion model +let gpt4 = openai.completion_model("gpt-4o"); + +// Embedding model +let embed = openai.embedding_model("text-embedding-3-small"); + +// Image generation model (requires `image` feature) +let dalle = openai.image_generation_model("dall-e-3"); + +// Audio generation model (requires `audio` feature) +let tts = openai.audio_generation_model("tts-1"); + +// Transcription model +let whisper = openai.transcription_model("whisper-1"); +``` + +## Best Practices + +1. **Feature Flags**: Only enable the feature flags you need (`image`, `audio`) to minimize compile times and binary size. + +2. **Error Handling**: Each media type has its own error type (`ImageGenerationError`, `AudioGenerationError`, `TranscriptionError`). Handle them appropriately. + +3. **Large Payloads**: Audio and image data can be large. Consider streaming where possible and be mindful of memory usage. + +4. **Model Selection**: Different models within the same provider may have different capabilities, pricing, and quality. Refer to provider documentation for guidance. + +## See Also + +- [Completion](./completion.mdx) -- Text completion +- [Provider Clients](./provider_clients.mdx) -- Provider client capabilities + +
+ + + + + + diff --git a/pages/docs/concepts/provider_clients.mdx b/pages/docs/concepts/provider_clients.mdx index 1b97f8d..102f8cf 100644 --- a/pages/docs/concepts/provider_clients.mdx +++ b/pages/docs/concepts/provider_clients.mdx @@ -3,62 +3,163 @@ title: Clients description: This section contains the concepts for Rig. --- -# Model provider clients in Rig +import { Cards } from "nextra/components"; + +# Model Provider Clients in Rig A provider client (or a model provider client) is an entity represented as a struct that can create clients for different types of LLMs or models, depending on the features supported by a given model provider. -Since the introduction of `rig::client::DynClientBuilder` (`rig` v0.13), clients now must implement the `rig::client::ProviderClient` trait. The trait signature can be found below: +## Client Architecture (v0.31.0) + +In v0.31.0, the provider client system has been significantly redesigned around a generic `Client` struct and a capability-based type system. + +### The `Client` Struct + +At the core is `rig::client::Client`, a generic struct parameterized by: + +- `Ext`: A provider extension (implementing the `Provider` trait) that defines provider-specific behavior +- `H`: An API key type (implementing the `ApiKey` trait) ```rust -pub trait ProviderClient: - AsCompletion + AsTranscription + AsEmbeddings + AsImageGeneration + AsAudioGeneration + Debug -{ - /// Create a client from the process's environment. - /// Panics if an environment is improperly configured. - fn from_env() -> Self - where - Self: Sized; +use rig::providers::openai; + +// Create a client from environment variables +let client = openai::Client::from_env(); + +// Or with an explicit API key +let client = openai::Client::new("your-api-key"); +``` + +### Provider Trait + +The `Provider` trait abstracts over provider-specific extensions and specifically handles building out the client with the provider extension. URL construction and custom instantiation are added as provided methods, should you want to implement your own version of this. + +```rust +pub trait Provider: Sized { + type Builder: ProviderBuilder; + + const VERIFY_PATH: &'static str; + + // Required method + fn build( + builder: &ClientBuilder::ApiKey, H>, + ) -> Result; +} +``` + +Each provider module (e.g., `rig::providers::openai`, `rig::providers::anthropic`) implements this trait. + +### ProviderBuilder Trait + +The `ProviderBuilder` trait provides a builder pattern for configuring providers: + +```rust +pub trait ProviderBuilder { + type Output: Provider; + type ApiKey; + + const BASE_URL: &'static str; - // .. there are other methods here, but as they already have a default implementation - // for the most part we don't need to concern ourselves with them + // Provided method + fn finish( + &self, + builder: ClientBuilder, + ) -> Result> { ... } } ``` -`ProviderClient` currently requires the following traits: +### Capabilities System -- `Debug` -- `AsCompletion` (the trait for creating a completion model client; additionally offers methods for agents and JSON extractors) -- `AsTranscription` (the trait for creating a transcription model client) -- `AsEmbedding` (the trait for creating a embedding model client) -- `AsImageGeneration` (the trait for creating an image generation model client) -- `AsAudioGeneration` (the trait for creating an audio generation model client) +Rig introduces a type-level capabilities system via the `Capabilities` and `Capability` traits. This allows compile-time checking of what a provider supports: -Each of the above traits (besides `Debug` which is a standard trait) is relatively easy to implement. +```rust +pub trait Capabilities { + type Completion: Capability; + type Embeddings: Capability; + type Transcription: Capability; + type ModelListing: Capability; + type ImageGeneration: Capability; + type AudioGeneration: Capability; +} + +pub trait Capability { + const CAPABLE: bool; +} +``` -While this trait is slightly confusing, your provider does not have to actually have to support everything listed. Fortunately, there is a macro called `impl_conversion_traits` which allows us to auto-implement whichever traits a given provider doesn't support by simply returning None. +### ProviderClient Trait -An example of this can be found below: +The `ProviderClient` trait provides a unified interface for creating clients: ```rust -// note: when adding a model to the rig-core codebase, `rig` should be `crate` -rig::impl_conversion_traits!( - AsEmbeddings, - AsTranscription, - AsImageGeneration, - AsAudioGeneration for Client -); +pub trait ProviderClient { + type Input; + + /// Create a client from the process's environment. + fn from_env() -> Self where Self: Sized; + + fn from_val(input: Self::Input) -> Self; +} ``` -Because this returns None, if you attempt to create an embedding model client (for example) from this particular Client using `rig::client::DynClientBuilder`, it will simply return None rather than a model. +### Supported Capabilities + +Provider clients can support any combination of the following: + +| Capability | Trait | Description | +| ---------------- | ----------------------- | ------------------------------------------------------------- | +| Text Completion | `CompletionClient` | Create completion models for text generation | +| Embeddings | `EmbeddingsClient` | Create embedding models for vector representations | +| Transcription | `TranscriptionClient` | Create transcription models for speech-to-text | +| Image Generation | `ImageGenerationClient` | Create models for image generation (requires `image` feature) | +| Audio Generation | `AudioGenerationClient` | Create models for text-to-speech (requires `audio` feature) | +| Model Listing | `ModelListingClient` | List available models from the provider | +| Verification | `VerifyClient` | Verify client credentials and connectivity | + +### Auth Types + +v0.31.0 provides typed authentication: + +- `BearerAuth`: API key inserted as a bearer token in request headers +- `NeedsApiKey`: Marker indicating the provider requires an API key +- `Nothing`: Marker for providers that don't need an API key (e.g., local Ollama) + +## Creating Models + +Once you have a client, create models using the corresponding client trait methods: + +```rust +use rig::client::CompletionClient; + +let openai = openai::Client::from_env(); + +// Create a completion model +let model = openai.completion_model("gpt-5.2"); + +// Create an embedding model +let embedding_model = openai.embedding_model("text-embedding-3-small"); + +// Create an agent (shorthand for completion_model + AgentBuilder) +let agent = openai.agent("gpt-5.2") + .preamble("You are a helpful assistant.") + .build(); +``` -## How to write your client implementation +## How to Write a Custom Provider -To implement a given trait, you simply need to implement the corresponding client trait. For example - the trait for `AsCompletion` is `rig::client::completion::CompletionClient`. Because `AsCompletion` is already implemented for `T: Clone` (meaning every single type), as long as you're able to derive Clone for your client, `AsCompletion` will automatically be implemented - which automatically satisfies the relevant requirement for `ProviderClient`. This is the same across all the other traits. +To implement a custom provider, you need to: -As Rig is currently an unstable crate (SemVer dictates that non-stable crates must stay at v0.x), this page may be updated in accordance with the state of the crate. +1. Implement the `Provider` trait for your extension type +2. Implement `CompletionModel` for your completion model type +3. Optionally implement `EmbeddingModel`, `TranscriptionModel`, etc. +4. Implement the relevant client traits (`CompletionClient`, `EmbeddingsClient`, etc.) -## Using the dynamic client builder +For a detailed walkthrough, see the [Write Your Own Provider](../../guides/extension/write_your_own_provider.mdx) guide. -If you're running a multi-agent service, you may find yourself needing to pull from more than one provider at any given time. For this type of use case, the `rig::client::DynClientBuilder` may prove very useful. +
-Interested in code snippets? Check out [our GitHub repo example](https://github.com/0xPlaygrounds/rig/blob/main/rig-core/examples/dyn_client.rs) where we create both an OpenAI and Anthropic client, then try to prompt them both and print the result. + diff --git a/pages/docs/concepts/streaming.mdx b/pages/docs/concepts/streaming.mdx new file mode 100644 index 0000000..498e5b5 --- /dev/null +++ b/pages/docs/concepts/streaming.mdx @@ -0,0 +1,215 @@ +--- +title: Streaming +description: Streaming completions and chat responses in Rig. +--- + +import { Cards } from "nextra/components"; + +# Streaming in Rig + +Rig provides full support for streaming completions, allowing you to process LLM responses incrementally as they are generated rather than waiting for the entire response. This is essential for building responsive user interfaces and handling long-form content. + +## Core Traits + +The streaming system mirrors the non-streaming completion traits: + +| Non-Streaming | Streaming | Description | +| ------------- | --------------------- | ---------------------------------------- | +| `Prompt` | `StreamingPrompt` | One-shot streaming prompt | +| `Chat` | `StreamingChat` | Streaming chat with history | +| `Completion` | `StreamingCompletion` | Low-level streaming completion interface | + +All of these traits live in the `rig::streaming` module. + +### StreamingPrompt + +The simplest streaming interface -- sends a prompt and returns a stream of chunks: + +```rust +use rig::streaming::StreamingPrompt; +use futures::StreamExt; + +let agent = openai.agent("gpt-4o") + .preamble("You are a helpful assistant.") + .build(); + +let mut stream = agent.stream_prompt("Tell me a story").await?; + +while let Some(chunk) = stream.next().await { + match chunk? { + StreamedAssistantContent::Text(text) => print!("{text}"), + StreamedAssistantContent::ToolCallDelta(delta) => { /* handle tool call deltas */ } + StreamedAssistantContent::FinalUsage(usage) => { /* handle final usage stats */ } + } +} +``` + +### StreamingChat + +Streaming with conversation history: + +```rust +use rig::streaming::StreamingChat; + +let mut stream = agent.stream_chat("Continue the story", chat_history).await?; +``` + +### StreamingCompletion + +Low-level streaming interface with full request customization: + +```rust +use rig::streaming::StreamingCompletion; + +let builder = agent.stream_completion("prompt", chat_history).await?; +let response = builder + .temperature(0.9) + .send() + .await?; +``` + +## Response Types + +### StreamedAssistantContent + +Each chunk from the stream is a `StreamedAssistantContent`: + +```rust +pub enum StreamedAssistantContent { + /// A text delta (partial text content) + Text(String), + /// A tool call delta (partial tool call data) + ToolCallDelta(ToolCallDeltaContent), + /// Final usage statistics sent at the end of the stream + FinalUsage(Usage), +} +``` + +### ToolCallDeltaContent + +Tool calls are streamed as deltas: + +```rust +pub enum ToolCallDeltaContent { + /// The name of the tool being called + Name(String), + /// Partial argument data + Arguments(String), +} +``` + +### StreamingCompletionResponse + +The full streaming response wraps the inner stream and provides access to the final message: + +```rust +pub struct StreamingCompletionResponse { + /// The inner stream of chunks + pub inner: Pin> + Send>>, + /// Populated after the stream completes: the full message + pub message: Option, + /// Populated after the stream completes: the raw response + pub response: Option, +} +``` + +## Multi-Turn Streaming + +When using agents with tools, multi-turn streaming produces `MultiTurnStreamItem` events: + +```rust +pub enum MultiTurnStreamItem { + /// Streamed user content (e.g., tool results sent back to the model) + UserContent(StreamedUserContent), + /// Streamed assistant content (text, tool call deltas, etc.) + AssistantContent(StreamedAssistantContent), +} +``` + +This allows you to observe the full agent loop in real-time, including tool calls and their results. + +## Streaming to stdout + +Rig provides a convenience function for the common case of printing a stream to the terminal: + +```rust +use rig::streaming::stream_to_stdout; + +let stream = agent.stream_prompt("Hello!").await?; +stream_to_stdout(stream).await?; +``` + +The `stream_to_stdout` function prints text chunks as they arrive and ignores tool call deltas (since those are typically not meaningful to display directly). + +## Pause Control + +The `PauseControl` struct allows you to pause and resume a streaming response: + +```rust +use rig::streaming::PauseControl; + +let pause = PauseControl::new(); +let pause_clone = pause.clone(); + +// In another task: +pause_clone.pause(); +// ... +pause_clone.resume(); +``` + +This is useful for implementing user-controlled streaming in interactive applications. + +## Example: Streaming Agent + +```rust +use rig::providers::openai; +use rig::streaming::StreamingPrompt; +use futures::StreamExt; + +#[tokio::main] +async fn main() -> Result<(), anyhow::Error> { + let openai = openai::Client::from_env(); + + let agent = openai.agent("gpt-5.2") + .preamble("You are a storyteller.") + .temperature(0.9) + .build(); + + let mut stream = agent.stream_prompt("Tell me a short story about a robot.").await?; + + while let Some(chunk) = stream.next().await { + match chunk? { + StreamedAssistantContent::Text(text) => { + print!("{text}"); + } + _ => {} // Handle other content types as needed + } + } + println!(); + + Ok(()) +} +``` + +## Best Practices + +1. **Error Handling**: Each chunk in the stream can fail independently. Always handle errors per-chunk rather than expecting the entire stream to succeed or fail atomically. + +2. **Buffering**: For tool calls, buffer the deltas until the complete tool call is received before executing the tool. + +3. **Backpressure**: Use `PauseControl` or standard stream backpressure mechanisms when the consumer cannot keep up with the producer. + +4. **Token Usage**: The final `FinalUsage` event (when available) provides token counts for the entire completion, not per-chunk. + +## See Also + +- [Completion](./completion.mdx) -- Non-streaming completion traits +- [Agents](./agent.mdx) -- Agent system with streaming support + +
+ + diff --git a/pages/docs/concepts/tools.mdx b/pages/docs/concepts/tools.mdx index 3c39671..8310e85 100644 --- a/pages/docs/concepts/tools.mdx +++ b/pages/docs/concepts/tools.mdx @@ -134,12 +134,18 @@ While implementing your own tools, you may find that using the raw `serde_json:: Fortunately, the `schemars` crate has a great way to solve this: with a derive macro (and helpers)! By deriving the `schemars::JsonSchema` macro, we can use description helpers to write our JSON schema instead of having to write it all manually. +> **Note:** As of `rig-core` v0.31.0, `schemars` **v1.0** is required. If you were previously using `schemars` v0.8, note the following migration changes: +> +> - The `#[schemars(description = "...")]` attribute is replaced by `#[doc]` comments (i.e. standard `///` doc comments) for field descriptions. +> - The `schema_for!` macro is replaced by `schemars::schema_for!` or `T::json_schema()`. +> - Add `schemars = "1"` to your `Cargo.toml` dependencies. + ```rust #[derive(Deserialize, Serialize, schemars::JsonSchema)] struct OperationArgs { - #[schemars(description = "The first number to add.")] + /// The first number to add. x: i32, - #[schemars(description = "The first number to add.")] + /// The second number to add. y: i32, } ``` @@ -154,20 +160,18 @@ impl Tool for Adder { // .. other trait impl parts here async fn definition(&self, _prompt: String) -> ToolDefinition { // this should technically never error out as it's generated from set codegen - let parameters = serde_json::to_value(schema_for!(OperationArgs)).unwrap(); + let parameters = schemars::schema_for!(OperationArgs); ToolDefinition { name: "add".to_string(), description: "Add x and y together".to_string(), - parameters, + parameters: serde_json::to_value(parameters).unwrap(), } } // .. other trait impl parts here } ``` -Rig currently uses `schemars` v0.8.16. If you're encountering strange errors, you may want to double check the version of your `schemars` crate. - ### Tool Macros Often times if you just want to create simple tools and don't need any special particular implementation details, writing out the entire `impl Tool` block can be quite cumbersome. @@ -229,7 +233,7 @@ Tools can be added to agents in two ways: ```rust let agent = client - .agent("gpt-4") + .agent("gpt-5.2") .preamble("You are a calculator.") .tool(Adder) .tool(Subtract) @@ -240,7 +244,7 @@ let agent = client ```rust let agent = client - .agent("gpt-4") + .agent("gpt-5.2") .preamble("You are a calculator.") .dynamic_tools(2, vector_store_index, toolset) .build(); @@ -278,6 +282,6 @@ For more information on integrating tools with specific LLM providers, see the p diff --git a/pages/docs/integrations/vector_stores/in_memory.mdx b/pages/docs/integrations/vector_stores/in_memory.mdx index 49b0c08..deaa2e2 100644 --- a/pages/docs/integrations/vector_stores/in_memory.mdx +++ b/pages/docs/integrations/vector_stores/in_memory.mdx @@ -17,7 +17,9 @@ The in-memory vector store is Rig's default vector store implementation, include - Automatic or custom document ID generation - Multiple embedding support per document - Cosine similarity search +- Configurable index strategies (brute-force or LSH) - Flexible document schema support +- Automatic `Tool` implementation for agent integration ## Implementation Details @@ -80,6 +82,57 @@ Memory layout example: - Maintains scores using ordered floating-point comparisons - Supports multiple embeddings per document with best-match selection +### Core Traits + +As of v0.31.0, the vector store system is built around these traits: + +- **`VectorStoreIndex`**: The primary trait for querying a vector store by similarity. Types implementing this trait automatically implement the `Tool` trait, meaning any vector store index can be used as an agent tool. +- **`InsertDocuments`**: Trait for inserting documents and their embeddings into a vector store (replaces the old `VectorStore` trait). +- **`VectorStoreIndexDyn`**: Type-erased version for dynamic dispatch scenarios. + +```rust +/// Trait for querying a vector store by similarity. +pub trait VectorStoreIndex: Send + Sync { + async fn top_n( + &self, + request: VectorSearchRequest, + ) -> Result, VectorStoreError>; + + async fn top_n_ids( + &self, + request: VectorSearchRequest, + ) -> Result, VectorStoreError>; +} + +/// Trait for inserting documents and embeddings into a vector store. +pub trait InsertDocuments: Send + Sync { + async fn add_documents( + &mut self, + documents: Vec<(String, OneOrMany)>, + ) -> Result<(), VectorStoreError>; +} +``` + +### Index Strategies + +v0.31.0 introduces configurable index strategies via the `IndexStrategy` enum: + +- **Brute-force** (default): Linear scan with cosine similarity. Best for small datasets. +- **LSH** (Locality-Sensitive Hashing): Available via the `rig::vector_store::lsh` module for approximate nearest-neighbor search on larger datasets. + +### Vector Store as a Tool + +Types implementing `VectorStoreIndex` automatically implement the `Tool` trait. This means you can add a vector store index directly as a tool to an agent: + +```rust +let agent = openai.agent("gpt-4o") + .preamble("You can search a knowledge base.") + .tool(vector_store_index) + .build(); +``` + +The tool will accept a search query and return matching documents as `VectorStoreOutput`. + ### Document Management Three ways to add documents: @@ -111,6 +164,50 @@ let store = InMemoryVectorStore::from_documents_with_id_f( ); ``` +## Querying with VectorSearchRequest + +As of v0.31.0, vector store queries are built using `VectorSearchRequest`, which supports filtering: + +```rust +use rig::vector_store::{VectorStoreIndex, VectorSearchRequest}; +use rig::vector_store::request::Filter; + +// Simple query +let results = index.top_n( + VectorSearchRequest::builder("search query", 5).build() +).await?; + +// Query with a score threshold +let results = index.top_n( + VectorSearchRequest::builder("search query", 5) + .threshold(0.7) + .build() +).await?; + +// Query with filters (for backends that support filtering) +let results = index.top_n( + VectorSearchRequest::builder("search query", 5) + .filter(Filter::eq("category", "science")) + .build() +).await?; +``` + +The `Filter` enum provides a backend-agnostic way to express filter conditions: + +```rust +pub enum Filter { + Eq(String, serde_json::Value), + Ne(String, serde_json::Value), + Gt(String, serde_json::Value), + Lt(String, serde_json::Value), + And(Vec), + Or(Vec), + // ... other variants +} +``` + +Backends can implement the `SearchFilter` trait to translate these canonical filters into their native query language. + ## Special Considerations ### 1. Memory Usage @@ -123,8 +220,9 @@ let store = InMemoryVectorStore::from_documents_with_id_f( - Fast lookups using HashMap for document retrieval - Efficient top-N selection using BinaryHeap -- O(n) complexity for vector similarity search +- O(n) complexity for vector similarity search (brute-force strategy) - Best for small to medium-sized datasets +- Consider LSH indexing for larger datasets ### 3. Document Storage @@ -138,29 +236,33 @@ let store = InMemoryVectorStore::from_documents_with_id_f( use rig::providers::openai; use rig::embeddings::EmbeddingsBuilder; use rig::vector_store::in_memory_store::InMemoryVectorStore; +use rig::vector_store::{VectorStoreIndex, VectorSearchRequest, InsertDocuments}; #[tokio::main] async fn main() -> Result<(), anyhow::Error> { + let openai = openai::Client::from_env(); + let model = openai.embedding_model(openai::TEXT_EMBEDDING_ADA_002); + // Initialize store let mut store = InMemoryVectorStore::default(); // Create embeddings - let embeddings = EmbeddingsBuilder::new(model) + let embeddings = EmbeddingsBuilder::new(model.clone()) .simple_document("doc1", "First document content") .simple_document("doc2", "Second document content") .build() .await?; - // Add documents to store - store.add_documents(embeddings); + // Add documents to store (uses InsertDocuments trait) + store.add_documents(embeddings).await?; // Create vector store index let index = store.index(model); - // Search similar documents - let results = store - .top_n::("search query", 5) - .await?; + // Search similar documents using VectorSearchRequest + let results = index.top_n( + VectorSearchRequest::builder("search query", 5).build() + ).await?; Ok(()) } @@ -172,50 +274,42 @@ async fn main() -> Result<(), anyhow::Error> { The core search implementation: -```rust filename=rig-core/src/vector_store/in_memory_store.rs [67:103] - - /// Implement vector search on [InMemoryVectorStore]. - /// To be used by implementations of [VectorStoreIndex::top_n] and [VectorStoreIndex::top_n_ids] methods. - fn vector_search(&self, prompt_embedding: &Embedding, n: usize) -> EmbeddingRanking { - // Sort documents by best embedding distance - let mut docs = BinaryHeap::new(); - - for (id, (doc, embeddings)) in self.embeddings.iter() { - // Get the best context for the document given the prompt - if let Some((distance, embed_doc)) = embeddings - .iter() - .map(|embedding| { - ( - OrderedFloat(embedding.cosine_similarity(prompt_embedding, false)), - &embedding.document, - ) - }) - .max_by(|a, b| a.0.cmp(&b.0)) - { - docs.push(Reverse(RankingItem(distance, id, doc, embed_doc))); - }; - +```rust filename=rig-core/src/vector_store/in_memory_store.rs +/// Implement vector search on [InMemoryVectorStore]. +fn vector_search(&self, prompt_embedding: &Embedding, n: usize) -> EmbeddingRanking { + // Sort documents by best embedding distance + let mut docs = BinaryHeap::new(); + + for (id, (doc, embeddings)) in self.embeddings.iter() { + // Get the best context for the document given the prompt + if let Some((distance, embed_doc)) = embeddings + .iter() + .map(|embedding| { + ( + OrderedFloat(embedding.cosine_similarity(prompt_embedding, false)), + &embedding.document, + ) + }) + .max_by(|a, b| a.0.cmp(&b.0)) + { + docs.push(Reverse(RankingItem(distance, id, doc, embed_doc))); + }; + } + + // Return top-n results + // ... +} ``` ### Error Handling -The vector store operations can produce several error types: +Vector store operations can produce errors via the `VectorStoreError` enum: - `EmbeddingError`: Issues with embedding generation - `JsonError`: Document serialization/deserialization errors - `DatastoreError`: General storage operations errors - `MissingIdError`: When a requested document ID doesn't exist - -Example error handling: - -```rust -match store.get_document::("doc1") { - Ok(Some(doc)) => println!("Found document: {:?}", doc), - Ok(None) => println!("Document not found"), - Err(VectorStoreError::JsonError(e)) => println!("Failed to deserialize: {}", e), - Err(e) => println!("Other error: {}", e), -} -``` +- `FilterError`: When constructing or converting filter expressions ## Best Practices @@ -235,6 +329,7 @@ match store.get_document::("doc1") { - Pre-allocate store capacity when possible - Batch document additions - Use appropriate embedding dimensions + - Consider LSH indexing for datasets exceeding a few thousand documents ## Limitations @@ -246,8 +341,7 @@ match store.get_document::("doc1") { 2. **Features**: - - No built-in indexing optimizations - - No metadata filtering + - Filtering support is basic compared to dedicated vector databases - No automatic persistence 3. **Production Use**: @@ -255,7 +349,7 @@ match store.get_document::("doc1") { - Consider cloud-based alternatives for production - No built-in backup/recovery mechanisms -For production deployments, consider using one of Rig's other vector store integrations (MongoDB, LanceDB, Neo4j, or Qdrant) which offer persistence and better scalability. +For production deployments, consider using one of Rig's other vector store integrations (MongoDB, LanceDB, Neo4j, Qdrant, SQLite, SurrealDB, Milvus, ScyllaDB, or AWS S3Vectors) which offer persistence and better scalability. ## Thread Safety @@ -265,18 +359,19 @@ For concurrent write access, consider wrapping the store in a synchronization pr ## Comparison with Other Vector Stores -| Feature | In-Memory | MongoDB | Qdrant | LanceDB | -| ------------------ | --------- | ------- | ------ | ------- | -| Persistence | ❌ | ✅ | ✅ | ✅ | -| Horizontal Scaling | ❌ | ✅ | ✅ | ❌ | -| Setup Complexity | Low | Medium | Medium | Low | -| Memory Usage | High | Low | Medium | Low | -| Query Speed | Fast | Medium | Fast | Fast | +| Feature | In-Memory | MongoDB | Qdrant | LanceDB | SQLite | +| ------------------ | --------- | ------- | ------ | ------- | ------ | +| Persistence | No | Yes | Yes | Yes | Yes | +| Horizontal Scaling | No | Yes | Yes | No | No | +| Setup Complexity | Low | Medium | Medium | Low | Low | +| Memory Usage | High | Low | Medium | Low | Low | +| Query Speed | Fast | Medium | Fast | Fast | Medium | +| Filtering | Basic | Rich | Rich | Rich | SQL |