Skip to content

Commit 7cd2d7e

Browse files
committed
AI model selection based on preferences
1 parent d8454b5 commit 7cd2d7e

2 files changed

Lines changed: 112 additions & 24 deletions

File tree

.changeset/fiery-times-create.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@plotday/sdk": minor
3+
---
4+
5+
Changed: BREAKING: Use ModelPreferences instead of an explicit AIModel in AI.prompt(). This supports BYOK and user preferences.

sdk/src/tools/ai.ts

Lines changed: 107 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import type { Static, TSchema } from "typebox";
22

3-
import { ITool, type Tools } from "..";
3+
import { ITool } from "..";
44

55
/**
66
* Built-in tool for prompting Large Language Models (LLMs).
@@ -43,7 +43,7 @@ import { ITool, type Tools } from "..";
4343
* });
4444
*
4545
* const response = await this.ai.prompt({
46-
* model: AIModel.GPT_4O_MINI,
46+
* model: { speed: "fast", cost: "medium" },
4747
* system: "Classify emails into categories: work, personal, spam, or promotional.",
4848
* prompt: `Categorize this email: ${emailContent}`,
4949
* outputSchema: schema
@@ -54,7 +54,7 @@ import { ITool, type Tools } from "..";
5454
*
5555
* async generateResponse(emailContent: string) {
5656
* const response = await this.ai.prompt({
57-
* model: AIModel.GPT_4O_MINI,
57+
* model: AIModel.GPT_5_MINI,
5858
* system: "Generate professional email responses that are helpful and concise.",
5959
* prompt: `Write a response to: ${emailContent}`
6060
* });
@@ -76,24 +76,31 @@ export abstract class AI extends ITool {
7676
*
7777
* @example
7878
* ```typescript
79-
* // Simple text generation
79+
* // Simple text generation with specific model
8080
* const response = await ai.prompt({
81-
* model: AIModel.GPT_4O_MINI,
81+
* model: AIModel.GPT_5_MINI,
8282
* prompt: "Explain quantum computing in simple terms"
8383
* });
8484
* console.log(response.text);
8585
*
86+
* // Using model preferences for automatic selection
87+
* const response = await ai.prompt({
88+
* model: { speed: "fast", cost: "low" },
89+
* prompt: "Summarize this text..."
90+
* });
91+
* console.log(response.text);
92+
*
8693
* // With system instructions
8794
* const response = await ai.prompt({
88-
* model: AIModel.CLAUDE_35_SONNET,
95+
* model: { speed: "capable", cost: "high" },
8996
* system: "You are a helpful physics tutor.",
9097
* prompt: "Explain quantum entanglement"
9198
* });
9299
* console.log(response.text);
93100
*
94101
* // Multi-turn conversation
95102
* const response = await ai.prompt({
96-
* model: AIModel.CLAUDE_35_SONNET,
103+
* model: AIModel.CLAUDE_SONNET_45,
97104
* messages: [
98105
* { role: "user", content: "What is 2+2?" },
99106
* { role: "assistant", content: "2+2 equals 4." },
@@ -134,51 +141,127 @@ export abstract class AI extends ITool {
134141
* ```
135142
*/
136143
abstract prompt<TOOLS extends AIToolSet, SCHEMA extends TSchema = never>(
137-
_request: AIRequest<TOOLS, SCHEMA>,
144+
_request: AIRequest<TOOLS, SCHEMA>
138145
): Promise<AIResponse<TOOLS, SCHEMA>>;
139146
}
140147

148+
/**
149+
* Model preferences for selecting an AI model based on performance and cost requirements.
150+
* This allows Plot to match those preferences with user preferences (such as preferred or
151+
* disallowed providers), as well as availability of newer and better models.
152+
*
153+
* @example
154+
* ```typescript
155+
* // Fast and cheap - uses Workers AI models like Llama 3.2 1B
156+
* const response = await ai.prompt({
157+
* model: { speed: "fast", cost: "low" },
158+
* prompt: "Summarize this in one sentence: ..."
159+
* });
160+
*
161+
* // Balanced performance - uses GPT-5 Mini or Gemini 2.5 Flash
162+
* const response = await ai.prompt({
163+
* model: { speed: "balanced", cost: "medium" },
164+
* prompt: "Analyze this data..."
165+
* });
166+
*
167+
* // Most capable - uses Claude Sonnet 4.5 or Opus 4.1
168+
* const response = await ai.prompt({
169+
* model: { speed: "capable", cost: "high" },
170+
* prompt: "Solve this complex reasoning problem..."
171+
* });
172+
*
173+
* // Request a specific model with a hint
174+
* const response = await ai.prompt({
175+
* model: { speed: "balanced", cost: "medium", hint: AIModel.CLAUDE_SONNET_45 },
176+
* prompt: "..."
177+
* });
178+
* ```
179+
*/
180+
export type ModelPreferences = {
181+
/**
182+
* Desired speed tier:
183+
* - "fast": Optimized for low latency and quick responses
184+
* - "balanced": Good balance of speed and capability
185+
* - "capable": Maximum reasoning and problem-solving ability
186+
*/
187+
speed: "fast" | "balanced" | "capable";
188+
189+
/**
190+
* Desired cost tier:
191+
* - "low": Minimal cost, often using Workers AI models (free/very cheap)
192+
* - "medium": Moderate pricing for good performance
193+
* - "high": Premium pricing for best-in-class models
194+
*/
195+
cost: "low" | "medium" | "high";
196+
197+
/**
198+
* Optional hint to suggest a specific model. The system will use this
199+
* model if possible, but may override it based on user preferences.
200+
*/
201+
hint?: AIModel;
202+
};
203+
141204
/**
142205
* Supported AI models available through Cloudflare AI Gateway and Workers AI.
143206
*
144207
* Models are organized by provider:
145208
* - **OpenAI**: Latest GPT models via AI Gateway
146209
* - **Anthropic**: Claude models via AI Gateway (prefix with "anthropic/")
147210
* - **Google**: Gemini models via AI Gateway (prefix with "google-ai-studio/")
148-
* - **Workers AI**: Models running on Cloudflare's network
211+
* - **Workers AI**: Models running on Cloudflare's network (free/low cost)
149212
*/
150213
export enum AIModel {
151-
// OpenAI models
214+
// OpenAI models - Latest GPT and reasoning models
215+
GPT_5 = "openai/gpt-5",
216+
GPT_5_PRO = "openai/gpt-5-pro",
217+
GPT_5_MINI = "openai/gpt-5-mini",
218+
GPT_5_NANO = "openai/gpt-5-nano",
152219
GPT_4O = "openai/gpt-4o",
153220
GPT_4O_MINI = "openai/gpt-4o-mini",
154-
GPT_4_TURBO = "openai/gpt-4-turbo",
155-
GPT_35_TURBO = "openai/gpt-3.5-turbo",
221+
O3 = "openai/o3",
222+
O3_MINI = "openai/o3-mini",
156223

157-
// Anthropic models
158-
CLAUDE_SONNET_4_5 = "anthropic/claude-sonnet-4-5",
159-
CLAUDE_35_SONNET = "anthropic/claude-3-5-sonnet",
160-
CLAUDE_3_OPUS = "anthropic/claude-3-opus",
224+
// Anthropic models - Claude 4.x and 3.7 series
225+
CLAUDE_SONNET_45 = "anthropic/claude-sonnet-4-5",
226+
CLAUDE_HAIKU_45 = "anthropic/claude-haiku-4-5",
227+
CLAUDE_OPUS_41 = "anthropic/claude-opus-4-1",
228+
CLAUDE_37_SONNET = "anthropic/claude-3-7-sonnet-latest",
161229

162-
// Google models
230+
// Google models - Gemini 2.x series
231+
GEMINI_25_PRO = "google/gemini-2.5-pro",
163232
GEMINI_25_FLASH = "google/gemini-2.5-flash",
233+
GEMINI_25_FLASH_LITE = "google/gemini-2.5-flash-lite",
234+
GEMINI_20_FLASH = "google/gemini-2.0-flash",
235+
GEMINI_20_FLASH_LITE = "google/gemini-2.0-flash-lite",
164236

165-
// Cloudflare Workers AI models
237+
// Cloudflare Workers AI models - Free/low-cost models running on Cloudflare's network
238+
LLAMA_4_SCOUT_17B = "meta/llama-4-scout-17b-16e-instruct",
166239
LLAMA_33_70B = "meta/llama-3.3-70b-instruct-fp8-fast",
167-
LLAMA_31_8B = "meta/llama-3.1-8b-instruct-fast",
168-
MISTRAL_7B = "meta/mistral-7b-instruct-v0.2",
240+
LLAMA_31_8B = "meta/llama-3.1-8b-instruct-fp8",
241+
LLAMA_32_1B = "meta/llama-3.2-1b-instruct",
242+
DEEPSEEK_R1_32B = "deepseek-ai/deepseek-r1-distill-qwen-32b",
169243
}
170244

171245
/**
172246
* Request parameters for AI text generation, matching Vercel AI SDK's generateText() function.
173247
*/
174248
export interface AIRequest<
175249
TOOLS extends AIToolSet,
176-
SCHEMA extends TSchema = never,
250+
SCHEMA extends TSchema = never
177251
> {
178252
/**
179253
* The AI model to use for generation.
254+
* Can be either a specific model from the AIModel enum or preferences (speed/cost tiers).
255+
*
256+
* @example
257+
* // Using a specific model
258+
* model: AIModel.GPT_5_MINI
259+
*
260+
* @example
261+
* // Using preferences
262+
* model: { speed: "fast", cost: "low" }
180263
*/
181-
model: AIModel;
264+
model: AIModel | ModelPreferences;
182265

183266
/**
184267
* System instructions to guide the model's behavior.
@@ -240,7 +323,7 @@ export interface AIRequest<
240323
*/
241324
export interface AIResponse<
242325
TOOLS extends AIToolSet,
243-
SCHEMA extends TSchema = never,
326+
SCHEMA extends TSchema = never
244327
> {
245328
/**
246329
* The generated text.
@@ -656,7 +739,7 @@ export type AITool<PARAMETERS extends ToolParameters = any, RESULT = any> = {
656739
*/
657740
execute?: (
658741
args: inferParameters<PARAMETERS>,
659-
options: ToolExecutionOptions,
742+
options: ToolExecutionOptions
660743
) => PromiseLike<RESULT>;
661744
} & (
662745
| {

0 commit comments

Comments
 (0)