11import type { Static , TSchema } from "typebox" ;
22
3- import { ITool , type Tools } from ".." ;
3+ import { ITool } from ".." ;
44
55/**
66 * Built-in tool for prompting Large Language Models (LLMs).
@@ -43,7 +43,7 @@ import { ITool, type Tools } from "..";
4343 * });
4444 *
4545 * const response = await this.ai.prompt({
46- * model: AIModel.GPT_4O_MINI ,
46+ * model: { speed: "fast", cost: "medium" } ,
4747 * system: "Classify emails into categories: work, personal, spam, or promotional.",
4848 * prompt: `Categorize this email: ${emailContent}`,
4949 * outputSchema: schema
@@ -54,7 +54,7 @@ import { ITool, type Tools } from "..";
5454 *
5555 * async generateResponse(emailContent: string) {
5656 * const response = await this.ai.prompt({
57- * model: AIModel.GPT_4O_MINI ,
57+ * model: AIModel.GPT_5_MINI ,
5858 * system: "Generate professional email responses that are helpful and concise.",
5959 * prompt: `Write a response to: ${emailContent}`
6060 * });
@@ -76,24 +76,31 @@ export abstract class AI extends ITool {
7676 *
7777 * @example
7878 * ```typescript
79- * // Simple text generation
79+ * // Simple text generation with specific model
8080 * const response = await ai.prompt({
81- * model: AIModel.GPT_4O_MINI ,
81+ * model: AIModel.GPT_5_MINI ,
8282 * prompt: "Explain quantum computing in simple terms"
8383 * });
8484 * console.log(response.text);
8585 *
86+ * // Using model preferences for automatic selection
87+ * const response = await ai.prompt({
88+ * model: { speed: "fast", cost: "low" },
89+ * prompt: "Summarize this text..."
90+ * });
91+ * console.log(response.text);
92+ *
8693 * // With system instructions
8794 * const response = await ai.prompt({
88- * model: AIModel.CLAUDE_35_SONNET ,
95+ * model: { speed: "capable", cost: "high" } ,
8996 * system: "You are a helpful physics tutor.",
9097 * prompt: "Explain quantum entanglement"
9198 * });
9299 * console.log(response.text);
93100 *
94101 * // Multi-turn conversation
95102 * const response = await ai.prompt({
96- * model: AIModel.CLAUDE_35_SONNET ,
103+ * model: AIModel.CLAUDE_SONNET_45 ,
97104 * messages: [
98105 * { role: "user", content: "What is 2+2?" },
99106 * { role: "assistant", content: "2+2 equals 4." },
@@ -134,51 +141,127 @@ export abstract class AI extends ITool {
134141 * ```
135142 */
136143 abstract prompt < TOOLS extends AIToolSet , SCHEMA extends TSchema = never > (
137- _request : AIRequest < TOOLS , SCHEMA > ,
144+ _request : AIRequest < TOOLS , SCHEMA >
138145 ) : Promise < AIResponse < TOOLS , SCHEMA > > ;
139146}
140147
148+ /**
149+ * Model preferences for selecting an AI model based on performance and cost requirements.
150+ * This allows Plot to match those preferences with user preferences (such as preferred or
151+ * disallowed providers), as well as availability of newer and better models.
152+ *
153+ * @example
154+ * ```typescript
155+ * // Fast and cheap - uses Workers AI models like Llama 3.2 1B
156+ * const response = await ai.prompt({
157+ * model: { speed: "fast", cost: "low" },
158+ * prompt: "Summarize this in one sentence: ..."
159+ * });
160+ *
161+ * // Balanced performance - uses GPT-5 Mini or Gemini 2.5 Flash
162+ * const response = await ai.prompt({
163+ * model: { speed: "balanced", cost: "medium" },
164+ * prompt: "Analyze this data..."
165+ * });
166+ *
167+ * // Most capable - uses Claude Sonnet 4.5 or Opus 4.1
168+ * const response = await ai.prompt({
169+ * model: { speed: "capable", cost: "high" },
170+ * prompt: "Solve this complex reasoning problem..."
171+ * });
172+ *
173+ * // Request a specific model with a hint
174+ * const response = await ai.prompt({
175+ * model: { speed: "balanced", cost: "medium", hint: AIModel.CLAUDE_SONNET_45 },
176+ * prompt: "..."
177+ * });
178+ * ```
179+ */
180+ export type ModelPreferences = {
181+ /**
182+ * Desired speed tier:
183+ * - "fast": Optimized for low latency and quick responses
184+ * - "balanced": Good balance of speed and capability
185+ * - "capable": Maximum reasoning and problem-solving ability
186+ */
187+ speed : "fast" | "balanced" | "capable" ;
188+
189+ /**
190+ * Desired cost tier:
191+ * - "low": Minimal cost, often using Workers AI models (free/very cheap)
192+ * - "medium": Moderate pricing for good performance
193+ * - "high": Premium pricing for best-in-class models
194+ */
195+ cost : "low" | "medium" | "high" ;
196+
197+ /**
198+ * Optional hint to suggest a specific model. The system will use this
199+ * model if possible, but may override it based on user preferences.
200+ */
201+ hint ?: AIModel ;
202+ } ;
203+
141204/**
142205 * Supported AI models available through Cloudflare AI Gateway and Workers AI.
143206 *
144207 * Models are organized by provider:
145208 * - **OpenAI**: Latest GPT models via AI Gateway
146209 * - **Anthropic**: Claude models via AI Gateway (prefix with "anthropic/")
147210 * - **Google**: Gemini models via AI Gateway (prefix with "google-ai-studio/")
148- * - **Workers AI**: Models running on Cloudflare's network
211+ * - **Workers AI**: Models running on Cloudflare's network (free/low cost)
149212 */
150213export enum AIModel {
151- // OpenAI models
214+ // OpenAI models - Latest GPT and reasoning models
215+ GPT_5 = "openai/gpt-5" ,
216+ GPT_5_PRO = "openai/gpt-5-pro" ,
217+ GPT_5_MINI = "openai/gpt-5-mini" ,
218+ GPT_5_NANO = "openai/gpt-5-nano" ,
152219 GPT_4O = "openai/gpt-4o" ,
153220 GPT_4O_MINI = "openai/gpt-4o-mini" ,
154- GPT_4_TURBO = "openai/gpt-4-turbo " ,
155- GPT_35_TURBO = "openai/gpt-3.5-turbo " ,
221+ O3 = "openai/o3 " ,
222+ O3_MINI = "openai/o3-mini " ,
156223
157- // Anthropic models
158- CLAUDE_SONNET_4_5 = "anthropic/claude-sonnet-4-5" ,
159- CLAUDE_35_SONNET = "anthropic/claude-3-5-sonnet" ,
160- CLAUDE_3_OPUS = "anthropic/claude-3-opus" ,
224+ // Anthropic models - Claude 4.x and 3.7 series
225+ CLAUDE_SONNET_45 = "anthropic/claude-sonnet-4-5" ,
226+ CLAUDE_HAIKU_45 = "anthropic/claude-haiku-4-5" ,
227+ CLAUDE_OPUS_41 = "anthropic/claude-opus-4-1" ,
228+ CLAUDE_37_SONNET = "anthropic/claude-3-7-sonnet-latest" ,
161229
162- // Google models
230+ // Google models - Gemini 2.x series
231+ GEMINI_25_PRO = "google/gemini-2.5-pro" ,
163232 GEMINI_25_FLASH = "google/gemini-2.5-flash" ,
233+ GEMINI_25_FLASH_LITE = "google/gemini-2.5-flash-lite" ,
234+ GEMINI_20_FLASH = "google/gemini-2.0-flash" ,
235+ GEMINI_20_FLASH_LITE = "google/gemini-2.0-flash-lite" ,
164236
165- // Cloudflare Workers AI models
237+ // Cloudflare Workers AI models - Free/low-cost models running on Cloudflare's network
238+ LLAMA_4_SCOUT_17B = "meta/llama-4-scout-17b-16e-instruct" ,
166239 LLAMA_33_70B = "meta/llama-3.3-70b-instruct-fp8-fast" ,
167- LLAMA_31_8B = "meta/llama-3.1-8b-instruct-fast" ,
168- MISTRAL_7B = "meta/mistral-7b-instruct-v0.2" ,
240+ LLAMA_31_8B = "meta/llama-3.1-8b-instruct-fp8" ,
241+ LLAMA_32_1B = "meta/llama-3.2-1b-instruct" ,
242+ DEEPSEEK_R1_32B = "deepseek-ai/deepseek-r1-distill-qwen-32b" ,
169243}
170244
171245/**
172246 * Request parameters for AI text generation, matching Vercel AI SDK's generateText() function.
173247 */
174248export interface AIRequest <
175249 TOOLS extends AIToolSet ,
176- SCHEMA extends TSchema = never ,
250+ SCHEMA extends TSchema = never
177251> {
178252 /**
179253 * The AI model to use for generation.
254+ * Can be either a specific model from the AIModel enum or preferences (speed/cost tiers).
255+ *
256+ * @example
257+ * // Using a specific model
258+ * model: AIModel.GPT_5_MINI
259+ *
260+ * @example
261+ * // Using preferences
262+ * model: { speed: "fast", cost: "low" }
180263 */
181- model : AIModel ;
264+ model : AIModel | ModelPreferences ;
182265
183266 /**
184267 * System instructions to guide the model's behavior.
@@ -240,7 +323,7 @@ export interface AIRequest<
240323 */
241324export interface AIResponse <
242325 TOOLS extends AIToolSet ,
243- SCHEMA extends TSchema = never ,
326+ SCHEMA extends TSchema = never
244327> {
245328 /**
246329 * The generated text.
@@ -656,7 +739,7 @@ export type AITool<PARAMETERS extends ToolParameters = any, RESULT = any> = {
656739 */
657740 execute ?: (
658741 args : inferParameters < PARAMETERS > ,
659- options : ToolExecutionOptions ,
742+ options : ToolExecutionOptions
660743 ) => PromiseLike < RESULT > ;
661744} & (
662745 | {
0 commit comments