Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/reference/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,7 @@ Auto-switches to backup models on API errors.
"runtime_fallback": {
"enabled": true,
"retry_on_errors": [400, 429, 503, 529],
"retry_on_message_patterns": ["no\\s+available\\s+accounts?"],
"max_fallback_attempts": 3,
"cooldown_seconds": 60,
"timeout_seconds": 30,
Expand All @@ -578,6 +579,7 @@ Auto-switches to backup models on API errors.
| ----------------------- | ------------------- | ------------------------------------------------------------------------------------------------------------------------------ |
| `enabled` | `false` | Enable runtime fallback |
| `retry_on_errors` | `[400,429,503,529]` | HTTP codes that trigger fallback. Also handles classified provider key errors. |
| `retry_on_message_patterns` | `[]` | Regex pattern strings matched against provider messages. Use this for provider-specific retry text that does not include status codes. |
| `max_fallback_attempts` | `3` | Max fallback attempts per session (1–20) |
| `cooldown_seconds` | `60` | Seconds before retrying a failed model |
| `timeout_seconds` | `30` | Seconds before forcing next fallback. **Set to `0` to disable timeout-based escalation and provider retry message detection.** |
Expand Down
1 change: 1 addition & 0 deletions src/config/schema/runtime-fallback.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export const RuntimeFallbackConfigSchema = z.object({
enabled: z.boolean().optional(),
/** HTTP status codes that trigger fallback (default: [400, 429, 503, 529]) */
retry_on_errors: z.array(z.number()).optional(),
retry_on_message_patterns: z.array(z.string()).optional(),
/** Maximum fallback attempts per session (default: 3) */
max_fallback_attempts: z.number().min(1).max(20).optional(),
/** Cooldown in seconds before retrying a failed model (default: 60) */
Expand Down
13 changes: 13 additions & 0 deletions src/hooks/runtime-fallback/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,25 @@ import type { RuntimeFallbackConfig } from "../../config"
export const DEFAULT_CONFIG: Required<RuntimeFallbackConfig> = {
enabled: false,
retry_on_errors: [429, 500, 502, 503, 504],
retry_on_message_patterns: [],
max_fallback_attempts: 3,
cooldown_seconds: 60,
timeout_seconds: 30,
notify_on_fallback: true,
}

export const AUTO_RETRY_SIGNAL_KEYWORD_PATTERNS = [
"too\\s+many\\s+requests",
"quota\\s*exceeded",
"quota\\s+will\\s+reset\\s+after",
"usage\\s+limit",
"rate\\s+limit",
"limit\\s+reached",
"all\\s+credentials\\s+for\\s+model",
"cool(?:ing)?\\s*down",
"exhausted\\s+your\\s+capacity",
]

/**
* Error patterns that indicate rate limiting or temporary failures
* These are checked in addition to HTTP status codes
Expand Down
39 changes: 39 additions & 0 deletions src/hooks/runtime-fallback/error-classifier.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,43 @@ describe("runtime-fallback error classifier", () => {
//#then
expect(signal).toBeUndefined()
})

test("does not classify no-available-accounts without configured message pattern", () => {
//#given
const info = {
status: "No available accounts: no available accounts [retrying in 25s attempt #5]",
}

//#when
const signal = extractAutoRetrySignal(info)

//#then
expect(signal).toBeUndefined()
})

test("classifies no-available-accounts when configured message pattern is provided", () => {
//#given
const info = {
status: "No available accounts: no available accounts [retrying in 25s attempt #5]",
}

//#when
const signal = extractAutoRetrySignal(info, ["no\\s+available\\s+accounts?"])

//#then
expect(signal).toBeDefined()
})

test("treats configured message pattern matches as retryable errors", () => {
//#given
const error = {
message: "No available accounts for provider anthropic",
}

//#when
const retryable = isRetryableError(error, [429, 503, 529], ["no\\s+available\\s+accounts?"])

//#then
expect(retryable).toBe(true)
})
})
46 changes: 36 additions & 10 deletions src/hooks/runtime-fallback/error-classifier.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { DEFAULT_CONFIG, RETRYABLE_ERROR_PATTERNS } from "./constants"
import { AUTO_RETRY_SIGNAL_KEYWORD_PATTERNS, DEFAULT_CONFIG, RETRYABLE_ERROR_PATTERNS } from "./constants"

export function getErrorMessage(error: unknown): string {
if (!error) return ""
Expand Down Expand Up @@ -99,13 +99,30 @@ export interface AutoRetrySignal {
signal: string
}

export const AUTO_RETRY_PATTERNS: Array<(combined: string) => boolean> = [
(combined) => /retrying\s+in/i.test(combined),
(combined) =>
/(?:too\s+many\s+requests|quota\s*exceeded|quota\s+will\s+reset\s+after|usage\s+limit|rate\s+limit|limit\s+reached|all\s+credentials\s+for\s+model|cool(?:ing)?\s*down|exhausted\s+your\s+capacity)/i.test(combined),
]
function compilePatterns(patterns: string[]): RegExp[] {
const compiled: RegExp[] = []
for (const pattern of patterns) {
try {
compiled.push(new RegExp(pattern, "i"))
} catch {
continue
}
}
return compiled
}

function resolveAutoRetryKeywordPatterns(retryOnMessagePatterns: string[] = []): RegExp[] {
return compilePatterns([...AUTO_RETRY_SIGNAL_KEYWORD_PATTERNS, ...retryOnMessagePatterns])
}

export function extractAutoRetrySignal(info: Record<string, unknown> | undefined): AutoRetrySignal | undefined {
function resolveRetryableMessagePatterns(retryOnMessagePatterns: string[] = []): RegExp[] {
return [...RETRYABLE_ERROR_PATTERNS, ...compilePatterns(retryOnMessagePatterns)]
}

export function extractAutoRetrySignal(
info: Record<string, unknown> | undefined,
retryOnMessagePatterns: string[] = []
): AutoRetrySignal | undefined {
if (!info) return undefined

const candidates: string[] = []
Expand All @@ -125,7 +142,12 @@ export function extractAutoRetrySignal(info: Record<string, unknown> | undefined
const combined = candidates.join("\n")
if (!combined) return undefined

const isAutoRetry = AUTO_RETRY_PATTERNS.every((test) => test(combined))
const autoRetryPatterns: Array<(combined: string) => boolean> = [
(text) => /retrying\s+in/i.test(text),
(text) => resolveAutoRetryKeywordPatterns(retryOnMessagePatterns).some((pattern) => pattern.test(text)),
]

const isAutoRetry = autoRetryPatterns.every((test) => test(combined))
if (isAutoRetry) {
return { signal: combined }
}
Expand All @@ -148,7 +170,11 @@ export function containsErrorContent(
return { hasError: false }
}

export function isRetryableError(error: unknown, retryOnErrors: number[]): boolean {
export function isRetryableError(
error: unknown,
retryOnErrors: number[],
retryOnMessagePatterns: string[] = []
): boolean {
const statusCode = extractStatusCode(error, retryOnErrors)
const message = getErrorMessage(error)
const errorType = classifyErrorType(error)
Expand All @@ -165,5 +191,5 @@ export function isRetryableError(error: unknown, retryOnErrors: number[]): boole
return true
}

return RETRYABLE_ERROR_PATTERNS.some((pattern) => pattern.test(message))
return resolveRetryableMessagePatterns(retryOnMessagePatterns).some((pattern) => pattern.test(message))
}
7 changes: 5 additions & 2 deletions src/hooks/runtime-fallback/event-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ export function createEventHandler(deps: HookDeps, helpers: AutoRetryHelpers) {
errorType: classifyErrorType(error),
})

if (!isRetryableError(error, config.retry_on_errors)) {
if (!isRetryableError(error, config.retry_on_errors, config.retry_on_message_patterns)) {
log(`[${HOOK_NAME}] Error not retryable, skipping fallback`, {
sessionID,
retryable: false,
Expand Down Expand Up @@ -194,7 +194,10 @@ export function createEventHandler(deps: HookDeps, helpers: AutoRetryHelpers) {
if (!sessionID || status?.type !== "retry") return

const retryMessage = typeof status.message === "string" ? status.message : ""
const retrySignal = extractAutoRetrySignal({ status: retryMessage, message: retryMessage })
const retrySignal = extractAutoRetrySignal(
{ status: retryMessage, message: retryMessage },
config.retry_on_message_patterns
)
if (!retrySignal) return

const retryKey = `${extractRetryAttempt(status.attempt, retryMessage)}:${normalizeRetryStatusMessage(retryMessage)}`
Expand Down
1 change: 1 addition & 0 deletions src/hooks/runtime-fallback/hook.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ export function createRuntimeFallbackHook(
const config = {
enabled: options?.config?.enabled ?? DEFAULT_CONFIG.enabled,
retry_on_errors: options?.config?.retry_on_errors ?? DEFAULT_CONFIG.retry_on_errors,
retry_on_message_patterns: options?.config?.retry_on_message_patterns ?? DEFAULT_CONFIG.retry_on_message_patterns,
max_fallback_attempts: options?.config?.max_fallback_attempts ?? DEFAULT_CONFIG.max_fallback_attempts,
cooldown_seconds: options?.config?.cooldown_seconds ?? DEFAULT_CONFIG.cooldown_seconds,
timeout_seconds: options?.config?.timeout_seconds ?? DEFAULT_CONFIG.timeout_seconds,
Expand Down
35 changes: 35 additions & 0 deletions src/hooks/runtime-fallback/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,41 @@ describe("runtime-fallback", () => {
expect(errorLog).toBeDefined()
})

test("should trigger fallback when custom retry_on_message_patterns matches", async () => {
const hook = createRuntimeFallbackHook(createMockPluginInput(), {
config: createMockConfig({
notify_on_fallback: false,
retry_on_message_patterns: ["no\\s+available\\s+accounts?"],
}),
pluginConfig: createMockPluginConfigWithCategoryFallback(["openai/gpt-5.4"]),
})
const sessionID = "test-session-custom-message-pattern"
SessionCategoryRegistry.register(sessionID, "test")

await hook.event({
event: {
type: "session.created",
properties: { info: { id: sessionID, model: "anthropic/claude-opus-4-6" } },
},
})

await hook.event({
event: {
type: "session.error",
properties: {
sessionID,
error: {
message: "No available accounts: no available accounts [retrying in 25s attempt #5]",
},
},
},
})

const fallbackLog = logCalls.find((c) => c.msg.includes("Preparing fallback"))
expect(fallbackLog).toBeDefined()
expect(fallbackLog?.data).toMatchObject({ from: "anthropic/claude-opus-4-6", to: "openai/gpt-5.4" })
})

test("should continue fallback chain when fallback model is not found", async () => {
const hook = createRuntimeFallbackHook(createMockPluginInput(), {
config: createMockConfig({ notify_on_fallback: false }),
Expand Down
13 changes: 9 additions & 4 deletions src/hooks/runtime-fallback/message-update-handler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ export function hasVisibleAssistantResponse(extractAutoRetrySignalFn: typeof ext

export function createMessageUpdateHandler(deps: HookDeps, helpers: AutoRetryHelpers) {
const { ctx, config, pluginConfig, sessionStates, sessionLastAccess, sessionRetryInFlight, sessionAwaitingFallbackResult } = deps
const checkVisibleResponse = hasVisibleAssistantResponse(extractAutoRetrySignal)
const checkVisibleResponse = hasVisibleAssistantResponse((info) =>
extractAutoRetrySignal(info, config.retry_on_message_patterns)
)

return async (props: Record<string, unknown> | undefined) => {
const info = props?.info as Record<string, unknown> | undefined
Expand All @@ -61,14 +63,17 @@ export function createMessageUpdateHandler(deps: HookDeps, helpers: AutoRetryHel
const eventParts = props?.parts as Array<{ type?: string; text?: string }> | undefined
const infoParts = info?.parts as Array<{ type?: string; text?: string }> | undefined
const parts = eventParts && eventParts.length > 0 ? eventParts : infoParts
const retrySignalResult = extractAutoRetrySignal(info)
const retrySignalResult = extractAutoRetrySignal(info, config.retry_on_message_patterns)
const partsText = (parts ?? [])
.filter((p) => typeof p?.text === "string")
.map((p) => (p.text ?? "").trim())
.filter((text) => text.length > 0)
.join("\n")
const retrySignalFromParts = partsText
? extractAutoRetrySignal({ message: partsText, status: partsText, summary: partsText })?.signal
? extractAutoRetrySignal(
{ message: partsText, status: partsText, summary: partsText },
config.retry_on_message_patterns
)?.signal
: undefined
const retrySignal = retrySignalResult?.signal ?? retrySignalFromParts
const errorContentResult = containsErrorContent(parts)
Expand Down Expand Up @@ -134,7 +139,7 @@ export function createMessageUpdateHandler(deps: HookDeps, helpers: AutoRetryHel
errorType: classifyErrorType(error),
})

if (!isRetryableError(error, config.retry_on_errors)) {
if (!isRetryableError(error, config.retry_on_errors, config.retry_on_message_patterns)) {
log(`[${HOOK_NAME}] message.updated error not retryable, skipping fallback`, {
sessionID,
statusCode: extractStatusCode(error, config.retry_on_errors),
Expand Down