From 466af50bd8f364f488cb2394482379ed5e9f947e Mon Sep 17 00:00:00 2001 From: Misaka09982 <41320755+Misaka09982@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:16:26 +0800 Subject: [PATCH 1/7] Add reasoningEffort and update related fields in project schema Added optional reasoningEffort field to project schema and updated related fields. --- packages/core/src/models/project.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/packages/core/src/models/project.ts b/packages/core/src/models/project.ts index fa6f4f60..4e4b5284 100644 --- a/packages/core/src/models/project.ts +++ b/packages/core/src/models/project.ts @@ -8,6 +8,7 @@ export const LLMConfigSchema = z.object({ temperature: z.number().min(0).max(2).default(0.7), maxTokens: z.number().int().min(1).default(8192), thinkingBudget: z.number().int().min(0).default(0), + reasoningEffort: z.enum(["low", "medium", "high"]).optional(), extra: z.record(z.unknown()).optional(), apiFormat: z.enum(["chat", "responses"]).default("chat"), stream: z.boolean().default(true), @@ -65,6 +66,10 @@ export const AgentLLMOverrideSchema = z.object({ baseUrl: z.string().url().optional(), apiKeyEnv: z.string().optional(), stream: z.boolean().optional(), + temperature: z.number().min(0).max(2).optional(), + maxTokens: z.number().int().min(1).optional(), + thinkingBudget: z.number().int().min(0).optional(), + reasoningEffort: z.enum(["low", "medium", "high"]).optional(), }); export type AgentLLMOverride = z.infer; From dfc027070a763d2d46a73dd5b0752667c26adaad Mon Sep 17 00:00:00 2001 From: Misaka09982 <41320755+Misaka09982@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:17:07 +0800 Subject: [PATCH 2/7] Add reasoningEffort to llm configuration --- packages/core/src/utils/config-loader.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/core/src/utils/config-loader.ts b/packages/core/src/utils/config-loader.ts index 8ce42d54..97ac3110 100644 --- a/packages/core/src/utils/config-loader.ts +++ b/packages/core/src/utils/config-loader.ts @@ -44,6 +44,7 @@ export async function loadProjectConfig(root: string): Promise { if (env.INKOS_LLM_TEMPERATURE) llm.temperature = parseFloat(env.INKOS_LLM_TEMPERATURE); if (env.INKOS_LLM_MAX_TOKENS) llm.maxTokens = parseInt(env.INKOS_LLM_MAX_TOKENS, 10); if (env.INKOS_LLM_THINKING_BUDGET) llm.thinkingBudget = parseInt(env.INKOS_LLM_THINKING_BUDGET, 10); + if (env.INKOS_LLM_REASONING_EFFORT) llm.reasoningEffort = env.INKOS_LLM_REASONING_EFFORT; // Extra params from env: INKOS_LLM_EXTRA_= const extraFromEnv: Record = {}; for (const [key, value] of Object.entries(env)) { From 16ba3479cb4546b750a7f5f15e324f518b7ffa93 Mon Sep 17 00:00:00 2001 From: Misaka09982 <41320755+Misaka09982@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:18:11 +0800 Subject: [PATCH 3/7] Add reasoningEffort option to LLM configuration --- packages/core/src/llm/provider.ts | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/packages/core/src/llm/provider.ts b/packages/core/src/llm/provider.ts index 30326909..0ef2f60f 100644 --- a/packages/core/src/llm/provider.ts +++ b/packages/core/src/llm/provider.ts @@ -79,6 +79,7 @@ export interface LLMClient { readonly temperature: number; readonly maxTokens: number; readonly thinkingBudget: number; + readonly reasoningEffort?: string; readonly extra: Record; }; } @@ -115,6 +116,7 @@ export function createLLMClient(config: LLMConfig): LLMClient { temperature: config.temperature ?? 0.7, maxTokens: config.maxTokens ?? 8192, thinkingBudget: config.thinkingBudget ?? 0, + reasoningEffort: config.reasoningEffort, extra: config.extra ?? {}, }; @@ -213,6 +215,7 @@ export async function chatCompletion( options?: { readonly temperature?: number; readonly maxTokens?: number; + readonly reasoningEffort?: string; readonly webSearch?: boolean; readonly onStreamProgress?: OnStreamProgress; }, @@ -220,6 +223,7 @@ export async function chatCompletion( const resolved = { temperature: options?.temperature ?? client.defaults.temperature, maxTokens: options?.maxTokens ?? client.defaults.maxTokens, + reasoningEffort: options?.reasoningEffort ?? client.defaults.reasoningEffort, extra: client.defaults.extra, }; const onStreamProgress = options?.onStreamProgress; @@ -304,6 +308,7 @@ export async function chatWithTools( const resolved = { temperature: options?.temperature ?? client.defaults.temperature, maxTokens: options?.maxTokens ?? client.defaults.maxTokens, + reasoningEffort: options?.reasoningEffort ?? client.defaults.reasoningEffort, }; // Tool-calling always uses streaming (only used by agent loop, not by writer/auditor) if (client.provider === "anthropic") { @@ -324,7 +329,7 @@ async function chatCompletionOpenAIChat( client: OpenAI, model: string, messages: ReadonlyArray, - options: { readonly temperature: number; readonly maxTokens: number; readonly extra: Record }, + options: { readonly temperature: number; readonly maxTokens: number; readonly reasoningEffort?: string; readonly extra: Record }, webSearch?: boolean, onStreamProgress?: OnStreamProgress, ): Promise { @@ -335,6 +340,7 @@ async function chatCompletionOpenAIChat( temperature: options.temperature, max_tokens: options.maxTokens, stream: true, + ...(options.reasoningEffort ? { reasoning_effort: options.reasoningEffort } : {}), ...(webSearch ? { web_search_options: { search_context_size: "medium" as const } } : {}), ...options.extra, }; @@ -386,7 +392,7 @@ async function chatCompletionOpenAIChatSync( client: OpenAI, model: string, messages: ReadonlyArray, - options: { readonly temperature: number; readonly maxTokens: number; readonly extra: Record }, + options: { readonly temperature: number; readonly maxTokens: number; readonly reasoningEffort?: string; readonly extra: Record }, _webSearch?: boolean, ): Promise { // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -396,6 +402,7 @@ async function chatCompletionOpenAIChatSync( temperature: options.temperature, max_tokens: options.maxTokens, stream: false, + ...(options.reasoningEffort ? { reasoning_effort: options.reasoningEffort } : {}), ...options.extra, }; const response = await client.chat.completions.create(syncParams); @@ -512,7 +519,7 @@ async function chatCompletionOpenAIResponses( client: OpenAI, model: string, messages: ReadonlyArray, - options: { readonly temperature: number; readonly maxTokens: number }, + options: { readonly temperature: number; readonly maxTokens: number; readonly reasoningEffort?: string }, webSearch?: boolean, onStreamProgress?: OnStreamProgress, ): Promise { @@ -531,6 +538,7 @@ async function chatCompletionOpenAIResponses( temperature: options.temperature, max_output_tokens: options.maxTokens, stream: true, + ...(options.reasoningEffort ? { reasoning: { effort: options.reasoningEffort as any } } : {}), ...(tools ? { tools } : {}), }); @@ -578,7 +586,7 @@ async function chatCompletionOpenAIResponsesSync( client: OpenAI, model: string, messages: ReadonlyArray, - options: { readonly temperature: number; readonly maxTokens: number }, + options: { readonly temperature: number; readonly maxTokens: number; readonly reasoningEffort?: string }, _webSearch?: boolean, ): Promise { const input: OpenAI.Responses.ResponseInputItem[] = messages.map((m) => ({ @@ -592,6 +600,7 @@ async function chatCompletionOpenAIResponsesSync( temperature: options.temperature, max_output_tokens: options.maxTokens, stream: false, + ...(options.reasoningEffort ? { reasoning: { effort: options.reasoningEffort as any } } : {}), }); const content = response.output @@ -618,7 +627,7 @@ async function chatWithToolsOpenAIResponses( model: string, messages: ReadonlyArray, tools: ReadonlyArray, - options: { readonly temperature: number; readonly maxTokens: number }, + options: { readonly temperature: number; readonly maxTokens: number; readonly reasoningEffort?: string }, ): Promise { const input = agentMessagesToResponsesInput(messages); const responsesTools: OpenAI.Responses.Tool[] = tools.map((t) => ({ @@ -636,6 +645,7 @@ async function chatWithToolsOpenAIResponses( temperature: options.temperature, max_output_tokens: options.maxTokens, stream: true, + ...(options.reasoningEffort ? { reasoning: { effort: options.reasoningEffort as any } } : {}), }); let content = ""; From 5c128a3282f0f42ad0b8911f008033751a773c6c Mon Sep 17 00:00:00 2001 From: Misaka09982 <41320755+Misaka09982@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:18:47 +0800 Subject: [PATCH 4/7] Refactor client creation logic with additional checks --- packages/core/src/pipeline/runner.ts | 32 +++++++++++++++++++--------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/packages/core/src/pipeline/runner.ts b/packages/core/src/pipeline/runner.ts index 696c716c..ce79498b 100644 --- a/packages/core/src/pipeline/runner.ts +++ b/packages/core/src/pipeline/runner.ts @@ -132,37 +132,49 @@ export class PipelineRunner { if (typeof override === "string") { return { model: override, client: this.config.client }; } - // Full override — needs its own client if baseUrl differs - if (!override.baseUrl) { + // Full override — needs its own client if baseUrl or other parameters differ + if ( + !override.baseUrl && + !override.temperature && + !override.maxTokens && + !override.thinkingBudget && + !override.reasoningEffort + ) { return { model: override.model, client: this.config.client }; } + const base = this.config.defaultLLMConfig; const provider = override.provider ?? base?.provider ?? "custom"; + const baseUrl = override.baseUrl ?? base?.baseUrl; const apiKeySource = override.apiKeyEnv ? `env:${override.apiKeyEnv}` : `base:${base?.apiKey ?? ""}`; const stream = override.stream ?? base?.stream ?? true; const apiFormat = base?.apiFormat ?? "chat"; + const cacheKey = [ provider, - override.baseUrl, + baseUrl, apiKeySource, `stream:${stream}`, `format:${apiFormat}`, + `temp:${override.temperature ?? ""}`, + `max:${override.maxTokens ?? ""}`, + `re:${override.reasoningEffort ?? ""}`, ].join("|"); + let client = this.agentClients.get(cacheKey); if (!client) { - const apiKey = override.apiKeyEnv - ? process.env[override.apiKeyEnv] ?? "" - : base?.apiKey ?? ""; + const apiKey = override.apiKeyEnv ? process.env[override.apiKeyEnv] ?? "" : base?.apiKey ?? ""; client = createLLMClient({ provider, - baseUrl: override.baseUrl, + baseUrl: baseUrl!, apiKey, model: override.model, - temperature: base?.temperature ?? 0.7, - maxTokens: base?.maxTokens ?? 8192, - thinkingBudget: base?.thinkingBudget ?? 0, + temperature: override.temperature ?? base?.temperature ?? 0.7, + maxTokens: override.maxTokens ?? base?.maxTokens ?? 8192, + thinkingBudget: override.thinkingBudget ?? base?.thinkingBudget ?? 0, + reasoningEffort: override.reasoningEffort ?? base?.reasoningEffort, apiFormat, stream, }); From 2d316cdf245e72c7c765f896909ff2b6311101c4 Mon Sep 17 00:00:00 2001 From: Misaka09982 <41320755+Misaka09982@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:20:21 +0800 Subject: [PATCH 5/7] Update comments and temperature handling in writer.ts --- packages/core/src/agents/writer.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/core/src/agents/writer.ts b/packages/core/src/agents/writer.ts index 325aee6d..15b97932 100644 --- a/packages/core/src/agents/writer.ts +++ b/packages/core/src/agents/writer.ts @@ -105,7 +105,7 @@ export class WriterAgent extends BaseAgent { } : undefined; - // ── Phase 1: Creative writing (temperature 0.7) ── + // ── Phase 1: Creative writing (temperature override > agent default) ── const resolvedLanguage = book.language ?? genreProfile.language; const creativeSystemPrompt = buildWriterSystemPrompt( book, genreProfile, bookRules, bookRulesBody, genreBody, styleGuide, styleFingerprint, @@ -148,7 +148,7 @@ export class WriterAgent extends BaseAgent { language: book.language ?? genreProfile.language, }); - const creativeTemperature = input.temperatureOverride ?? 0.7; + const creativeTemperature = input.temperatureOverride ?? this.ctx.client.defaults.temperature; this.ctx.logger?.info(`Phase 1: creative writing for chapter ${chapterNumber}`); From 5f281afd5605d9d801764f724750c08fb5704291 Mon Sep 17 00:00:00 2001 From: Misaka09982 <41320755+Misaka09982@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:21:22 +0800 Subject: [PATCH 6/7] Add reasoning effort option to config command --- packages/cli/src/commands/config.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/cli/src/commands/config.ts b/packages/cli/src/commands/config.ts index 6773dc6f..43a04f56 100644 --- a/packages/cli/src/commands/config.ts +++ b/packages/cli/src/commands/config.ts @@ -23,7 +23,7 @@ configCommand const KNOWN_KEYS = new Set([ "llm.provider", "llm.baseUrl", "llm.model", "llm.temperature", - "llm.maxTokens", "llm.thinkingBudget", "llm.apiFormat", "llm.stream", + "llm.maxTokens", "llm.thinkingBudget", "llm.reasoningEffort", "llm.apiFormat", "llm.stream", "daemon.schedule.radarCron", "daemon.schedule.writeCron", "daemon.maxConcurrentBooks", "daemon.chaptersPerCycle", "daemon.retryDelayMs", "daemon.cooldownAfterChapterMs", @@ -94,6 +94,7 @@ configCommand .option("--max-tokens ", "Max output tokens") .option("--thinking-budget ", "Anthropic thinking budget") .option("--api-format ", "API format (chat / responses)") + .option("--reasoning-effort ", "Reasoning effort (low / medium / high)") .option("--lang ", "Default writing language: zh (Chinese) or en (English)") .action(async (opts) => { try { @@ -110,6 +111,7 @@ configCommand if (opts.maxTokens) lines.push(`INKOS_LLM_MAX_TOKENS=${opts.maxTokens}`); if (opts.thinkingBudget) lines.push(`INKOS_LLM_THINKING_BUDGET=${opts.thinkingBudget}`); if (opts.apiFormat) lines.push(`INKOS_LLM_API_FORMAT=${opts.apiFormat}`); + if (opts.reasoningEffort) lines.push(`INKOS_LLM_REASONING_EFFORT=${opts.reasoningEffort}`); if (opts.lang) lines.push(`INKOS_DEFAULT_LANGUAGE=${opts.lang}`); await writeFile(GLOBAL_ENV_PATH, lines.join("\n") + "\n", "utf-8"); From a8a76c040481cc651ad3dd6675bd04067eb44dc8 Mon Sep 17 00:00:00 2001 From: Misaka09982 <41320755+Misaka09982@users.noreply.github.com> Date: Mon, 23 Mar 2026 13:21:49 +0800 Subject: [PATCH 7/7] Add reasoning effort parameter to init command Add optional parameter for reasoning effort in init command. --- packages/cli/src/commands/init.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/cli/src/commands/init.ts b/packages/cli/src/commands/init.ts index 0b360717..bf19178a 100644 --- a/packages/cli/src/commands/init.ts +++ b/packages/cli/src/commands/init.ts @@ -96,6 +96,7 @@ export const initCommand = new Command("init") "# INKOS_LLM_TEMPERATURE=0.7", "# INKOS_LLM_MAX_TOKENS=8192", "# INKOS_LLM_THINKING_BUDGET=0 # Anthropic extended thinking budget", + "# INKOS_LLM_REASONING_EFFORT= # Reasoning effort: low / medium / high", "# INKOS_LLM_API_FORMAT=chat # chat (default) or responses (OpenAI Responses API)", "", "# Web search (optional, for auditor era-research):",