Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion packages/cli/src/commands/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ configCommand

const KNOWN_KEYS = new Set([
"llm.provider", "llm.baseUrl", "llm.model", "llm.temperature",
"llm.maxTokens", "llm.thinkingBudget", "llm.apiFormat", "llm.stream",
"llm.maxTokens", "llm.thinkingBudget", "llm.reasoningEffort", "llm.apiFormat", "llm.stream",
"daemon.schedule.radarCron", "daemon.schedule.writeCron",
"daemon.maxConcurrentBooks", "daemon.chaptersPerCycle",
"daemon.retryDelayMs", "daemon.cooldownAfterChapterMs",
Expand Down Expand Up @@ -94,6 +94,7 @@ configCommand
.option("--max-tokens <n>", "Max output tokens")
.option("--thinking-budget <n>", "Anthropic thinking budget")
.option("--api-format <format>", "API format (chat / responses)")
.option("--reasoning-effort <level>", "Reasoning effort (low / medium / high)")
.option("--lang <language>", "Default writing language: zh (Chinese) or en (English)")
.action(async (opts) => {
try {
Expand All @@ -110,6 +111,7 @@ configCommand
if (opts.maxTokens) lines.push(`INKOS_LLM_MAX_TOKENS=${opts.maxTokens}`);
if (opts.thinkingBudget) lines.push(`INKOS_LLM_THINKING_BUDGET=${opts.thinkingBudget}`);
if (opts.apiFormat) lines.push(`INKOS_LLM_API_FORMAT=${opts.apiFormat}`);
if (opts.reasoningEffort) lines.push(`INKOS_LLM_REASONING_EFFORT=${opts.reasoningEffort}`);
if (opts.lang) lines.push(`INKOS_DEFAULT_LANGUAGE=${opts.lang}`);

await writeFile(GLOBAL_ENV_PATH, lines.join("\n") + "\n", "utf-8");
Expand Down
1 change: 1 addition & 0 deletions packages/cli/src/commands/init.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ export const initCommand = new Command("init")
"# INKOS_LLM_TEMPERATURE=0.7",
"# INKOS_LLM_MAX_TOKENS=8192",
"# INKOS_LLM_THINKING_BUDGET=0 # Anthropic extended thinking budget",
"# INKOS_LLM_REASONING_EFFORT= # Reasoning effort: low / medium / high",
"# INKOS_LLM_API_FORMAT=chat # chat (default) or responses (OpenAI Responses API)",
"",
"# Web search (optional, for auditor era-research):",
Expand Down
4 changes: 2 additions & 2 deletions packages/core/src/agents/writer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ export class WriterAgent extends BaseAgent {
}
: undefined;

// ── Phase 1: Creative writing (temperature 0.7) ──
// ── Phase 1: Creative writing (temperature override > agent default) ──
const resolvedLanguage = book.language ?? genreProfile.language;
const creativeSystemPrompt = buildWriterSystemPrompt(
book, genreProfile, bookRules, bookRulesBody, genreBody, styleGuide, styleFingerprint,
Expand Down Expand Up @@ -148,7 +148,7 @@ export class WriterAgent extends BaseAgent {
language: book.language ?? genreProfile.language,
});

const creativeTemperature = input.temperatureOverride ?? 0.7;
const creativeTemperature = input.temperatureOverride ?? this.ctx.client.defaults.temperature;

this.ctx.logger?.info(`Phase 1: creative writing for chapter ${chapterNumber}`);

Expand Down
20 changes: 15 additions & 5 deletions packages/core/src/llm/provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ export interface LLMClient {
readonly temperature: number;
readonly maxTokens: number;
readonly thinkingBudget: number;
readonly reasoningEffort?: string;
readonly extra: Record<string, unknown>;
};
}
Expand Down Expand Up @@ -115,6 +116,7 @@ export function createLLMClient(config: LLMConfig): LLMClient {
temperature: config.temperature ?? 0.7,
maxTokens: config.maxTokens ?? 8192,
thinkingBudget: config.thinkingBudget ?? 0,
reasoningEffort: config.reasoningEffort,
extra: config.extra ?? {},
};

Expand Down Expand Up @@ -213,13 +215,15 @@ export async function chatCompletion(
options?: {
readonly temperature?: number;
readonly maxTokens?: number;
readonly reasoningEffort?: string;
readonly webSearch?: boolean;
readonly onStreamProgress?: OnStreamProgress;
},
): Promise<LLMResponse> {
const resolved = {
temperature: options?.temperature ?? client.defaults.temperature,
maxTokens: options?.maxTokens ?? client.defaults.maxTokens,
reasoningEffort: options?.reasoningEffort ?? client.defaults.reasoningEffort,
extra: client.defaults.extra,
};
const onStreamProgress = options?.onStreamProgress;
Expand Down Expand Up @@ -304,6 +308,7 @@ export async function chatWithTools(
const resolved = {
temperature: options?.temperature ?? client.defaults.temperature,
maxTokens: options?.maxTokens ?? client.defaults.maxTokens,
reasoningEffort: options?.reasoningEffort ?? client.defaults.reasoningEffort,
};
// Tool-calling always uses streaming (only used by agent loop, not by writer/auditor)
if (client.provider === "anthropic") {
Expand All @@ -324,7 +329,7 @@ async function chatCompletionOpenAIChat(
client: OpenAI,
model: string,
messages: ReadonlyArray<LLMMessage>,
options: { readonly temperature: number; readonly maxTokens: number; readonly extra: Record<string, unknown> },
options: { readonly temperature: number; readonly maxTokens: number; readonly reasoningEffort?: string; readonly extra: Record<string, unknown> },
webSearch?: boolean,
onStreamProgress?: OnStreamProgress,
): Promise<LLMResponse> {
Expand All @@ -335,6 +340,7 @@ async function chatCompletionOpenAIChat(
temperature: options.temperature,
max_tokens: options.maxTokens,
stream: true,
...(options.reasoningEffort ? { reasoning_effort: options.reasoningEffort } : {}),
...(webSearch ? { web_search_options: { search_context_size: "medium" as const } } : {}),
...options.extra,
};
Expand Down Expand Up @@ -386,7 +392,7 @@ async function chatCompletionOpenAIChatSync(
client: OpenAI,
model: string,
messages: ReadonlyArray<LLMMessage>,
options: { readonly temperature: number; readonly maxTokens: number; readonly extra: Record<string, unknown> },
options: { readonly temperature: number; readonly maxTokens: number; readonly reasoningEffort?: string; readonly extra: Record<string, unknown> },
_webSearch?: boolean,
): Promise<LLMResponse> {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
Expand All @@ -396,6 +402,7 @@ async function chatCompletionOpenAIChatSync(
temperature: options.temperature,
max_tokens: options.maxTokens,
stream: false,
...(options.reasoningEffort ? { reasoning_effort: options.reasoningEffort } : {}),
...options.extra,
};
const response = await client.chat.completions.create(syncParams);
Expand Down Expand Up @@ -512,7 +519,7 @@ async function chatCompletionOpenAIResponses(
client: OpenAI,
model: string,
messages: ReadonlyArray<LLMMessage>,
options: { readonly temperature: number; readonly maxTokens: number },
options: { readonly temperature: number; readonly maxTokens: number; readonly reasoningEffort?: string },
webSearch?: boolean,
onStreamProgress?: OnStreamProgress,
): Promise<LLMResponse> {
Expand All @@ -531,6 +538,7 @@ async function chatCompletionOpenAIResponses(
temperature: options.temperature,
max_output_tokens: options.maxTokens,
stream: true,
...(options.reasoningEffort ? { reasoning: { effort: options.reasoningEffort as any } } : {}),
...(tools ? { tools } : {}),
});

Expand Down Expand Up @@ -578,7 +586,7 @@ async function chatCompletionOpenAIResponsesSync(
client: OpenAI,
model: string,
messages: ReadonlyArray<LLMMessage>,
options: { readonly temperature: number; readonly maxTokens: number },
options: { readonly temperature: number; readonly maxTokens: number; readonly reasoningEffort?: string },
_webSearch?: boolean,
): Promise<LLMResponse> {
const input: OpenAI.Responses.ResponseInputItem[] = messages.map((m) => ({
Expand All @@ -592,6 +600,7 @@ async function chatCompletionOpenAIResponsesSync(
temperature: options.temperature,
max_output_tokens: options.maxTokens,
stream: false,
...(options.reasoningEffort ? { reasoning: { effort: options.reasoningEffort as any } } : {}),
});

const content = response.output
Expand All @@ -618,7 +627,7 @@ async function chatWithToolsOpenAIResponses(
model: string,
messages: ReadonlyArray<AgentMessage>,
tools: ReadonlyArray<ToolDefinition>,
options: { readonly temperature: number; readonly maxTokens: number },
options: { readonly temperature: number; readonly maxTokens: number; readonly reasoningEffort?: string },
): Promise<ChatWithToolsResult> {
const input = agentMessagesToResponsesInput(messages);
const responsesTools: OpenAI.Responses.Tool[] = tools.map((t) => ({
Expand All @@ -636,6 +645,7 @@ async function chatWithToolsOpenAIResponses(
temperature: options.temperature,
max_output_tokens: options.maxTokens,
stream: true,
...(options.reasoningEffort ? { reasoning: { effort: options.reasoningEffort as any } } : {}),
});

let content = "";
Expand Down
5 changes: 5 additions & 0 deletions packages/core/src/models/project.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ export const LLMConfigSchema = z.object({
temperature: z.number().min(0).max(2).default(0.7),
maxTokens: z.number().int().min(1).default(8192),
thinkingBudget: z.number().int().min(0).default(0),
reasoningEffort: z.enum(["low", "medium", "high"]).optional(),
extra: z.record(z.unknown()).optional(),
apiFormat: z.enum(["chat", "responses"]).default("chat"),
stream: z.boolean().default(true),
Expand Down Expand Up @@ -65,6 +66,10 @@ export const AgentLLMOverrideSchema = z.object({
baseUrl: z.string().url().optional(),
apiKeyEnv: z.string().optional(),
stream: z.boolean().optional(),
temperature: z.number().min(0).max(2).optional(),
maxTokens: z.number().int().min(1).optional(),
thinkingBudget: z.number().int().min(0).optional(),
reasoningEffort: z.enum(["low", "medium", "high"]).optional(),
});

export type AgentLLMOverride = z.infer<typeof AgentLLMOverrideSchema>;
Expand Down
32 changes: 22 additions & 10 deletions packages/core/src/pipeline/runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,37 +132,49 @@ export class PipelineRunner {
if (typeof override === "string") {
return { model: override, client: this.config.client };
}
// Full override — needs its own client if baseUrl differs
if (!override.baseUrl) {
// Full override — needs its own client if baseUrl or other parameters differ
if (
!override.baseUrl &&
!override.temperature &&
!override.maxTokens &&
!override.thinkingBudget &&
!override.reasoningEffort
) {
return { model: override.model, client: this.config.client };
}

const base = this.config.defaultLLMConfig;
const provider = override.provider ?? base?.provider ?? "custom";
const baseUrl = override.baseUrl ?? base?.baseUrl;
const apiKeySource = override.apiKeyEnv
? `env:${override.apiKeyEnv}`
: `base:${base?.apiKey ?? ""}`;
const stream = override.stream ?? base?.stream ?? true;
const apiFormat = base?.apiFormat ?? "chat";

const cacheKey = [
provider,
override.baseUrl,
baseUrl,
apiKeySource,
`stream:${stream}`,
`format:${apiFormat}`,
`temp:${override.temperature ?? ""}`,
`max:${override.maxTokens ?? ""}`,
`re:${override.reasoningEffort ?? ""}`,
].join("|");

let client = this.agentClients.get(cacheKey);
if (!client) {
const apiKey = override.apiKeyEnv
? process.env[override.apiKeyEnv] ?? ""
: base?.apiKey ?? "";
const apiKey = override.apiKeyEnv ? process.env[override.apiKeyEnv] ?? "" : base?.apiKey ?? "";
client = createLLMClient({
provider,
baseUrl: override.baseUrl,
baseUrl: baseUrl!,
apiKey,
model: override.model,
temperature: base?.temperature ?? 0.7,
maxTokens: base?.maxTokens ?? 8192,
thinkingBudget: base?.thinkingBudget ?? 0,
temperature: override.temperature ?? base?.temperature ?? 0.7,
maxTokens: override.maxTokens ?? base?.maxTokens ?? 8192,
thinkingBudget: override.thinkingBudget ?? base?.thinkingBudget ?? 0,
reasoningEffort: override.reasoningEffort ?? base?.reasoningEffort,
apiFormat,
stream,
});
Expand Down
1 change: 1 addition & 0 deletions packages/core/src/utils/config-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ export async function loadProjectConfig(root: string): Promise<ProjectConfig> {
if (env.INKOS_LLM_TEMPERATURE) llm.temperature = parseFloat(env.INKOS_LLM_TEMPERATURE);
if (env.INKOS_LLM_MAX_TOKENS) llm.maxTokens = parseInt(env.INKOS_LLM_MAX_TOKENS, 10);
if (env.INKOS_LLM_THINKING_BUDGET) llm.thinkingBudget = parseInt(env.INKOS_LLM_THINKING_BUDGET, 10);
if (env.INKOS_LLM_REASONING_EFFORT) llm.reasoningEffort = env.INKOS_LLM_REASONING_EFFORT;
// Extra params from env: INKOS_LLM_EXTRA_<key>=<value>
const extraFromEnv: Record<string, unknown> = {};
for (const [key, value] of Object.entries(env)) {
Expand Down