From cea9383b99e28b8ca6b204e88eb73f727f401fb0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Maciej=20Bi=C5=82as?= <maciej@bilas.io>
Date: Wed, 25 Mar 2026 16:40:23 +0100
Subject: [PATCH] feat: allow configuring default summary length

---
 README.md                                  |  4 ++
 docs/config.md                             | 14 +++-
 docs/llm.md                                |  3 +-
 src/config/sections.ts                     | 26 ++++++-
 src/config/types.ts                        |  6 ++
 src/daemon/flow-context.ts                 |  2 +-
 src/run/help.ts                            |  2 +-
 src/run/runner-plan.ts                     |  6 +-
 tests/cli.config-precedence.test.ts        | 84 +++++++++++++++++++++-
 tests/config.more-branches.test.ts         |  5 ++
 tests/config.test.ts                       |  6 +-
 tests/daemon.run-context-overrides.test.ts | 33 +++++++++
 12 files changed, 179 insertions(+), 12 deletions(-)
diff --git a/README.md b/README.md
index 47f99b73..9c4873a4 100644
--- a/README.md
+++ b/README.md
@@ -211,6 +211,8 @@ summarize "https://open.spotify.com/episode/5auotqWAXhhKyb9ymCuBJY"
 
 `--length` controls how much output we ask for (guideline), not a hard cap.
 
+Set a default in `~/.summarize/config.json` with `output.length`.
+
 ```bash
 summarize "https://example.com" --length long
 summarize "https://example.com" --length 20k
@@ -511,6 +513,7 @@ Supported keys today:
 {
   "model": { "id": "openai/gpt-5-mini" },
   "env": { "OPENAI_API_KEY": "sk-..." },
+  "output": { "length": "long" },
   "ui": { "theme": "ember" }
 }
 ```
@@ -530,6 +533,7 @@ Also supported:
 - `models` (define presets selectable via `--model <preset>`)
 - `env` (generic env var defaults; process env still wins)
 - `apiKeys` (legacy shortcut, mapped to env names; prefer `env` for new configs)
+- `output.length` (default `--length`: `short|medium|long|xl|xxl|20k`)
 - `cache.media` (media download cache: TTL 7 days, 2048 MB cap by default; `--no-media-cache` disables)
 - `media.videoMode: "auto"|"transcript"|"understand"`
 - `slides.enabled` / `slides.max` / `slides.ocr` / `slides.dir` (defaults for `--slides`)
diff --git a/docs/config.md b/docs/config.md
index fce487e2..ac1a0a7b 100644
--- a/docs/config.md
+++ b/docs/config.md
@@ -29,6 +29,12 @@ For output language:
 2. Config file `output.language` (preferred) or `language` (legacy)
 3. Built-in default (`auto` = match source content language)
 
+For output length:
+
+1. CLI flag `--length`
+2. Config file `output.length`
+3. Built-in default (`xl`)
+
 See `docs/language.md` for supported values.
 
 For prompt:
@@ -58,12 +64,18 @@ For UI theme:
 {
   "model": { "id": "google/gemini-3-flash" },
   "env": { "OPENAI_API_KEY": "sk-..." },
-  "output": { "language": "auto" },
+  "output": { "language": "auto", "length": "long" },
   "prompt": "Explain like I am five.",
   "ui": { "theme": "ember" }
 }
 ```
 
+`output.length` accepts the same values as `--length`:
+
+- Presets: `short`, `medium`, `long`, `xl`, `xxl`
+- Shorthand: `s`, `m`, `l`
+- Character targets: `1500`, `20k`, `20000`
+
 Shorthand (equivalent):
 
 ```json
diff --git a/docs/llm.md b/docs/llm.md
index 333c839b..fa780c4f 100644
--- a/docs/llm.md
+++ b/docs/llm.md
@@ -73,7 +73,8 @@ installed, auto mode can use local CLI models via `cli.enabled` or implicit auto
 - `--length short|medium|long|xl|xxl|<chars>`
   - This is _soft guidance_ to the model (no hard truncation).
   - Minimum numeric value: 50 chars.
-  - Default: `long`.
+  - Built-in default: `xl`.
+  - Config default: `output.length` in `~/.summarize/config.json`.
   - Output format is Markdown; use short paragraphs and only add bullets when they improve scanability.
 - `--force-summary`
   - Always run the LLM even when extracted content is shorter than the requested length.
diff --git a/src/config/sections.ts b/src/config/sections.ts
index 105f7ec6..7bb8066b 100644
--- a/src/config/sections.ts
+++ b/src/config/sections.ts
@@ -1,3 +1,4 @@
+import { parseLengthArg } from "../flags.js";
 import { isCliThemeName, listCliThemes } from "../tty/theme.js";
 import {
   isRecord,
@@ -364,7 +365,30 @@ export function parseOutputConfig(root: Record<string, unknown>, path: string) {
     typeof value.language === "string" && value.language.trim().length > 0
       ? value.language.trim()
       : undefined;
-  return typeof language === "string" ? { language } : undefined;
+  const length = (() => {
+    if (typeof value.length === "undefined") return undefined;
+    if (typeof value.length !== "string") {
+      throw new Error(`Invalid config file ${path}: "output.length" must be a string.`);
+    }
+    const trimmed = value.length.trim();
+    if (!trimmed) {
+      throw new Error(`Invalid config file ${path}: "output.length" must not be empty.`);
+    }
+    try {
+      parseLengthArg(trimmed);
+    } catch (error) {
+      throw new Error(
+        `Invalid config file ${path}: "output.length" is invalid: ${(error as Error).message}`,
+      );
+    }
+    return trimmed;
+  })();
+  return typeof language === "string" || typeof length === "string"
+    ? {
+        ...(typeof language === "string" ? { language } : {}),
+        ...(typeof length === "string" ? { length } : {}),
+      }
+    : undefined;
 }
 
 export function parseUiConfig(root: Record<string, unknown>, path: string) {
diff --git a/src/config/types.ts b/src/config/types.ts
index b6a7ec4a..1ae036a9 100644
--- a/src/config/types.ts
+++ b/src/config/types.ts
@@ -213,6 +213,12 @@ export type SummarizeConfig = {
      * - otherwise: translate the output into the requested language
      */
     language?: string;
+    /**
+     * Default summary length (same values as `--length`).
+     *
+     * Examples: "short", "long", "xl", "20k".
+     */
+    length?: string;
   };
   ui?: {
     /**
diff --git a/src/daemon/flow-context.ts b/src/daemon/flow-context.ts
index 7897dbae..c6ef5c34 100644
--- a/src/daemon/flow-context.ts
+++ b/src/daemon/flow-context.ts
@@ -133,7 +133,6 @@ export function createDaemonUrlFlowContext(args: DaemonUrlFlowContextArgs): UrlF
 
   const languageExplicitlySet = typeof languageRaw === "string" && Boolean(languageRaw.trim());
 
-  const { lengthArg } = resolveSummaryLength(lengthRaw);
   const resolvedOverrides: RunOverrides = overrides ?? {
     firecrawlMode: null,
     markdownMode: null,
@@ -200,6 +199,7 @@ export function createDaemonUrlFlowContext(args: DaemonUrlFlowContextArgs): UrlF
   });
   const configForCliWithMagic = applyAutoCliFallbackOverrides(configForCli, resolvedOverrides);
   const allowAutoCliFallback = resolvedOverrides.autoCliFallbackEnabled === true;
+  const { lengthArg } = resolveSummaryLength(lengthRaw, config?.output?.length ?? "xl");
 
   const {
     requestedModel,
diff --git a/src/run/help.ts b/src/run/help.ts
index 1011204f..99319862 100644
--- a/src/run/help.ts
+++ b/src/run/help.ts
@@ -79,7 +79,7 @@ export function buildProgram() {
     )
     .option(
       "--length <length>",
-      "Summary length: short|medium|long|xl|xxl (or s/m/l) or a character limit like 20000, 20k",
+      "Summary length: short|medium|long|xl|xxl (or s/m/l) or a character limit like 20000, 20k (default: xl; configurable via ~/.summarize/config.json output.length)",
       "xl",
     )
     .option(
diff --git a/src/run/runner-plan.ts b/src/run/runner-plan.ts
index c78a1107..49ff19cc 100644
--- a/src/run/runner-plan.ts
+++ b/src/run/runner-plan.ts
@@ -18,6 +18,7 @@ import { resolveRunInput } from "./run-input.js";
 import { createRunMetrics } from "./run-metrics.js";
 import { resolveModelSelection } from "./run-models.js";
 import { resolveDesiredOutputTokens } from "./run-output.js";
+import { resolveSummaryLength } from "./run-settings.js";
 import { resolveStreamSettings } from "./run-stream.js";
 import { createRunnerFlowContexts } from "./runner-contexts.js";
 import { executeRunnerInput } from "./runner-execution.js";
@@ -87,7 +88,7 @@ export async function createRunnerPlan(options: {
     isYoutubeUrl,
     format,
     youtubeMode,
-    lengthArg,
+    lengthArg: requestedLengthArg,
     maxOutputTokensArg,
     timeoutMs,
     retries,
@@ -176,6 +177,9 @@ export async function createRunnerPlan(options: {
   if (!promptOverride && typeof config?.prompt === "string" && config.prompt.trim().length > 0) {
     promptOverride = config.prompt.trim();
   }
+  const lengthArg = lengthExplicitlySet
+    ? requestedLengthArg
+    : resolveSummaryLength(config?.output?.length).lengthArg;
 
   const slidesSettings = resolveRunnerSlidesSettings({
     normalizedArgv,
diff --git a/tests/cli.config-precedence.test.ts b/tests/cli.config-precedence.test.ts
index 9bd9d969..a69d3e27 100644
--- a/tests/cli.config-precedence.test.ts
+++ b/tests/cli.config-precedence.test.ts
@@ -52,6 +52,12 @@ function captureStream() {
   return { stream, getText: () => text };
 }
 
+function resolveFetchUrl(input: RequestInfo | URL): string {
+  if (typeof input === "string") return input;
+  if (input instanceof URL) return input.toString();
+  return input.url;
+}
+
 describe("cli config precedence", () => {
   it("uses config file model when --model and SUMMARIZE_MODEL are absent", async () => {
     mocks.completeSimple.mockClear();
@@ -61,7 +67,7 @@ describe("cli config precedence", () => {
       "<body><article><p>Hi</p></article></body></html>";
 
     const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
-      const url = typeof input === "string" ? input : input.url;
+      const url = resolveFetchUrl(input);
       if (url === "https://example.com") return htmlResponse(html);
       throw new Error(`Unexpected fetch call: ${url}`);
     });
@@ -89,7 +95,7 @@ describe("cli config precedence", () => {
       "<body><article><p>Hi</p></article></body></html>";
 
     const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
-      const url = typeof input === "string" ? input : input.url;
+      const url = resolveFetchUrl(input);
       if (url === "https://example.com") return htmlResponse(html);
       throw new Error(`Unexpected fetch call: ${url}`);
     });
@@ -138,7 +144,7 @@ describe("cli config precedence", () => {
       "<body><article><p>Hi</p></article></body></html>";
 
     const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
-      const url = typeof input === "string" ? input : input.url;
+      const url = resolveFetchUrl(input);
       if (url === "https://example.com") return htmlResponse(html);
       throw new Error(`Unexpected fetch call: ${url}`);
     });
@@ -170,4 +176,76 @@ describe("cli config precedence", () => {
     // --extract means no LLM calls; ensure we didn't try to init a provider.
     expect(mocks.completeSimple).toHaveBeenCalledTimes(0);
   });
+
+  it("uses config file output.length when --length is absent", async () => {
+    mocks.completeSimple.mockClear();
+
+    const html =
+      "<!doctype html><html><head><title>Hello</title></head>" +
+      "<body><article><p>Hi</p></article></body></html>";
+
+    const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
+      const url = resolveFetchUrl(input);
+      if (url === "https://example.com") return htmlResponse(html);
+      throw new Error(`Unexpected fetch call: ${url}`);
+    });
+
+    const tempRoot = mkdtempSync(join(tmpdir(), "summarize-cli-config-"));
+    const configPath = join(tempRoot, ".summarize", "config.json");
+    mkdirSync(join(tempRoot, ".summarize"), { recursive: true });
+    writeFileSync(
+      configPath,
+      JSON.stringify({ model: { id: "openai/gpt-5.2" }, output: { length: "short" } }),
+      "utf8",
+    );
+
+    const stdout = captureStream();
+
+    await runCli(["--timeout", "2s", "--json", "https://example.com"], {
+      env: { HOME: tempRoot, OPENAI_API_KEY: "test" },
+      fetch: fetchMock as unknown as typeof fetch,
+      stdout: stdout.stream,
+      stderr: noopStream(),
+    });
+
+    const parsed = JSON.parse(stdout.getText()) as { input: { length: { preset: string } } };
+    expect(parsed.input.length).toEqual({ kind: "preset", preset: "short" });
+  });
+
+  it("prefers --length over config file output.length", async () => {
+    mocks.completeSimple.mockClear();
+
+    const html =
+      "<!doctype html><html><head><title>Hello</title></head>" +
+      "<body><article><p>Hi</p></article></body></html>";
+
+    const fetchMock = vi.fn(async (input: RequestInfo | URL) => {
+      const url = resolveFetchUrl(input);
+      if (url === "https://example.com") return htmlResponse(html);
+      throw new Error(`Unexpected fetch call: ${url}`);
+    });
+
+    const tempRoot = mkdtempSync(join(tmpdir(), "summarize-cli-config-"));
+    const configPath = join(tempRoot, ".summarize", "config.json");
+    mkdirSync(join(tempRoot, ".summarize"), { recursive: true });
+    writeFileSync(
+      configPath,
+      JSON.stringify({ model: { id: "openai/gpt-5.2" }, output: { length: "short" } }),
+      "utf8",
+    );
+
+    const stdout = captureStream();
+
+    await runCli(["--timeout", "2s", "--length", "20k", "--json", "https://example.com"], {
+      env: { HOME: tempRoot, OPENAI_API_KEY: "test" },
+      fetch: fetchMock as unknown as typeof fetch,
+      stdout: stdout.stream,
+      stderr: noopStream(),
+    });
+
+    const parsed = JSON.parse(stdout.getText()) as {
+      input: { length: { kind: string; maxCharacters: number } };
+    };
+    expect(parsed.input.length).toEqual({ kind: "chars", maxCharacters: 20000 });
+  });
 });
diff --git a/tests/config.more-branches.test.ts b/tests/config.more-branches.test.ts
index 8ef197f7..fce809f3 100644
--- a/tests/config.more-branches.test.ts
+++ b/tests/config.more-branches.test.ts
@@ -90,6 +90,11 @@ describe("config extra branches", () => {
     );
   });
 
+  it("rejects invalid output.length", () => {
+    const root = writeJsonConfig({ output: { length: "tiny" } });
+    expect(() => loadSummarizeConfig({ env: { HOME: root } })).toThrow(/output\.length/i);
+  });
+
   it("rejects non-object ui config", () => {
     const root = writeJsonConfig({ ui: 1 });
     expect(() => loadSummarizeConfig({ env: { HOME: root } })).toThrow(/"ui" must be an object/i);
diff --git a/tests/config.test.ts b/tests/config.test.ts
index da0788ac..569e0f75 100644
--- a/tests/config.test.ts
+++ b/tests/config.test.ts
@@ -58,16 +58,16 @@ describe("config loading", () => {
     });
   });
 
-  it("supports output.language", () => {
+  it("supports output.language and output.length", () => {
     const { root } = writeJsonConfig({
       model: { id: "openai/gpt-5-mini" },
-      output: { language: "de" },
+      output: { language: "de", length: "long" },
     });
 
     const result = loadSummarizeConfig({ env: { HOME: root } });
     expect(result.config).toEqual({
       model: { id: "openai/gpt-5-mini" },
-      output: { language: "de" },
+      output: { language: "de", length: "long" },
     });
   });
 
diff --git a/tests/daemon.run-context-overrides.test.ts b/tests/daemon.run-context-overrides.test.ts
index ed8589d8..87c475ec 100644
--- a/tests/daemon.run-context-overrides.test.ts
+++ b/tests/daemon.run-context-overrides.test.ts
@@ -104,6 +104,39 @@ describe("daemon/flow-context (overrides)", () => {
     ).toBe("en");
   });
 
+  it("uses config length when request length is unset, then prefers request overrides", () => {
+    const home = makeTempHome();
+    writeConfig(home, { output: { length: "short" } });
+
+    const configCtx = createDaemonUrlFlowContext({
+      env: { HOME: home },
+      fetchImpl: fetch,
+      cache: makeCacheState(),
+      modelOverride: null,
+      promptOverride: null,
+      lengthRaw: "",
+      languageRaw: "auto",
+      maxExtractCharacters: null,
+      runStartedAtMs: Date.now(),
+      stdoutSink: { writeChunk: () => {} },
+    });
+    expect(configCtx.flags.lengthArg).toEqual({ kind: "preset", preset: "short" });
+
+    const requestCtx = createDaemonUrlFlowContext({
+      env: { HOME: home },
+      fetchImpl: fetch,
+      cache: makeCacheState(),
+      modelOverride: null,
+      promptOverride: null,
+      lengthRaw: "20k",
+      languageRaw: "auto",
+      maxExtractCharacters: null,
+      runStartedAtMs: Date.now(),
+      stdoutSink: { writeChunk: () => {} },
+    });
+    expect(requestCtx.flags.lengthArg).toEqual({ kind: "chars", maxCharacters: 20000 });
+  });
+
   it("applies run overrides for daemon contexts", () => {
     const home = makeTempHome();
     const ctx = createDaemonUrlFlowContext({