diff --git a/.gitignore b/.gitignore index fd4f2b0..22e1168 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ node_modules .DS_Store +scripts/test-output/ +scripts/node_modules/ diff --git a/LEARNINGS.md b/LEARNINGS.md new file mode 100644 index 0000000..63933ef --- /dev/null +++ b/LEARNINGS.md @@ -0,0 +1,339 @@ +# Too Many Threads — Complete Learnings Document + +## Play-by-Play: Everything We Discovered + +### Phase 0: Initial Assessment + +**Read the full hank**: 8 codons (including a 3-iteration generation loop). Codons: read-and-explore → story-diversity → visualize-candidates → select-merge-spec → story-review → generation-loop(3x) → comic-review → post-processing. + +**Read the execution**: The original run produced 14 panels (cover + 13 strips) across 3 generation iterations. An Opus-driven review agent picked the best version of each panel, resulting in a mix: 4 from iter-0, 8 from iter-1, 2 from iter-2. + +**First panel review**: Identified 8 issues across 14 panels. Brought in a Codex agent for cross-validation — it caught things we missed, we caught things it missed, and we corrected each other on nuance. + +### Phase 0.5: Three-Way Analysis (Hrishi + Claude + Codex) + +**What Codex caught that Claude missed:** +- Panel-03 P4: the sign revision doesn't land visually — sign still says "Must Be Fast" when caption says "She revised the listing" +- Panel-11 has TWO prompt-leak captions, not one (second leak in P4: "The visible seam reads as beauty...") +- The `--reference-images` CLI flag is parsed but never used in the multi-turn chat path of `generate-all-panels.ts` +- Post-processing prompt assumes 15 strips (says "panel-01 through panel-15") + +**What Claude caught that Codex missed:** +- Systematic pattern: ALL text errors correlate with prompts missing the RENDERING RULES header (5 of 14 prompts) +- Cross-iteration mixing as the PRIMARY cause of style drift (not just the reference image code path) +- The tense fix needed on panel-10 ("has" → "had") + +**Where Codex overclaimed:** +- Said "the main generation loop is not actually using the visual reference path it claims" — partially true for `--reference-images` but `--use-cover-as-ref` DOES work implicitly through chat history (cover is generated first, stays in context) + +**Where Claude initially got it wrong:** +- Misattributed panel-03's issue to sign ORDER rather than visual payoff (Codex corrected) +- Initially missed the second prompt leak in panel-11 P4 + +### Phase 1: Prompt Edits + Targeted Regeneration + +**Added RENDERING RULES to 5 missing prompts** (02, 04, 07, 09, 11). Template: +``` +## RENDERING RULES — READ FIRST +1. ONLY render the EXACT text that appears after "Caption:" or "Speech bubble:" markers +2. Each text appears EXACTLY ONCE. NEVER duplicate. +3. Scene descriptions are INVISIBLE — do NOT render as text +4. ALL characters are INSECTS. NO humans. +5. Caption boxes: pale cream, bold text. Speech bubbles: white, bold black text. +``` + +**Story improvements made:** +- Sign flip in Strip 3: "Must Be Fast" first → gnats swarm → revised to "Must Be Consistent" (with messy scratched-out handwriting showing real-time learning) +- Strip 1 P3: Spool changed from deflated to MANIC (still spinning furiously, tangled, oblivious, grinning) — the denial break happens in P4, not P3 +- Frognu: "round workshop window" → "rectangular doorway" (stopped porthole interpretation). Added "fond and slightly worried — like a neighbor who's been watching all week" +- Ported Frognu character spec from Chronicles (spec-3.md): Sanrio-like, oversized hat, overstuffed bag, huge eyes, baby-chibi proportions + +**Regenerated 8 panels via generate-panel.ts** with cover + panel-02 as reference images. + +**Key incident: Panel-11 needed 3 attempts.** +- v1: P2 leaked "Moth wince." from parenthetical stage direction `(Moth, the large moth on the left, wincing after testing a repair strand)` +- v2: Stripped parenthetical → P2 duplicated "Ugh. Strong." in two bubbles +- v3: Added "This panel has exactly ONE speech bubble" → clean + +**Learning: Parenthetical stage directions in speech bubble specs leak into rendered text.** Strip to bare `(Character): "line"`. Also add explicit bubble count when only one expected. + +### Phase 2: Polish Pass + +**Issues found by Hrishi on review:** +- Panel-01 P4: Spool looking at a painting on the wall instead of holding a scroll +- Panel-04: Spool drawn too tiny +- Panel-05 P4: Spool hanging upside-down looking happy instead of exhausted +- Panel-06: Art style completely different (kept from original iter-2) +- Panel-10: Font different, Spool different, needs "However" transition +- Panel-11: Caption wording change ("what to do" → "for instructions") + +**Fixes applied, then regenerated.** + +**Panel-04 pass-2 regression**: Adding "PROMINENT size" language for Spool caused a new duplicate text error ("Then the is wrong" reappeared). Kept pass-1 version instead. **Learning: prompt changes can introduce regressions. Always compare before accepting.** + +### Phase 3: Spool Consistency + Cross-Iteration Seams + +**Problem identified**: The 8 regenerated panels had consistent Spool (generated with same references), but the 6 kept originals (from different iterations) had different Spool proportions. The seams between new and old panels were visible. + +**Solution**: Regenerated ALL remaining kept panels (05, 09, 12, 13) with the same references. Only cover and panel-06 (intentionally different moonlit style) stayed from originals. + +**Panel-12 two-Spool bug**: The prompt described Spool doing two sequential actions ("begins to spin... then STOPS"). Generator rendered two separate Spool figures. Fixed with "IMPORTANT: There is only ONE Spool in this panel." + +**Panel-13 attribution bug**: Frognu's line and Spool's line rendered on wrong characters. Fixed with LEFT/RIGHT spatial anchoring: "LEFT speech bubble FROM FROGNU... RIGHT speech bubble FROM SPOOL..." + +**Font drift on panels 11-13**: Added explicit font anchoring: "FONT: Clean, rounded, bold comic book lettering — NOT thin handwriting or irregular script." + +**Tapestry continuity**: Passing panel-10 (storm tapestry) as a 3rd reference image for panels 11-13 helped visual continuity. + +**Chat-history approach** (generate-panel-with-history.ts): Sends existing panels as user-turn images, model acknowledges, then generates new panel. Works well for style anchoring. Initial attempt to put images in model turns failed (Gemini thought_signature requirement) — fixed by keeping images in user turns only. + +**Frognu cuddliness push**: Made even more baby-like in panel-13: "tiny, extremely cute... head nearly HALF total height... huge round front-facing eyes like a baby animal... the cutest thing in the panel" + +### Phase 4: Spool Character Spec Standardization + +**Created standardized Spool spec** applied to all 14 prompts. New elements: +- Age: "young-adult, NOT a baby or toddler" +- Head ratio: "30-35%, NOT chibi" +- Limbs: "3 pairs stubby front legs (hands) + 4 pairs prolegs (walking)" +- Scale: "acorn-sized, fits on thimble" +- Consistency: "SAME proportions in every panel of this strip" + +### Phase 5: Clean Regen v1 (Text Spec Only) + +**14/14 generated, 0 failures.** But Spool consistency was all over the place — baby in some panels, mature in others, yellow-green vs apple-green, wildly different head ratios. + +**Finding: Text character specs alone CANNOT control visual consistency.** The model interprets "8 segments, 30-35% head ratio" differently every time based on pose, zoom, emotional context, and accumulated conversation drift. + +### Phase 6: Context Dilution Diagnosis + +**Built diagnostic tool** (diagnose-chat-context.ts) to map exactly what the model sees at each generation step. + +**Key numbers:** +- Turn 0: 5 reference images = 9MB (100% of image context) +- Turn 16 (panel-07): 5 refs + 8 panels = 12 images, refs = 9% of context +- Turn 28 (panel-13): 5 refs + 14 panels = 19 images, refs = 6% of context + +**Finding: Reference images get "drowned out" by accumulated generated panels.** The model anchors on RECENT images, not old references. If panel-05 drifts slightly, panel-06 anchors on that drift, and it compounds. This is worldline rot in a multi-turn image generation context. + +### Phase 7: Clean Regen v2 (With Reference Injection) + +**Modified generate-all-panels.ts** to inject 5 Spool reference images as the first chat message before generating any panels. The model acknowledges the references, then generates. + +**Result: Still inconsistent.** The same context dilution problem — by panel 10+, the references are buried under 100MB+ of generated panels. + +### Phase 8: Front-Loaded Approach (The Breakthrough) + +**Inspired by Hrishi's original workflow**: "What worked was to give it the full set of strips, the character specs, and a few reference images in the first message. Then just say 'Can I please get the first page?'" + +**Built generate-frontloaded.ts:** +1. First message: 5 reference images + ALL 14 strip descriptions compiled into one 69K-char context document +2. Per-panel: simple one-line "Please generate strip N" request + +**Result: BEST character consistency across all panels.** Spool looks like the same character throughout. The model absorbed everything upfront and maintained it. + +**Why it works**: Instead of re-explaining the character in every turn (competing with references for attention), the references are DOMINANT in the initial context. Each per-panel request is tiny (~50 chars), so it doesn't displace the reference images' influence. + +**Text issues**: 3 of 14 panels had prompt leaks (scene descriptions rendered as caption text). But character consistency was solved. + +### Phase 9: Hybrid Approach + +**Built generate-frontloaded-v2.ts**: Same front-loaded context, but per-panel requests now include the EXACT text to render: +``` +Please generate strip 07. The ONLY text in this image: +- Speech: "You see the geometry in everything." +- Speech: "You find what's fragile." +- Speech: "You see where to look." +- Speech: "None of you are the problem. My instructions were." +``` + +**Result: Panel-09 Moth joke FIXED** (was garbled in v3, clean in hybrid). Character consistency maintained. + +**Remaining prompt leaks**: Panels 07, 10, 11, 13 still had occasional scene description text rendering. But these are STOCHASTIC, not systematic. + +### Phase 10: The Re-Roll Strategy (Final Breakthrough) + +**Tested**: Regenerated panels 10 and 11 with ZERO prompt changes. Just re-rolled. + +**Result: Both clean on retry.** The prompt leaks that plagued these panels for multiple generations just... weren't there on the next roll. + +**Applied to panels 03, 06, 07**: All clean on re-roll. Panel-13: 3 candidates, picked the best. + +**THE KEY FINDING: Prompt leaks from image generators are stochastic, not systematic.** The correct hank response is not to rewrite prompts — it's to retry and pick clean results. This is exactly the "best picks" strategy the original hank already uses, just applied to text correctness as well as visual quality. + +--- + +## Organized Learnings + +### Architecture + +| # | Learning | Impact | +|---|---------|--------| +| A1 | **Front-load context, simplify requests** — send everything (specs + storyboards + references) in one first message, then "Can I get page N?" per panel | Fundamental architecture change to the generation loop | +| A2 | **Character reference images are essential** — text specs alone cannot control visual consistency | Need a reference-image preparation step | +| A3 | **Context dilution drowns references** — by panel 13, references are 6% of image context | Front-loading mitigates this; periodic re-injection could help more | +| A4 | **Hybrid requests work best** — per-panel requests that specify ONLY the text to render (not scene descriptions) reduce prompt leaks | Change per-panel request format | +| A5 | **Re-roll is the fix for stochastic failures** — don't rewrite prompts for occasional text leaks, just retry | Change the review/regen strategy in the generation loop | +| A6 | **Cross-iteration mixing creates style seams** — picking best panels from different generation sessions creates visible inconsistency | Single-session generation is better; re-rolls should use same references | + +### Prompt Engineering + +| # | Learning | Impact | +|---|---------|--------| +| P1 | **RENDERING RULES header prevents most prompt leaks** — every text error correlated with a missing header | Must be in EVERY prompt | +| P2 | **Parenthetical stage directions leak** — `(Moth, wincing after testing)` renders as text | Strip to bare `(Character): "line"` | +| P3 | **Explicit bubble/caption counts prevent duplication** — "This panel has exactly ONE speech bubble" | Add per-panel counts | +| P4 | **"ONE Spool" language prevents character duplication** — describing sequential actions causes the generator to render multiple figures | Be explicit about character count | +| P5 | **Font anchoring reduces font drift** — "Clean, rounded, BOLD comic lettering — NOT thin handwriting" | Add to art style section | +| P6 | **Spatial attribution prevents speech swap** — "LEFT bubble FROM Frognu, RIGHT bubble FROM Spool" | Use for multi-character dialogue panels | +| P7 | **Scene descriptions with narrative prose leak more** — compositional directions are safer than sentences | Keep scene descriptions minimal and spatial | +| P8 | **"INTACT" / "NOT torn" for tapestry** — the generator defaults to dramatic damage unless told otherwise | Explicit state assertions for key objects | +| P9 | **Prompt changes can introduce regressions** — fixing one thing can break another | Always compare before accepting | + +### Character Design + +| # | Learning | Impact | +|---|---------|--------| +| C1 | **Text character specs are necessary but not sufficient** — ratios, segment counts, age all specified but still huge variance | Must pair with visual references | +| C2 | **Visual reference images lock in the character** — 5 curated Spool close-ups as reference dramatically improved consistency | Need a "character reference" preparation step | +| C3 | **Cross-comic character continuity needs explicit spec porting** — Frognu went from generic to recognizable by porting 5 specific traits from the Chronicles spec | Maintain a canonical character database | +| C4 | **Emotional context bleeds into character design** — sad scenes make Spool more baby-like, manic scenes more detailed | Explicit "SAME proportions regardless of emotion" language helps | +| C5 | **Cuddlier = more specific** — "baby-like Sanrio/chibi" is more actionable than "cute" for Frognu | Specific proportional language beats adjectives | + +### Pipeline & Code + +| # | Learning | Impact | +|---|---------|--------| +| T1 | **`--reference-images` was dead code in the chat path** — parsed but never injected into multi-turn chat messages | Fixed in our modified generate-all-panels.ts | +| T2 | **Gemini thought_signature blocks fake model turns** — can't put images in model role to fake conversation history | Use user-turn images + chat.sendMessage() instead | +| T3 | **Chat-history approach works for single-panel regen** — send 2-3 good panels as user messages, model acknowledges, then generate | Good for targeted fixes | +| T4 | **Post-processing prompt had wrong strip count** — said 15 strips instead of 13 | Fix the template | +| T5 | **Watermark needs a logo file in the data source** — the pipeline silently skips if missing | Either provide logo or switch to generated back page | +| T6 | **The rerun bundle is a snapshot** — editing workspace prompts doesn't update the bundle | Source of truth is workspace/generation/prompts/ | + +### Story & Creative + +| # | Learning | Impact | +|---|---------|--------| +| S1 | **Sign flip ("Fast" → "Consistent") with messy revision** — visually shows character growth on the sign itself | Better story beat | +| S2 | **Manic-not-deflated Spool** — having P3 be denial and P4 be realization gives sharper contrast | Better pacing | +| S3 | **Warmer Frognu framing** — "neighbor who's been watching all week" vs "stranger at window" changes the emotional read | More connected universe feel | +| S4 | **"However, the swamp had..."** — adding the transition word smooths the flow between strips | Small caption fixes compound | +| S5 | **"Nobody asked for instructions"** — stronger than "what to do" for the team-without-direction beat | Word choice matters | + +--- + +## What Needs to Change in the Hank + +### 1. Generation Script: Replace with Front-Loaded Approach + +**Current**: `generate-all-panels.ts` sends full prompt per panel in multi-turn chat. +**New**: `generate-frontloaded-v2.ts` — front-loads ALL context + references in first message, sends short text-focused requests per panel. + +**Changes needed:** +- New script: `generate-frontloaded-v2.ts` (already written and tested) +- Or modify existing `generate-all-panels.ts` to: + a. Compile all prompts into one context document + b. Inject reference images in first message + c. Use short text-focused requests per panel + +### 2. New Rig: Character Reference Preparation + +**Add a rig step before generation** that prepares character reference images: +- Option A: Curated references from the data source (simplest) +- Option B: Generate a character sheet from the spec, then use as reference +- Option C: Generate the cover first, crop character close-ups, use as references + +**Recommended**: Option A — architect provides 3-5 character reference images in the data source. The rig copies them to a known location. The generation script reads them. + +### 3. Prompt Standardization + +**Every panel prompt must have:** +- RENDERING RULES header (verbatim template) +- Standardized character spec blocks (Spool, Frognu, etc.) +- Font anchoring in art style section +- Stripped parenthetical stage directions +- Explicit bubble/caption counts per panel +- "ONE [character]" language where needed +- Spatial attribution for multi-character dialogue + +### 4. Generation Loop Strategy Change + +**Current**: Generate 3 iterations, review each, pick best per panel. +**New**: +1. Front-loaded generation (1 iteration) +2. Review for text correctness (automated or agent-driven) +3. Re-roll ONLY panels with text issues (not all panels) +4. Pick from re-rolls +5. Repeat until all panels clean + +This changes the loop from "generate everything 3 times and pick" to "generate once, fix what's broken." Much cheaper and preserves cross-panel consistency. + +### 5. Review Agent Changes + +**The review agent should distinguish:** +- Systematic issues (wrong character design, missing story beat) → fix prompt +- Stochastic issues (prompt leak, garbled text, duplicate bubble) → re-roll + +Currently the review treats all issues the same (fix prompt + regen). The stochastic ones should just be re-rolled. + +### 6. Post-Processing Fixes + +- Fix strip count assumption (13 not 15) +- Consider designed back page instead of watermark overlay +- Verify logo exists before assuming watermark step + +### 7. Data Source Requirements + +Add to the data source input: +- `character-references/` directory with curated character close-ups +- `assets/logo.png` if watermark desired +- `config.json` with comic title, author, strip count + +--- + +## Summary Statistics + +| Metric | Value | +|--------|-------| +| Total panel generations | ~60+ | +| Panels in final comic | 14 | +| Panels from hybrid-v2 run | 9 | +| Panels from targeted re-rolls | 5 (03, 06, 07, 10, 11, 13) | +| Total passes | 3 major + multiple re-rolls | +| Final text error rate | 0/14 (0%) | +| Approaches tested | 5 (per-panel, reference-injected, frontloaded, hybrid, re-roll) | +| Key architectural finding | Front-load context + simple requests + re-roll failures | +| Key character finding | Visual references essential, text specs insufficient alone | +| Key text finding | Prompt leaks are stochastic — retry, don't rewrite | + +--- + +## Missed Details & Addenda + +### Tools Created During This Process +- `generate-panel-with-history.ts` — chat-history approach for single-panel regen with style anchoring +- `generate-frontloaded.ts` — front-loaded context + simple requests (v1) +- `generate-frontloaded-v2.ts` — hybrid: front-loaded context + text-focused requests (v2, best approach) +- `diagnose-chat-context.ts` — diagnostic tool mapping context growth across turns +- `spool-standard-spec.md` — canonical character spec reference + +### Debugging Techniques That Worked +- **ImageMagick quadrant crops** (`magick panel.png -crop 2x2@ +repage panel-Q%d.png`) for close text inspection. The `-crop 2x2@` syntax was more reliable than percentage-based cropping. +- **Spool comparison strips** — cropping Spool from each panel and laying them side by side revealed consistency issues invisible when viewing full panels +- **MD5 comparison** across pipeline stages (resized → watermarked → final) to diagnose watermark failure +- **Chat context diagnosis** — calculating image MB at each turn to find context dilution + +### Generator Quirks Discovered +- **Matryoshka Spool** — the generator once rendered a tiny second Spool head emerging from the top of Spool's main head (panel-03 pass 3) +- **Emotional bleeding** — sad scene descriptions cause the generator to draw characters with baby-like proportions (bigger eyes, rounder head). Manic scenes produce more detailed/mature renderings. +- **Sequential action = multiple characters** — "she spins, then STOPS" rendered as two Spools in different poses +- **Narrative prose = caption text** — any sentence in a scene description that reads like narration will eventually be rendered as a visible caption +- **The generator forgets references over time** — even with multi-turn chat, images from turn 1 lose influence by turn 15+ + +### What We DIDN'T Try (Future Experiments) +- **Periodic reference re-injection** — re-sending character references every 4-5 panels as "reminder" messages +- **Batch splitting** — generating in 3 batches of 5 panels, each with fresh reference injection +- **Character sheet generation** — dedicated panel of Spool from multiple angles/expressions as a single reference +- **Post-cover character extraction** — auto-cropping Spool from the generated cover and using as reference for remaining panels +- **Style transfer from the Frognu Chronicles** — using result-5 panels as overall art style references (not just character) +- **Different base model** — all testing was on gemini-3.1-flash-image-preview; gemini-2.5-flash-preview-image might behave differently diff --git a/README.md b/README.md deleted file mode 100644 index a931cb5..0000000 --- a/README.md +++ /dev/null @@ -1,304 +0,0 @@ -# Comic Creator - -A Hankweave pipeline that turns creative ideas into finished comic books. - -Give it text about what you care about — blog posts, a manifesto, a one-liner — plus some inspiration images and a few sentences about what kind of comic you want. It reads everything deeply, designs characters, writes a story, generates the art, reviews and iterates, and assembles a PDF. - -The output includes a self-contained rerun package. If a panel isn't right, you can regenerate just that panel without re-running the whole pipeline. You can also translate the comic by rewriting the prompts in another language and regenerating. - -## Quick Start - -```bash -# 1. Create a template folder -./create-template.sh my-comic - -# 2. Fill in my-comic/ideas.md -# Drop writing, blog posts, anything into my-comic/inspiration/ -# Drop style reference images there too -# (Optional) Add a logo.png to my-comic/assets/ for watermarking - -# 3. Set your API keys -export GOOGLE_API_KEY="your-gemini-key" -export ANTHROPIC_API_KEY="your-anthropic-key" - -# 4. Make the comic -./make-comic.sh my-comic -``` - -The richer your input, the better the output. A 5-line brief makes a decent comic. Blog posts, philosophy, and writing samples make a great one. - -## Requirements - -- **bun** (https://bun.sh) — runs the TypeScript scripts -- **Node.js** v18+ — for npx/Hankweave -- **GOOGLE_API_KEY** — for Gemini image generation (Nano Banana 2) -- **ANTHROPIC_API_KEY** — for Claude (story writing, review, art direction) - -## How It Works - -``` -your ideas + writing + images - │ - ▼ - ┌─ Read & Explore (Opus) ───────────┐ - │ Reads everything. Keeps a journal │ - │ of reactions, ideas, quotes. │ - │ Generates 3 character + 3 story │ - │ candidates. │ - └────────────────┬──────────────────┘ - ▼ - ┌─ Diversity Review (Haiku) ────────┐ - │ Reviews as a child and an adult. │ - │ Challenges assumptions. │ - └────────────────┬──────────────────┘ - ▼ - ┌─ Visualize (Opus + Nano) ────────┐ - │ Generates sample images of each │ - │ character. Opus picks favorites. │ - └────────────────┬──────────────────┘ - ▼ - ┌─ Select, Merge & Spec (Opus) ────┐ - │ Picks the best character + story. │ - │ Writes a full production spec. │ - │ Writes per-panel generation │ - │ prompts with dialogue. │ - └────────────────┬──────────────────┘ - ▼ - ┌─ Story Review (Opus) ────────────┐ - │ Fresh eyes on the narrative. │ - │ Cuts weak strips. Writes an │ - │ emotional blueprint. │ - └────────────────┬──────────────────┘ - ▼ - ╔═ Generation Loop ×3 ═════════════╗ - ║ Rig: Generate panels via Gemini ║ - ║ (multi-turn chat for character ║ - ║ consistency across panels) ║ - ║ ║ - ║ Opus: Review each panel against ║ - ║ the spec + emotional blueprint. ║ - ║ Update prompts. Pick best. ║ - ╚════════════════╦═════════════════╝ - ▼ - ┌─ Comic Review (Opus) ────────────┐ - │ Reads the finished comic cold. │ - │ Catches text errors, visual │ - │ problems, story gaps. │ - └────────────────┬──────────────────┘ - ▼ - ┌─ Post-Processing (rig + Sonnet) ─┐ - │ Normalize colors → Add borders → │ - │ Resize → Watermark → PDF │ - │ Also creates the rerun package. │ - └────────────────┬──────────────────┘ - ▼ - comic.pdf + rerun/ -``` - -## Output - -After a run completes, you get: - -``` -output/ -├── comic.pdf # The finished comic -├── panels/ # Individual panel images (1200×896) -├── summary.md # Title, characters, story summary -└── rerun/ - ├── prompts/ # Per-panel generation prompts (editable!) - ├── scripts/ # All processing scripts - ├── cover-reference.png # Style anchor for character consistency - ├── spec.md # The full production spec - ├── regenerate.sh # Regenerate ALL panels from prompts - └── fix-panel.sh # Fix specific panels by name -``` - -## The Execution Directory - -When Hankweave runs, it creates an execution directory (in `~/.hankweave-executions/` or wherever you specify with `-e`). Inside it: - -``` -agentRoot/ -├── read_only_data_source/ # Your input data (symlinked, read-only) -├── scripts/ # Rig scripts (copied at startup) -├── workspace/ -│ ├── reading-journal.md # Opus's notes from reading your input -│ ├── candidates/ -│ │ ├── characters/ # 3 character options + sample images -│ │ └── stories/ # 3 story options + diversity reviews -│ ├── spec/ -│ │ ├── current.md # The production spec (the key artifact) -│ │ ├── story-review.md # Fresh-eyes narrative review -│ │ └── emotional-blueprint.md # Per-strip emotional intent -│ ├── generation/ -│ │ ├── prompts/ # Per-panel generation prompts -│ │ ├── iteration-0/ # First generation (panels/ + review.md) -│ │ ├── iteration-1/ # Second generation -│ │ ├── iteration-2/ # Third generation -│ │ └── best-picks.md # Which iteration won for each panel -│ ├── approved/ # Best version of each panel -│ ├── comic-review.md # Fresh reader review of finished comic -│ └── post-processed/ # normalized/ → bordered/ → resized/ → final/ -└── output/ # Final deliverables (copied out by Hankweave) -``` - -The `spec/current.md` is the most important intermediate — it's the complete production spec that drives everything downstream. If you want to understand what the hank decided, start there. - -The `generation/best-picks.md` shows which iteration produced the best version of each panel and why. - -## Fixing Individual Panels - -The most common issue: a panel where the character looks different (older, different proportions) or text is duplicated/garbled. The rerun package handles this: - -```bash -cd output/rerun/ - -# Fix specific panels — regenerates, re-runs post-processing, rebuilds PDF -./fix-panel.sh panel-07 panel-10 - -# Or edit a prompt first, then regenerate -vim prompts/panel-07.md -./fix-panel.sh panel-07 -``` - -The cover image is used as a style reference for regeneration, keeping characters consistent. - -You can also regenerate multiple candidates and pick the best: -```bash -cd output/rerun/scripts/ -for i in 1 2 3; do - bun run generate-panel.ts \ - --prompt-file ../prompts/panel-07.md \ - --output ../candidates/panel-07-v$i.png \ - --reference-images ../cover-reference.png \ - --size 2K --aspect-ratio 3:2 -done -# Review the candidates, pick your favorite, copy to ../approved/panel-07.png -``` - -## Translating a Comic - -Once you have a comic, you can translate it by editing the prompts and regenerating. The art style and characters stay consistent (anchored by the cover reference) while the text changes. We've tested this with Korean — it works. - -**Step 1: Copy the rerun package** -```bash -cp -r output/rerun/ korean-version/ -cd korean-version/ -``` - -**Step 2: Translate the dialogue** - -In each prompt file, change ONLY the text inside `Caption:` and `Speech bubble:` lines: -``` -# English: -Speech bubble (Weewoo): "What happened here?" -Caption: "She couldn't fix it. So she stayed." - -# Korean: -Speech bubble (Weewoo): "여기서 무슨 일이 있었어?" -Caption: "고칠 수 없었어요. 그래서 곁에 있었어요." -``` - -Keep all art descriptions, character descriptions, and scene instructions in English — Nano understands these better in English and they don't appear as visible text. - -**Step 3: Translate the cover title** - -The cover prompt has title text — change it too: -``` -# English: -Visible text at the top of the image: **"The Little Guardian"** -Visible text below the title, smaller: **"A Weewoo Story"** - -# Korean: -Visible text at the top of the image: **"작은 수호자"** -Visible text below the title, smaller: **"위우 이야기"** -``` - -**Step 4: Watch for prompt leakage** - -This is the biggest gotcha in translation. Some scene descriptions are written as narrative prose that Nano renders as visible text. Look for lines like: - -``` -# BAD — Nano will render this as a caption: -The corruption is still there. It hasn't been destroyed. But it's contained. - -# GOOD — Nano treats this as a drawing instruction: -Draw the dark corrupted water visible behind the barrier, contained but present. -``` - -If a scene description reads like narration, rewrite it to start with "Draw..." or restructure as a visual instruction. Also check `CRITICAL:` lines — if they contain quoted English text, Nano may render it. - -**Step 5: Regenerate and review** -```bash -mkdir -p generation output/panels -./regenerate.sh -# Review panels, fix any with duplicate text or English leaks: -./fix-panel.sh panel-07 panel-11 -``` - -## Tips - -**For better stories:** -- Give it blog posts, not briefs. The pipeline reads deeply — quotes, philosophy, and voice samples produce much richer characters and narratives than "make a comic about X." -- The pipeline chooses the strip count (10-15). Tighter is usually better. -- Not every strip needs a lesson. The prompt encourages breathing room — some strips should just be funny, or beautiful, or quiet. - -**For better art:** -- Drop style reference images in `inspiration/`. Even 2-3 references dramatically improve consistency. -- Character reference images (poses, expressions) help more than environment references. -- Action scenes tend to "age up" cute characters. This is a known Nano Banana behavior — the review loop catches most cases, and `fix-panel.sh` handles the rest. - -**For fixing issues:** -- Duplicate text in a panel → regenerate with `fix-panel.sh` -- Character looks too old/different → regenerate with cover reference (fix-panel.sh does this automatically) -- Speech bubble pointing to wrong character → edit the prompt to specify "Speech bubble pointing to [character] (the [description] on the [left/right])" then regenerate -- Want multiple options → generate 2-3 candidates and pick the best - -## Known Limitations - -- **Character consistency drifts** across panels, especially in dramatic/action scenes. Multi-turn generation and cover references help significantly but don't eliminate it entirely. -- **Text duplication** (same line appearing twice) happens occasionally (~10% of panels). The review loop catches most instances. -- **Speech bubble attribution** can be ambiguous when multiple characters are close together. -- **Art style** varies slightly between panels generated in different iterations. - -All of these can be fixed per-panel using the rerun package. - -## File Inventory - -``` -hank.json # The Hankweave config (8 codons, 1 loop, 1 sentinel) -prompts/ # 9 codon prompts - system-final.md # Global system prompt (craft rules, file structure) - read-and-explore.md # Opus reads input, keeps journal, generates candidates - story-diversity.md # Haiku reviews as child + adult - visualize-candidates.md # Opus reviews generated character samples - select-merge-spec.md # Picks best, writes spec + generation prompts - story-review.md # Cuts weak strips, writes emotional blueprint - generate-and-review.md # Reviews panels, updates prompts per iteration - comic-review.md # Fresh reader quality gate - post-processing.md # Verify output, assemble PDF, create rerun package -scripts/ # 10 TypeScript rig scripts - generate-all-panels.ts # Multi-turn Gemini chat for panel generation - generate-panel.ts # Single panel generation - generate-samples.ts # Character candidate samples - post-process-pipeline.ts # Full pipeline: normalize→border→resize→watermark - normalize-images.ts # Color/brightness normalization - normalize-borders.ts # Consistent white borders - resize-images.ts # Uniform dimensions - watermark-images.ts # Logo watermark overlay - add-text-overlay.ts # Deterministic text on images - assemble-pdf.ts # Images → PDF -sentinels/ - narrator.json # Warm storytelling narrator (Haiku, watches progress) -create-template.sh # Creates a blank input template -make-comic.sh # Runs the hank on filled input -README.md # This file -``` - -## Built With - -- [Hankweave](https://hankweave.southbridge.ai) — Runtime for agentic workflows -- [Claude Opus](https://anthropic.com) — Story writing, art direction, review -- [Gemini / Nano Banana 2](https://ai.google.dev/gemini-api/docs/nanobanana) — Image generation -- [sharp](https://sharp.pixelplumbing.com/) — Image processing -- [pdf-lib](https://pdf-lib.js.org/) — PDF assembly diff --git a/create-template.sh b/create-template.sh deleted file mode 100755 index f5c5c54..0000000 --- a/create-template.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -# Creates a template folder for a new comic project. -# Usage: ./create-template.sh [folder-name] - -set -e - -FOLDER="${1:-my-comic}" - -if [ -d "$FOLDER" ]; then - echo "Error: '$FOLDER' already exists." - exit 1 -fi - -mkdir -p "$FOLDER/inspiration" "$FOLDER/assets" - -cat > "$FOLDER/ideas.md" << 'EOF' -# My Comic - -Tell us what you want the comic to be about. -Be as specific or as vague as you want — the more you leave open, -the more creative freedom the pipeline has. - -## What's the story about? - - - - - -## Characters - - - - - -## Style - - - - - -## Anything else? - - - -EOF - -echo "Created template at ./$FOLDER/" -echo "" -echo "Next steps:" -echo " 1. Edit $FOLDER/ideas.md with your creative brief" -echo " 2. Drop style reference images in $FOLDER/inspiration/" -echo " 3. (Optional) Add a logo.png to $FOLDER/assets/" -echo " 4. Run: ./make-comic.sh $FOLDER" diff --git a/hank.json b/hank.json index 57a7a8f..30cebc7 100644 --- a/hank.json +++ b/hank.json @@ -1,14 +1,14 @@ { "$schema": "https://unpkg.com/hankweave@latest/schemas/hank.schema.json", "meta": { - "name": "Comic Creator", - "version": "1.0.0", - "description": "Takes creative ideas and inspiration → produces a finished comic book PDF. Automated end-to-end: character design, story writing, image generation, post-processing." + "name": "Comic Creator v2", + "version": "0.7.0", + "description": "Takes ideas and inspiration → produces a finished mascot comic book PDF. Uses front-loaded generation with character references for visual consistency." }, "requirements": { "env": ["GOOGLE_API_KEY"] }, - "globalSystemPromptFile": "./prompts/system-final.md", + "globalSystemPromptFile": "./prompts/system.md", "hank": [ { "id": "read-and-explore", @@ -16,22 +16,22 @@ "model": "opus", "continuationMode": "fresh", "promptFile": "./prompts/read-and-explore.md", - "description": "Opus reads ALL raw input deeply. Maintains a reading journal. Generates 3 character + 3 story candidates.", + "description": "Reads ALL raw input deeply. Maintains a reading journal. Searches web for inspiration. Generates 3 character + 3 story candidates.", "rigSetup": [ { "type": "copy", - "copy": { "from": "./scripts", "to": "scripts" } + "copy": { + "from": "./scripts", + "to": "scripts" + } }, { "type": "command", "command": { - "run": "cd scripts && bun run check-deps.ts --install", + "run": "cd scripts && bun install --frozen-lockfile 2>/dev/null || bun install", "workingDirectory": "project" } } - ], - "sentinels": [ - { "sentinelConfig": "./sentinels/narrator.json" } ] }, { @@ -40,7 +40,7 @@ "model": "haiku", "continuationMode": "fresh", "promptFile": "./prompts/story-diversity.md", - "description": "Reviews stories as a child and as an adult. Wild card suggestion." + "description": "Reviews stories as a child and as an adult. Challenges assumptions. Suggests a wild card." }, { "id": "visualize-candidates", @@ -65,10 +65,7 @@ "model": "opus", "continuationMode": "fresh", "promptFile": "./prompts/select-merge-spec.md", - "description": "Picks best character+story, writes production spec with dialogue, writes generation prompts.", - "sentinels": [ - { "sentinelConfig": "./sentinels/narrator.json" } - ] + "description": "Picks best character+story, writes production spec with dialogue, writes generation prompts. MUST use standardized character spec template with: age, head ratio, limb count, scale, consistency rule. MUST add RENDERING RULES header to every prompt. MUST strip parenthetical stage directions from speech bubble specs." }, { "id": "story-review", @@ -76,34 +73,47 @@ "model": "opus", "continuationMode": "fresh", "promptFile": "./prompts/story-review.md", - "description": "Fresh-eyes story review. Cuts weak strips. Writes emotional blueprint. Updates prompts." + "description": "Fresh-eyes story review. Checks flow, emotional arc, complexity. Writes emotional blueprint. Updates prompts." + }, + { + "id": "generate-panels", + "name": "Front-Loaded Panel Generation", + "model": "opus", + "continuationMode": "fresh", + "promptFile": "./prompts/generate-panels-frontloaded.md", + "rigSetup": [ + { + "type": "command", + "command": { + "run": "mkdir -p workspace/character-references workspace/approved && cp read_only_data_source/character-references/*.png workspace/character-references/ 2>/dev/null || echo 'No character references in data source — will generate without'", + "workingDirectory": "project" + }, + "allowFailure": true + }, + { + "type": "command", + "command": { + "run": "cd scripts && REFS=$(ls ../workspace/character-references/*.png 2>/dev/null | tr '\\n' ',' | sed 's/,$//'); bun run generate-frontloaded-v2.ts --prompts-dir ../workspace/generation/prompts --output-dir ../workspace/generation --reference-images \"$REFS\" --size 2K --aspect-ratio 3:2", + "workingDirectory": "project" + }, + "allowFailure": true + } + ], + "description": "Front-loads ALL context (character specs, storyboards, reference images) in one message, then generates each panel with short text-focused requests. Character reference images from workspace/character-references/ are injected into the first chat message." }, { "type": "loop", - "id": "generation-loop", - "name": "Panel Generation & Art Review Loop", + "id": "review-reroll-loop", + "name": "Review & Re-Roll Loop", "terminateOn": { "type": "iterationLimit", "limit": 3 }, "codons": [ { - "id": "generate-and-review", - "name": "Generate Panels & Art Review", + "id": "review-and-reroll", + "name": "Review Panels & Re-Roll Failures", "model": "opus", - "continuationMode": "fresh", - "promptFile": "./prompts/generate-and-review.md", - "rigSetup": [ - { - "type": "command", - "command": { - "run": "cd scripts && bun run generate-all-panels.ts --prompts-dir ../workspace/generation/prompts --output-dir ../workspace/generation --size 2K --aspect-ratio 3:2 --use-cover-as-ref", - "workingDirectory": "project" - }, - "allowFailure": true - } - ], - "description": "Rig generates panels via multi-turn chat. Opus reviews against spec and emotional blueprint.", - "sentinels": [ - { "sentinelConfig": "./sentinels/narrator.json" } - ] + "continuationMode": "continue-previous", + "promptFile": "./prompts/review-and-reroll.md", + "description": "Reviews each panel for: text correctness (prompt leaks, garbled text, duplicates), character consistency, story accuracy. Distinguishes STOCHASTIC issues (re-roll) from SYSTEMATIC issues (fix prompt). Re-rolls failed panels using generate-panel-with-history.ts with cover + adjacent panel as context. Stops when all panels pass." } ] }, @@ -113,16 +123,7 @@ "model": "opus", "continuationMode": "fresh", "promptFile": "./prompts/comic-review.md", - "rigSetup": [ - { - "type": "command", - "command": { - "run": "cd scripts && bun run create-proxies.ts --input ../workspace/approved --max-width 2000", - "workingDirectory": "project" - } - } - ], - "description": "Reads finished comic cold. Catches text problems, story gaps, visual issues." + "description": "Reads the finished comic cold. Notes every issue." }, { "id": "post-processing", @@ -134,13 +135,17 @@ { "type": "command", "command": { - "run": "mkdir -p workspace/post-processed output/panels output/rerun && cd scripts && bun run post-process-pipeline.ts --input ../workspace/approved --output ../workspace/post-processed --scripts-dir . --logo ../read_only_data_source/assets/logo.png 2>&1 || echo 'Pipeline completed'", + "run": "mkdir -p workspace/post-processed/normalized workspace/post-processed/bordered workspace/post-processed/resized workspace/post-processed/watermarked workspace/post-processed/final workspace/approved output/panels", "workingDirectory": "project" } } ], - "description": "Rig runs full post-processing pipeline. Agent verifies, assembles PDF, creates rerun package.", - "outputFiles": [{ "copy": ["output/**/*"] }] + "description": "Normalizes, borders, resizes, watermarks, assembles PDF.", + "outputFiles": [ + { + "copy": ["output/**/*"] + } + ] } ] } diff --git a/make-comic.sh b/make-comic.sh deleted file mode 100755 index 535608d..0000000 --- a/make-comic.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -# Makes a comic from a filled-in template folder. -# Usage: ./make-comic.sh [options] -# -# Examples: -# ./make-comic.sh ./my-comic -# ./make-comic.sh ./my-comic --validate # Dry run, no generation -# ./make-comic.sh ./my-comic -n # Fresh run (ignore previous) - -set -e - -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -DATA_DIR="${1:?Usage: ./make-comic.sh }" -shift - -# Check prerequisites -if [ -z "$GOOGLE_API_KEY" ]; then - echo "Error: GOOGLE_API_KEY environment variable must be set." - echo "Get one at https://aistudio.google.com/apikey" - exit 1 -fi - -if ! command -v bun &> /dev/null; then - echo "Error: bun is required. Install: https://bun.sh" - exit 1 -fi - -if ! command -v npx &> /dev/null; then - echo "Error: npx (Node.js) is required." - exit 1 -fi - -echo "╔══════════════════════════════════════╗" -echo "║ Comic Creator v1.0.0 ║" -echo "╚══════════════════════════════════════╝" -echo "" -echo "Data: $DATA_DIR" -echo "" - -# Run the hank -npx hankweave "$SCRIPT_DIR/hank.json" "$DATA_DIR" "$@" diff --git a/prompts/comic-review.md b/prompts/comic-review.md index 0361559..4b4ab65 100644 --- a/prompts/comic-review.md +++ b/prompts/comic-review.md @@ -1,26 +1,90 @@ -# Comic Review — Fresh Reader +# Comic Review — Reading the Finished Comic -Read the finished comic as if you've never seen it before. No spec, no context. +You are reading a finished comic for the first time. You have NOT seen the creative +process, the spec, or any reviews. You are a fresh reader. -## What To Do +## Your Task -Read every panel image in `workspace/approved/` in order (cover first, then panel-01 through panel-NN). If a `_proxy/` folder exists inside `approved/`, read from there instead — these are compressed versions safe for your API. The full-resolution originals are preserved for final output. +Read every panel image in `workspace/approved/` in order: +- `cover.png` first +- Then `panel-01.png` through `panel-15.png` -Write `workspace/comic-review.md` with notes on EVERY strip: +**Actually read each image.** Look at it. See what's in it. -For each strip: -- **First impression:** What did you feel? What caught your eye? -- **Story clarity:** Could you follow it without a spec? -- **Text check:** Is ALL text legible? Quote any garbled words. Flag any text that looks like a stage direction or prompt instruction leaked into the image. -- **Visual check:** Character consistency, impossible poses, border style. -- **Grade:** A/B/C/D/F +## What to Note -At the end: -- **Overall:** One-sentence summary. Did it make you feel something? -- **Strongest strip** and why -- **Weakest strip** and why -- **Every text problem** (table: strip, issue) -- **Every visual problem** (table: strip, issue) -- **Top 3 improvements** from a reader's perspective +As you read, write `workspace/comic-review.md` with notes on EVERY strip: -Be honest. Be specific. This review is the last quality gate before the comic ships. +### For each strip: +``` +## Strip N (panel-NN.png) + +### First impression +What did I feel? What caught my eye? What's the immediate read? + +### Story +- What's happening? Is it clear? +- Does it follow from the previous strip? +- Would I be confused if I hadn't read a spec? +- Is there a moment of surprise, humor, or emotion? + +### Text & Dialogue +- Can I read ALL the text? (If not, quote what's garbled) +- Does the dialogue sound natural? +- Is any text duplicated or misplaced? + +### Visual Issues +- Character floating/in impossible position? +- Speech bubbles pointing to wrong character? +- Wrong character in scene? +- Border style consistent? (white border expected on all strips, not cover) +- Color/mood appropriate? + +### The Needle/Thread +- Is the character's signature tool visible when working? +- How prominent is it? + +### Grade: [A/B/C/D/F] +One letter, honest. +``` + +### At the end, write: + +``` +## Overall Reading Experience + +### The story in one sentence +[What is this comic about, having just read it cold?] + +### Did it make me feel something? +[Honest answer] + +### The strongest strip +[Which one and why] + +### The weakest strip +[Which one and why] + +### Confusing moments +[Anything that broke immersion or confused me] + +### Text problems +[List every garbled/illegible text instance with strip number] + +### Visual problems +[List every visual issue with strip number] + +### Border consistency +[Note any strips that don't match the expected white-border style] + +### What would make this comic better? +[Top 3 suggestions, from a reader's perspective] +``` + +## Important + +- You are a READER, not a technical reviewer. React to the comic as a reader would. +- Be honest. If a strip is boring, say so. If one is brilliant, say so. +- Note EVERY text problem — garbled words, duplicates, illegible passages. +- Note EVERY visual problem — even small ones. +- Your review will be used to fix problems in the next iteration. diff --git a/prompts/generate-and-review.md b/prompts/generate-and-review.md index ce52303..fab086c 100644 --- a/prompts/generate-and-review.md +++ b/prompts/generate-and-review.md @@ -16,24 +16,14 @@ the next iteration. ### 1. Find the Latest Iteration Look in `workspace/generation/` for the highest-numbered iteration directory. -### Image Size Note -The full-resolution panel images may be too large to read directly. Check if a -`_proxy/` folder exists inside the panels directory — if it does, **read images -from `_proxy/` instead**. These are compressed versions at the same resolution -that your API can handle. The full-resolution originals are preserved for the -final output. When writing your review, refer to panels by their original names -(e.g., "panel-01.png") even if you read from `_proxy/panel-01.jpg`. - ### 2. Review Every Panel — READ EVERY IMAGE For each panel, **actually look at the image file** (don't skip any!) and evaluate: **HARD FAIL criteria (any of these = NEEDS_WORK):** - Character floating in air or in an impossible position -- Speech bubble pointing to wrong character, nobody, or ambiguously between - two characters. Check EVERY bubble tail — it must clearly point to the speaker. -- Same text appearing twice in the panel — this is ALWAYS a fail, never an - "echo" or "emphasis." If the same words appear twice, it's a generation error. +- Speech bubble pointing to wrong character or nobody +- Same text appearing twice in the panel - Text that is garbled or unreadable - Wrong character in the scene (human in non-human world, wrong species) - Panels appear to be in wrong order (story reads out of sequence) @@ -47,10 +37,6 @@ For each panel, **actually look at the image file** (don't skip any!) and evalua **QUALITY checks (flag but may still PASS):** - Character proportions inconsistent with other panels -- Character looks like a different AGE or SPECIES than in other panels — if the - main character is a cub in strip 1 but looks adult in strip 10, that's a problem - even if the "same character" is technically there. Compare against the cover. -- Art style shifts noticeably (more realistic, different linework, different palette) - Color palette shifts (too warm, too cool, too dark, too bright) - Emotional expression doesn't match the emotional blueprint - Supporting character design differs from previous appearances diff --git a/prompts/generate-candidates.md b/prompts/generate-candidates.md new file mode 100644 index 0000000..c15d1fa --- /dev/null +++ b/prompts/generate-candidates.md @@ -0,0 +1,81 @@ +# Generate Character & Story Candidates + +Read the curated context and generate **3 distinct character design options** and **3 distinct story arc options**. + +## Your Workspace + +- Read `workspace/curated/context-for-spec.md` for the full creative brief +- Read `workspace/curated/reference-catalog.md` to know what visual references exist + +## Part 1: Character Candidates + +Generate 3 very different character concepts. For each, write a detailed description file. + +Write to: +- `workspace/candidates/characters/option-a.md` +- `workspace/candidates/characters/option-b.md` +- `workspace/candidates/characters/option-c.md` + +Each character description should include: + +### The Concept +- What species/type of creature? (Don't just default to a frog — be creative!) +- What's their name? +- What's their role in the world? +- What's their personality in one sentence? + +### Visual Design +- **Overall shape:** Body proportions, silhouette (remember the Plush Toy Test) +- **Head/Face:** Eye size and shape (30-40% of face for cuteness), mouth style, expression range +- **Body:** Color palette, texture, distinguishing features +- **Accessories:** 1-2 signature items that tell their story (hat, bag, tool, etc.) +- **Size in world:** How big relative to their environment? + +### Emotional Range +Describe 5 key expressions with how eyes, mouth, and body language change: +- Default/neutral +- Happy/excited +- Sad/overwhelmed +- Determined/working +- Surprised/curious + +### Why This Character Works +- How does this character embody the themes from the creative brief? +- What makes them visually memorable? +- Would they work as a plush toy? + +**Make the three options genuinely different** — different species, different aesthetics, different personality flavors. Not three variations of the same idea. One could be warm and round, another angular and energetic, another small and contemplative. + +## Part 2: Story Candidates + +Generate 3 different story arc options. Each is a 15-strip narrative. + +Write to: +- `workspace/candidates/stories/option-a.md` +- `workspace/candidates/stories/option-b.md` +- `workspace/candidates/stories/option-c.md` + +Each story should include: + +### The Arc +- 3-act structure: Setup (strips 1-5), Development (strips 6-10), Resolution (strips 11-15) +- What's the emotional journey? +- What's the core conflict or tension? + +### Strip-by-Strip Outline +For each of the 15 strips, write 2-3 sentences describing: +- What happens in this strip +- What's the emotional beat +- Key dialogue or caption (if any) + +### Supporting Cast +- What 3-5 supporting characters appear? +- When do they show up? +- What role do they play? + +### Why This Story Works +- How does it serve the themes? +- Is there a satisfying payoff? +- Does each strip work standalone AND as part of the larger arc? + +**Make the three stories genuinely different** — different narrative structures, different emotional arcs, different approaches to the themes. One could be a journey, another a day-in-the-life, another a crisis-and-recovery. diff --git a/prompts/generate-panels-frontloaded.md b/prompts/generate-panels-frontloaded.md new file mode 100644 index 0000000..32533d3 --- /dev/null +++ b/prompts/generate-panels-frontloaded.md @@ -0,0 +1,32 @@ +# Front-Loaded Panel Generation + +The rig just ran `generate-frontloaded-v2.ts`, which: +1. Compiled ALL panel prompts into one context document +2. Injected character reference images from `workspace/character-references/` +3. Generated each panel with short text-focused requests +4. Saved panels to `workspace/generation/iteration-N/panels/` + +## Your Job + +1. **Check if the generation succeeded.** Look at `workspace/generation/` for the latest `iteration-N` directory. Check its `metadata.json` for success/failure counts. + +2. **If any panels failed to generate**, note them. The re-roll loop will handle them. + +3. **Copy all generated panels to `workspace/approved/`** as the initial set: + ```bash + cp workspace/generation/iteration-*/panels/*.png workspace/approved/ + ``` + +4. **Prepare character references for re-rolling.** If `workspace/character-references/` doesn't exist yet but the cover looks good, crop Spool from the cover and save as a reference: + ```bash + cd scripts && magick ../workspace/approved/cover.png -crop 50%x50%+25%+25% +repage ../workspace/character-references/spool-from-cover.png + ``` + +5. **Write a brief generation report** to `workspace/generation/generation-report.md` noting: how many panels generated, which (if any) failed, overall quality assessment. + +## Important Notes + +- The front-loaded approach gives the best character consistency because ALL context is absorbed before any generation begins +- Each per-panel request included ONLY the text to render (captions + speech bubbles), NOT scene descriptions +- Text prompt leaks are STOCHASTIC — the same prompt will sometimes leak scene descriptions and sometimes not +- The review-and-reroll loop (next step) will handle any text issues diff --git a/prompts/input-triage.md b/prompts/input-triage.md new file mode 100644 index 0000000..de7712e --- /dev/null +++ b/prompts/input-triage.md @@ -0,0 +1,61 @@ +# Input Triage + +Read EVERYTHING the user has provided in `read_only_data_source/` and organize it for the pipeline. + +## Your Workspace + +The user's input folder may contain any combination of: +- Markdown files (`.md`, `.mdx`) — writing, blog posts, ideas, background +- Image files — inspiration, style references, character sketches, screenshots +- A special file `ideas.md` — the creative brief (if it exists) +- An `assets/` folder — logo, back page, etc. +- An `inspiration/` folder — reference materials + +**Don't assume any specific structure.** Read whatever is there. + +## What To Do + +### 1. Read Everything +Read every text file. Look at every image. Understand what the user has provided. +Some files might be blog posts (rich background context), some might be creative +briefs, some might be completely unexpected. Read them all. + +### 2. Write Rich Context for the Spec Writer +Create `workspace/curated/context-for-spec.md`: + +This is the MAIN input for the creative codon. Make it RICH. Include: +- Who is this for? What do they do? What are their values? +- What themes, ideas, and stories emerged from their writing? +- What specific creative direction did they give (if any)? +- What's their voice like? (Quote actual passages from their writing) +- Key metaphors, concepts, and vocabulary they use +- What audience are they trying to reach? +- Any specific characters or story elements they mentioned + +**Be thorough.** Extract insights from EVERY text file. Blog posts contain philosophy +and worldview that should inform the character and story. Don't just summarize — pull +out the juicy bits, the turns of phrase, the ideas that spark stories. + +### 3. Write Compressed Context for Image Generation +Create `workspace/curated/context-for-nano.md`: +- A COMPRESSED version (under 500 words) +- Focus on visual direction: style, color palette, mood, character appearance +- This is for the image generator which has limited context + +### 4. Catalog Reference Images +Create `workspace/curated/reference-catalog.md`: +- List ALL images found in the data source +- For each: filename, what it shows, why it's useful +- Rank by relevance (most useful for this project first) +- Note which would work as style references for image generation (max 6) + +### 5. Check Assets +Create `workspace/curated/assets-check.md`: +- Is there a logo for watermarking? Where? +- Any back page image? +- Note paths for downstream use + +## Important +- Don't invent creative ideas yet — just organize and compress +- If input is sparse, say so — the next codon will work with whatever we have +- Blog posts are GOLD — they contain the voice, philosophy, and worldview diff --git a/prompts/post-processing.md b/prompts/post-processing.md index 04f065f..3cd311d 100644 --- a/prompts/post-processing.md +++ b/prompts/post-processing.md @@ -1,85 +1,145 @@ # Post-Processing & PDF Assembly -The rig has already run the post-processing pipeline (normalize → border → resize → watermark). Your job is to verify the output and create the final deliverables. +The generation loop is complete. You have approved panels. Time to finish the comic. ## Your Workspace -- `workspace/post-processed/final/` — Processed panels (from the rig) -- `workspace/approved/` — Original approved panels (for comparison) +- `workspace/approved/` — Best version of each panel - `workspace/spec/current.md` — The spec (for title and author) +- `read_only_data_source/assets/` — Optional logo and back page - `scripts/` — Processing scripts -## What To Do +## Step-by-Step Process -### 1. Verify Processed Panels +### 1. Verify Approved Panels -Check `workspace/post-processed/final/` has all the panels. Compare count with `workspace/approved/`. If any are missing, something went wrong in the pipeline — check each stage directory (normalized, bordered, resized, watermarked) to find where it broke. +Check `workspace/approved/` has ALL expected panels: +- `cover.png` +- `panel-01.png` through `panel-15.png` -Look at one or two panels from `workspace/post-processed/final/` to verify they look correct — right dimensions, visible watermark on non-cover panels, consistent borders. +That's 16 files. If any are missing, check `workspace/generation/iteration-*/panels/` +for alternatives and copy the best one to `workspace/approved/`. -### 2. Assemble PDF +### 2. Normalize Colors -Read the spec for the comic title. Then: ```bash -cd scripts && bun run assemble-pdf.ts --input ../workspace/post-processed/final --output ../output/comic.pdf --title "[COMIC TITLE]" --author "[AUTHOR]" +cd scripts && bun run normalize-images.ts --input ../workspace/approved --output ../workspace/post-processed/normalized ``` -### 3. Copy Output Panels +### 3. Normalize Borders + +Ensure all panels (except cover) have consistent white borders: +```bash +cd scripts && bun run normalize-borders.ts --input ../workspace/post-processed/normalized --output ../workspace/post-processed/bordered --skip "cover.png" +``` + +### 4. Resize to Uniform Dimensions + +```bash +cd scripts && bun run resize-images.ts --input ../workspace/post-processed/bordered --output ../workspace/post-processed/resized --width 1200 --height 896 +``` + +### 4. Add Watermark (if logo exists) + +Check if `read_only_data_source/assets/logo.png` exists. + +**If logo exists:** +```bash +cd scripts && bun run watermark-images.ts --input ../workspace/post-processed/resized --output ../workspace/post-processed/watermarked --logo ../read_only_data_source/assets/logo.png --skip "cover.png" --opacity 0.6 +``` + +**VERIFY THE WATERMARK:** After running, look at one watermarked panel image +(e.g., `workspace/post-processed/watermarked/panel-01.png`). Check the bottom-left +corner — the logo should be visibly present. If it's invisible, re-run with higher +opacity (0.8). + +**If no logo:** Copy resized to watermarked: +```bash +cp -r workspace/post-processed/resized/* workspace/post-processed/watermarked/ +``` + +### 5. Add Text Overlay (deterministic branding) + +Check if `read_only_data_source/ideas.md` or the spec mentions any branding text +to add to the cover (e.g., "Made by friends at example.com"). If so, add it to the +COVER ONLY using the text overlay script: + +```bash +cd scripts && bun run add-text-overlay.ts --input ../workspace/post-processed/watermarked --output ../workspace/post-processed/final --text "[BRANDING TEXT from spec or ideas]" --position bottom-left --font-size 14 --color "#555555" --apply-to "cover.png" +``` + +If there's no branding text to add, just copy watermarked to final: +```bash +cp -r workspace/post-processed/watermarked/* workspace/post-processed/final/ +``` + +### 6. Assemble PDF + +Read the spec for the title and author: +```bash +cd scripts && bun run assemble-pdf.ts --input ../workspace/post-processed/final --output ../output/comic.pdf --title "[COMIC TITLE from spec]" --author "[AUTHOR from input]" +``` + +### 7. Copy Output Panels ```bash cp workspace/post-processed/final/* output/panels/ ``` -### 4. Write Summary +### 8. Write Summary Create `output/summary.md` with: - Comic title and brief description -- Character name and one-line description +- Character name and description - Story summary (2-3 sentences) - Number of strips, resolution, file sizes -- A note on what makes this comic special - -### 5. Create Rerun Package +- Generation details (model used, iterations, panels regenerated) -Create `output/rerun/` with everything someone needs to regenerate: -- Copy `workspace/spec/current.md` → `output/rerun/spec.md` -- Copy `workspace/generation/prompts/*.md` → `output/rerun/prompts/` -- Copy `scripts/` → `output/rerun/scripts/` +### 9. Create Rerun Package -Also copy the cover panel to the rerun package (`output/rerun/cover-reference.png`). +Create `output/rerun/` — a self-contained folder that lets someone edit the spec and regenerate the comic without running the full Hank. -Write TWO scripts: - -**`output/rerun/regenerate.sh`** — regenerates ALL panels and assembles a PDF. Uses multi-turn chat (`generate-all-panels.ts --use-cover-as-ref`) for full character consistency. Make it executable. +```bash +mkdir -p output/rerun +``` -**`output/rerun/fix-panel.sh`** — regenerates specific panels by name. Usage: `./fix-panel.sh panel-07 panel-10`. Uses the cover-reference.png for consistency. After regenerating, re-runs post-processing and PDF assembly so the fix is immediately visible. Make it executable. Example implementation: +Copy into `output/rerun/`: +- The current spec (`workspace/spec/current.md` → `output/rerun/spec.md`) +- The emotional blueprint (`workspace/spec/emotional-blueprint.md` → `output/rerun/emotional-blueprint.md`) +- All generation prompts (`workspace/generation/prompts/*.md` → `output/rerun/prompts/`) +- The scripts directory (copy `scripts/` → `output/rerun/scripts/`) +- The nano reference images (`workspace/nano-references/` → `output/rerun/nano-references/` if exists) +Then write `output/rerun/regenerate.sh`: ```bash #!/bin/bash -# Fix specific panels: ./fix-panel.sh panel-07 panel-10 +# Regenerate comic from edited spec/prompts +# Edit the prompts in ./prompts/ then run this script + set -e -SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" -for panel in "$@"; do - echo "Regenerating $panel..." - cd "$SCRIPT_DIR/scripts" && bun run generate-panel.ts \ - --prompt-file "$SCRIPT_DIR/prompts/$panel.md" \ - --output "$SCRIPT_DIR/approved/$panel.png" \ - --reference-images "$SCRIPT_DIR/cover-reference.png" \ - --size 2K --aspect-ratio 3:2 -done -echo "Re-running post-processing..." -cd "$SCRIPT_DIR/scripts" && bun run post-process-pipeline.ts \ - --input "$SCRIPT_DIR/approved" --output "$SCRIPT_DIR/post-processed" --scripts-dir . +SCRIPTS="./scripts" +cd "$SCRIPTS" && bun install --frozen-lockfile 2>/dev/null || bun install && cd .. + +echo "Generating panels..." +cd scripts && bun run generate-all-panels.ts --prompts-dir ../prompts --output-dir ../generated --size 2K --aspect-ratio 3:2 --use-cover-as-ref && cd .. + +echo "Normalizing..." +cd scripts && bun run normalize-images.ts --input ../generated/iteration-0/panels --output ../normalized && cd .. + +echo "Resizing..." +cd scripts && bun run resize-images.ts --input ../normalized --output ../resized --width 1200 --height 896 && cd .. + echo "Assembling PDF..." -cd "$SCRIPT_DIR/scripts" && bun run assemble-pdf.ts \ - --input "$SCRIPT_DIR/post-processed/final" --output "$SCRIPT_DIR/output/comic.pdf" \ - --title "TITLE" --author "AUTHOR" -echo "Done! Check output/comic.pdf" +cd scripts && bun run assemble-pdf.ts --input ../resized --output ../comic.pdf --title "EDIT THIS TITLE" && cd .. + +echo "Done! Output: comic.pdf" ``` -### 6. Verify +Make the script executable: `chmod +x output/rerun/regenerate.sh` + +### 10. Final Verification -- Check `output/comic.pdf` exists, report size -- Count `output/panels/` files -- Look at one panel to verify quality -- Report done +- Check `output/comic.pdf` exists and report its size +- Count files in `output/panels/` +- Look at one panel from `output/panels/` to verify it looks correct +- Report total output diff --git a/prompts/review-and-reroll.md b/prompts/review-and-reroll.md new file mode 100644 index 0000000..62437d3 --- /dev/null +++ b/prompts/review-and-reroll.md @@ -0,0 +1,79 @@ +# Review & Re-Roll Panels + +## Your Workspace + +- `workspace/approved/` — current best version of each panel +- `workspace/generation/prompts/` — the per-panel prompts +- `workspace/spec/current.md` — the production spec +- `workspace/spec/emotional-blueprint.md` — the emotional blueprint +- `workspace/character-references/` — character reference images +- `scripts/generate-panel-with-history.ts` — for re-rolling individual panels + +## Review Process + +Look at EVERY panel in `workspace/approved/`. For each panel, check: + +### 1. Text Correctness (HIGHEST PRIORITY) +- Are ALL captions and speech bubbles present and correct? +- Is any scene description text rendered as visible text? (prompt leak) +- Is any text duplicated? +- Is any text garbled or nonsensical? +- Are speech bubbles attributed to the correct characters? + +### 2. Character Consistency +- Does Spool look the same across all panels? (head ratio, segments, color, scarf) +- Does she look like the character reference images? +- Is there only ONE of each character per panel? (check for duplicate Spools) + +### 3. Story Accuracy +- Does the panel match the storyboard description? +- Are the right characters present? +- Is the emotional beat landing? + +## How to Fix Issues + +### Stochastic Issues → RE-ROLL (don't change the prompt) +These are issues that come and go between generations: +- Prompt leaks (scene description rendered as text) +- Garbled/nonsensical text +- Duplicate speech bubbles +- Minor character duplication +- Font style inconsistency + +**To re-roll a panel:** +```bash +cd scripts && bun run generate-panel-with-history.ts \ + --prompt-file ../workspace/generation/prompts/PANEL-NAME.md \ + --output ../workspace/approved/PANEL-NAME.png \ + --history-panels ../workspace/approved/cover.png,../workspace/approved/ADJACENT-PANEL.png \ + --size 2K --aspect-ratio 3:2 +``` + +Use the cover + one adjacent panel (the panel before or after) as history. This gives the model style context without overwhelming it. + +Generate 2-3 candidates. Pick the best. If none are clean after 3 attempts, note it as a systematic issue. + +### Systematic Issues → FIX PROMPT (then re-generate) +These are issues that persist across multiple generations: +- Wrong character design (consistently) +- Missing story element +- Wrong text content (the prompt itself has the wrong words) +- Spatial layout fundamentally wrong + +Fix the prompt in `workspace/generation/prompts/`, then re-roll. + +## Completion Criteria + +**You're done when:** +- Every panel has correct, clean text (no leaks, no duplicates, no garbles) +- Character consistency is acceptable across the set +- All story beats are present + +**Write your review to** `workspace/generation/review-and-reroll-report.md` including: +- Per-panel status (pass/fail + reason) +- What was re-rolled and how many attempts +- What was fixed in prompts (if anything) +- Final panel status + +**If all panels pass**, write "ALL PANELS APPROVED" at the top of the report. +**If some still have issues after 3 re-roll attempts each**, note them as "ACCEPTED WITH KNOWN FLAWS" and describe the flaws. diff --git a/prompts/select-merge-spec.md b/prompts/select-merge-spec.md index 8292f86..25fb25d 100644 --- a/prompts/select-merge-spec.md +++ b/prompts/select-merge-spec.md @@ -54,7 +54,7 @@ Every prompt should include: 2. **Font instruction** (copy this exactly): "Rounded comic lettering, consistent across all panels. Speech bubbles are white with black text. Caption boxes are pale cream with slightly italicized text." 3. Full character description (don't just say the name — describe what they look like) 4. Scene description for each panel -5. **The actual dialogue and captions** — written as `Speech bubble (Character): "text"` and `Caption: "text"`. The image generator renders these directly into the image. **When multiple characters are in a panel, always specify where the bubble tail points** — e.g., "Speech bubble pointing to Weewoo (the mouse on the left)". Misattributed bubbles are one of the most common generation errors. +5. **The actual dialogue and captions** — written as `Speech bubble (Character): "text"` and `Caption: "text"`. The image generator renders these directly into the image. 6. Mood and lighting Example: diff --git a/prompts/system-final.md b/prompts/system-final.md deleted file mode 100644 index 3f87578..0000000 --- a/prompts/system-final.md +++ /dev/null @@ -1,33 +0,0 @@ -# Comic Creator - -You are part of a pipeline that makes mascot comic series. **10-15 strips** (as many as the story needs, no more), each a 4-panel (2x2 grid) image. Plus a cover. Assembled into a PDF. - -## What Makes a Good Comic - -A good comic tells a story that works on two levels: a child should enjoy the pictures and the adventure, an adult should find meaning in the metaphor. The best comic dialogue is short — six words that make you feel something. The best comic panels show one moment clearly, with room to breathe. - -Comics are NOT illustrated essays. Don't explain — show. - -## Key Craft Rules - -- **Speech bubbles, captions, and dialogue are part of the generated image.** The image generator renders all story text. Include exact dialogue in every generation prompt. The post-processing pipeline only adds watermarks and logos — it cannot add dialogue. -- **Every prompt must be self-contained.** The image generator has no memory. Describe the character fully every time. -- **Describe scenes narratively, not as keyword lists.** -- **Separate text from description.** In prompts, clearly mark what should be VISIBLE TEXT (use `Caption:` and `Speech bubble:`) vs what is scene description. Don't phrase descriptions as narration that could be rendered as captions. -- **Font consistency.** Every prompt should specify: "Rounded comic lettering, consistent across all panels. Speech bubbles are white with black text. Caption boxes are pale cream with slightly italicized text." -- **The Plush Toy Test.** If a character wouldn't work as a stuffed animal, simplify. -- **Not every strip needs a lesson.** Some should just be fun, or beautiful, or quiet. - -## File Structure - -``` -workspace/ -├── reading-journal.md -├── candidates/ -├── spec/current.md -├── spec/emotional-blueprint.md -├── generation/prompts/ -├── generation/iteration-N/ -├── approved/ -└── post-processed/ -``` diff --git a/prompts/system.md b/prompts/system.md new file mode 100644 index 0000000..d3f337f --- /dev/null +++ b/prompts/system.md @@ -0,0 +1,32 @@ +# Comic Creator + +You are part of a pipeline that makes mascot comic series. **10-15 strips** (as many as the story needs, no more), each a 4-panel (2x2 grid) image. Plus a cover. Assembled into a PDF. + +## What Makes a Good Comic + +A good comic tells a story that works on two levels: a child should enjoy the pictures and the adventure, an adult should find meaning in the metaphor. The best comic dialogue is short — six words that make you feel something. The best comic panels show one moment clearly, with room to breathe. + +Comics are NOT illustrated essays. Don't explain — show. If a caption tells you what to think, it's a bad caption. If a panel makes you think without telling you, it's a good panel. + +## Key Craft Rules + +- **Speech bubbles, captions, and dialogue are part of the generated image.** The image generator (Nano Banana / Gemini) renders all story text. Include exact dialogue in every generation prompt. The post-processing pipeline only adds watermarks and logos — it cannot add dialogue. +- **Every prompt must be self-contained.** The image generator has no memory between panels. Describe the character fully every time. +- **Describe scenes narratively, not as keyword lists.** Gemini responds to stories, not tags. +- **Separate text from description.** In prompts, clearly mark what should be VISIBLE TEXT (speech bubbles, captions) vs what is scene description. Nano sometimes renders descriptive prose as captions. Use `Caption: "text"` and `Speech bubble: "text"` ONLY for text that should appear in the image. Scene descriptions should not be phrased as narration that could be mistaken for captions. +- **The Plush Toy Test.** If a character wouldn't work as a stuffed animal, simplify the design. +- **Font consistency.** Every generation prompt should specify the same font style: "Rounded comic lettering, consistent across all panels. Speech bubbles are white with black text. Caption boxes are pale cream with slightly italicized text." Copy this exact instruction into every prompt. + +## File Structure + +``` +workspace/ +├── reading-journal.md # Notes, ideas, aha moments from reading input +├── candidates/ # Character & story options + samples +├── spec/current.md # The production spec +├── spec/emotional-blueprint.md +├── generation/prompts/ # Per-panel generation prompts +├── generation/iteration-N/ # Generated panels per iteration +├── approved/ # Best panels +└── post-processed/ # Final processed panels +``` diff --git a/prompts/visualize-candidates.md b/prompts/visualize-candidates.md index da41942..62aa940 100644 --- a/prompts/visualize-candidates.md +++ b/prompts/visualize-candidates.md @@ -4,7 +4,7 @@ Sample images have been generated for each character candidate by the rig setup. ## What To Do -1. **Look at EVERY sample image** in `workspace/candidates/characters/samples/`. If a `_proxy/` subfolder exists inside each option's directory, read from there instead (these are compressed versions safe for your API). Read each image file. +1. **Look at EVERY sample image** in `workspace/candidates/characters/samples/`. Read each image file. 2. **Also look at the original inspiration images** in `read_only_data_source/inspiration/` — compare the generated samples against the style references the user provided. Are they in the right aesthetic neighborhood? diff --git a/scripts/diagnose-chat-context.ts b/scripts/diagnose-chat-context.ts new file mode 100644 index 0000000..13d573d --- /dev/null +++ b/scripts/diagnose-chat-context.ts @@ -0,0 +1,227 @@ +/** + * diagnose-chat-context.ts + * + * Reconstructs and logs exactly what the Gemini model sees at each + * panel generation step in the multi-turn chat. Does NOT generate + * images — just builds the conversation log to diagnose context issues. + * + * Usage: + * bun run diagnose-chat-context.ts \ + * --prompts-dir ../workspace/generation/prompts \ + * --reference-images \ + * --output ../workspace/chat-diagnosis.json + */ + +import { readFileSync, writeFileSync, existsSync, readdirSync, statSync } from "fs"; +import { join, resolve, basename, extname } from "path"; +import { parseArgs } from "util"; + +function parseCliArgs() { + const { values } = parseArgs({ + args: process.argv.slice(2), + options: { + "prompts-dir": { type: "string", short: "p" }, + "reference-images": { type: "string", short: "r" }, + output: { type: "string", short: "o" }, + "generated-panels-dir": { type: "string", short: "g" }, + }, + }); + return { + promptsDir: resolve(values["prompts-dir"] ?? "."), + referenceImages: (values["reference-images"] ?? "").split(",").filter(Boolean).map(p => resolve(p.trim())), + outputPath: resolve(values.output ?? "./chat-diagnosis.json"), + generatedPanelsDir: values["generated-panels-dir"] ? resolve(values["generated-panels-dir"]) : null, + }; +} + +function getFileSizeKB(path: string): number { + try { + return Math.round(statSync(path).size / 1024); + } catch { + return 0; + } +} + +function sortPromptFiles(files: string[]): string[] { + return files.sort((a, b) => { + const nameA = basename(a, ".md").toLowerCase(); + const nameB = basename(b, ".md").toLowerCase(); + if (nameA === "cover") return -1; + if (nameB === "cover") return 1; + const numA = parseInt(nameA.replace(/\D/g, ""), 10); + const numB = parseInt(nameB.replace(/\D/g, ""), 10); + if (!isNaN(numA) && !isNaN(numB)) return numA - numB; + return nameA.localeCompare(nameB); + }); +} + +function main() { + const config = parseCliArgs(); + + const promptFiles = sortPromptFiles( + readdirSync(config.promptsDir).filter(f => f.endsWith(".md")) + ); + + const diagnosis: any = { + summary: {}, + turns: [], + }; + + let turnNumber = 0; + let totalImagesInContext = 0; + let totalTextCharsInContext = 0; + let totalImageBytesInContext = 0; + + // Turn 0: Reference image injection + if (config.referenceImages.length > 0) { + const refTurn: any = { + turn: turnNumber++, + role: "user", + purpose: "Character reference injection", + parts: [], + contextSnapshot: {}, + }; + + for (const imgPath of config.referenceImages) { + const sizeKB = getFileSizeKB(imgPath); + refTurn.parts.push({ + type: "image", + file: basename(imgPath), + sizeKB, + }); + totalImagesInContext++; + totalImageBytesInContext += sizeKB * 1024; + } + + const instructionText = "These are CHARACTER REFERENCE images for the main character Spool. Study her exact design..."; + refTurn.parts.push({ + type: "text", + chars: instructionText.length, + preview: instructionText.slice(0, 200), + }); + totalTextCharsInContext += instructionText.length; + + refTurn.contextSnapshot = { + totalImages: totalImagesInContext, + totalTextChars: totalTextCharsInContext, + estimatedImageMB: (totalImageBytesInContext / 1024 / 1024).toFixed(1), + }; + + diagnosis.turns.push(refTurn); + + // Model acknowledgment (turn 0.5) + const ackTurn: any = { + turn: turnNumber++, + role: "model", + purpose: "Reference acknowledgment", + parts: [{ type: "text", chars: 200, preview: "(model acknowledges character reference)" }], + }; + totalTextCharsInContext += 200; // estimated + diagnosis.turns.push(ackTurn); + } + + // Panel generation turns + for (const promptFile of promptFiles) { + const panelName = basename(promptFile, ".md"); + const promptPath = join(config.promptsDir, promptFile); + const promptText = readFileSync(promptPath, "utf-8").trim(); + + // User turn: prompt + const userTurn: any = { + turn: turnNumber++, + role: "user", + purpose: `Generate ${panelName}`, + parts: [{ + type: "text", + chars: promptText.length, + preview: promptText.slice(0, 150) + "...", + hasRenderingRules: promptText.includes("RENDERING RULES"), + hasFontAnchoring: promptText.includes("FONT:") || promptText.includes("bold comic"), + hasSpoolSpec: promptText.includes("young-adult"), + hasSpoolConsistency: promptText.includes("SAME proportions"), + }], + }; + totalTextCharsInContext += promptText.length; + userTurn.contextSnapshot = { + totalImages: totalImagesInContext, + totalTextChars: totalTextCharsInContext, + estimatedImageMB: (totalImageBytesInContext / 1024 / 1024).toFixed(1), + estimatedTextTokens: Math.round(totalTextCharsInContext / 4), // rough estimate + }; + diagnosis.turns.push(userTurn); + + // Model turn: generated panel + let panelSizeKB = 0; + if (config.generatedPanelsDir) { + const panelPath = join(config.generatedPanelsDir, `${panelName}.png`); + panelSizeKB = getFileSizeKB(panelPath); + } else { + panelSizeKB = 10000; // estimated ~10MB per panel + } + + const modelTurn: any = { + turn: turnNumber++, + role: "model", + purpose: `${panelName} response`, + parts: [ + { type: "text", chars: 50, preview: "(model text response)" }, + { type: "image", file: `${panelName}.png`, sizeKB: panelSizeKB }, + ], + }; + totalImagesInContext++; + totalImageBytesInContext += panelSizeKB * 1024; + totalTextCharsInContext += 50; + + modelTurn.contextSnapshot = { + totalImages: totalImagesInContext, + totalTextChars: totalTextCharsInContext, + estimatedImageMB: (totalImageBytesInContext / 1024 / 1024).toFixed(1), + note: totalImagesInContext > 10 ? "WARNING: Many images in context — reference images may be losing influence" : "OK", + }; + diagnosis.turns.push(modelTurn); + } + + // Summary + diagnosis.summary = { + totalTurns: turnNumber, + referenceImages: config.referenceImages.length, + panelsGenerated: promptFiles.length, + finalContextImages: totalImagesInContext, + finalContextTextChars: totalTextCharsInContext, + finalContextEstimatedImageMB: (totalImageBytesInContext / 1024 / 1024).toFixed(1), + finalContextEstimatedTokens: Math.round(totalTextCharsInContext / 4), + contextGrowthPerPanel: { + imagesAdded: 1, + textCharsAdded: "~2000-5000 (prompt) + ~50 (response)", + imageMBAdded: "~10MB", + }, + diagnosis: [ + `Reference images are ${config.referenceImages.length} images in turn 0.`, + `By panel-07 (turn ${config.referenceImages.length > 0 ? 16 : 14}), there are ${config.referenceImages.length + 7} images in context.`, + `By panel-13 (turn ${config.referenceImages.length > 0 ? 28 : 26}), there are ${config.referenceImages.length + 14} images in context.`, + `The 5 reference images are ~9MB total. By panel-13, generated images add ~140MB.`, + `Reference images represent ${config.referenceImages.length > 0 ? Math.round(9 / (9 + 140) * 100) : 0}% of total image context by the end.`, + `The model's attention to the reference images DECREASES as more panels accumulate.`, + `This is likely why Spool drifts — the references are "drowned out" by generated panels.`, + ], + recommendations: [ + "Option A: Re-inject reference images periodically (e.g., every 4-5 panels) as new user messages", + "Option B: Use fewer, larger reference images (one high-quality composite instead of 5 separate)", + "Option C: Split generation into batches of 4-5 panels, each starting a fresh chat with references", + "Option D: After generating the cover, crop Spool from it and add as an additional reference for remaining panels", + ], + }; + + writeFileSync(config.outputPath, JSON.stringify(diagnosis, null, 2)); + console.log(`Diagnosis written to: ${config.outputPath}`); + console.log(`\n=== KEY FINDINGS ===\n`); + for (const line of diagnosis.summary.diagnosis) { + console.log(` ${line}`); + } + console.log(`\n=== RECOMMENDATIONS ===\n`); + for (const rec of diagnosis.summary.recommendations) { + console.log(` ${rec}`); + } +} + +main(); diff --git a/scripts/generate-all-panels.ts b/scripts/generate-all-panels.ts index 6e58223..cfdb12a 100644 --- a/scripts/generate-all-panels.ts +++ b/scripts/generate-all-panels.ts @@ -26,9 +26,8 @@ import { GoogleGenAI, type GenerateContentConfig } from "@google/genai"; import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync } from "fs"; -import { join, resolve, extname, basename, dirname } from "path"; +import { join, resolve, extname, basename } from "path"; import { parseArgs } from "util"; -import { execSync } from "child_process"; function parseCliArgs() { const { values } = parseArgs({ @@ -329,24 +328,6 @@ async function main() { console.log(` Failed: ${failCount}/${promptFiles.length}`); console.log(` Output: ${iterDir}`); - // Create proxy images for APIs with image size limits - const proxyMaxMb = process.env.IMAGE_PROXY_MAX_MB ?? "4"; - if (proxyMaxMb.toLowerCase() !== "unlimited" && parseFloat(proxyMaxMb) > 0) { - console.log(`\nCreating proxy images (max ${proxyMaxMb}MB)...`); - try { - const scriptsDir = dirname(resolve(process.argv[1])); - execSync( - `bun run create-proxies.ts --input "${panelsDir}" --max-mb ${proxyMaxMb}`, - { cwd: scriptsDir, stdio: "inherit" }, - ); - } catch (err: any) { - console.warn(`Warning: Proxy creation failed: ${err.message?.slice(0, 200)}`); - // Non-fatal — agent can still try to read originals directly - } - } else { - console.log(`\nProxy creation: ${proxyMaxMb === "unlimited" ? "disabled (unlimited)" : "disabled (max-mb=0)"}`); - } - if (failCount > 0) { process.exit(1); } diff --git a/scripts/generate-frontloaded-v2.ts b/scripts/generate-frontloaded-v2.ts new file mode 100644 index 0000000..2dfba7f --- /dev/null +++ b/scripts/generate-frontloaded-v2.ts @@ -0,0 +1,255 @@ +/** + * generate-frontloaded-v2.ts + * + * Hybrid approach: + * - Full scene descriptions + character specs + reference images in first message + * - Per-panel requests include ONLY the text to render (captions + speech bubbles) + * + * This separates "what to draw" (background context) from "what text to show" (foreground request). + */ + +import { GoogleGenAI } from "@google/genai"; +import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync } from "fs"; +import { join, resolve, extname, basename, dirname } from "path"; +import { parseArgs } from "util"; +import { execSync } from "child_process"; + +function getMimeType(filePath: string): string { + const ext = extname(filePath).toLowerCase(); + switch (ext) { + case ".png": return "image/png"; + case ".jpg": case ".jpeg": return "image/jpeg"; + default: return "image/png"; + } +} + +function sortPromptFiles(files: string[]): string[] { + return files.sort((a, b) => { + const nameA = basename(a, ".md").toLowerCase(); + const nameB = basename(b, ".md").toLowerCase(); + if (nameA === "cover") return -1; + if (nameB === "cover") return 1; + const numA = parseInt(nameA.replace(/\D/g, ""), 10); + const numB = parseInt(nameB.replace(/\D/g, ""), 10); + if (!isNaN(numA) && !isNaN(numB)) return numA - numB; + return nameA.localeCompare(nameB); + }); +} + +/** Extract Caption and Speech bubble lines from a prompt */ +function extractTextElements(promptText: string): string[] { + const lines: string[] = []; + for (const line of promptText.split("\n")) { + const trimmed = line.trim(); + // Match Caption: *"text"* or Caption: "text" + if (trimmed.startsWith("Caption:")) { + const match = trimmed.match(/Caption:\s*\*?"?(.+?)"?\*?$/); + if (match) lines.push(`Caption: ${match[1]}`); + } + // Match Speech bubble (Character): "text" + if (trimmed.startsWith("Speech bubble")) { + const match = trimmed.match(/Speech bubble\s*\([^)]*\):\s*"(.+?)"/); + if (match) lines.push(`Speech: "${match[1]}"`); + } + // Match title text for cover + if (trimmed.startsWith("Title text:") || trimmed.startsWith("Subtitle text:")) { + lines.push(trimmed); + } + } + return lines; +} + +function parseCliArgs() { + const { values } = parseArgs({ + args: process.argv.slice(2), + options: { + "prompts-dir": { type: "string", short: "p" }, + "output-dir": { type: "string", short: "o" }, + "reference-images": { type: "string", short: "r" }, + model: { type: "string", short: "m" }, + size: { type: "string", short: "s" }, + "aspect-ratio": { type: "string", short: "a" }, + "panels": { type: "string" }, // optional: comma-separated panel names to generate (e.g., "panel-07,panel-09") + }, + }); + + const referenceImages: string[] = []; + if (values["reference-images"]) { + for (const imgPath of values["reference-images"].split(",")) { + const resolved = resolve(imgPath.trim()); + if (existsSync(resolved)) referenceImages.push(resolved); + } + } + + const panelFilter = values.panels ? values.panels.split(",").map(p => p.trim()) : null; + + return { + promptsDir: resolve(values["prompts-dir"] ?? "."), + outputDir: resolve(values["output-dir"] ?? "."), + referenceImages, + model: values.model ?? "gemini-3.1-flash-image-preview", + imageSize: values.size ?? "2K", + aspectRatio: values["aspect-ratio"] ?? "3:2", + panelFilter, + }; +} + +async function main() { + const config = parseCliArgs(); + + const apiKey = process.env.GOOGLE_API_KEY ?? process.env.GEMINI_API_KEY; + if (!apiKey) { + console.error("Error: GOOGLE_API_KEY or GEMINI_API_KEY must be set"); + process.exit(1); + } + + const client = new GoogleGenAI({ apiKey }); + + const promptFiles = sortPromptFiles( + readdirSync(config.promptsDir).filter(f => f.endsWith(".md")) + ); + + mkdirSync(config.outputDir, { recursive: true }); + const panelsDir = join(config.outputDir, "panels"); + mkdirSync(panelsDir, { recursive: true }); + + // Build full context (all strip descriptions) + let fullContext = `# COMIC: "Too Many Threads" — Complete Storyboard\n\n`; + fullContext += `You will generate a 14-page comic (1 cover + 13 strips). Each strip is a 2x2 grid of 4 panels.\n`; + fullContext += `I will ask for one page at a time. Generate EXACTLY the page I ask for.\n\n`; + fullContext += `## CRITICAL RULES\n`; + fullContext += `- The ONLY visible text in each image should be the captions and speech bubbles I specify in my request.\n`; + fullContext += `- Scene descriptions in this storyboard are INVISIBLE drawing instructions. NEVER render them as text.\n`; + fullContext += `- ALL characters are INSECTS. NO humans.\n`; + fullContext += `- Art style: hand-drawn storybook, soft watercolor, THICK linework, warm rounded designs\n`; + fullContext += `- FONT: Clean, rounded, bold comic lettering. Captions in pale cream boxes. Speech bubbles white with bold black text.\n`; + fullContext += `- The CHARACTER REFERENCE IMAGES define EXACTLY how Spool looks. Match her in EVERY panel.\n\n---\n\n`; + + // Build per-panel text summaries for the requests + const panelTextMap: Map = new Map(); + + for (const promptFile of promptFiles) { + const panelName = basename(promptFile, ".md"); + const promptText = readFileSync(join(config.promptsDir, promptFile), "utf-8").trim(); + fullContext += `# PAGE: ${panelName}\n\n${promptText}\n\n---\n\n`; + panelTextMap.set(panelName, extractTextElements(promptText)); + } + + console.log(`\n=== FRONT-LOADED V2 (hybrid) ===`); + console.log(` Context: ${fullContext.length} chars`); + console.log(` Reference images: ${config.referenceImages.length}`); + if (config.panelFilter) console.log(` Generating only: ${config.panelFilter.join(", ")}`); + console.log(``); + + // Create chat + const chat = client.chats.create({ + model: config.model, + config: { + responseModalities: ["TEXT", "IMAGE"], + imageConfig: { + aspectRatio: config.aspectRatio as any, + imageSize: config.imageSize as any, + }, + }, + }); + + // FIRST MESSAGE: reference images + full context + const firstParts: Array = []; + for (const imgPath of config.referenceImages) { + const buffer = readFileSync(imgPath); + firstParts.push({ + inlineData: { mimeType: getMimeType(imgPath), data: buffer.toString("base64") }, + }); + console.log(` Ref: ${basename(imgPath)}`); + } + firstParts.push({ text: fullContext }); + + console.log(`\n Sending context...`); + try { + const resp = await chat.sendMessage({ message: firstParts }); + const ack = resp.candidates?.[0]?.content?.parts?.find((p: any) => p.text)?.text; + if (ack) console.log(` Model: ${ack.slice(0, 150)}\n`); + } catch (err: any) { + console.error(` Context failed: ${err.message?.slice(0, 300)}`); + process.exit(1); + } + + // Save context for debugging + writeFileSync(join(config.outputDir, "context-sent.md"), fullContext); + + // GENERATE panels with text-focused requests + let successCount = 0; + let failCount = 0; + const chatLog: any[] = []; + + const panelsToGenerate = config.panelFilter + ? promptFiles.filter(f => config.panelFilter!.includes(basename(f, ".md"))) + : promptFiles; + + for (const promptFile of panelsToGenerate) { + const panelName = basename(promptFile, ".md"); + const outputPath = join(panelsDir, `${panelName}.png`); + const pageLabel = panelName === "cover" ? "the cover page" : `strip ${panelName.replace("panel-", "")}`; + const textElements = panelTextMap.get(panelName) ?? []; + + // Build the short request with text elements + let request = `Please generate ${pageLabel}. Follow the storyboard exactly.\n\n`; + request += `The ONLY text that should appear in this image:\n`; + for (const el of textElements) { + request += `- ${el}\n`; + } + request += `\nNo other text. No scene descriptions as text. Only the lines listed above.`; + + console.log(` [${successCount + failCount + 1}/${panelsToGenerate.length}] ${panelName}`); + console.log(` Text elements: ${textElements.length}`); + + try { + const response = await chat.sendMessage({ message: request }); + const responseParts = response.candidates?.[0]?.content?.parts; + + let imageFound = false; + let responseText = ""; + for (const part of (responseParts ?? [])) { + if (part.text) responseText += part.text; + if (part.inlineData?.data) { + const buf = Buffer.from(part.inlineData.data, "base64"); + writeFileSync(outputPath, buf); + console.log(` Saved: ${basename(outputPath)} (${(buf.length / 1024).toFixed(0)} KB)`); + imageFound = true; + successCount++; + break; + } + } + + chatLog.push({ + panel: panelName, + status: imageFound ? "success" : "no_image", + request: request.slice(0, 500), + textElements, + responseText: responseText.slice(0, 200), + }); + + if (!imageFound) { + console.error(` No image. Text: ${responseText.slice(0, 200)}`); + failCount++; + } + } catch (err: any) { + console.error(` Error: ${err.message?.slice(0, 200)}`); + failCount++; + chatLog.push({ panel: panelName, status: "error", error: err.message?.slice(0, 300) }); + } + } + + writeFileSync(join(config.outputDir, "chat-log.json"), JSON.stringify(chatLog, null, 2)); + + // Proxies + try { + execSync(`bun run create-proxies.ts --input "${panelsDir}" --max-mb 4`, + { cwd: dirname(resolve(process.argv[1])), stdio: "inherit" }); + } catch { console.warn("Proxy creation failed (non-fatal)"); } + + console.log(`\n=== Done: ${successCount}/${panelsToGenerate.length} success, ${failCount} failed ===`); + if (failCount > 0) process.exit(1); +} + +main().catch(err => { console.error("Failed:", err.message ?? err); process.exit(1); }); diff --git a/scripts/generate-panel-with-history.ts b/scripts/generate-panel-with-history.ts new file mode 100644 index 0000000..706d377 --- /dev/null +++ b/scripts/generate-panel-with-history.ts @@ -0,0 +1,168 @@ +/** + * generate-panel-with-history.ts + * + * Generates a panel using multi-turn conversation history. + * Places existing panels as fake "assistant" outputs so the model + * thinks it already generated them, anchoring style/font/characters. + * + * Usage: + * bun run generate-panel-with-history.ts \ + * --prompt-file ./prompts/panel-13.md \ + * --output ./output/panel-13.png \ + * --history-panels cover.png,panel-02.png,panel-07.png,panel-10.png \ + * [--size 2K] [--aspect-ratio 3:2] [--model gemini-3.1-flash-image-preview] + */ + +import { GoogleGenAI, type Content } from "@google/genai"; +import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs"; +import { resolve, dirname, basename, extname } from "path"; +import { parseArgs } from "util"; + +function getMimeType(filePath: string): string { + const ext = extname(filePath).toLowerCase(); + switch (ext) { + case ".png": return "image/png"; + case ".jpg": case ".jpeg": return "image/jpeg"; + case ".webp": return "image/webp"; + default: return "image/png"; + } +} + +function parseCliArgs() { + const { values } = parseArgs({ + args: process.argv.slice(2), + options: { + "prompt-file": { type: "string" }, + output: { type: "string", short: "o" }, + "history-panels": { type: "string" }, + model: { type: "string", short: "m" }, + size: { type: "string", short: "s" }, + "aspect-ratio": { type: "string", short: "a" }, + }, + }); + + if (!values["prompt-file"]) { + console.error("Error: --prompt-file required"); + process.exit(1); + } + if (!values["history-panels"]) { + console.error("Error: --history-panels required (comma-separated image paths)"); + process.exit(1); + } + + const promptPath = resolve(values["prompt-file"]); + const prompt = readFileSync(promptPath, "utf-8").trim(); + + const historyPanels = values["history-panels"].split(",").map(p => { + const resolved = resolve(p.trim()); + if (!existsSync(resolved)) { + console.error(`History panel not found: ${resolved}`); + process.exit(1); + } + return resolved; + }); + + return { + prompt, + outputPath: resolve(values.output ?? "./output.png"), + historyPanels, + model: values.model ?? "gemini-3.1-flash-image-preview", + imageSize: values.size ?? "2K", + aspectRatio: values["aspect-ratio"] ?? "3:2", + }; +} + +async function main() { + const config = parseCliArgs(); + + const apiKey = process.env.GOOGLE_API_KEY ?? process.env.GEMINI_API_KEY; + if (!apiKey) { + console.error("Error: GOOGLE_API_KEY or GEMINI_API_KEY must be set"); + process.exit(1); + } + + const client = new GoogleGenAI({ apiKey }); + + // Use chat API: send reference panels as user messages, let model acknowledge, + // then send the actual generation prompt. + const chat = client.chats.create({ + model: config.model, + config: { + responseModalities: ["TEXT", "IMAGE"], + imageConfig: { + aspectRatio: config.aspectRatio as any, + imageSize: config.imageSize as any, + }, + }, + }); + + // Send each history panel as a user message with the image + for (let i = 0; i < config.historyPanels.length; i++) { + const panelPath = config.historyPanels[i]; + const panelName = basename(panelPath, extname(panelPath)); + const buffer = readFileSync(panelPath); + + console.log(` Sending history [${i + 1}/${config.historyPanels.length}]: ${panelName}`); + + const resp = await chat.sendMessage({ + message: [ + { + inlineData: { + mimeType: getMimeType(panelPath), + data: buffer.toString("base64"), + }, + }, + { + text: `This is ${panelName} from our comic. Study the exact art style, font, thick linework, character designs, and panel layout. Match these EXACTLY when I ask you to generate the next panel.`, + }, + ], + }); + + // Log model's text response (it won't generate an image for these) + const ackText = resp.candidates?.[0]?.content?.parts?.find(p => p.text)?.text; + if (ackText) console.log(` Model: ${ackText.slice(0, 100)}`); + } + + // Now send the actual generation prompt + console.log(`\nGenerating with ${config.historyPanels.length} panels in context...`); + console.log(` Model: ${config.model}`); + console.log(` Size: ${config.imageSize}, Aspect: ${config.aspectRatio}\n`); + + const response = await chat.sendMessage({ + message: config.prompt, + }); + + const responseParts = response.candidates?.[0]?.content?.parts; + if (!responseParts) { + console.error("No response parts"); + console.error("Full response:", JSON.stringify(response, null, 2).slice(0, 500)); + process.exit(1); + } + + let imageFound = false; + for (const part of responseParts) { + if (part.text) { + console.log(` Model text: ${part.text.slice(0, 200)}`); + } + if (part.inlineData?.data) { + const outputDir = dirname(config.outputPath); + if (!existsSync(outputDir)) mkdirSync(outputDir, { recursive: true }); + const imageBuffer = Buffer.from(part.inlineData.data, "base64"); + writeFileSync(config.outputPath, imageBuffer); + console.log(` Saved: ${config.outputPath} (${(imageBuffer.length / 1024).toFixed(0)} KB)`); + imageFound = true; + } + } + + if (!imageFound) { + console.error("No image in response. Parts:", JSON.stringify(responseParts.map(p => p.text ? "text" : "other"), null, 2)); + process.exit(1); + } + + console.log("\nDone!"); +} + +main().catch(err => { + console.error("Failed:", err.message ?? err); + process.exit(1); +}); diff --git a/scripts/generate-samples.ts b/scripts/generate-samples.ts index 5a18835..d976976 100644 --- a/scripts/generate-samples.ts +++ b/scripts/generate-samples.ts @@ -21,9 +21,8 @@ import { GoogleGenAI, type GenerateContentConfig } from "@google/genai"; import { readFileSync, writeFileSync, existsSync, mkdirSync, readdirSync } from "fs"; -import { join, resolve, basename, extname, dirname } from "path"; +import { join, resolve, basename, extname } from "path"; import { parseArgs } from "util"; -import { execSync } from "child_process"; const SAMPLE_POSES = [ "standing facing the viewer with a friendly expression, full body visible, simple background", @@ -226,24 +225,7 @@ async function main() { } } - console.log("\nAll samples generated."); - - // Create proxy images for APIs with image size limits - const proxyMaxMb = process.env.IMAGE_PROXY_MAX_MB ?? "4"; - if (proxyMaxMb.toLowerCase() !== "unlimited" && parseFloat(proxyMaxMb) > 0) { - console.log(`\nCreating proxy images (max ${proxyMaxMb}MB)...`); - try { - const scriptsDir = dirname(resolve(process.argv[1])); - execSync( - `bun run create-proxies.ts --input "${config.outputDir}" --max-mb ${proxyMaxMb} --recursive`, - { cwd: scriptsDir, stdio: "inherit" }, - ); - } catch (err: any) { - console.warn(`Warning: Proxy creation failed: ${err.message?.slice(0, 200)}`); - } - } - - console.log("\nDone!"); + console.log("\nDone! All samples generated."); } main().catch((err) => { diff --git a/sentinels/narrator.json b/sentinels/narrator.json deleted file mode 100644 index e816486..0000000 --- a/sentinels/narrator.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "id": "narrator", - "name": "Comic Narrator", - "description": "Watches the comic being created and narrates progress in a warm, storytelling voice.", - "trigger": { - "type": "event", - "on": ["assistant.action", "tool.result"] - }, - "execution": { - "strategy": "debounce", - "milliseconds": 8000 - }, - "model": "anthropic/claude-haiku-4-5", - "conversational": { - "trimmingStrategy": { - "type": "maxTurns", - "maxTurns": 30 - } - }, - "systemPromptText": "You are a warm, gentle narrator watching a comic book being made. You speak like a storybook narrator — short, evocative sentences. Note what's happening: characters being born, stories taking shape, panels being drawn. When something goes wrong, note it with gentle humor. When something beautiful emerges, celebrate it briefly. Keep observations to 1-3 sentences. You're writing the story of the story being made.", - "userPromptText": "Recent activity:\n<%= JSON.stringify(it.events.slice(-10).map(e => ({type: e.type, data: typeof e.data === 'string' ? e.data.slice(0,200) : JSON.stringify(e.data).slice(0,200)})), null, 2) %>\n\nCodon: <%= it.codon.name %>\nRunning for: <%= Math.floor((it.world.currentTime - it.codon.startTime) / 1000) %>s", - "output": { - "format": "text", - "file": "./narrator-log.md" - }, - "reportToWebsocket": { - "lifecycle": true, - "errors": true, - "outputs": true, - "triggers": false - }, - "errorHandling": { - "maxConsecutiveFailures": 5, - "unloadOnFatalError": true - } -}