diff --git a/src/content/__tests__/escapeDrawtext.test.ts b/src/content/__tests__/escapeDrawtext.test.ts index f4fb88c..bf6abea 100644 --- a/src/content/__tests__/escapeDrawtext.test.ts +++ b/src/content/__tests__/escapeDrawtext.test.ts @@ -3,63 +3,77 @@ import { describe, it, expect } from "vitest"; import { escapeDrawtext } from "../escapeDrawtext"; describe("escapeDrawtext", () => { - it("replaces straight apostrophes with escaped quote safe for drawtext", () => { + it("removes straight apostrophes", () => { const result = escapeDrawtext("didn't"); - expect(result).not.toContain("'"); - // Must NOT use U+02BC (modifier letter apostrophe) β€” most fonts lack this glyph - expect(result).not.toContain("\u02BC"); - expect(result).toContain("didn"); + expect(result).toBe("didnt"); }); - it("preserves curly right single quotation marks as-is", () => { + it("removes curly right single quotation marks", () => { const result = escapeDrawtext("didn\u2019t"); - - // U+2019 is the replacement char β€” it should remain - expect(result).toContain("\u2019"); - expect(result).not.toContain("\u02BC"); + expect(result).not.toContain("\u2019"); + expect(result).toBe("didnt"); }); - it("replaces curly left single quotation marks with right single quotation mark", () => { + it("removes curly left single quotation marks", () => { const result = escapeDrawtext("\u2018hello\u2019"); - expect(result).not.toContain("\u2018"); - expect(result).not.toContain("\u02BC"); - // Both should become U+2019 - expect(result).toBe("\u2019hello\u2019"); + expect(result).not.toContain("\u2019"); + expect(result).toBe("hello"); }); it("escapes colons for ffmpeg", () => { const result = escapeDrawtext("caption: hello"); - expect(result).toContain("\\\\:"); }); - it("escapes percent to %% for a single literal % in ffmpeg drawtext", () => { + it("escapes percent to %% for ffmpeg drawtext", () => { const result = escapeDrawtext("100%"); - - // ffmpeg drawtext: %% renders as single %. So "100%" should become "100%%". expect(result).toBe("100%%"); }); it("escapes backslashes", () => { const result = escapeDrawtext("back\\slash"); - expect(result).toContain("\\\\\\\\"); }); it("strips newlines and carriage returns", () => { expect(escapeDrawtext("line1\nline2")).toBe("line1 line2"); expect(escapeDrawtext("line1\r\nline2")).toBe("line1 line2"); - expect(escapeDrawtext("line1\rline2")).toBe("line1line2"); + }); + + it("produces text safe inside ffmpeg single-quoted drawtext in filter_complex", () => { + const result = escapeDrawtext("you're my addiction"); + expect(result).not.toContain("'"); + expect(result).not.toContain("\u2019"); + expect(result).toBe("youre my addiction"); + }); + + it("removes double quotes", () => { + const result = escapeDrawtext('"hello world"'); + expect(result).not.toContain('"'); + expect(result).toBe("hello world"); + }); + + it("removes emoji", () => { + const result = escapeDrawtext("fire πŸ”₯🎢 music"); + expect(result).not.toContain("πŸ”₯"); + expect(result).not.toContain("🎢"); + }); + + it("handles the exact failing caption from production", () => { + const result = escapeDrawtext('Desire ignites: "Yo quiero un chin, tu eres mΓ­a." 🎢πŸ”₯ #Intensity #LaEquis'); + expect(result).not.toContain('"'); + expect(result).not.toContain("'"); + expect(result).not.toContain("🎢"); + expect(result).not.toContain("πŸ”₯"); + expect(result).toContain("Desire ignites"); + expect(result).toContain("Yo quiero"); }); it("handles a real caption with apostrophes and special chars", () => { const result = escapeDrawtext("didn't think anyone would hear this: it's real"); - - // Should not contain raw single quotes, left curly quotes, or U+02BC - expect(result).not.toMatch(/['\u2018\u2032\u02BC]/); - // Should contain escaped colon + expect(result).not.toMatch(/['\u2018\u2019\u2032]/); expect(result).toContain("\\\\:"); }); }); diff --git a/src/content/__tests__/generateContentImage.test.ts b/src/content/__tests__/generateContentImage.test.ts deleted file mode 100644 index 0541152..0000000 --- a/src/content/__tests__/generateContentImage.test.ts +++ /dev/null @@ -1,133 +0,0 @@ -import { describe, it, expect, vi, beforeEach } from "vitest"; - -vi.mock("node:fs/promises", () => ({ - default: { readFile: vi.fn() }, -})); - -vi.mock("@fal-ai/client", () => ({ - fal: { - storage: { upload: vi.fn() }, - }, -})); - -vi.mock("@trigger.dev/sdk/v3", () => ({ - logger: { log: vi.fn() }, -})); - -vi.mock("../../sandboxes/logStep", () => ({ - logStep: vi.fn(), -})); - -const mockFalSubscribe = vi.fn(); -vi.mock("../falSubscribe", () => ({ - falSubscribe: (...args: unknown[]) => mockFalSubscribe(...args), -})); - -import fs from "node:fs/promises"; -import { fal } from "@fal-ai/client"; -import { generateContentImage } from "../generateContentImage"; - -describe("generateContentImage", () => { - beforeEach(() => { - vi.clearAllMocks(); - mockFalSubscribe.mockResolvedValue({ - data: { images: [{ url: "https://fal.ai/generated.png" }] }, - }); - }); - - it("passes face guide and reference image to fal", async () => { - vi.mocked(fs.readFile).mockResolvedValue(Buffer.from("ref-image")); - vi.mocked(fal.storage.upload).mockResolvedValue("https://fal.ai/ref.png"); - - await generateContentImage({ - faceGuideUrl: "https://fal.ai/face.png", - referenceImagePath: "/path/to/ref-01.png", - prompt: "test prompt", - }); - - const callArgs = mockFalSubscribe.mock.calls[0][1] as Record; - expect(callArgs.image_urls).toEqual([ - "https://fal.ai/face.png", - "https://fal.ai/ref.png", - ]); - }); - - it("includes additionalImageUrls in image_urls after face guide and reference", async () => { - vi.mocked(fs.readFile).mockResolvedValue(Buffer.from("ref-image")); - vi.mocked(fal.storage.upload).mockResolvedValue("https://fal.ai/ref.png"); - - await generateContentImage({ - faceGuideUrl: "https://fal.ai/face.png", - referenceImagePath: "/path/to/ref-01.png", - prompt: "test prompt", - additionalImageUrls: [ - "https://example.com/album-cover.png", - "https://example.com/playlist-cover.png", - ], - }); - - const callArgs = mockFalSubscribe.mock.calls[0][1] as Record; - expect(callArgs.image_urls).toEqual([ - "https://fal.ai/face.png", - "https://fal.ai/ref.png", - "https://example.com/album-cover.png", - "https://example.com/playlist-cover.png", - ]); - }); - - it("works with additionalImageUrls but no face guide or reference", async () => { - await generateContentImage({ - referenceImagePath: null, - prompt: "test prompt", - additionalImageUrls: ["https://example.com/cover.png"], - }); - - const callArgs = mockFalSubscribe.mock.calls[0][1] as Record; - expect(callArgs.image_urls).toEqual(["https://example.com/cover.png"]); - }); - - it("deduplicates additionalImageUrls that match faceGuideUrl", async () => { - await generateContentImage({ - faceGuideUrl: "https://fal.ai/face.png", - referenceImagePath: null, - prompt: "test prompt", - additionalImageUrls: ["https://fal.ai/face.png", "https://example.com/cover.png"], - }); - - const callArgs = mockFalSubscribe.mock.calls[0][1] as Record; - expect(callArgs.image_urls).toEqual([ - "https://fal.ai/face.png", - "https://example.com/cover.png", - ]); - }); - - it("ignores empty additionalImageUrls array", async () => { - await generateContentImage({ - faceGuideUrl: "https://fal.ai/face.png", - referenceImagePath: null, - prompt: "test prompt", - additionalImageUrls: [], - }); - - const callArgs = mockFalSubscribe.mock.calls[0][1] as Record; - expect(callArgs.image_urls).toEqual(["https://fal.ai/face.png"]); - }); - - it("deduplicates within additionalImageUrls itself", async () => { - await generateContentImage({ - referenceImagePath: null, - prompt: "test prompt", - additionalImageUrls: [ - "https://example.com/cover.png", - "https://example.com/cover.png", - "https://example.com/other.png", - ], - }); - - const callArgs = mockFalSubscribe.mock.calls[0][1] as Record; - expect(callArgs.image_urls).toEqual([ - "https://example.com/cover.png", - "https://example.com/other.png", - ]); - }); -}); diff --git a/src/content/escapeDrawtext.ts b/src/content/escapeDrawtext.ts index 214fcdb..b96af6a 100644 --- a/src/content/escapeDrawtext.ts +++ b/src/content/escapeDrawtext.ts @@ -1,10 +1,9 @@ /** * Escapes a text string for use in ffmpeg drawtext filters. * - * Handles both -vf and filter_complex contexts by replacing all - * quote-like characters with the right single quotation mark (U+2019), - * which renders as an apostrophe in all standard fonts and is not - * parsed as a delimiter by ffmpeg. + * Handles both -vf and filter_complex contexts by removing all + * characters that could break ffmpeg's parser: quotes, emoji, + * and escaping colons/backslashes/percent signs. * * @param text - Raw caption text * @returns Escaped text safe for ffmpeg drawtext @@ -14,7 +13,12 @@ export function escapeDrawtext(text: string): string { .replace(/\r/g, "") .replace(/\n/g, " ") .replace(/\\/g, "\\\\\\\\") - .replace(/['\u2018\u2032]/g, "\u2019") + .replace(/['\u2018\u2019\u2032""\u201C\u201D]/g, "") + .replace(/[\u{1F000}-\u{1FFFF}]/gu, "") + .replace(/[\u{2600}-\u{27BF}]/gu, "") + .replace(/[\u{FE00}-\u{FE0F}]/gu, "") .replace(/:/g, "\\\\:") - .replace(/%/g, "%%"); + .replace(/%/g, "%%") + .replace(/\s{2,}/g, " ") + .trim(); } diff --git a/src/content/generateAudioVideo.ts b/src/content/generateAudioVideo.ts deleted file mode 100644 index f28ea5d..0000000 --- a/src/content/generateAudioVideo.ts +++ /dev/null @@ -1,147 +0,0 @@ -import { execFile } from "node:child_process"; -import { readFile, writeFile, unlink, mkdir, rm } from "node:fs/promises"; -import { randomUUID } from "node:crypto"; -import { tmpdir } from "node:os"; -import { join } from "node:path"; -import { promisify } from "node:util"; -import { fal } from "@fal-ai/client"; -import { logger } from "@trigger.dev/sdk/v3"; -import { DEFAULT_PIPELINE_CONFIG } from "./defaultPipelineConfig"; - -const execFileAsync = promisify(execFile); - -/** - * Generates a video with audio baked in (lipsync mode) using fal.ai LTX-2. - * The model creates lip-synced animation from image + audio. - * - * Matches the content-creation-app's generateAudioVideo.ts behavior. - * - * @param imageUrl - URL of the AI-generated image - * @param songBuffer - Raw mp3 bytes of the song - * @param audioStartSeconds - Where to clip the song from - * @param audioDurationSeconds - How long the clip should be - * @param motionPrompt - Describes how the subject should move - * @returns URL of the generated video (with audio baked in) - */ -export async function generateAudioVideo({ - imageUrl, - songBuffer, - audioStartSeconds, - audioDurationSeconds, - motionPrompt, -}: { - imageUrl: string; - songBuffer: Buffer; - audioStartSeconds: number; - audioDurationSeconds: number; - motionPrompt: string; -}): Promise { - const config = DEFAULT_PIPELINE_CONFIG; - const durationSeconds = Math.min( - audioDurationSeconds, - config.audioVideoModelMaxSeconds, - ); - const fps = 25; - const numFrames = Math.round(durationSeconds * fps) + 1; - - // Clip the audio to the right section and upload to fal - const audioUrl = await clipAndUploadAudio( - songBuffer, - audioStartSeconds, - durationSeconds, - ); - - logger.log("Generating audio-to-video (lipsync)", { - model: config.audioVideoModel, - durationSeconds, - numFrames, - }); - - const result = await fal.subscribe(config.audioVideoModel, { - input: { - prompt: motionPrompt, - image_url: imageUrl, - audio_url: audioUrl, - match_audio_length: false, - num_frames: numFrames, - video_size: "landscape_16_9", - fps, - camera_lora: "static", - guidance_scale: 3, - num_inference_steps: 40, - video_quality: "high", - }, - logs: true, - }); - - const data = result.data as Record; - const videoUrl = extractFalUrl(data); - - if (!videoUrl) { - throw new Error( - `Audio-to-video returned no URL. Response: ${JSON.stringify(data).slice(0, 200)}`, - ); - } - - logger.log("Audio-to-video generated", { videoUrl: videoUrl.slice(0, 80) }); - return videoUrl; -} - -/** - * Clips the song mp3 to the specified range and uploads to fal storage. - */ -async function clipAndUploadAudio( - songBuffer: Buffer, - startSeconds: number, - durationSeconds: number, -): Promise { - const tempDir = join(tmpdir(), `audio-clip-${randomUUID()}`); - await mkdir(tempDir, { recursive: true }); - const inputPath = join(tempDir, "song.mp3"); - const clippedPath = join(tempDir, "clip.mp3"); - - try { - await writeFile(inputPath, songBuffer); - - await execFileAsync("ffmpeg", [ - "-y", - "-i", inputPath, - "-ss", String(startSeconds), - "-t", String(durationSeconds), - "-c:a", "libmp3lame", - "-q:a", "2", - clippedPath, - ]); - - const clippedBuffer = await readFile(clippedPath); - const file = new File([clippedBuffer], "clip.mp3", { type: "audio/mpeg" }); - const url = await fal.storage.upload(file); - - logger.log("Audio clip uploaded to fal", { - startSeconds, - durationSeconds, - url: url.slice(0, 80), - }); - - return url; - } finally { - await rm(tempDir, { recursive: true, force: true }).catch(() => undefined); - } -} - -function extractFalUrl(data: Record): string | undefined { - for (const key of ["image", "video"]) { - if (data[key] && typeof data[key] === "object") { - const url = (data[key] as Record)?.url; - if (url) return url; - } - } - for (const key of ["images", "videos"]) { - if (Array.isArray(data[key]) && (data[key] as unknown[]).length > 0) { - const url = ((data[key] as unknown[])[0] as Record)?.url; - if (url) return url; - } - } - if (typeof data.url === "string") return data.url; - return undefined; -} diff --git a/src/content/generateCaption.ts b/src/content/generateCaption.ts deleted file mode 100644 index c30f97e..0000000 --- a/src/content/generateCaption.ts +++ /dev/null @@ -1,120 +0,0 @@ -import { logger } from "@trigger.dev/sdk/v3"; -import type { TemplateData } from "./loadTemplate"; -import type { SongLyrics } from "./transcribeSong"; -import type { CaptionLength } from "../schemas/contentCreationSchema"; - -const CAPTION_LENGTH_INSTRUCTIONS: Record = { - short: "Write a SHORT caption (max 10 words). Punchy, minimal, like a text message. Think: one phrase that hits.", - medium: "Write a MEDIUM caption (15-30 words). A complete thought with feeling. 1-2 sentences max.", - long: "Write a LONG caption (40-80 words). A mini-story or stream of consciousness. Vulnerable, raw, the kind of caption people screenshot.", -}; - -/** - * Generates a TikTok-style caption using the Recoup Chat API. - * Combines template style, artist context, song lyrics, and audience data. - * - * Matches the content-creation-app's generateCaption.ts behavior. - */ -export async function generateCaption({ - template, - songTitle, - fullLyrics, - clipLyrics, - artistContext, - audienceContext, - captionLength = "short", -}: { - template: TemplateData; - songTitle: string; - fullLyrics: string; - clipLyrics: string; - artistContext: string; - audienceContext: string; - captionLength?: CaptionLength; -}): Promise { - const recoupApiKey = process.env.RECOUP_API_KEY; - if (!recoupApiKey) { - throw new Error("RECOUP_API_KEY is required for caption generation"); - } - - const captionGuide = template.captionGuide - ? JSON.stringify(template.captionGuide, null, 2) - : "(no caption guide)"; - - const examples = template.captionExamples.length > 0 - ? template.captionExamples.map(c => `- "${c}"`).join("\n") - : "(no examples)"; - - const lengthInstruction = CAPTION_LENGTH_INSTRUCTIONS[captionLength]; - - const prompt = `Generate ONE caption for a TikTok post. - -## LENGTH REQUIREMENT -${lengthInstruction} - -## Content Style -${captionGuide} - -## Reference Captions (these are examples of what GOOD looks like for this style) -${examples} - -## Artist -${artistContext} - -## Audience -${audienceContext} - -## Song Playing: "${songTitle}" -Full lyrics: ${fullLyrics} -What the viewer hears (first 8 seconds): "${clipLyrics}" - -Generate ONE caption. ${lengthInstruction} Return ONLY the caption text, nothing else. No quotes around it. No hashtags unless the caption naturally calls for them.`; - - logger.log("Generating caption", { songTitle }); - - const recoupApiUrl = process.env.RECOUP_API_URL ?? "https://recoup-api.vercel.app"; - const response = await fetch(`${recoupApiUrl}/api/chat/generate`, { - method: "POST", - headers: { - "Content-Type": "application/json", - "x-api-key": recoupApiKey, - }, - body: JSON.stringify({ - prompt, - model: "google/gemini-2.5-flash", - excludeTools: ["create_task"], - }), - }); - - if (!response.ok) { - throw new Error(`Recoup Chat API error: ${response.status}`); - } - - const json = (await response.json()) as { - text?: string | Array<{ type: string; text?: string }>; - }; - - let captionText: string; - if (typeof json.text === "string") { - captionText = json.text.trim(); - } else if (Array.isArray(json.text)) { - captionText = json.text - .filter(p => p.type === "text" && p.text) - .map(p => p.text!) - .join("") - .trim(); - } else { - captionText = ""; - } - - // Clean up β€” remove quotes if the model wrapped it - captionText = captionText.replace(/^["']|["']$/g, "").trim(); - - if (!captionText) { - throw new Error("Caption generation returned empty text"); - } - - logger.log("Caption generated", { caption: captionText.slice(0, 80) }); - return captionText; -} - diff --git a/src/content/generateContentImage.ts b/src/content/generateContentImage.ts deleted file mode 100644 index aef1689..0000000 --- a/src/content/generateContentImage.ts +++ /dev/null @@ -1,107 +0,0 @@ -import fs from "node:fs/promises"; -import { fal } from "@fal-ai/client"; -import { logStep } from "../sandboxes/logStep"; -import { DEFAULT_PIPELINE_CONFIG } from "./defaultPipelineConfig"; -import { falSubscribe } from "./falSubscribe"; - -/** - * Generates an AI image using fal.ai. - * - * Takes up to two images: - * 1. Guide image (face-guide headshot or album cover) β€” the primary subject - * 2. Reference image (scene composition from template) β€” the setting - * - * The prompt tells the model how to combine these images. - * - * @param faceGuideUrl - fal storage URL of the guide image (face or album cover) - * @param referenceImagePath - local path to a template reference image (or null) - * @param prompt - Scene/style prompt with instructions for how to use the images - * @returns URL of the generated image - */ -export async function generateContentImage({ - faceGuideUrl, - referenceImagePath, - prompt, - additionalImageUrls, -}: { - /** Guide image URL β€” omit for templates that don't use an input image. */ - faceGuideUrl?: string; - referenceImagePath: string | null; - prompt: string; - /** Extra image URLs (e.g. album covers, playlist covers) to pass to the model. */ - additionalImageUrls?: string[]; -}): Promise { - const config = DEFAULT_PIPELINE_CONFIG; - - // Build image_urls: guide image (if provided) + reference image (if provided) - const imageUrls: string[] = []; - if (faceGuideUrl) imageUrls.push(faceGuideUrl); - - if (referenceImagePath) { - logStep("Uploading reference image to fal storage", false, { - path: referenceImagePath, - }); - const refBuffer = await fs.readFile(referenceImagePath); - const refFile = new File([refBuffer], "reference.png", { type: "image/png" }); - const refUrl = await fal.storage.upload(refFile); - imageUrls.push(refUrl); - } - - if (additionalImageUrls?.length) { - const unique = [...new Set(additionalImageUrls)]; - const deduped = unique.filter((url) => !imageUrls.includes(url)); - logStep("Adding additional image URLs", false, { - count: deduped.length, - urls: deduped.map((u) => u.slice(0, 80)), - }); - imageUrls.push(...deduped); - } - - logStep("Generating image", false, { - model: config.imageModel, - promptLength: prompt.length, - imageCount: imageUrls.length, - hasFaceGuide: Boolean(faceGuideUrl), - hasReferenceImage: Boolean(referenceImagePath), - hasAdditionalImages: Boolean(additionalImageUrls?.length), - }); - - const result = await falSubscribe(config.imageModel, { - prompt, - image_urls: imageUrls, - aspect_ratio: config.aspectRatio, - resolution: config.resolution, - output_format: "png", - num_images: 1, - }); - - const data = result.data as Record; - const imageUrl = extractFalUrl(data); - - if (!imageUrl) { - throw new Error( - `Image generation returned no URL. Response: ${JSON.stringify(data).slice(0, 200)}`, - ); - } - - logStep("Image generated", false, { imageUrl: imageUrl.slice(0, 80) }); - return imageUrl; -} - -/** Extracts a media URL from various fal.ai response shapes. */ -function extractFalUrl(data: Record): string | undefined { - for (const key of ["image", "video"]) { - if (data[key] && typeof data[key] === "object") { - const url = (data[key] as Record)?.url; - if (url) return url; - } - } - for (const key of ["images", "videos"]) { - if (Array.isArray(data[key]) && (data[key] as unknown[]).length > 0) { - const url = ((data[key] as unknown[])[0] as Record)?.url; - if (url) return url; - } - } - if (typeof data.url === "string") return data.url; - return undefined; -} diff --git a/src/content/generateContentVideo.ts b/src/content/generateContentVideo.ts deleted file mode 100644 index 44f3210..0000000 --- a/src/content/generateContentVideo.ts +++ /dev/null @@ -1,74 +0,0 @@ -import { fal } from "@fal-ai/client"; -import { logger } from "@trigger.dev/sdk/v3"; -import { DEFAULT_PIPELINE_CONFIG } from "./defaultPipelineConfig"; - -/** - * Generates a video from an AI-generated image using fal.ai image-to-video. - * - * @param imageUrl - URL of the source image (from generateContentImage) - * @param motionPrompt - Describes how the subject should move - * @returns URL of the generated video - */ -export async function generateContentVideo({ - imageUrl, - motionPrompt, -}: { - imageUrl: string; - motionPrompt: string; -}): Promise { - const config = DEFAULT_PIPELINE_CONFIG; - const durationSeconds = Math.min( - config.clipDuration, - config.videoModelMaxSeconds, - ); - - logger.log("Generating video", { - model: config.videoModel, - durationSeconds, - motionPrompt: motionPrompt.slice(0, 100), - }); - - const result = await fal.subscribe(config.videoModel, { - input: { - prompt: motionPrompt, - image_url: imageUrl, - aspect_ratio: config.aspectRatio ?? "16:9", - duration: `${durationSeconds}s`, - resolution: config.videoResolution ?? "720p", - generate_audio: false, - }, - logs: true, - }); - - const data = result.data as Record; - const videoUrl = extractFalUrl(data); - - if (!videoUrl) { - throw new Error( - `Video generation returned no URL. Response: ${JSON.stringify(data).slice(0, 200)}`, - ); - } - - logger.log("Video generated", { videoUrl: videoUrl.slice(0, 80) }); - return videoUrl; -} - -/** - * Extracts a media URL from various fal.ai response shapes. - */ -function extractFalUrl(data: Record): string | undefined { - for (const key of ["image", "video"]) { - if (data[key] && typeof data[key] === "object") { - const url = (data[key] as Record)?.url; - if (url) return url; - } - } - for (const key of ["images", "videos"]) { - if (Array.isArray(data[key]) && (data[key] as unknown[]).length > 0) { - const url = ((data[key] as unknown[])[0] as Record)?.url; - if (url) return url; - } - } - if (typeof data.url === "string") return data.url; - return undefined; -} diff --git a/src/content/testPipeline.ts b/src/content/testPipeline.ts deleted file mode 100644 index af3dd32..0000000 --- a/src/content/testPipeline.ts +++ /dev/null @@ -1,435 +0,0 @@ -#!/usr/bin/env npx tsx -/** - * Local pipeline tester β€” run individual steps without Trigger.dev. - * - * Usage: - * npx tsx src/content/testPipeline.ts image # test image generation only - * npx tsx src/content/testPipeline.ts video # test video from saved image - * npx tsx src/content/testPipeline.ts upscale-image # test image upscale - * npx tsx src/content/testPipeline.ts upscale-video # test video upscale - * npx tsx src/content/testPipeline.ts audio # test audio selection - * npx tsx src/content/testPipeline.ts caption # test caption generation - * npx tsx src/content/testPipeline.ts render # test ffmpeg final render - * npx tsx src/content/testPipeline.ts render-only # test ffmpeg with a sample video URL - * - * Each step saves its output to .pipeline-state.json so the next step can use it. - */ - -import fs from "node:fs/promises"; -import path from "node:path"; -import { fal } from "@fal-ai/client"; -import dotenv from "dotenv"; - -// Load env -dotenv.config({ path: path.resolve(process.cwd(), ".env.local") }); -dotenv.config({ path: path.resolve(process.cwd(), ".env") }); - -const STATE_FILE = path.resolve(process.cwd(), ".pipeline-state.json"); -const GITHUB_REPO = "https://github.com/recoupable/sidney-swift-1ca89eeb-14ab-4a4a-a1c5-2dd41663c039"; -const ARTIST_SLUG = "gatsby-grace"; -const TEMPLATE_NAME = "artist-caption-bedroom"; - -// --- State management --- -interface PipelineState { - faceGuideUrl?: string; - referenceImageUrl?: string; - imageUrl?: string; - upscaledImageUrl?: string; - videoUrl?: string; - upscaledVideoUrl?: string; - songBuffer?: string; // base64 - songTitle?: string; - audioStartSeconds?: number; - clipLyrics?: string; - fullLyrics?: string; - captionText?: string; - finalVideoPath?: string; -} - -async function loadState(): Promise { - try { - return JSON.parse(await fs.readFile(STATE_FILE, "utf-8")); - } catch { - return {}; - } -} - -async function saveState(state: PipelineState): Promise { - await fs.writeFile(STATE_FILE, JSON.stringify(state, null, 2)); - console.log(` πŸ’Ύ State saved to ${STATE_FILE}`); -} - -// --- Configure fal --- -function setupFal(): void { - if (!process.env.FAL_KEY) { - console.error("❌ FAL_KEY not set in .env.local"); - process.exit(1); - } - fal.config({ credentials: process.env.FAL_KEY }); -} - -// --- Steps --- - -async function testImage(): Promise { - setupFal(); - const { fetchGithubFile } = await import("./fetchGithubFile.js"); - const { generateContentImage } = await import("./generateContentImage.js"); - const { loadTemplate, pickRandomReferenceImage, buildImagePrompt } = await import("./loadTemplate.js"); - const { FACE_SWAP_INSTRUCTION } = await import("./contentPrompts.js"); - - console.log("\n🎨 Testing: Image Generation\n"); - - const template = await loadTemplate(TEMPLATE_NAME); - console.log(` Template: ${template.name}`); - - // Fetch face-guide - console.log(" Fetching face-guide..."); - const faceGuideBuffer = await fetchGithubFile(GITHUB_REPO, `artists/${ARTIST_SLUG}/context/images/face-guide.png`); - if (!faceGuideBuffer) throw new Error("face-guide.png not found"); - const faceGuideFile = new File([faceGuideBuffer], "face-guide.png", { type: "image/png" }); - const faceGuideUrl = await fal.storage.upload(faceGuideFile); - console.log(` βœ… Face-guide uploaded: ${faceGuideUrl.slice(0, 60)}...`); - - // Pick reference image - const refPath = pickRandomReferenceImage(template); - console.log(` Reference: ${refPath ? path.basename(refPath) : "none"}`); - - // Generate image β€” face-swap instruction + template's scene prompt + style guide - const basePrompt = `${FACE_SWAP_INSTRUCTION} ${template.imagePrompt}`; - const prompt = buildImagePrompt(basePrompt, template.styleGuide); - console.log(` Prompt: "${prompt.slice(0, 80)}..."`); - console.log(" πŸ”„ Generating image...\n"); - - const imageUrl = await generateContentImage({ faceGuideUrl, referenceImagePath: refPath, prompt }); - - console.log(`\n βœ… Image generated!`); - console.log(` πŸ”— ${imageUrl}`); - - const state = await loadState(); - await saveState({ ...state, faceGuideUrl, imageUrl }); -} - -async function testVideo(): Promise { - setupFal(); - const { generateContentVideo } = await import("./generateContentVideo.js"); - const { loadTemplate, buildMotionPrompt } = await import("./loadTemplate.js"); - - const state = await loadState(); - const imageUrl = state.upscaledImageUrl ?? state.imageUrl; - if (!imageUrl) { console.error("❌ No image URL in state. Run: image first"); process.exit(1); } - - console.log("\n🎬 Testing: Video Generation\n"); - console.log(` Image: ${imageUrl.slice(0, 60)}...`); - - const template = await loadTemplate(TEMPLATE_NAME); - const motionPrompt = buildMotionPrompt(template); - console.log(` Motion: "${motionPrompt.slice(0, 80)}..."`); - console.log(" πŸ”„ Generating video (2-5 min)...\n"); - - const videoUrl = await generateContentVideo({ imageUrl, motionPrompt }); - - console.log(`\n βœ… Video generated!`); - console.log(` πŸ”— ${videoUrl}`); - - await saveState({ ...state, videoUrl }); -} - -async function testUpscaleImage(): Promise { - setupFal(); - const { upscaleImage } = await import("./upscaleImage.js"); - - const state = await loadState(); - if (!state.imageUrl) { console.error("❌ No image URL in state. Run: image first"); process.exit(1); } - - console.log("\nπŸ” Testing: Image Upscale\n"); - console.log(` Input: ${state.imageUrl.slice(0, 60)}...`); - console.log(" πŸ”„ Upscaling...\n"); - - const upscaledImageUrl = await upscaleImage(state.imageUrl); - - console.log(`\n βœ… Upscaled!`); - console.log(` πŸ”— ${upscaledImageUrl}`); - - await saveState({ ...state, upscaledImageUrl }); -} - -async function testUpscaleVideo(): Promise { - setupFal(); - const { upscaleVideo } = await import("./upscaleVideo.js"); - - const state = await loadState(); - if (!state.videoUrl) { console.error("❌ No video URL in state. Run: video first"); process.exit(1); } - - console.log("\nπŸ” Testing: Video Upscale\n"); - console.log(` Input: ${state.videoUrl.slice(0, 60)}...`); - console.log(" πŸ”„ Upscaling (this takes a while)...\n"); - - const upscaledVideoUrl = await upscaleVideo(state.videoUrl); - - console.log(`\n βœ… Upscaled!`); - console.log(` πŸ”— ${upscaledVideoUrl}`); - - await saveState({ ...state, upscaledVideoUrl }); -} - -async function testAudio(): Promise { - setupFal(); - const { selectAudioClip } = await import("./selectAudioClip.js"); - console.log("\n🎡 Testing: Audio Selection\n"); - console.log(" πŸ”„ Finding songs, transcribing, analyzing...\n"); - - const clip = await selectAudioClip({ - githubRepo: GITHUB_REPO, - artistSlug: ARTIST_SLUG, - lipsync: false, - }); - - console.log(`\n βœ… Audio selected!`); - console.log(` 🎡 Song: "${clip.songTitle}"`); - console.log(` ⏱️ Clip: ${clip.startSeconds}s (${clip.durationSeconds}s)`); - console.log(` πŸ“ Lyrics: "${clip.clipLyrics.slice(0, 80)}..."`); - console.log(` 🎭 Mood: ${clip.clipMood}`); - - const state = await loadState(); - await saveState({ - ...state, - songBuffer: clip.songBuffer.toString("base64"), - songTitle: clip.songTitle, - audioStartSeconds: clip.startSeconds, - clipLyrics: clip.clipLyrics, - fullLyrics: clip.lyrics.fullLyrics, - }); -} - -async function testCaption(): Promise { - const { generateCaption, fetchArtistContext, fetchAudienceContext } = await import("./generateCaption.js"); - const { fetchGithubFile } = await import("./fetchGithubFile.js"); - const { loadTemplate } = await import("./loadTemplate.js"); - - const state = await loadState(); - if (!state.songTitle) { console.error("❌ No song in state. Run: audio first"); process.exit(1); } - - console.log("\n✍️ Testing: Caption Generation\n"); - - const template = await loadTemplate(TEMPLATE_NAME); - const artistContext = await fetchArtistContext(GITHUB_REPO, ARTIST_SLUG, fetchGithubFile); - const audienceContext = await fetchAudienceContext(GITHUB_REPO, ARTIST_SLUG, fetchGithubFile); - - console.log(` Artist context: ${artistContext.slice(0, 60)}...`); - console.log(` Audience context: ${audienceContext.slice(0, 60)}...`); - console.log(" πŸ”„ Generating caption...\n"); - - const captionText = await generateCaption({ - template, - songTitle: state.songTitle, - fullLyrics: state.fullLyrics ?? "", - clipLyrics: state.clipLyrics ?? "", - artistContext, - audienceContext, - }); - - console.log(`\n βœ… Caption: "${captionText}"`); - - await saveState({ ...state, captionText }); -} - -async function testRender(): Promise { - const { renderFinalVideo } = await import("./renderFinalVideo.js"); - - const state = await loadState(); - const videoUrl = state.upscaledVideoUrl ?? state.videoUrl; - if (!videoUrl) { console.error("❌ No video URL. Run: video first"); process.exit(1); } - if (!state.songBuffer) { console.error("❌ No audio. Run: audio first"); process.exit(1); } - - console.log("\n🎬 Testing: ffmpeg Final Render\n"); - console.log(` Video: ${videoUrl.slice(0, 60)}...`); - console.log(` Caption: "${(state.captionText ?? "test caption").slice(0, 60)}"`); - console.log(` Audio: ${state.songTitle} @ ${state.audioStartSeconds}s`); - console.log(" πŸ”„ Rendering...\n"); - - const result = await renderFinalVideo({ - videoUrl, - songBuffer: Buffer.from(state.songBuffer, "base64"), - audioStartSeconds: state.audioStartSeconds ?? 0, - audioDurationSeconds: 8, - captionText: state.captionText ?? "test caption for the video", - hasAudio: false, - }); - - const outPath = path.resolve(process.cwd(), "test-output.mp4"); - const videoBuffer = Buffer.from(result.dataUrl.split(",")[1], "base64"); - await fs.writeFile(outPath, videoBuffer); - - console.log(`\n βœ… Final video rendered!`); - console.log(` πŸ“ ${outPath} (${(result.sizeBytes / 1024).toFixed(0)} KB)`); - console.log(` πŸ‘€ Open: open test-output.mp4`); - - await saveState({ ...state, finalVideoPath: outPath }); -} - -async function testRenderOnly(): Promise { - const { execFile } = await import("node:child_process"); - const { promisify } = await import("node:util"); - const { writeFile: writeFileFn, readFile: readFileFn, unlink: unlinkFn, mkdir: mkdirFn } = await import("node:fs/promises"); - const { randomUUID } = await import("node:crypto"); - const { tmpdir } = await import("node:os"); - const execFileAsync = promisify(execFile); - - console.log("\n🎬 Testing: ffmpeg Render (local test video)\n"); - - // Create test video (colored bars, 3 seconds, 16:9) - const tempDir = path.join(tmpdir(), `render-test-${randomUUID()}`); - await mkdirFn(tempDir, { recursive: true }); - const testVideoPath = path.join(tempDir, "test-video.mp4"); - const silentPath = path.join(tempDir, "silent.mp3"); - const outputPath = path.join(tempDir, "final.mp4"); - - console.log(" Creating test video..."); - await execFileAsync("ffmpeg", [ - "-y", "-f", "lavfi", "-i", "testsrc=s=1280x720:d=3", - "-c:v", "libx264", "-pix_fmt", "yuv420p", testVideoPath, - ]); - - console.log(" Creating test audio..."); - await execFileAsync("ffmpeg", [ - "-y", "-f", "lavfi", "-i", "anullsrc=r=44100:cl=stereo", - "-t", "3", silentPath, - ]); - - // Test with a LONG caption to verify adaptive sizing works - const testCaption = process.argv[3] === "long" - ? "sometimes you just gotta sit in the dark and let the playlist do the talking because words ain't gonna fix what's broken inside but at least the bass hits right where it hurts and you wonder if anyone else is sitting in their room right now feeling the exact same thing scrolling through old photos at 3am knowing you should sleep but the music won't let you go" - : process.argv[3] === "medium" - ? "sometimes you just gotta sit in the dark and let the playlist do the talking because words ain't gonna fix what's broken inside" - : "that one drawer holding all the memories you just can't throw away"; - - console.log(` Caption: "${testCaption.slice(0, 60)}${testCaption.length > 60 ? "..." : ""}"`); - console.log(" πŸ”„ Rendering...\n"); - - // Inline the adaptive layout logic for the test - const FRAME_W = 720; - const FRAME_H = 1280; - const MAX_H_RATIO = 0.4; - const MIN_FS = 20; - const MAX_FS = 42; - const BOTTOM_M = 120; - - function wrapLocal(text: string, maxChars: number): string[] { - const words = text.split(" "); - const lines: string[] = []; - let cur = ""; - for (const w of words) { - if (cur.length + w.length + 1 > maxChars && cur.length > 0) { lines.push(cur); cur = w; } - else { cur = cur ? `${cur} ${w}` : w; } - } - if (cur) lines.push(cur); - return lines; - } - - let chosenFs = MAX_FS; - let chosenLh = chosenFs + 10; - let chosenLines: string[] = []; - for (let fs = MAX_FS; fs >= MIN_FS; fs -= 2) { - const cpl = Math.floor(FRAME_W * 0.85 / (fs * 0.55)); - const lh = fs + 10; - const lines = wrapLocal(testCaption.replace(/'/g, "\u2019"), cpl); - if (lines.length * lh <= FRAME_H * MAX_H_RATIO) { - chosenFs = fs; chosenLh = lh; chosenLines = lines; break; - } - chosenFs = fs; chosenLh = lh; chosenLines = lines; - } - - // Determine position based on line count - const position = chosenLines.length <= 3 ? "bottom" : chosenLines.length <= 6 ? "center" : "top"; - console.log(` Font size: ${chosenFs}px, Lines: ${chosenLines.length}, Position: ${position}`); - - const FH = 1280; - const totalTH = chosenLines.length * chosenLh; - let blockStartY: number; - if (position === "bottom") { blockStartY = FH - BOTTOM_M - totalTH; } - else if (position === "center") { blockStartY = Math.round((FH - totalTH) / 2); } - else { blockStartY = 180; } - - const cropFilter = "crop=ih*9/16:ih"; - const scaleFilter = "scale=720:1280"; - const bw = Math.max(2, Math.round(chosenFs / 14)); - const captionFilters = chosenLines.map((line, i) => { - const yPos = blockStartY + (i * chosenLh); - return `drawtext=text='${line}':fontsize=${chosenFs}:fontcolor=white:borderw=${bw}:bordercolor=black:x=(w-tw)/2:y=${yPos}`; - }); - - const videoFilter = [cropFilter, scaleFilter, ...captionFilters].join(","); - - console.log(` Filter: ${videoFilter.slice(0, 100)}...`); - console.log(" πŸ”„ Running ffmpeg...\n"); - - try { - await execFileAsync("ffmpeg", [ - "-y", - "-i", testVideoPath, - "-t", "3", - "-i", silentPath, - "-vf", videoFilter, - "-c:v", "libx264", - "-c:a", "aac", - "-map", "0:v:0", - "-map", "1:a:0", - "-pix_fmt", "yuv420p", - "-movflags", "+faststart", - "-shortest", - outputPath, - ]); - - const stat = await fs.stat(outputPath); - const outFile = path.resolve(process.cwd(), "test-output.mp4"); - await fs.copyFile(outputPath, outFile); - - console.log(` βœ… Rendered! ${(stat.size / 1024).toFixed(0)} KB`); - console.log(` πŸ“ ${outFile}`); - console.log(` πŸ‘€ Run: open test-output.mp4`); - } catch (err) { - console.error(` ❌ ffmpeg failed:`, (err as Error).message); - // Show stderr for debugging - const e = err as { stderr?: string }; - if (e.stderr) console.error(e.stderr.slice(-500)); - } finally { - await unlinkFn(testVideoPath).catch(() => {}); - await unlinkFn(silentPath).catch(() => {}); - await unlinkFn(outputPath).catch(() => {}); - } -} - -// --- Main --- -const step = process.argv[2]; -const steps: Record Promise> = { - image: testImage, - video: testVideo, - "upscale-image": testUpscaleImage, - "upscale-video": testUpscaleVideo, - audio: testAudio, - caption: testCaption, - render: testRender, - "render-only": testRenderOnly, -}; - -if (!step || !steps[step]) { - console.log(` -Usage: npx tsx src/content/testPipeline.ts - -Steps (run in order, each saves state for the next): - image Generate AI image from face-guide - upscale-image Upscale the generated image - video Generate video from image - upscale-video Upscale the video - audio Select audio clip from GitHub songs - caption Generate TikTok caption - render Final ffmpeg render (crop + audio + caption) - render-only Quick test ffmpeg with a sample video -`); - process.exit(0); -} - -steps[step]().catch(err => { - console.error(`\n❌ ${err.message}`); - process.exit(1); -}); diff --git a/src/content/upscaleImage.ts b/src/content/upscaleImage.ts deleted file mode 100644 index 36cb509..0000000 --- a/src/content/upscaleImage.ts +++ /dev/null @@ -1,55 +0,0 @@ -import { fal } from "@fal-ai/client"; -import { logger } from "@trigger.dev/sdk/v3"; -import { DEFAULT_PIPELINE_CONFIG } from "./defaultPipelineConfig"; - -/** - * Upscales an image using fal.ai SeedVR2 for realistic detail and texture. - * Matches the content-creation-app's upscaleImage.ts behavior. - * - * @param imageUrl - URL of the image to upscale - * @returns URL of the upscaled image - */ -export async function upscaleImage(imageUrl: string): Promise { - logger.log("Upscaling image", { - model: DEFAULT_PIPELINE_CONFIG.upscaleModel, - }); - - const result = await fal.subscribe(DEFAULT_PIPELINE_CONFIG.upscaleModel, { - input: { - image_url: imageUrl, - upscale_mode: "factor", - upscale_factor: 2, - output_format: "png", - }, - logs: true, - }); - - const data = result.data as Record; - const url = extractFalUrl(data); - - if (!url) { - throw new Error( - `Image upscale returned no URL. Response: ${JSON.stringify(data).slice(0, 200)}`, - ); - } - - logger.log("Image upscaled", { url: url.slice(0, 80) }); - return url; -} - -function extractFalUrl(data: Record): string | undefined { - for (const key of ["image", "video"]) { - if (data[key] && typeof data[key] === "object") { - const url = (data[key] as Record)?.url; - if (url) return url; - } - } - for (const key of ["images", "videos"]) { - if (Array.isArray(data[key]) && (data[key] as unknown[]).length > 0) { - const url = ((data[key] as unknown[])[0] as Record)?.url; - if (url) return url; - } - } - if (typeof data.url === "string") return data.url; - return undefined; -} diff --git a/src/content/upscaleVideo.ts b/src/content/upscaleVideo.ts deleted file mode 100644 index c07d5ea..0000000 --- a/src/content/upscaleVideo.ts +++ /dev/null @@ -1,61 +0,0 @@ -import { fal } from "@fal-ai/client"; -import { logger } from "@trigger.dev/sdk/v3"; - -const VIDEO_UPSCALE_MODEL = "fal-ai/seedvr/upscale/video"; -const TARGET_RESOLUTION = "1080p"; - -/** - * Upscales a video from 720p to 1080p using fal.ai SeedVR2. - * Matches the content-creation-app's upscaleVideo.ts behavior. - * - * @param videoUrl - URL of the video to upscale - * @returns URL of the upscaled video - */ -export async function upscaleVideo(videoUrl: string): Promise { - logger.log("Upscaling video", { - model: VIDEO_UPSCALE_MODEL, - target: TARGET_RESOLUTION, - }); - - const result = await fal.subscribe(VIDEO_UPSCALE_MODEL, { - input: { - video_url: videoUrl, - upscale_mode: "target", - target_resolution: TARGET_RESOLUTION, - noise_scale: 0.1, - output_format: "X264 (.mp4)", - output_quality: "high", - output_write_mode: "balanced", - }, - logs: true, - }); - - const data = result.data as Record; - const url = extractFalUrl(data); - - if (!url) { - throw new Error( - `Video upscale returned no URL. Response: ${JSON.stringify(data).slice(0, 200)}`, - ); - } - - logger.log("Video upscaled", { url: url.slice(0, 80) }); - return url; -} - -function extractFalUrl(data: Record): string | undefined { - for (const key of ["image", "video"]) { - if (data[key] && typeof data[key] === "object") { - const url = (data[key] as Record)?.url; - if (url) return url; - } - } - for (const key of ["images", "videos"]) { - if (Array.isArray(data[key]) && (data[key] as unknown[]).length > 0) { - const url = ((data[key] as unknown[])[0] as Record)?.url; - if (url) return url; - } - } - if (typeof data.url === "string") return data.url; - return undefined; -} diff --git a/src/recoup/__tests__/callRecoupApi.test.ts b/src/recoup/__tests__/callRecoupApi.test.ts new file mode 100644 index 0000000..cc683df --- /dev/null +++ b/src/recoup/__tests__/callRecoupApi.test.ts @@ -0,0 +1,97 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; + +const mockFetch = vi.fn(); +vi.stubGlobal("fetch", mockFetch); + +vi.mock("../../sandboxes/logStep", () => ({ + logStep: vi.fn(), +})); + +describe("callRecoupApi", () => { + beforeEach(() => { + vi.clearAllMocks(); + process.env.RECOUP_API_KEY = "test-key"; + process.env.RECOUP_API_BASE_URL = "https://api.test.com"; + }); + + it("makes a POST request with api key and json body", async () => { + mockFetch.mockResolvedValue({ + ok: true, + json: async () => ({ imageUrl: "https://example.com/img.png" }), + }); + + const { callRecoupApi } = await import("../callRecoupApi"); + const result = await callRecoupApi("/api/content/image", { prompt: "sunset" }); + + expect(mockFetch).toHaveBeenCalledWith( + "https://api.test.com/api/content/image", + expect.objectContaining({ + method: "POST", + headers: expect.objectContaining({ + "x-api-key": "test-key", + "Content-Type": "application/json", + }), + body: JSON.stringify({ prompt: "sunset" }), + }), + ); + expect(result).toEqual({ imageUrl: "https://example.com/img.png" }); + }); + + it("throws on non-ok response", async () => { + mockFetch.mockResolvedValue({ + ok: false, + status: 400, + json: async () => ({ error: "Bad request" }), + }); + + const { callRecoupApi } = await import("../callRecoupApi"); + await expect(callRecoupApi("/api/content/image", { prompt: "" })) + .rejects.toThrow("API call failed"); + }); + + it("logs the request and response", async () => { + mockFetch.mockResolvedValue({ + ok: true, + status: 200, + json: async () => ({ imageUrl: "https://example.com/img.png" }), + }); + + const { logStep } = await import("../../sandboxes/logStep"); + const { callRecoupApi } = await import("../callRecoupApi"); + await callRecoupApi("/api/content/image", { prompt: "sunset" }); + + expect(vi.mocked(logStep)).toHaveBeenCalledWith( + expect.stringContaining("/api/content/image"), + expect.any(Boolean), + expect.objectContaining({ status: 200 }), + ); + }); + + it("handles non-JSON error responses (e.g. 502)", async () => { + mockFetch.mockResolvedValue({ + ok: false, + status: 502, + json: async () => { throw new Error("not json"); }, + text: async () => "Bad Gateway", + }); + + const { callRecoupApi } = await import("../callRecoupApi"); + await expect(callRecoupApi("/api/content/image", { prompt: "" })) + .rejects.toThrow("502"); + }); + + it("supports PATCH method", async () => { + mockFetch.mockResolvedValue({ + ok: true, + json: async () => ({ runId: "run_123" }), + }); + + const { callRecoupApi } = await import("../callRecoupApi"); + await callRecoupApi("/api/content", { video_url: "https://example.com/v.mp4" }, "PATCH"); + + expect(mockFetch).toHaveBeenCalledWith( + "https://api.test.com/api/content", + expect.objectContaining({ method: "PATCH" }), + ); + }); +}); diff --git a/src/recoup/__tests__/contentApi.test.ts b/src/recoup/__tests__/contentApi.test.ts new file mode 100644 index 0000000..9676721 --- /dev/null +++ b/src/recoup/__tests__/contentApi.test.ts @@ -0,0 +1,169 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; + +const mockCallRecoupApi = vi.fn(); +vi.mock("../callRecoupApi", () => ({ + callRecoupApi: mockCallRecoupApi, +})); + +describe("generateImage", () => { + beforeEach(() => vi.clearAllMocks()); + + it("calls POST /api/content/image with prompt and reference images", async () => { + mockCallRecoupApi.mockResolvedValue({ imageUrl: "https://fal.media/img.png", images: ["https://fal.media/img.png"] }); + + const { generateImage } = await import("../contentApi"); + const url = await generateImage({ + prompt: "a portrait photo", + referenceImageUrl: "https://example.com/face.png", + images: ["https://example.com/ref1.png"], + }); + + expect(mockCallRecoupApi).toHaveBeenCalledWith("/api/content/image", { + prompt: "a portrait photo", + reference_image_url: "https://example.com/face.png", + images: ["https://example.com/ref1.png"], + }); + expect(url).toBe("https://fal.media/img.png"); + }); + + it("filters out non-URL strings from images array", async () => { + mockCallRecoupApi.mockResolvedValue({ imageUrl: "https://fal.media/img.png", images: ["https://fal.media/img.png"] }); + + const { generateImage } = await import("../contentApi"); + await generateImage({ + prompt: "a portrait", + images: ["https://example.com/valid.png", "references/images/local-path.png"], + }); + + const callArgs = mockCallRecoupApi.mock.calls[0][1] as Record; + const images = callArgs.images as string[]; + expect(images).toEqual(["https://example.com/valid.png"]); + }); +}); + +describe("upscaleMedia", () => { + beforeEach(() => vi.clearAllMocks()); + + it("calls POST /api/content/upscale for image", async () => { + mockCallRecoupApi.mockResolvedValue({ url: "https://fal.media/upscaled.png" }); + + const { upscaleMedia } = await import("../contentApi"); + const url = await upscaleMedia("https://example.com/img.png", "image"); + + expect(mockCallRecoupApi).toHaveBeenCalledWith("/api/content/upscale", { + url: "https://example.com/img.png", + type: "image", + }); + expect(url).toBe("https://fal.media/upscaled.png"); + }); + + it("calls POST /api/content/upscale for video", async () => { + mockCallRecoupApi.mockResolvedValue({ url: "https://fal.media/upscaled.mp4" }); + + const { upscaleMedia } = await import("../contentApi"); + const url = await upscaleMedia("https://example.com/vid.mp4", "video"); + + expect(mockCallRecoupApi).toHaveBeenCalledWith("/api/content/upscale", { + url: "https://example.com/vid.mp4", + type: "video", + }); + expect(url).toBe("https://fal.media/upscaled.mp4"); + }); +}); + +describe("generateVideo", () => { + beforeEach(() => vi.clearAllMocks()); + + it("calls POST /api/content/video with image_url and prompt", async () => { + mockCallRecoupApi.mockResolvedValue({ videoUrl: "https://fal.media/vid.mp4", mode: "animate" }); + + const { generateVideo } = await import("../contentApi"); + const url = await generateVideo({ + imageUrl: "https://example.com/img.png", + prompt: "gentle breathing motion", + }); + + expect(mockCallRecoupApi).toHaveBeenCalledWith("/api/content/video", { + image_url: "https://example.com/img.png", + prompt: "gentle breathing motion", + }); + expect(url).toBe("https://fal.media/vid.mp4"); + }); + + it("passes audio_url for lipsync mode", async () => { + mockCallRecoupApi.mockResolvedValue({ videoUrl: "https://fal.media/vid.mp4", mode: "lipsync" }); + + const { generateVideo } = await import("../contentApi"); + await generateVideo({ + imageUrl: "https://example.com/img.png", + prompt: "singing", + audioUrl: "https://example.com/song.mp3", + }); + + expect(mockCallRecoupApi).toHaveBeenCalledWith("/api/content/video", { + image_url: "https://example.com/img.png", + prompt: "singing", + audio_url: "https://example.com/song.mp3", + }); + }); +}); + +describe("generateCaption", () => { + beforeEach(() => vi.clearAllMocks()); + + it("calls POST /api/content/caption with topic and template", async () => { + mockCallRecoupApi + .mockResolvedValueOnce({ templates: [{ id: "artist-caption-bedroom" }] }) + .mockResolvedValueOnce({ content: "midnight thoughts hit different" }); + + const { generateCaption } = await import("../contentApi"); + const text = await generateCaption({ + topic: "heartbreak and late nights", + template: "artist-caption-bedroom", + length: "short", + }); + + expect(mockCallRecoupApi).toHaveBeenCalledWith("/api/content/caption", { + topic: "heartbreak and late nights", + template: "artist-caption-bedroom", + length: "short", + }); + expect(text).toBe("midnight thoughts hit different"); + }); + + it("omits template if not in API's template list", async () => { + // First call: GET /api/content/templates returns the known list + mockCallRecoupApi + .mockResolvedValueOnce({ templates: [{ id: "artist-caption-bedroom" }, { id: "album-record-store" }] }) + // Second call: POST /api/content/caption + .mockResolvedValueOnce({ content: "editorial caption" }); + + const { generateCaption } = await import("../generateCaption"); + await generateCaption({ + topic: "album release", + template: "artist-release-editorial", + length: "short", + }); + + // Should have called templates API first, then caption without template + expect(mockCallRecoupApi).toHaveBeenCalledTimes(2); + const captionArgs = mockCallRecoupApi.mock.calls[1][1] as Record; + expect(captionArgs).not.toHaveProperty("template"); + }); + + it("passes template when it exists in API's template list", async () => { + mockCallRecoupApi + .mockResolvedValueOnce({ templates: [{ id: "artist-caption-bedroom" }] }) + .mockResolvedValueOnce({ content: "bedroom vibes" }); + + const { generateCaption } = await import("../generateCaption"); + await generateCaption({ + topic: "heartbreak", + template: "artist-caption-bedroom", + length: "short", + }); + + const captionArgs = mockCallRecoupApi.mock.calls[1][1] as Record; + expect(captionArgs.template).toBe("artist-caption-bedroom"); + }); +}); diff --git a/src/recoup/callRecoupApi.ts b/src/recoup/callRecoupApi.ts new file mode 100644 index 0000000..de71e56 --- /dev/null +++ b/src/recoup/callRecoupApi.ts @@ -0,0 +1,47 @@ +import { logStep } from "../sandboxes/logStep"; + +/** + * Call a Recoup API endpoint with authentication. + * + * @param path - API path (e.g. "/api/content/image"). + * @param body - JSON body to send. + * @param method - HTTP method. Defaults to "POST". + * @returns Parsed JSON response. + * @throws Error if the API returns a non-ok status. + */ +export async function callRecoupApi( + path: string, + body: Record, + method: "GET" | "POST" | "PATCH" = "POST", +): Promise> { + const baseUrl = process.env.RECOUP_API_BASE_URL || "https://recoup-api.vercel.app"; + const apiKey = process.env.RECOUP_API_KEY; + if (!apiKey) throw new Error("RECOUP_API_KEY is required"); + + const response = await fetch(`${baseUrl}${path}`, { + method, + headers: { + "x-api-key": apiKey, + "Content-Type": "application/json", + }, + ...(method !== "GET" ? { body: JSON.stringify(body) } : {}), + }); + + let data: Record; + try { + data = await response.json(); + } catch { + const text = await response.text().catch(() => "No response body"); + throw new Error(`API call failed: ${response.status} β€” ${text}`); + } + + logStep(`${method} ${path}`, true, { + status: response.status, + response: data, + }); + + if (!response.ok) { + throw new Error(`API call failed: ${response.status} β€” ${(data.error as string) || "Unknown error"}`); + } + return data; +} diff --git a/src/recoup/contentApi.ts b/src/recoup/contentApi.ts new file mode 100644 index 0000000..9579fe0 --- /dev/null +++ b/src/recoup/contentApi.ts @@ -0,0 +1,4 @@ +export { generateImage } from "./generateImage"; +export { upscaleMedia } from "./upscaleMedia"; +export { generateVideo } from "./generateVideo"; +export { generateCaption } from "./generateCaption"; diff --git a/src/recoup/fetchTemplateIds.ts b/src/recoup/fetchTemplateIds.ts new file mode 100644 index 0000000..e6dfc31 --- /dev/null +++ b/src/recoup/fetchTemplateIds.ts @@ -0,0 +1,12 @@ +import { callRecoupApi } from "./callRecoupApi"; + +/** + * Fetch the list of valid template IDs from the API. + * + * @returns Array of template ID strings. + */ +export async function fetchTemplateIds(): Promise { + const data = await callRecoupApi("/api/content/templates", {}, "GET"); + const templates = data.templates as Array<{ id: string }>; + return templates.map(t => t.id); +} diff --git a/src/recoup/generateCaption.ts b/src/recoup/generateCaption.ts new file mode 100644 index 0000000..4e45c48 --- /dev/null +++ b/src/recoup/generateCaption.ts @@ -0,0 +1,26 @@ +import { callRecoupApi } from "./callRecoupApi"; +import { fetchTemplateIds } from "./fetchTemplateIds"; + +/** + * Generate a caption via POST /api/content/caption. + * + * @param params - Caption generation parameters. + * @returns Generated caption text. + */ +export async function generateCaption(params: { + topic: string; + template?: string; + length?: string; +}): Promise { + const body: Record = { topic: params.topic }; + + if (params.template) { + const validIds = await fetchTemplateIds(); + if (validIds.includes(params.template)) body.template = params.template; + } + + if (params.length) body.length = params.length; + + const data = await callRecoupApi("/api/content/caption", body); + return data.content as string; +} diff --git a/src/recoup/generateImage.ts b/src/recoup/generateImage.ts new file mode 100644 index 0000000..df23232 --- /dev/null +++ b/src/recoup/generateImage.ts @@ -0,0 +1,23 @@ +import { callRecoupApi } from "./callRecoupApi"; + +/** + * Generate an image via POST /api/content/image. + * + * @param params - Image generation parameters. + * @returns URL of the generated image. + */ +export async function generateImage(params: { + prompt: string; + referenceImageUrl?: string; + images?: string[]; +}): Promise { + const body: Record = { prompt: params.prompt }; + if (params.referenceImageUrl) body.reference_image_url = params.referenceImageUrl; + if (params.images) { + const validUrls = params.images.filter(url => url.startsWith("http")); + if (validUrls.length > 0) body.images = validUrls; + } + + const data = await callRecoupApi("/api/content/image", body); + return data.imageUrl as string; +} diff --git a/src/recoup/generateVideo.ts b/src/recoup/generateVideo.ts new file mode 100644 index 0000000..b03ea1e --- /dev/null +++ b/src/recoup/generateVideo.ts @@ -0,0 +1,22 @@ +import { callRecoupApi } from "./callRecoupApi"; + +/** + * Generate a video via POST /api/content/video. + * + * @param params - Video generation parameters. + * @returns URL of the generated video. + */ +export async function generateVideo(params: { + imageUrl: string; + prompt: string; + audioUrl?: string; +}): Promise { + const body: Record = { + image_url: params.imageUrl, + prompt: params.prompt, + }; + if (params.audioUrl) body.audio_url = params.audioUrl; + + const data = await callRecoupApi("/api/content/video", body); + return data.videoUrl as string; +} diff --git a/src/recoup/upscaleMedia.ts b/src/recoup/upscaleMedia.ts new file mode 100644 index 0000000..5064da2 --- /dev/null +++ b/src/recoup/upscaleMedia.ts @@ -0,0 +1,13 @@ +import { callRecoupApi } from "./callRecoupApi"; + +/** + * Upscale an image or video via POST /api/content/upscale. + * + * @param url - URL of the media to upscale. + * @param type - Whether the input is an image or video. + * @returns URL of the upscaled media. + */ +export async function upscaleMedia(url: string, type: "image" | "video"): Promise { + const data = await callRecoupApi("/api/content/upscale", { url, type }); + return data.url as string; +} diff --git a/src/tasks/__tests__/createContentTask.test.ts b/src/tasks/__tests__/createContentTask.test.ts index d0a63d3..673ac96 100644 --- a/src/tasks/__tests__/createContentTask.test.ts +++ b/src/tasks/__tests__/createContentTask.test.ts @@ -43,24 +43,11 @@ vi.mock("../../content/detectFace", () => ({ detectFace: vi.fn().mockResolvedValue(false), })); -vi.mock("../../content/generateContentImage", () => ({ - generateContentImage: vi.fn().mockResolvedValue("https://fal.ai/image.png"), -})); - -vi.mock("../../content/generateContentVideo", () => ({ - generateContentVideo: vi.fn().mockResolvedValue("https://fal.ai/video.mp4"), -})); - -vi.mock("../../content/generateAudioVideo", () => ({ - generateAudioVideo: vi.fn().mockResolvedValue("https://fal.ai/lipsync-video.mp4"), -})); - -vi.mock("../../content/upscaleImage", () => ({ - upscaleImage: vi.fn().mockImplementation((url: string) => Promise.resolve(`${url}?upscaled`)), -})); - -vi.mock("../../content/upscaleVideo", () => ({ - upscaleVideo: vi.fn().mockImplementation((url: string) => Promise.resolve(`${url}?upscaled`)), +vi.mock("../../recoup/contentApi", () => ({ + generateImage: vi.fn().mockResolvedValue("https://fal.ai/image.png"), + generateVideo: vi.fn().mockResolvedValue("https://fal.ai/video.mp4"), + upscaleMedia: vi.fn().mockImplementation((url: string) => Promise.resolve(`${url}?upscaled`)), + generateCaption: vi.fn().mockResolvedValue("this is the vibe 🎡"), })); vi.mock("../../content/selectAudioClip", () => ({ @@ -77,9 +64,6 @@ vi.mock("../../content/selectAudioClip", () => ({ }), })); -vi.mock("../../content/generateCaption", () => ({ - generateCaption: vi.fn().mockResolvedValue("this is the vibe 🎡"), -})); vi.mock("../../content/fetchArtistContext", () => ({ fetchArtistContext: vi.fn().mockResolvedValue("Artist identity info"), @@ -170,9 +154,12 @@ describe("createContentTask", () => { images: ["https://example.com/cover1.png", "https://example.com/cover2.png"], }); - const { generateContentImage } = await import("../../content/generateContentImage"); - const callArgs = vi.mocked(generateContentImage).mock.calls[0][0]; - expect(callArgs.additionalImageUrls).toBeUndefined(); + const { generateImage } = await import("../../recoup/contentApi"); + const callArgs = vi.mocked(generateImage).mock.calls[0][0] as Record; + // When usesImageOverlay=true, additional images should NOT be passed to generateImage + // (they're used later in renderFinalVideo as overlays instead) + expect(callArgs.images).not.toContain("https://example.com/cover1.png"); + expect(callArgs.images).not.toContain("https://example.com/cover2.png"); }); it("throws when FAL_KEY is missing", async () => { diff --git a/src/tasks/__tests__/createContentTaskApi.test.ts b/src/tasks/__tests__/createContentTaskApi.test.ts new file mode 100644 index 0000000..55f6ec7 --- /dev/null +++ b/src/tasks/__tests__/createContentTaskApi.test.ts @@ -0,0 +1,173 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +const mockRun = vi.fn(); +const mockTagsAdd = vi.fn(); + +vi.mock("@trigger.dev/sdk/v3", () => ({ + logger: { log: vi.fn(), error: vi.fn() }, + metadata: { set: vi.fn() }, + tags: { + add: (...args: unknown[]) => mockTagsAdd(...args), + }, + schemaTask: (config: { run: unknown }) => { + mockRun.mockImplementation(config.run as (...args: unknown[]) => unknown); + return config; + }, +})); + +const mockFalConfig = vi.fn(); +const mockFalStorageUpload = vi.fn(); +vi.mock("@fal-ai/client", () => ({ + fal: { + config: (...args: unknown[]) => mockFalConfig(...args), + storage: { upload: (...args: unknown[]) => mockFalStorageUpload(...args) }, + }, +})); + +vi.mock("../../sandboxes/logStep", () => ({ logStep: vi.fn() })); +vi.mock("../../content/fetchGithubFile", () => ({ + fetchGithubFile: vi.fn().mockResolvedValue(Buffer.from("fake-png")), +})); +vi.mock("../../content/downloadImageBuffer", () => ({ + downloadImageBuffer: vi.fn().mockResolvedValue({ + buffer: Buffer.from("fake-image"), + contentType: "image/png", + }), +})); +vi.mock("../../content/detectFace", () => ({ + detectFace: vi.fn().mockResolvedValue(false), +})); +vi.mock("../../content/selectAudioClip", () => ({ + selectAudioClip: vi.fn().mockResolvedValue({ + songFilename: "song.mp3", + songTitle: "Test Song", + songBuffer: Buffer.from("fake-mp3"), + startSeconds: 30, + durationSeconds: 8, + lyrics: { title: "Test Song", fullLyrics: "full lyrics", segments: [] }, + clipLyrics: "clip lyrics here", + clipReason: "great hook", + clipMood: "energetic", + }), +})); +vi.mock("../../content/fetchArtistContext", () => ({ + fetchArtistContext: vi.fn().mockResolvedValue("Artist identity info"), +})); +vi.mock("../../content/fetchAudienceContext", () => ({ + fetchAudienceContext: vi.fn().mockResolvedValue("Audience info"), +})); +vi.mock("../../content/renderFinalVideo", () => ({ + renderFinalVideo: vi.fn().mockResolvedValue({ + videoUrl: "https://fal.ai/storage/final-video.mp4", + mimeType: "video/mp4", + sizeBytes: 5000, + }), +})); +vi.mock("../../content/loadTemplate", () => ({ + loadTemplate: vi.fn().mockResolvedValue({ + name: "artist-caption-bedroom", + imagePrompt: "test scene prompt", + usesFaceGuide: true, + styleGuide: null, + captionGuide: null, + captionExamples: [], + videoMoods: [], + videoMovements: [], + referenceImagePaths: [], + }), + pickRandomReferenceImage: vi.fn().mockReturnValue(null), + buildImagePrompt: vi.fn().mockReturnValue("test prompt"), + buildMotionPrompt: vi.fn().mockReturnValue("test motion prompt"), +})); + +// Mock the NEW API wrappers +const mockGenerateImage = vi.fn().mockResolvedValue("https://api.test/image.png"); +const mockUpscaleMedia = vi.fn().mockImplementation((url: string) => Promise.resolve(`${url}?upscaled`)); +const mockGenerateVideo = vi.fn().mockResolvedValue("https://api.test/video.mp4"); +const mockGenerateCaption = vi.fn().mockResolvedValue("caption from api"); + +vi.mock("../../recoup/contentApi", () => ({ + generateImage: (...args: unknown[]) => mockGenerateImage(...args), + upscaleMedia: (...args: unknown[]) => mockUpscaleMedia(...args), + generateVideo: (...args: unknown[]) => mockGenerateVideo(...args), + generateCaption: (...args: unknown[]) => mockGenerateCaption(...args), +})); + +await import("../createContentTask"); + +const VALID_PAYLOAD = { + accountId: "acc_123", + artistSlug: "gatsby-grace", + template: "artist-caption-bedroom", + lipsync: false, + githubRepo: "https://github.com/recoupable/test-repo", +}; + +describe("createContentTask (API integration)", () => { + beforeEach(() => { + vi.clearAllMocks(); + process.env.FAL_KEY = "test-fal-key"; + mockFalStorageUpload.mockResolvedValue("https://fal.storage/face-guide.png"); + }); + + it("calls generateImage API instead of internal function", async () => { + await mockRun(VALID_PAYLOAD); + expect(mockGenerateImage).toHaveBeenCalledTimes(1); + expect(mockGenerateImage).toHaveBeenCalledWith( + expect.objectContaining({ prompt: expect.any(String) }), + ); + }); + + it("calls generateVideo API instead of internal function", async () => { + await mockRun(VALID_PAYLOAD); + expect(mockGenerateVideo).toHaveBeenCalledTimes(1); + expect(mockGenerateVideo).toHaveBeenCalledWith( + expect.objectContaining({ + imageUrl: expect.any(String), + prompt: expect.any(String), + }), + ); + }); + + it("calls generateCaption API with artist and audience context in topic", async () => { + await mockRun(VALID_PAYLOAD); + expect(mockGenerateCaption).toHaveBeenCalledTimes(1); + const callArgs = mockGenerateCaption.mock.calls[0][0] as Record; + const topic = callArgs.topic as string; + expect(topic).toContain("Artist identity info"); + expect(topic).toContain("Audience info"); + expect(topic).toContain("Test Song"); + }); + + it("calls upscaleMedia API for image when upscale=true", async () => { + await mockRun({ ...VALID_PAYLOAD, upscale: true }); + expect(mockUpscaleMedia).toHaveBeenCalledWith(expect.any(String), "image"); + }); + + it("calls upscaleMedia API for video when upscale=true", async () => { + await mockRun({ ...VALID_PAYLOAD, upscale: true }); + expect(mockUpscaleMedia).toHaveBeenCalledWith(expect.any(String), "video"); + }); + + it("does not call upscaleMedia when upscale=false", async () => { + await mockRun({ ...VALID_PAYLOAD, upscale: false }); + expect(mockUpscaleMedia).not.toHaveBeenCalled(); + }); + + it("passes audioUrl for lipsync mode", async () => { + mockFalStorageUpload.mockResolvedValue("https://fal.storage/song.mp3"); + await mockRun({ ...VALID_PAYLOAD, lipsync: true }); + expect(mockGenerateVideo).toHaveBeenCalledWith( + expect.objectContaining({ + audioUrl: expect.any(String), + }), + ); + }); + + it("returns completed result with API-generated content", async () => { + const result = await mockRun(VALID_PAYLOAD); + expect(result.status).toBe("completed"); + expect(result.imageUrl).toBe("https://api.test/image.png"); + expect(result.captionText).toBe("caption from api"); + }); +}); diff --git a/src/tasks/createContentTask.ts b/src/tasks/createContentTask.ts index 0ac1285..c5c7e4e 100644 --- a/src/tasks/createContentTask.ts +++ b/src/tasks/createContentTask.ts @@ -3,14 +3,8 @@ import { schemaTask, tags } from "@trigger.dev/sdk/v3"; import { createContentPayloadSchema } from "../schemas/contentCreationSchema"; import { logStep } from "../sandboxes/logStep"; import { fetchGithubFile } from "../content/fetchGithubFile"; -import { generateContentImage } from "../content/generateContentImage"; -import { generateContentVideo } from "../content/generateContentVideo"; -import { generateAudioVideo } from "../content/generateAudioVideo"; -import { upscaleImage } from "../content/upscaleImage"; -import { upscaleVideo } from "../content/upscaleVideo"; import { resolveFaceGuide } from "../content/resolveFaceGuide"; import { resolveAudioClip } from "../content/resolveAudioClip"; -import { generateCaption } from "../content/generateCaption"; import { fetchArtistContext } from "../content/fetchArtistContext"; import { fetchAudienceContext } from "../content/fetchAudienceContext"; import { renderFinalVideo } from "../content/renderFinalVideo"; @@ -21,6 +15,12 @@ import { buildMotionPrompt, } from "../content/loadTemplate"; import { resolveImageInstruction } from "../content/resolveImageInstruction"; +import { + generateImage, + upscaleMedia, + generateVideo, + generateCaption, +} from "../recoup/contentApi"; /** * Content-creation task β€” full pipeline that generates a social-ready video. @@ -30,13 +30,12 @@ import { resolveImageInstruction } from "../content/resolveImageInstruction"; * 2. Fetch face-guide from artist's GitHub repo * 3. Select audio clip (fetch songs, transcribe, analyze, pick best clip) * 4. Fetch artist + audience context for caption generation - * 5. Generate image (fal.ai β€” face-guide + template prompt + style guide) - * 6. Upscale image (fal.ai β€” 2x detail enhancement) - * 7. Generate video (fal.ai β€” animate image, or audio-to-video for lipsync) - * 8. Upscale video (fal.ai β€” 720p β†’ 1080p) - * 9. Generate caption (Recoup Chat API β€” TikTok-style text) + * 5. Generate image (via POST /api/content/image) + * 6. Upscale image (via POST /api/content/upscale) + * 7. Generate video (via POST /api/content/video) + * 8. Upscale video (via POST /api/content/upscale) + * 9. Generate caption (via POST /api/content/caption) * 10. Final render (ffmpeg β€” crop 16:9β†’9:16, overlay audio + caption) - * 11. Return final video for API to persist * * No Supabase access β€” API handles all storage. */ @@ -89,65 +88,69 @@ export const createContentTask = schemaTask({ payload.githubRepo, payload.artistSlug, fetchGithubFile, ); - // --- Step 5: Generate image --- - logStep("Generating image"); + // --- Step 5: Generate image (API) --- + logStep("Generating image via API"); const referenceImagePath = pickRandomReferenceImage(template); - // Build prompt: custom/face-swap/no-face instruction + template scene + style guide const instruction = resolveImageInstruction(template); const basePrompt = `${instruction} ${template.imagePrompt}`; const fullPrompt = buildImagePrompt(basePrompt, template.styleGuide); - let imageUrl = await generateContentImage({ - faceGuideUrl: faceGuideUrl ?? undefined, - referenceImagePath, + + const imageRefs: string[] = []; + if (faceGuideUrl) imageRefs.push(faceGuideUrl); + if (referenceImagePath) imageRefs.push(referenceImagePath); + if (!template.usesImageOverlay && additionalImageUrls.length) { + imageRefs.push(...additionalImageUrls); + } + + let imageUrl = await generateImage({ prompt: fullPrompt, - additionalImageUrls: template.usesImageOverlay ? undefined : additionalImageUrls, + referenceImageUrl: faceGuideUrl ?? undefined, + images: imageRefs.length > 0 ? imageRefs : undefined, }); - // --- Step 6: Upscale image (optional) --- + // --- Step 6: Upscale image (API, optional) --- if (payload.upscale) { - logStep("Upscaling image"); - imageUrl = await upscaleImage(imageUrl); + logStep("Upscaling image via API"); + imageUrl = await upscaleMedia(imageUrl, "image"); } - // --- Step 7: Generate video --- - let videoUrl: string; + // --- Step 7: Generate video (API) --- const motionPrompt = buildMotionPrompt(template); + let audioUrl: string | undefined; if (payload.lipsync) { - // Lipsync path: audio baked into video - logStep("Generating audio-to-video (lipsync)"); - videoUrl = await generateAudioVideo({ - imageUrl, - songBuffer: audioClip.songBuffer, - audioStartSeconds: audioClip.startSeconds, - audioDurationSeconds: audioClip.durationSeconds, - motionPrompt, - }); - } else { - // Normal path: image-to-video, audio added in post - logStep("Generating video"); - videoUrl = await generateContentVideo({ - imageUrl, - motionPrompt, - }); + logStep("Uploading audio for lipsync"); + const audioFile = new File([audioClip.songBuffer], "song.mp3", { type: "audio/mpeg" }); + audioUrl = await fal.storage.upload(audioFile); } - // --- Step 8: Upscale video (optional) --- + logStep("Generating video via API"); + let videoUrl = await generateVideo({ + imageUrl, + prompt: motionPrompt, + audioUrl, + }); + + // --- Step 8: Upscale video (API, optional) --- if (payload.upscale) { - logStep("Upscaling video"); - videoUrl = await upscaleVideo(videoUrl); + logStep("Upscaling video via API"); + videoUrl = await upscaleMedia(videoUrl, "video"); } - // --- Step 9: Generate caption --- - logStep("Generating caption"); + // --- Step 9: Generate caption (API) --- + logStep("Generating caption via API"); + const captionTopic = [ + `Song: "${audioClip.songTitle}"`, + audioClip.clipLyrics ? `Lyrics: "${audioClip.clipLyrics}"` : null, + audioClip.clipMood ? `Mood: ${audioClip.clipMood}` : null, + artistContext ? `Artist: ${artistContext}` : null, + audienceContext ? `Audience: ${audienceContext}` : null, + ].filter(Boolean).join(". "); + const captionText = await generateCaption({ - template, - songTitle: audioClip.songTitle, - fullLyrics: audioClip.lyrics.fullLyrics, - clipLyrics: audioClip.clipLyrics, - artistContext, - audienceContext, - captionLength: payload.captionLength, + topic: captionTopic, + template: payload.template, + length: payload.captionLength, }); // --- Step 10: Final render (ffmpeg) ---