Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 41 additions & 8 deletions frontend/src/components/ImageManager.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,53 @@ interface ImageManagerProps {
images: string[];
onImagesChange: (images: string[]) => void;
disabled?: boolean;
/** Maximum number of images allowed. When set to 1, replaces instead of adding. */
maxImages?: number;
/** Label for the component */
label?: string;
/** Tooltip for the label */
tooltip?: string;
/** Hide the label */
hideLabel?: boolean;
}

export function ImageManager({
images,
onImagesChange,
disabled,
maxImages,
label = "Reference Images",
tooltip = "Select reference images for VACE conditioning. Images will guide the video generation style and content.",
hideLabel = false,
}: ImageManagerProps) {
const [isMediaPickerOpen, setIsMediaPickerOpen] = useState(false);

const handleAddImage = (imagePath: string) => {
onImagesChange([...images, imagePath]);
if (maxImages === 1) {
// Single image mode - replace
onImagesChange([imagePath]);
} else {
onImagesChange([...images, imagePath]);
}
};

const handleRemoveImage = (index: number) => {
onImagesChange(images.filter((_, i) => i !== index));
};

const canAddMore = maxImages === undefined || images.length < maxImages;

return (
<div>
<LabelWithTooltip
label="Reference Images"
tooltip="Select reference images for VACE conditioning. Images will guide the video generation style and content."
className="text-sm font-medium mb-2 block"
/>
{!hideLabel && (
<LabelWithTooltip
label={label}
tooltip={tooltip}
className="text-sm font-medium mb-2 block"
/>
)}

<div className="grid grid-cols-2 gap-2">
<div className={maxImages === 1 ? "grid grid-cols-1" : "grid grid-cols-2 gap-2"}>
{images.length === 0 && (
<button
onClick={() => setIsMediaPickerOpen(true)}
Expand All @@ -52,7 +73,7 @@ export function ImageManager({
>
<img
src={getAssetUrl(imagePath)}
alt={`Reference ${index + 1}`}
alt={`${label} ${index + 1}`}
className="w-full h-full object-cover"
/>
<button
Expand All @@ -65,6 +86,18 @@ export function ImageManager({
</button>
</div>
))}

{/* Show add button if we have images but can add more (multi-image mode) */}
{images.length > 0 && canAddMore && maxImages !== 1 && (
<button
onClick={() => setIsMediaPickerOpen(true)}
disabled={disabled}
className="aspect-square border-2 border-dashed rounded-lg flex flex-col items-center justify-center hover:bg-accent hover:border-accent-foreground disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
>
<Plus className="h-6 w-6 mb-1 text-muted-foreground" />
<span className="text-xs text-muted-foreground">Add Image</span>
</button>
)}
</div>

<MediaPicker
Expand Down
103 changes: 102 additions & 1 deletion frontend/src/components/InputAndControlsPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import { Upload, ArrowUp } from "lucide-react";
import { LabelWithTooltip } from "./ui/label-with-tooltip";
import type { VideoSourceMode } from "../hooks/useVideoSource";
import type { PromptItem, PromptTransition } from "../lib/api";
import type { InputMode, PipelineInfo } from "../types";
import type { ExtensionMode, InputMode, PipelineInfo } from "../types";
import { PromptInput } from "./PromptInput";
import { TimelinePromptEditor } from "./TimelinePromptEditor";
import type { TimelinePrompt } from "./PromptTimeline";
Expand Down Expand Up @@ -69,6 +69,14 @@ interface InputAndControlsPanelProps {
onRefImagesChange?: (images: string[]) => void;
onSendHints?: (imagePaths: string[]) => void;
isDownloading?: boolean;
// FFLF (First-Frame-Last-Frame) extension mode
firstFrameImage?: string;
onFirstFrameImageChange?: (imagePath: string | undefined) => void;
lastFrameImage?: string;
onLastFrameImageChange?: (imagePath: string | undefined) => void;
extensionMode?: ExtensionMode;
onExtensionModeChange?: (mode: ExtensionMode) => void;
onSendExtensionFrames?: () => void;
}

export function InputAndControlsPanel({
Expand Down Expand Up @@ -115,6 +123,13 @@ export function InputAndControlsPanel({
onRefImagesChange,
onSendHints,
isDownloading = false,
firstFrameImage,
onFirstFrameImageChange,
lastFrameImage,
onLastFrameImageChange,
extensionMode = "firstframe",
onExtensionModeChange,
onSendExtensionFrames,
}: InputAndControlsPanelProps) {
// Helper function to determine if playhead is at the end of timeline
const isAtEndOfTimeline = () => {
Expand Down Expand Up @@ -326,6 +341,92 @@ export function InputAndControlsPanel({
</div>
)}

{/* FFLF Extension Frames - only show when VACE is enabled */}
{vaceEnabled && (
<div>
<LabelWithTooltip
label="Extension Frames"
tooltip="Set reference frames for video extension. First frame starts the video from that image, last frame generates toward that target."
className="text-sm font-medium mb-2 block"
/>
<div className="grid grid-cols-2 gap-2">
<div className="space-y-1">
<span className="text-xs text-muted-foreground">First Frame</span>
<ImageManager
images={firstFrameImage ? [firstFrameImage] : []}
onImagesChange={images => {
onFirstFrameImageChange?.(images[0] || undefined);
}}
disabled={isDownloading}
maxImages={1}
hideLabel
/>
</div>
<div className="space-y-1">
<span className="text-xs text-muted-foreground">Last Frame</span>
<ImageManager
images={lastFrameImage ? [lastFrameImage] : []}
onImagesChange={images => {
onLastFrameImageChange?.(images[0] || undefined);
}}
disabled={isDownloading}
maxImages={1}
hideLabel
/>
</div>
</div>
{(firstFrameImage || lastFrameImage) && (
<div className="space-y-2 mt-2">
<div className="flex items-center justify-between gap-2">
<span className="text-xs text-muted-foreground">Mode:</span>
<Select
value={extensionMode}
onValueChange={value => {
if (value && onExtensionModeChange) {
onExtensionModeChange(value as ExtensionMode);
}
}}
disabled={!firstFrameImage && !lastFrameImage}
>
<SelectTrigger className="w-24 h-6 text-xs">
<SelectValue />
</SelectTrigger>
<SelectContent>
{firstFrameImage && (
<SelectItem value="firstframe">First</SelectItem>
)}
{lastFrameImage && (
<SelectItem value="lastframe">Last</SelectItem>
)}
{firstFrameImage && lastFrameImage && (
<SelectItem value="firstlastframe">Both</SelectItem>
)}
</SelectContent>
</Select>
</div>
<div className="flex items-center justify-end">
<Button
onMouseDown={e => {
e.preventDefault();
onSendExtensionFrames?.();
}}
disabled={isDownloading || !isStreaming || (!firstFrameImage && !lastFrameImage)}
size="sm"
className="rounded-full w-8 h-8 p-0 bg-black hover:bg-gray-800 text-white disabled:opacity-50 disabled:cursor-not-allowed"
title={
!isStreaming
? "Start streaming to send extension frames"
: "Send extension frames"
}
>
<ArrowUp className="h-4 w-4" />
</Button>
</div>
</div>
)}
</div>
)}

<div>
{(() => {
// The Input can have two states: Append (default) and Edit (when a prompt is selected and the video is paused)
Expand Down
4 changes: 4 additions & 0 deletions frontend/src/hooks/useWebRTC.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ interface InitialParameters {
kv_cache_attention_bias?: number;
vace_ref_images?: string[];
vace_context_scale?: number;
first_frame_image?: string;
last_frame_image?: string;
}

interface UseWebRTCOptions {
Expand Down Expand Up @@ -328,6 +330,8 @@ export function useWebRTC(options?: UseWebRTCOptions) {
vace_ref_images?: string[];
vace_use_input_video?: boolean;
vace_context_scale?: number;
first_frame_image?: string;
last_frame_image?: string;
}) => {
if (
dataChannelRef.current &&
Expand Down
69 changes: 69 additions & 0 deletions frontend/src/pages/StreamPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import { usePipelines } from "../hooks/usePipelines";
import { getDefaultPromptForMode } from "../data/pipelines";
import { adjustResolutionForPipeline } from "../lib/utils";
import type {
ExtensionMode,
InputMode,
PipelineId,
LoRAConfig,
Expand Down Expand Up @@ -524,6 +525,57 @@ export function StreamPage() {
}
};

// Derive the appropriate extension mode based on which frame images are set
const deriveExtensionMode = (
first: string | undefined,
last: string | undefined
): ExtensionMode | undefined => {
if (first && last) return "firstlastframe";
if (first) return "firstframe";
if (last) return "lastframe";
return undefined;
};

const handleFirstFrameImageChange = (imagePath: string | undefined) => {
updateSettings({
firstFrameImage: imagePath,
extensionMode: deriveExtensionMode(imagePath, settings.lastFrameImage),
});
};

const handleLastFrameImageChange = (imagePath: string | undefined) => {
updateSettings({
lastFrameImage: imagePath,
extensionMode: deriveExtensionMode(settings.firstFrameImage, imagePath),
});
};

const handleExtensionModeChange = (mode: ExtensionMode) => {
updateSettings({ extensionMode: mode });
};

const handleSendExtensionFrames = () => {
const mode = settings.extensionMode || "firstframe";
const params: Record<string, string> = {};

if (mode === "firstframe" && settings.firstFrameImage) {
params.first_frame_image = settings.firstFrameImage;
} else if (mode === "lastframe" && settings.lastFrameImage) {
params.last_frame_image = settings.lastFrameImage;
} else if (mode === "firstlastframe") {
if (settings.firstFrameImage) {
params.first_frame_image = settings.firstFrameImage;
}
if (settings.lastFrameImage) {
params.last_frame_image = settings.lastFrameImage;
}
}

if (Object.keys(params).length > 0) {
sendParameterUpdate(params);
}
};

const handleResetCache = () => {
// Send reset cache command to backend
sendParameterUpdate({
Expand Down Expand Up @@ -809,6 +861,8 @@ export function StreamPage() {
vace_ref_images?: string[];
vace_use_input_video?: boolean;
vace_context_scale?: number;
first_frame_image?: string;
last_frame_image?: string;
} = {
// Signal the intended input mode to the backend so it doesn't
// briefly fall back to text mode before video frames arrive
Expand Down Expand Up @@ -850,6 +904,14 @@ export function StreamPage() {
settings.vaceUseInputVideo ?? false;
}

// Add FFLF (first-frame-last-frame) parameters if set
if (settings.firstFrameImage) {
initialParameters.first_frame_image = settings.firstFrameImage;
}
if (settings.lastFrameImage) {
initialParameters.last_frame_image = settings.lastFrameImage;
}

// Video mode parameters - applies to all pipelines in video mode
if (currentMode === "video") {
initialParameters.noise_scale = settings.noiseScale ?? 0.7;
Expand Down Expand Up @@ -942,6 +1004,13 @@ export function StreamPage() {
onRefImagesChange={handleRefImagesChange}
onSendHints={handleSendHints}
isDownloading={isDownloading}
firstFrameImage={settings.firstFrameImage}
onFirstFrameImageChange={handleFirstFrameImageChange}
lastFrameImage={settings.lastFrameImage}
onLastFrameImageChange={handleLastFrameImageChange}
extensionMode={settings.extensionMode || "firstframe"}
onExtensionModeChange={handleExtensionModeChange}
onSendExtensionFrames={handleSendExtensionFrames}
/>
</div>

Expand Down
7 changes: 7 additions & 0 deletions frontend/src/types/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ export type InputMode = "text" | "video";
// VAE type for model selection (dynamic from backend registry)
export type VaeType = string;

// Extension mode for FFLF (First-Frame-Last-Frame) feature
export type ExtensionMode = "firstframe" | "lastframe" | "firstlastframe";

// WebRTC ICE server configuration
export interface IceServerConfig {
urls: string | string[];
Expand Down Expand Up @@ -77,6 +80,10 @@ export interface SettingsState {
vaceUseInputVideo?: boolean;
refImages?: string[];
vaceContextScale?: number;
// FFLF (First-Frame-Last-Frame) extension mode
firstFrameImage?: string;
lastFrameImage?: string;
extensionMode?: ExtensionMode;
// VAE type selection
vaeType?: VaeType;
}
Expand Down