diff --git a/frontend/src/components/VideoOutput.tsx b/frontend/src/components/VideoOutput.tsx index 4ed6bb61..8fe95339 100644 --- a/frontend/src/components/VideoOutput.tsx +++ b/frontend/src/components/VideoOutput.tsx @@ -14,6 +14,12 @@ interface VideoOutputProps { onPlayPauseToggle?: () => void; onStartStream?: () => void; onVideoPlaying?: () => void; + // Controller input props + supportsControllerInput?: boolean; + isPointerLocked?: boolean; + onRequestPointerLock?: () => void; + /** Ref to expose the video container element for pointer lock */ + videoContainerRef?: React.RefObject; } export function VideoOutput({ @@ -27,12 +33,20 @@ export function VideoOutput({ onPlayPauseToggle, onStartStream, onVideoPlaying, + supportsControllerInput = false, + isPointerLocked = false, + onRequestPointerLock, + videoContainerRef, }: VideoOutputProps) { const videoRef = useRef(null); + const internalContainerRef = useRef(null); const [showOverlay, setShowOverlay] = useState(false); const [isFadingOut, setIsFadingOut] = useState(false); const overlayTimeoutRef = useRef(null); + // Use external ref if provided, otherwise use internal + const containerRef = videoContainerRef || internalContainerRef; + useEffect(() => { if (videoRef.current && remoteStream) { videoRef.current.srcObject = remoteStream; @@ -87,7 +101,16 @@ export function VideoOutput({ }, [onPlayPauseToggle, remoteStream]); const handleVideoClick = () => { - triggerPlayPause(); + // If controller input is supported and not locked, request pointer lock + if (supportsControllerInput && !isPointerLocked && onRequestPointerLock) { + onRequestPointerLock(); + return; + } + + // Otherwise toggle play/pause + if (!isPointerLocked) { + triggerPlayPause(); + } }; // Handle spacebar press for play/pause @@ -134,6 +157,7 @@ export function VideoOutput({ {remoteStream ? (
@@ -158,6 +182,12 @@ export function VideoOutput({
)} + {/* Controller Input Overlay - only show before pointer lock (browser shows ESC hint) */} + {supportsControllerInput && !isPointerLocked && ( +
+ Click to enable controller input +
+ )} ) : isDownloading ? (
diff --git a/frontend/src/hooks/useControllerInput.ts b/frontend/src/hooks/useControllerInput.ts new file mode 100644 index 00000000..72065aeb --- /dev/null +++ b/frontend/src/hooks/useControllerInput.ts @@ -0,0 +1,252 @@ +import { useEffect, useRef, useCallback, useState } from "react"; + +/** + * Controller input state matching the backend CtrlInput format. + * Uses W3C event.code strings for key identification. + */ +export interface ControllerInputState { + /** Set of currently pressed keys (W3C event.code values) */ + button: string[]; + /** Mouse velocity/delta as [dx, dy] tuple */ + mouse: [number, number]; +} + +/** + * Configuration for the controller input hook. + */ +export interface ControllerInputConfig { + /** Target send rate in Hz (default: 60) */ + sendRateHz?: number; + /** Mouse sensitivity multiplier (default: 0.002) */ + mouseSensitivity?: number; + /** Keys to capture (default: WASD, arrows, space, shift) */ + capturedKeys?: Set; +} + +/** Default keys to capture */ +const DEFAULT_CAPTURED_KEYS = new Set([ + "KeyW", + "KeyA", + "KeyS", + "KeyD", + "ArrowUp", + "ArrowDown", + "ArrowLeft", + "ArrowRight", + "Space", + "ShiftLeft", + "ShiftRight", + "KeyQ", + "KeyE", + "KeyR", + "KeyF", + "KeyC", + "KeyX", + "KeyZ", +]); + +/** + * Hook for capturing WASD keyboard and mouse input for streaming to backend. + * + * Uses a pygame-inspired state dictionary pattern: + * - Tracks which keys are currently held down (not just press events) + * - Accumulates mouse deltas between send intervals + * - Sends state snapshots at a fixed rate (default 60Hz) + * + * @param sendFn Function to send controller input to backend + * @param enabled Whether controller input capture is enabled + * @param targetRef Ref to the element that should capture input (for pointer lock) + * @param config Optional configuration + */ +export function useControllerInput( + sendFn: (params: { ctrl_input: ControllerInputState }) => void, + enabled: boolean, + targetRef: React.RefObject, + config?: ControllerInputConfig +) { + const { + sendRateHz = 60, + mouseSensitivity = 1.5, + capturedKeys = DEFAULT_CAPTURED_KEYS, + } = config || {}; + + // State for UI feedback + const [isPointerLocked, setIsPointerLocked] = useState(false); + const [pressedKeys, setPressedKeys] = useState>(new Set()); + + // Refs for tracking input state (mutable for performance) + const pressedKeysRef = useRef>(new Set()); + const mouseDeltaRef = useRef<[number, number]>([0, 0]); + const lastSentStateRef = useRef(""); + const sendIntervalRef = useRef(null); + + // Handle keyboard events + const handleKeyDown = useCallback( + (e: KeyboardEvent) => { + if (!enabled || !isPointerLocked) return; + + // Ignore if typing in an input field + const target = e.target as HTMLElement; + if ( + target.tagName === "INPUT" || + target.tagName === "TEXTAREA" || + target.tagName === "SELECT" || + target.isContentEditable + ) { + return; + } + + if (capturedKeys.has(e.code)) { + e.preventDefault(); + pressedKeysRef.current.add(e.code); + setPressedKeys(new Set(pressedKeysRef.current)); + } + }, + [enabled, isPointerLocked, capturedKeys] + ); + + const handleKeyUp = useCallback( + (e: KeyboardEvent) => { + if (!enabled) return; + + if (capturedKeys.has(e.code)) { + e.preventDefault(); + pressedKeysRef.current.delete(e.code); + setPressedKeys(new Set(pressedKeysRef.current)); + } + }, + [enabled, capturedKeys] + ); + + // Handle mouse movement (only when pointer is locked) + const handleMouseMove = useCallback( + (e: MouseEvent) => { + if (!enabled || !isPointerLocked) return; + + // Accumulate mouse deltas + mouseDeltaRef.current[0] += e.movementX * mouseSensitivity; + mouseDeltaRef.current[1] += e.movementY * mouseSensitivity; + }, + [enabled, isPointerLocked, mouseSensitivity] + ); + + // Handle pointer lock changes + const handlePointerLockChange = useCallback(() => { + const isLocked = document.pointerLockElement === targetRef.current; + setIsPointerLocked(isLocked); + + if (!isLocked) { + // Clear pressed keys when pointer lock is released + pressedKeysRef.current.clear(); + setPressedKeys(new Set()); + mouseDeltaRef.current = [0, 0]; + } + }, [targetRef]); + + // Request pointer lock + const requestPointerLock = useCallback(() => { + if (targetRef.current && enabled) { + targetRef.current.requestPointerLock(); + } + }, [targetRef, enabled]); + + // Release pointer lock + const releasePointerLock = useCallback(() => { + if (document.pointerLockElement) { + document.exitPointerLock(); + } + }, []); + + // Send controller input at fixed interval + const sendControllerInput = useCallback(() => { + if (!enabled || !isPointerLocked) return; + + const state: ControllerInputState = { + button: Array.from(pressedKeysRef.current), + mouse: [...mouseDeltaRef.current] as [number, number], + }; + + // Only send if state has changed (optimization) + const stateStr = JSON.stringify(state); + if (stateStr !== lastSentStateRef.current) { + sendFn({ ctrl_input: state }); + lastSentStateRef.current = stateStr; + } + + // Reset mouse delta after sending (it's accumulated between sends) + mouseDeltaRef.current = [0, 0]; + }, [enabled, isPointerLocked, sendFn]); + + // Set up event listeners + useEffect(() => { + if (!enabled) return; + + window.addEventListener("keydown", handleKeyDown); + window.addEventListener("keyup", handleKeyUp); + window.addEventListener("mousemove", handleMouseMove); + document.addEventListener("pointerlockchange", handlePointerLockChange); + + return () => { + window.removeEventListener("keydown", handleKeyDown); + window.removeEventListener("keyup", handleKeyUp); + window.removeEventListener("mousemove", handleMouseMove); + document.removeEventListener( + "pointerlockchange", + handlePointerLockChange + ); + }; + }, [ + enabled, + handleKeyDown, + handleKeyUp, + handleMouseMove, + handlePointerLockChange, + ]); + + // Set up send interval + useEffect(() => { + if (!enabled || !isPointerLocked) { + if (sendIntervalRef.current) { + clearInterval(sendIntervalRef.current); + sendIntervalRef.current = null; + } + return; + } + + const intervalMs = 1000 / sendRateHz; + sendIntervalRef.current = window.setInterval( + sendControllerInput, + intervalMs + ); + + return () => { + if (sendIntervalRef.current) { + clearInterval(sendIntervalRef.current); + sendIntervalRef.current = null; + } + }; + }, [enabled, isPointerLocked, sendRateHz, sendControllerInput]); + + // Clean up on unmount + useEffect(() => { + return () => { + if (sendIntervalRef.current) { + clearInterval(sendIntervalRef.current); + } + if (document.pointerLockElement) { + document.exitPointerLock(); + } + }; + }, []); + + return { + /** Whether pointer lock is currently active */ + isPointerLocked, + /** Set of currently pressed keys (for UI display) */ + pressedKeys, + /** Request pointer lock on the target element */ + requestPointerLock, + /** Release pointer lock */ + releasePointerLock, + }; +} diff --git a/frontend/src/hooks/usePipelines.ts b/frontend/src/hooks/usePipelines.ts index 5eee87a7..5287cd2b 100644 --- a/frontend/src/hooks/usePipelines.ts +++ b/frontend/src/hooks/usePipelines.ts @@ -36,6 +36,10 @@ export function usePipelines() { } } + // Check if pipeline supports controller input (has ctrl_input field in schema) + const supportsControllerInput = + schema.config_schema?.properties?.ctrl_input !== undefined; + transformed[id] = { name: schema.name, about: schema.description, @@ -60,6 +64,7 @@ export function usePipelines() { schema.recommended_quantization_vram_threshold ?? undefined, modified: schema.modified, vaeTypes, + supportsControllerInput, }; } diff --git a/frontend/src/hooks/useWebRTC.ts b/frontend/src/hooks/useWebRTC.ts index c73d504c..057ec585 100644 --- a/frontend/src/hooks/useWebRTC.ts +++ b/frontend/src/hooks/useWebRTC.ts @@ -329,6 +329,7 @@ export function useWebRTC(options?: UseWebRTCOptions) { vace_ref_images?: string[]; vace_use_input_video?: boolean; vace_context_scale?: number; + ctrl_input?: { button: string[]; mouse: [number, number] }; }) => { if ( dataChannelRef.current && diff --git a/frontend/src/lib/controllerTransport.ts b/frontend/src/lib/controllerTransport.ts new file mode 100644 index 00000000..ff55acd5 --- /dev/null +++ b/frontend/src/lib/controllerTransport.ts @@ -0,0 +1,76 @@ +/** + * Controller Transport Abstraction + * + * This module provides an abstraction layer for sending controller input + * to the backend. Currently uses WebRTC data channel, but can be swapped + * for GPU frame sharing or other transport mechanisms in the future. + */ + +import type { ControllerInputState } from "../hooks/useControllerInput"; + +/** + * Interface for controller input transport. + * Implementations can use different transport mechanisms. + */ +export interface ControllerTransport { + /** Send controller input state to backend */ + send(input: ControllerInputState): void; + /** Check if transport is ready to send */ + isReady(): boolean; +} + +/** + * WebRTC Data Channel transport implementation. + * Uses the existing WebRTC data channel for sending controller input. + */ +export class DataChannelTransport implements ControllerTransport { + sendParameterUpdate: (params: { ctrl_input: ControllerInputState }) => void; + + constructor( + sendParameterUpdate: (params: { ctrl_input: ControllerInputState }) => void + ) { + this.sendParameterUpdate = sendParameterUpdate; + } + + send(input: ControllerInputState): void { + this.sendParameterUpdate({ ctrl_input: input }); + } + + isReady(): boolean { + // The sendParameterUpdate function handles checking if the channel is open + return true; + } +} + +/** + * Create a controller transport using WebRTC data channel. + * + * @param sendParameterUpdate Function from useWebRTC hook + * @returns ControllerTransport instance + */ +export function createDataChannelTransport( + sendParameterUpdate: (params: { ctrl_input: ControllerInputState }) => void +): ControllerTransport { + return new DataChannelTransport(sendParameterUpdate); +} + +/** + * Future: GPU Shared Memory transport + * + * This would be used when running locally with the Electron app + * to bypass WebRTC and share frames directly on the GPU. + * + * Example implementation: + * + * export class SharedMemoryTransport implements ControllerTransport { + * constructor(private sharedBuffer: SharedArrayBuffer) {} + * + * send(input: ControllerInputState): void { + * // Write directly to shared memory + * } + * + * isReady(): boolean { + * return this.sharedBuffer !== null; + * } + * } + */ diff --git a/frontend/src/pages/StreamPage.tsx b/frontend/src/pages/StreamPage.tsx index 919a10ca..a41d60c5 100644 --- a/frontend/src/pages/StreamPage.tsx +++ b/frontend/src/pages/StreamPage.tsx @@ -10,6 +10,7 @@ import { StatusBar } from "../components/StatusBar"; import { useWebRTC } from "../hooks/useWebRTC"; import { useVideoSource } from "../hooks/useVideoSource"; import { useWebRTCStats } from "../hooks/useWebRTCStats"; +import { useControllerInput } from "../hooks/useControllerInput"; import { usePipeline } from "../hooks/usePipeline"; import { useStreamState } from "../hooks/useStreamState"; import { usePipelines } from "../hooks/usePipelines"; @@ -172,6 +173,20 @@ export function StreamPage() { isStreaming, }); + // Video container ref for controller input pointer lock + const videoContainerRef = useRef(null); + + // Check if current pipeline supports controller input + const currentPipelineSupportsController = + pipelines?.[settings.pipelineId]?.supportsControllerInput ?? false; + + // Controller input hook - captures WASD/mouse and streams to backend + const { isPointerLocked, requestPointerLock } = useControllerInput( + sendParameterUpdate, + isStreaming && currentPipelineSupportsController, + videoContainerRef + ); + // Video source for preview (camera or video) // Enable based on input mode, not pipeline category const { @@ -1067,6 +1082,11 @@ export function StreamPage() { onVideoPlayingCallbackRef.current = null; // Clear after execution } }} + // Controller input props + supportsControllerInput={currentPipelineSupportsController} + isPointerLocked={isPointerLocked} + onRequestPointerLock={requestPointerLock} + videoContainerRef={videoContainerRef} />
{/* Timeline area - compact, always visible */} diff --git a/frontend/src/types/index.ts b/frontend/src/types/index.ts index 14572fdf..8a97eb6f 100644 --- a/frontend/src/types/index.ts +++ b/frontend/src/types/index.ts @@ -111,6 +111,8 @@ export interface PipelineInfo { recommendedQuantizationVramThreshold?: number | null; // Available VAE types from config schema enum (derived from vae_type field presence) vaeTypes?: string[]; + // Controller input support - presence of ctrl_input field in pipeline schema + supportsControllerInput?: boolean; } export interface DownloadProgress { diff --git a/src/scope/core/pipelines/base_schema.py b/src/scope/core/pipelines/base_schema.py index 4e19667c..1f1aaa52 100644 --- a/src/scope/core/pipelines/base_schema.py +++ b/src/scope/core/pipelines/base_schema.py @@ -10,6 +10,9 @@ height: int = 320 width: int = 576 denoising_steps: list[int] = [1000, 750, 500, 250] + +For pipelines that support controller input (WASD/mouse), include a ctrl_input field: + ctrl_input: CtrlInput | None = None """ from enum import Enum @@ -18,6 +21,9 @@ from pydantic import BaseModel, ConfigDict, Field from pydantic.fields import FieldInfo +# Re-export CtrlInput for convenient import by pipeline schemas +from scope.core.pipelines.controller import CtrlInput as CtrlInput # noqa: PLC0414 + if TYPE_CHECKING: from .artifacts import Artifact diff --git a/src/scope/core/pipelines/controller.py b/src/scope/core/pipelines/controller.py new file mode 100644 index 00000000..ca5b1ea0 --- /dev/null +++ b/src/scope/core/pipelines/controller.py @@ -0,0 +1,110 @@ +"""Controller input data model for interactive pipelines. + +This module provides the CtrlInput dataclass for capturing keyboard and mouse +input from the frontend. It uses W3C event.code strings as the universal standard +for key identification. + +Pipelines that need different keycode formats (e.g., Windows Virtual Keycodes +for world_engine compatibility) should convert internally using the provided +W3C_TO_WIN mapping. +""" + +from dataclasses import dataclass, field +from typing import Any + + +@dataclass +class CtrlInput: + """Controller input state for interactive pipelines. + + Uses W3C event.code strings for key identification, which is: + - Universal (web standard, not OS-specific) + - Self-documenting ("KeyW" is clearer than 87) + - Layout-independent (physical key position, not character) + + Attributes: + button: Set of currently pressed keys using W3C event.code strings. + Example: {"KeyW", "KeyA", "Space", "ShiftLeft"} + mouse: Mouse velocity/delta as (dx, dy) tuple. + Values are typically normalized floats. + """ + + button: set[str] = field(default_factory=set) + mouse: tuple[float, float] = (0.0, 0.0) + + +def parse_ctrl_input(data: dict[str, Any]) -> CtrlInput: + """Parse controller input from frontend JSON format. + + Args: + data: Dictionary with 'button' (list of strings) and 'mouse' (list of 2 floats) + + Returns: + CtrlInput instance with parsed values + """ + button = set(data.get("button", [])) + mouse_data = data.get("mouse", [0.0, 0.0]) + mouse = (float(mouse_data[0]), float(mouse_data[1])) if mouse_data else (0.0, 0.0) + return CtrlInput(button=button, mouse=mouse) + + +# W3C event.code to Windows Virtual Keycode mapping +# For pipelines that need Windows keycodes (e.g., world_engine compatibility) +W3C_TO_WIN: dict[str, int] = { + # Letters (WASD) + "KeyW": 87, + "KeyA": 65, + "KeyS": 83, + "KeyD": 68, + # Other common letters + "KeyQ": 81, + "KeyE": 69, + "KeyR": 82, + "KeyF": 70, + "KeyC": 67, + "KeyX": 88, + "KeyZ": 90, + # Space and modifiers + "Space": 32, + "ShiftLeft": 160, + "ShiftRight": 161, + "ControlLeft": 162, + "ControlRight": 163, + "AltLeft": 164, + "AltRight": 165, + # Arrow keys + "ArrowUp": 38, + "ArrowDown": 40, + "ArrowLeft": 37, + "ArrowRight": 39, + # Other common keys + "Enter": 13, + "Escape": 27, + "Tab": 9, + "Backspace": 8, + # Number keys + "Digit1": 49, + "Digit2": 50, + "Digit3": 51, + "Digit4": 52, + "Digit5": 53, + "Digit6": 54, + "Digit7": 55, + "Digit8": 56, + "Digit9": 57, + "Digit0": 48, +} + + +def convert_to_win_keycodes(ctrl_input: CtrlInput) -> set[int]: + """Convert W3C event.code strings to Windows Virtual Keycodes. + + Use this in pipelines that need Windows keycodes (e.g., world_engine). + + Args: + ctrl_input: CtrlInput with W3C event.code strings + + Returns: + Set of Windows Virtual Keycode integers + """ + return {W3C_TO_WIN[code] for code in ctrl_input.button if code in W3C_TO_WIN} diff --git a/src/scope/core/pipelines/controller_viz/__init__.py b/src/scope/core/pipelines/controller_viz/__init__.py new file mode 100644 index 00000000..aa1ad9b9 --- /dev/null +++ b/src/scope/core/pipelines/controller_viz/__init__.py @@ -0,0 +1,6 @@ +"""Controller Visualizer pipeline for testing WASD and mouse input.""" + +from .pipeline import ControllerVisualizerPipeline +from .schema import ControllerVisualizerConfig + +__all__ = ["ControllerVisualizerPipeline", "ControllerVisualizerConfig"] diff --git a/src/scope/core/pipelines/controller_viz/pipeline.py b/src/scope/core/pipelines/controller_viz/pipeline.py new file mode 100644 index 00000000..26dddb98 --- /dev/null +++ b/src/scope/core/pipelines/controller_viz/pipeline.py @@ -0,0 +1,128 @@ +"""Controller Visualizer pipeline implementation. + +Displays 4 directional keys (WASD + arrows combined) in lower-left, +and mouse x/y values in lower-right. +""" + +from typing import TYPE_CHECKING + +import torch + +from scope.core.pipelines.controller import CtrlInput + +from ..interface import Pipeline +from .schema import ControllerVisualizerConfig + +if TYPE_CHECKING: + from ..base_schema import BasePipelineConfig + + +class ControllerVisualizerPipeline(Pipeline): + """Displays 4 directional keys + mouse values for debugging.""" + + @classmethod + def get_config_class(cls) -> type["BasePipelineConfig"]: + return ControllerVisualizerConfig + + def __init__( + self, + height: int = 512, + width: int = 512, + device: torch.device | None = None, + dtype: torch.dtype = torch.float32, + **kwargs, # Accept extra params from pipeline manager (loras, vae_type, etc.) + ): + self.height = height + self.width = width + self.device = ( + device + if device is not None + else torch.device("cuda" if torch.cuda.is_available() else "cpu") + ) + self.dtype = dtype + + # Pre-allocate output buffer (T, H, W, C) - single frame + self._output = torch.zeros( + (1, height, width, 3), dtype=torch.float32, device=self.device + ) + + # 4 directional keys - each triggered by WASD or arrows + # (col, row) positions, (keys that trigger it) + self._directions = { + "up": ((1, 0), {"KeyW", "ArrowUp"}), + "left": ((0, 1), {"KeyA", "ArrowLeft"}), + "down": ((1, 1), {"KeyS", "ArrowDown"}), + "right": ((2, 1), {"KeyD", "ArrowRight"}), + } + + # Mouse cursor position (accumulates over time, starts at center) + self._cursor_x = width / 2.0 + self._cursor_y = height / 2.0 + + def __call__(self, **kwargs) -> torch.Tensor: + """Render controller input visualization. + + Args: + ctrl_input: CtrlInput with button set and mouse tuple + + Returns: + Tensor of shape (1, H, W, C) in [0, 1] range + """ + ctrl_input: CtrlInput = kwargs.get("ctrl_input") or CtrlInput() + + # Clear to dark background + self._output.fill_(0.1) + + # Draw 4 directional keys in lower-left + key_size = 30 + gap = 5 + margin = 20 + base_y = self.height - margin - key_size * 2 - gap + + for _direction, ((col, row), trigger_keys) in self._directions.items(): + x = margin + col * (key_size + gap) + y = base_y + row * (key_size + gap) + + # Check if any trigger key is pressed + is_pressed = bool(ctrl_input.button & trigger_keys) + + if is_pressed: + self._output[0, y : y + key_size, x : x + key_size, :] = 0.9 + else: + # Draw border only + border = 2 + self._output[0, y : y + border, x : x + key_size, :] = 0.3 + self._output[ + 0, y + key_size - border : y + key_size, x : x + key_size, : + ] = 0.3 + self._output[0, y : y + key_size, x : x + border, :] = 0.3 + self._output[ + 0, y : y + key_size, x + key_size - border : x + key_size, : + ] = 0.3 + + # Mouse indicator - red dot that tracks cursor position across full canvas + mouse_dx, mouse_dy = ctrl_input.mouse + + # Accumulate mouse deltas into cursor position + self._cursor_x += mouse_dx + self._cursor_y += mouse_dy + + # Clamp to canvas bounds (with margin for dot size) + dot_size = 5 + self._cursor_x = max(dot_size, min(self.width - dot_size, self._cursor_x)) + self._cursor_y = max(dot_size, min(self.height - dot_size, self._cursor_y)) + + dot_x = int(self._cursor_x) + dot_y = int(self._cursor_y) + + # Draw red dot + dot_size = 5 + y1 = max(0, dot_y - dot_size) + y2 = min(self.height, dot_y + dot_size) + x1 = max(0, dot_x - dot_size) + x2 = min(self.width, dot_x + dot_size) + self._output[0, y1:y2, x1:x2, 0] = 0.9 # Red + self._output[0, y1:y2, x1:x2, 1] = 0.1 + self._output[0, y1:y2, x1:x2, 2] = 0.1 + + return self._output.clamp(0, 1) diff --git a/src/scope/core/pipelines/controller_viz/schema.py b/src/scope/core/pipelines/controller_viz/schema.py new file mode 100644 index 00000000..c2ef9a58 --- /dev/null +++ b/src/scope/core/pipelines/controller_viz/schema.py @@ -0,0 +1,27 @@ +"""Schema for Controller Visualizer pipeline.""" + +from ..base_schema import BasePipelineConfig, CtrlInput, ModeDefaults + + +class ControllerVisualizerConfig(BasePipelineConfig): + """Configuration for the Controller Visualizer pipeline. + + This pipeline visualizes WASD keyboard and mouse inputs in real-time, + useful for testing and debugging the controller input system. + """ + + pipeline_id = "controller-viz" + pipeline_name = "Controller Visualizer" + pipeline_description = ( + "Visualizes WASD keyboard and mouse controller inputs in real-time. " + "Useful for testing the controller input system." + ) + + # No prompts needed for visualization + supports_prompts = False + + # Text mode (no video input required) + modes = {"text": ModeDefaults(default=True)} + + # Controller input support - presence of this field enables controller input capture + ctrl_input: CtrlInput | None = None diff --git a/src/scope/core/pipelines/registry.py b/src/scope/core/pipelines/registry.py index cc4d0d8f..b0286e74 100644 --- a/src/scope/core/pipelines/registry.py +++ b/src/scope/core/pipelines/registry.py @@ -137,6 +137,11 @@ def _register_pipelines(): ".video_depth_anything.pipeline", "VideoDepthAnythingPipeline", ), + ( + "controller-viz", + ".controller_viz.pipeline", + "ControllerVisualizerPipeline", + ), ] # Try to import and register each pipeline diff --git a/src/scope/server/pipeline_processor.py b/src/scope/server/pipeline_processor.py index e362a6dd..a42e0ff9 100644 --- a/src/scope/server/pipeline_processor.py +++ b/src/scope/server/pipeline_processor.py @@ -9,6 +9,8 @@ import torch +from scope.core.pipelines.controller import parse_ctrl_input + from .pipeline_manager import PipelineNotAvailableException logger = logging.getLogger(__name__) @@ -337,6 +339,21 @@ def process_chunk(self): if "input_mode" in new_parameters: self._video_mode = new_parameters.get("input_mode") == "video" + # Accumulate ctrl_input: keys = latest, mouse = sum + if "ctrl_input" in new_parameters: + if "ctrl_input" in self.parameters: + existing = self.parameters["ctrl_input"] + new_ctrl = new_parameters["ctrl_input"] + new_parameters["ctrl_input"] = { + "button": new_ctrl.get("button", []), + "mouse": [ + existing.get("mouse", [0, 0])[0] + + new_ctrl.get("mouse", [0, 0])[0], + existing.get("mouse", [0, 0])[1] + + new_ctrl.get("mouse", [0, 0])[1], + ], + } + # Merge new parameters with existing ones self.parameters = {**self.parameters, **new_parameters} except queue.Empty: @@ -404,6 +421,13 @@ def process_chunk(self): if lora_scales is not None: call_params["lora_scales"] = lora_scales + # Extract ctrl_input, parse it, and reset mouse for next frame + if "ctrl_input" in self.parameters: + ctrl_data = self.parameters["ctrl_input"] + call_params["ctrl_input"] = parse_ctrl_input(ctrl_data) + # Reset mouse accumulator, keep key state + self.parameters["ctrl_input"]["mouse"] = [0.0, 0.0] + # Route video input based on VACE status # We do not support combining latent initialization and VACE conditioning if video_input is not None: