From f886ac03c147ef2ef0856cb93600b5dee21bfd0a Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Sat, 11 Apr 2026 14:03:59 +0000 Subject: [PATCH] feat: custom sample slicing UI Add a waveform view to `SamplerPanel` that allows users to manually add, move, and remove transient markers for slicing a custom WAV file. This includes updates to `WaveformDisplay` to handle mouse events, `SamplerPanel` to sync the state, and `useAudioEngine` to track the alignment objects via `setAlignment`. Co-authored-by: ford442 <9397845+ford442@users.noreply.github.com> --- agent_plan.md | 4 +- src/components/SamplerPanel.tsx | 22 ++- src/components/WaveformDisplay.tsx | 168 +++++++++++++++++++++- src/hooks/audioEngine/sampleManagement.ts | 10 ++ src/hooks/useAudioEngine.ts | 2 + src/types.ts | 1 + 6 files changed, 201 insertions(+), 6 deletions(-) diff --git a/agent_plan.md b/agent_plan.md index 5a58ea0c..98fa65d1 100644 --- a/agent_plan.md +++ b/agent_plan.md @@ -39,7 +39,7 @@ - [x] **Glissando/Portamento Curves:** Allow users to draw custom pitch curves or select between Linear and Exponential glide types between steps. (Implemented Exponential Glide in `SingingVoice.ts`!) - [x] **Per-Step Breath Intensity:** Allow sequence steps to override global breathiness for rhythmic breathing and whisper effects. (Implemented in `useAudioEngine.ts`!) -- [ ] **Custom Sample Slicing UI:** Add a waveform view to `SamplerPanel` that allows users to manually add, move, and remove transient markers for slicing a custom WAV file instead of just auto-slicing by phoneme. +- [x] **Custom Sample Slicing UI:** Add a waveform view to `SamplerPanel` that allows users to manually add, move, and remove transient markers for slicing a custom WAV file instead of just auto-slicing by phoneme. ### Domain C: Accessibility & Mobile - [x] **Touch Targets:** Audit `Sequencer.tsx` click listeners to ensure mobile drag-to-create works smoothly. @@ -74,11 +74,13 @@ * [x] **Idea:** "Dynamic Reverb" - Allow users to draw automation curves for reverb send per step. (Implemented!) * [x] **Idea:** "Global Saturation / Tape Warmth" - Add a master channel saturation unit to glue the mix together. (Implemented via WaveShaperNode!) * **Idea:** "AI Auto-Mix Assistant" - Automatically adjusts levels, panning, and EQ based on track content to maintain a balanced mix. +* **Idea:** "Real-time Convolution Reverb for Vocal Spaces" - Enhance the dynamic reverb by allowing users to select impulse response types. * **Idea:** "Per-Step Breath Intensity" - Allow sequence steps to override global breathiness for rhythmic breathing and whisper effects. (Implemented!) --- ## 📜 Changelog +* [2026-06-21] - Implemented Custom Sample Slicing UI: Enhanced `WaveformDisplay.tsx` to support interactive mousedown, drag, and double-click events, allowing users to manually slice custom WAV files directly on the canvas. Connected to the AudioEngine via `setAlignment`. * [2026-06-20] - Implemented Glissando/Portamento Curves & Per-Step Breath Intensity: Added `slideType` parameter (Linear/Exponential) to allow musical variations of pitch glides in TTS, and allowed individual steps to override global breath noise via `breathIntensity`. Added Custom Sample Slicing UI to Active Backlog. * [2026-06-19] - Implemented Global Saturation: Added a master channel `WaveShaperNode` with a variable distortion curve mapped to a "Warmth" (Saturation) slider in the top utility UI. Routed the entire master mix through it to add glue and presence. Added new idea: "AI Auto-Mix Assistant". * [2026-06-18] - Implemented Dynamic Reverb: Added a `ConvolverNode` hooked up to the master output with a generated exponential decay noise impulse response. Mapped `reverbSend` from individual sequence steps in `NoteSelector` to send audio from the TTS `SingingVoice` into the new global reverb bus. Added new idea: "Global Saturation / Tape Warmth". diff --git a/src/components/SamplerPanel.tsx b/src/components/SamplerPanel.tsx index 26859863..13b045ce 100644 --- a/src/components/SamplerPanel.tsx +++ b/src/components/SamplerPanel.tsx @@ -495,9 +495,22 @@ const SamplerPanelComponent: React.FC = ({ }; // Get alignment - const alignment = (audioEngine?.getAlignment && activeBankIdx >= 0) - ? audioEngine.getAlignment(activeBankIdx) - : null; + const [currentAlignment, setCurrentAlignment] = useState( + (audioEngine?.getAlignment && activeBankIdx >= 0) ? audioEngine.getAlignment(activeBankIdx) : null + ); + + // Sync local state when bank or engine alignment changes + useEffect(() => { + const alg = (audioEngine?.getAlignment && activeBankIdx >= 0) ? audioEngine.getAlignment(activeBankIdx) : null; + setCurrentAlignment(alg); + }, [activeBankIdx, audioEngine]); + + const handleAlignmentChange = useCallback((newAlignment: any) => { + setCurrentAlignment(newAlignment); + if (audioEngine?.setAlignment && activeBankIdx >= 0) { + audioEngine.setAlignment(activeBankIdx, newAlignment); + } + }, [audioEngine, activeBankIdx]); return (
= ({ {/* 1. Waveform Visualization */} {/* Multisample Generator Progress */} diff --git a/src/components/WaveformDisplay.tsx b/src/components/WaveformDisplay.tsx index e7791027..c0a82ca2 100644 --- a/src/components/WaveformDisplay.tsx +++ b/src/components/WaveformDisplay.tsx @@ -5,13 +5,18 @@ interface WaveformDisplayProps { buffer: AudioBuffer | null; alignment: AlignmentResult | null; sliceHighlightRef: React.MutableRefObject<((slice: number) => void) | null>; + onAlignmentChange?: (alignment: AlignmentResult) => void; } -export const WaveformDisplay: React.FC = ({ buffer, alignment, sliceHighlightRef }) => { +export const WaveformDisplay: React.FC = ({ buffer, alignment, sliceHighlightRef, onAlignmentChange }) => { const canvasRef = useRef(null); const containerRef = useRef(null); const activeSliceRef = useRef(-1); + // State for drag interactions + const isDraggingRef = useRef(false); + const draggedMarkerIndexRef = useRef(-1); + // Keep latest props in ref to access them inside the imperative callback without stale closures const propsRef = useRef({ buffer, alignment }); useEffect(() => { propsRef.current = { buffer, alignment }; }, [buffer, alignment]); @@ -147,6 +152,167 @@ export const WaveformDisplay: React.FC = ({ buffer, alignm }, [buffer, alignment, sliceHighlightRef]); + // Handle mouse interactions for custom slicing + useEffect(() => { + const canvas = canvasRef.current; + if (!canvas) return; + + const getMouseTime = (e: MouseEvent): number | null => { + const { buffer } = propsRef.current; + if (!buffer) return null; + + const rect = canvas.getBoundingClientRect(); + const x = Math.max(0, Math.min(e.clientX - rect.left, rect.width)); + return (x / rect.width) * buffer.duration; + }; + + const getMarkerIndexNearTime = (time: number, thresholdSecs: number): number => { + const { alignment } = propsRef.current; + if (!alignment) return -1; + + let closestIdx = -1; + let minDiff = Infinity; + + // Skip the first marker (index 0) since it's the start of the file + for (let i = 1; i < alignment.phonemes.length; i++) { + const diff = Math.abs(alignment.phonemes[i].start - time); + if (diff < minDiff && diff <= thresholdSecs) { + minDiff = diff; + closestIdx = i; + } + } + return closestIdx; + }; + + const handleMouseDown = (e: MouseEvent) => { + if (!onAlignmentChange || !propsRef.current.alignment || !propsRef.current.buffer) return; + + const time = getMouseTime(e); + if (time === null) return; + + const rect = canvas.getBoundingClientRect(); + const threshold = (5 / rect.width) * propsRef.current.buffer.duration; // 5px threshold + + const markerIdx = getMarkerIndexNearTime(time, threshold); + + if (markerIdx !== -1) { + // Clicked on a marker, start dragging + isDraggingRef.current = true; + draggedMarkerIndexRef.current = markerIdx; + e.preventDefault(); + } else { + // Clicked in empty space, add a new slice + const { alignment, buffer } = propsRef.current; + + // Find where to insert + let insertIdx = alignment.phonemes.findIndex(p => p.start > time); + if (insertIdx === -1) insertIdx = alignment.phonemes.length; + + const newPhonemes = [...alignment.phonemes]; + + // We split the phoneme at insertIdx - 1 + const prevPhoneme = newPhonemes[insertIdx - 1]; + + const newPhoneme = { + phoneme: `S${alignment.phonemes.length + 1}`, + start: time, + end: prevPhoneme.end, + isVowel: true + }; + + prevPhoneme.end = time; + + newPhonemes.splice(insertIdx, 0, newPhoneme); + + onAlignmentChange({ + ...alignment, + phonemes: newPhonemes + }); + } + }; + + const handleMouseMove = (e: MouseEvent) => { + if (!onAlignmentChange || !propsRef.current.alignment || !propsRef.current.buffer) return; + + const time = getMouseTime(e); + if (time === null) return; + + if (isDraggingRef.current && draggedMarkerIndexRef.current !== -1) { + // Update marker position + const { alignment } = propsRef.current; + const idx = draggedMarkerIndexRef.current; + const newPhonemes = [...alignment.phonemes]; + + // Constrain time between previous and next markers + const minTime = newPhonemes[idx - 1].start + 0.01; + const maxTime = idx < newPhonemes.length - 1 ? newPhonemes[idx + 1].start - 0.01 : propsRef.current.buffer.duration - 0.01; + const clampedTime = Math.max(minTime, Math.min(time, maxTime)); + + newPhonemes[idx].start = clampedTime; + newPhonemes[idx - 1].end = clampedTime; + + onAlignmentChange({ + ...alignment, + phonemes: newPhonemes + }); + } else { + // Update cursor + const rect = canvas.getBoundingClientRect(); + const threshold = (5 / rect.width) * propsRef.current.buffer.duration; + const markerIdx = getMarkerIndexNearTime(time, threshold); + + if (markerIdx !== -1) { + canvas.style.cursor = 'col-resize'; + } else { + canvas.style.cursor = 'crosshair'; + } + } + }; + + const handleMouseUp = () => { + isDraggingRef.current = false; + draggedMarkerIndexRef.current = -1; + }; + + const handleDoubleClick = (e: MouseEvent) => { + if (!onAlignmentChange || !propsRef.current.alignment || !propsRef.current.buffer) return; + + const time = getMouseTime(e); + if (time === null) return; + + const rect = canvas.getBoundingClientRect(); + const threshold = (5 / rect.width) * propsRef.current.buffer.duration; + const markerIdx = getMarkerIndexNearTime(time, threshold); + + if (markerIdx !== -1) { + // Remove marker + const { alignment } = propsRef.current; + const newPhonemes = [...alignment.phonemes]; + + // Merge with previous + newPhonemes[markerIdx - 1].end = newPhonemes[markerIdx].end; + newPhonemes.splice(markerIdx, 1); + + onAlignmentChange({ + ...alignment, + phonemes: newPhonemes + }); + } + }; + + canvas.addEventListener('mousedown', handleMouseDown); + window.addEventListener('mousemove', handleMouseMove); + window.addEventListener('mouseup', handleMouseUp); + canvas.addEventListener('dblclick', handleDoubleClick); + + return () => { + canvas.removeEventListener('mousedown', handleMouseDown); + window.removeEventListener('mousemove', handleMouseMove); + window.removeEventListener('mouseup', handleMouseUp); + canvas.removeEventListener('dblclick', handleDoubleClick); + }; + }, [onAlignmentChange]); + const label = !buffer ? "Waveform visualization: No sample loaded" : `Waveform visualization: Sample loaded${alignment ? " with phoneme alignment" : ""}`; diff --git a/src/hooks/audioEngine/sampleManagement.ts b/src/hooks/audioEngine/sampleManagement.ts index c31a917f..d0d3dac5 100644 --- a/src/hooks/audioEngine/sampleManagement.ts +++ b/src/hooks/audioEngine/sampleManagement.ts @@ -107,12 +107,22 @@ export function createSampleLibraryControls(refs: SampleManagementRefs) { return refs.vocalAlignmentsRef.current.get(bankName) || null; }; + const setAlignment = (bankIndex: number, alignment: AlignmentResult | null) => { + const bankName = `bank_${bankIndex}`; + if (alignment) { + refs.vocalAlignmentsRef.current.set(bankName, alignment); + } else { + refs.vocalAlignmentsRef.current.delete(bankName); + } + }; + return { loadSampleToEngine, getMultisampleBank, isMultisampleReady, prepareVocal, getAlignment, + setAlignment, }; } diff --git a/src/hooks/useAudioEngine.ts b/src/hooks/useAudioEngine.ts index 44284b42..871d64d7 100644 --- a/src/hooks/useAudioEngine.ts +++ b/src/hooks/useAudioEngine.ts @@ -252,6 +252,7 @@ export const useAudioEngine = (pyodide: unknown) => { isMultisampleReady, prepareVocal, getAlignment, + setAlignment, } = createSampleLibraryControls({ loadedSampleBuffersRef, multisampleBanksRef, @@ -786,6 +787,7 @@ export const useAudioEngine = (pyodide: unknown) => { processSpoon, prepareVocal, getAlignment, + setAlignment, setSustainMode, setSustainGrainSize, getMultisampleBank, diff --git a/src/types.ts b/src/types.ts index 0cd18b89..f8994e8c 100644 --- a/src/types.ts +++ b/src/types.ts @@ -227,6 +227,7 @@ export interface AudioEngine { processSpoon?: (sampleName: string, note: string) => Promise; prepareVocal?: (bankIndex: number, text: string) => Promise; getAlignment?: (bankIndex: number) => AlignmentResult | null; + setAlignment?: (bankIndex: number, alignment: AlignmentResult | null) => void; setSustainMode?: (mode: 'loop' | 'stretch' | 'wavetable') => void; setSustainGrainSize?: (size: number) => void; playSinging?: (buffer: AudioBuffer, targetNote: string, duration: number, sourceNote?: string) => void;