diff --git a/BOLT'S JOURNAL - PERFORMANCE LEARNINGS.md b/BOLT'S JOURNAL - PERFORMANCE LEARNINGS.md index 609417f..1390ba9 100644 --- a/BOLT'S JOURNAL - PERFORMANCE LEARNINGS.md +++ b/BOLT'S JOURNAL - PERFORMANCE LEARNINGS.md @@ -1,6 +1,9 @@ - Direct matrix array manipulation (`instanceMatrix.array`) bypasses expensive Object3D composition and matrix allocations, significantly improving rendering batcher update loops. ## 2024-04-09 - TSL and GC Performance Rules\n**Learning:** In Three.js, TSL math nodes are generally faster and preferred over updating uniforms via JS every frame for performance optimization. For Three.js InstancedMesh objects, colors must be updated via `.setColorAt()`. Modifying the material directly will incorrectly affect all instances. In Candy World, collision detection handled in JavaScript becomes a severe bottleneck at >500 entities. AssemblyScript/WASM handles 2000+ entities efficiently.\n**Action:** Use TSL math nodes instead of JS uniforms whenever possible. Always use `.setColorAt()` for InstancedMesh colors. Use WASM for heavy collision detection. +## 2024-04-10 - Direct InstancedMesh Updates (Batcher Sweep) +**Learning:** Using a dummy `THREE.Object3D` proxy inside a high-frequency loop to update an `InstancedMesh` via `setMatrixAt` causes significant internal allocation churn and massive CPU bottlenecking. +**Action:** Bypass the object proxy completely. Pre-allocate a scratch `THREE.Matrix4()`, compose position, rotation, and scale data directly into it, and write it straight to the `InstancedMesh`'s flat `Float32Array` via `_scratchMatrix.toArray(this.mesh.instanceMatrix.array, i * 16)`. Flag `this.mesh.instanceMatrix.needsUpdate = true` at the end of the loop. ## 2024-05-XX - Zero-Allocation Matrix Batching **Learning:** Calling `Object3D.updateMatrix()` and `mesh.setMatrixAt()` inside update loops or batch generation code causes significant CPU overhead and garbage collection (GC) spikes because they instantiate intermediate objects and allocate arrays under the hood. **Action:** For all `InstancedMesh` batchers, construct `Matrix4` locally using zero-allocation scratch variables (`_scratchMatrix.compose(pos, quat, scale)`) and copy the result directly to the underlying buffer memory using `_scratchMatrix.toArray(mesh.instanceMatrix.array, index * 16)`. Always follow up with `mesh.instanceMatrix.needsUpdate = true`. diff --git a/src/foliage/arpeggio-batcher.ts b/src/foliage/arpeggio-batcher.ts index 61f133a..326cec7 100644 --- a/src/foliage/arpeggio-batcher.ts +++ b/src/foliage/arpeggio-batcher.ts @@ -32,7 +32,6 @@ export class ArpeggioFernBatcher { mesh: THREE.InstancedMesh | null; // Merged mesh // Scratch - dummy: THREE.Object3D; _color: THREE.Color; // GLOBAL TSL Uniform @@ -51,7 +50,6 @@ export class ArpeggioFernBatcher { this.mesh = null; - this.dummy = new THREE.Object3D(); this._color = new THREE.Color(); // Initialize global uniform @@ -311,7 +309,6 @@ export class ArpeggioFernBatcher { this.logicFerns.push(dummy); // Setup Instance (Only ONE matrix per fern now!) - this.dummy.position.copy(dummy.position); // No extra Y offset needed, baked into geometry logic relative to pivot // Base was offset 0.25, Frond 0.4. // But original register() added 0.25 * scale to base, and 0.4 * scale to frond. @@ -330,12 +327,10 @@ export class ArpeggioFernBatcher { // So the merged geometry has its bottom at y=0 (actually base starts at 0, cone height 0.5 centered at 0.25). // So yes, pivot is at bottom. - // So I just need to copy dummy position/rotation/scale. - this.dummy.rotation.copy(dummy.rotation); - this.dummy.scale.setScalar(scale); - // ⚡ OPTIMIZATION: Eliminate CPU overhead and GC spikes from Matrix4 composition by writing directly to instanceMatrix.array - _scratchMatrix.compose(this.dummy.position, this.dummy.quaternion, this.dummy.scale); + // Compose directly from the logic object's properties without using a proxy THREE.Object3D + dummy.scale.setScalar(scale); + _scratchMatrix.compose(dummy.position, dummy.quaternion, dummy.scale); _scratchMatrix.toArray(this.mesh!.instanceMatrix.array, i * 16); // Color @@ -351,13 +346,9 @@ export class ArpeggioFernBatcher { updateInstance(index, dummy) { if (!this.initialized) return; - // Simple Matrix Update (1 per fern!) - this.dummy.position.copy(dummy.position); - this.dummy.rotation.copy(dummy.rotation); - this.dummy.scale.copy(dummy.scale); - // ⚡ OPTIMIZATION: Eliminate CPU overhead and GC spikes from Matrix4 composition by writing directly to instanceMatrix.array - _scratchMatrix.compose(this.dummy.position, this.dummy.quaternion, this.dummy.scale); + // Compose directly from the logic object's properties without using a proxy THREE.Object3D + _scratchMatrix.compose(dummy.position, dummy.quaternion, dummy.scale); _scratchMatrix.toArray(this.mesh!.instanceMatrix.array, index * 16); this.mesh!.instanceMatrix.needsUpdate = true; diff --git a/src/foliage/portamento-batcher.ts b/src/foliage/portamento-batcher.ts index 7d7025f..079c2ed 100644 --- a/src/foliage/portamento-batcher.ts +++ b/src/foliage/portamento-batcher.ts @@ -35,7 +35,6 @@ export class PortamentoPineBatcher { bendAttribute: THREE.InstancedBufferAttribute | null = null; // scratch - dummy = new THREE.Object3D(); _color = new THREE.Color(); init() { @@ -168,13 +167,9 @@ export class PortamentoPineBatcher { dummy.userData.bendFactor = 0; this.logicPines[i] = dummy; - // Apply initial transform - this.dummy.position.copy(dummy.position); - this.dummy.quaternion.copy(dummy.quaternion); - this.dummy.scale.copy(dummy.scale); - // ⚡ OPTIMIZATION: Eliminate CPU overhead and GC spikes from Matrix4 composition by writing directly to instanceMatrix.array - _scratchMatrix.compose(this.dummy.position, this.dummy.quaternion, this.dummy.scale); + // Compose directly from the logic object's properties without using a proxy THREE.Object3D + _scratchMatrix.compose(dummy.position, dummy.quaternion, dummy.scale); _scratchMatrix.toArray(this.trunkMesh!.instanceMatrix.array, i * 16); _scratchMatrix.toArray(this.needleMesh!.instanceMatrix.array, i * 16); @@ -191,12 +186,10 @@ export class PortamentoPineBatcher { updateInstance(idx: number, dummy: THREE.Object3D) { if (!this.initialized) return; - this.dummy.position.copy(dummy.position); - this.dummy.quaternion.copy(dummy.quaternion); - this.dummy.scale.copy(dummy.scale); // ⚡ OPTIMIZATION: Eliminate CPU overhead and GC spikes from Matrix4 composition by writing directly to instanceMatrix.array - _scratchMatrix.compose(this.dummy.position, this.dummy.quaternion, this.dummy.scale); + // Compose directly from the logic object's properties without using a proxy THREE.Object3D + _scratchMatrix.compose(dummy.position, dummy.quaternion, dummy.scale); _scratchMatrix.toArray(this.trunkMesh!.instanceMatrix.array, idx * 16); _scratchMatrix.toArray(this.needleMesh!.instanceMatrix.array, idx * 16);