From 592890370cc54a8d0ed06c608d09a6629f06fb52 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Fri, 13 Mar 2026 15:06:49 +0100 Subject: [PATCH 01/42] Keep master matching strategy, add packed sparse v6 with v5 compatibility --- AGENTS.md | 60 +++++ src/SerumData.cpp | 25 +- src/sparse-vector.h | 563 +++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 613 insertions(+), 35 deletions(-) create mode 100644 AGENTS.md diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..5650239 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,60 @@ +# AGENTS.md + +## Purpose +This document summarizes `libserum` runtime flow, cROMc persistence, and compatibility rules. + +Maintenance rule: behavior, format, or API changes must update this file in the same change. + +## Core files +- `src/serum-decode.cpp`: runtime load / identify / colorize pipeline. +- `src/SerumData.h/.cpp`: model + cROMc serialization. +- `src/SceneGenerator.h/.cpp`: PUP CSV scene parsing/generation. +- `src/sparse-vector.h`: sparse storage and payload serialization. + +## Load flow +Entry: `Serum_Load(altcolorpath, romname, flags)`. + +1. Reset runtime state (`Serum_free`). +2. Detect optional `*.pup.csv`. +3. Prefer `*.cROMc` unless `skip-cromc.txt` exists. +4. Fallback to `*.cROM` / `*.cRZ`. +5. Parse CSV scenes when present. +6. Build/refresh frame lookup vectors after final scene data is known. + +## Identification and colorization strategy +- Keep master strategy/semantics for frame matching and colorization. +- Normal matching excludes scene frames. +- Scene and normal trackers remain independent. +- No behavior change here is part of this memory-optimization step. + +## cROMc format +Current concentrate version: **6**. + +### v6 sparse-vector payload layout +- Sparse vectors are serialized in packed form: + - `packedIds` + - `packedOffsets` + - `packedSizes` + - `packedBlob` +- Packed payloads are deduplicated by content during packing. +- Optional binary bit-packing is supported for boolean-like `uint8_t` vectors. +- Runtime can still modify vectors via `set()`; packed storage is restored to mutable map form only when needed. + +### v5 compatibility +- v5 cROMc files remain loadable. +- v5 sparse-vector legacy layout is deserialized via a load-time legacy flag and converted to packed runtime representation. +- Backward compatibility is required only for v5. +- Compatibility between unreleased v6 development snapshots is not required. + +## Compression policy notes +- `dyna4cols_v2` and `dyna4cols_v2_extra` are LZ4-compressed sparse vectors. +- `backgroundmask` and `backgroundmask_extra` use binary bit-packing plus LZ4. +- Sprite vectors with known sensitivity (`spritedescriptionso`, `spritedescriptionsc`, `spriteoriginal`, `spritemask_extra`) remain uncompressed/unbitpacked. + +## Validation checklist +1. `cmake --build build -j4` +2. Load tests: +- v5 cROMc +- v6 cROMc +- cROM/cRZ with and without CSV +3. Verify no functional change in master matching/colorization behavior. diff --git a/src/SerumData.cpp b/src/SerumData.cpp index 70b4ac1..b0fd430 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -22,8 +22,8 @@ SerumData::SerumData() dynamasks(255, false, true), dynamasks_extra(255, false, true), dyna4cols(0), - dyna4cols_v2(0), - dyna4cols_v2_extra(0), + dyna4cols_v2(0, false, true), + dyna4cols_v2_extra(0, false, true), framesprites(255), spritedescriptionso(0), spritedescriptionsc(0), @@ -49,8 +49,8 @@ SerumData::SerumData() backgroundframes_v2_extra(0, false, true), backgroundIDs(0xffff), backgroundBB(0), - backgroundmask(0, false, true), - backgroundmask_extra(0, false, true), + backgroundmask(0, false, true, true, 0, 1), + backgroundmask_extra(0, false, true, true, 0, 1), dynashadowsdir(0), dynashadowscol(0), dynashadowsdir_extra(0), @@ -269,6 +269,14 @@ bool SerumData::LoadFromFile(const char *filename, const uint8_t flags) { // Create a custom stream that decompresses on the fly DecompressingIStream decompStream(fp, compressedSize, originalSize); + struct LegacyLoadFlagGuard { + explicit LegacyLoadFlagGuard(bool legacy) { + sparse_vector_serialization::SetLegacyLoadExpected(legacy); + } + ~LegacyLoadFlagGuard() { + sparse_vector_serialization::SetLegacyLoadExpected(false); + } + } legacyLoadGuard(concentrateFileVersion <= 5); // Deserialize directly from the decompressing stream { @@ -350,6 +358,15 @@ bool SerumData::LoadFromBuffer(const uint8_t *data, size_t size, } std::istringstream iss(decompressed, std::ios::binary); + struct LegacyLoadFlagGuard { + explicit LegacyLoadFlagGuard(bool legacy) { + sparse_vector_serialization::SetLegacyLoadExpected(legacy); + } + ~LegacyLoadFlagGuard() { + sparse_vector_serialization::SetLegacyLoadExpected(false); + } + } legacyLoadGuard(concentrateFileVersion <= 5); + { cereal::PortableBinaryInputArchive archive(iss); archive(*this); diff --git a/src/sparse-vector.h b/src/sparse-vector.h index aa56a79..cdba923 100644 --- a/src/sparse-vector.h +++ b/src/sparse-vector.h @@ -1,13 +1,31 @@ #pragma once +#include #include #include #include #include +#include +#include #include +#include #include #include "LZ4Stream.h" + +namespace sparse_vector_serialization { +inline bool &LegacyLoadExpectedFlag() { + static bool flag = false; + return flag; +} + +inline void SetLegacyLoadExpected(bool expected) { + LegacyLoadExpectedFlag() = expected; +} + +inline bool IsLegacyLoadExpected() { return LegacyLoadExpectedFlag(); } +} // namespace sparse_vector_serialization + template class SparseVector { static_assert( @@ -23,16 +41,305 @@ class SparseVector { std::vector decompBuffer; bool useIndex; bool useCompression; + bool useBinaryBitPacking; + T bitPackFalseValue; + T bitPackTrueValue; + mutable uint32_t lastPayloadId = UINT32_MAX; + mutable const uint8_t *lastPayloadPtr = nullptr; + mutable uint32_t lastPayloadSize = 0; mutable uint32_t lastAccessedId = UINT32_MAX; + mutable uint32_t secondAccessedId = UINT32_MAX; mutable std::vector lastDecompressed; + mutable std::vector secondDecompressed; + mutable std::vector decodeScratch; + std::vector packedIds; + std::vector packedOffsets; + std::vector packedSizes; + std::vector packedBlob; + mutable std::unordered_map packedIndexById; + mutable std::vector packedDenseIndexById; + + static constexpr uint8_t kBitPackedMagic = 0xB1; + + size_t rawByteSize() const { return elementSize * sizeof(T); } + + size_t bitPackedByteSize() const { return 1 + ((elementSize + 7) / 8); } + + bool isBitPackedPayload(const uint8_t *payload, size_t size) const { + if (!useBinaryBitPacking || !payload) { + return false; + } + if (!std::is_same::value) { + return false; + } + return size == bitPackedByteSize() && payload[0] == kBitPackedMagic; + } + + void encodeBitPacked(const T *values, std::vector &encoded) const { + if (!useBinaryBitPacking) { + encoded.clear(); + return; + } + if (!std::is_same::value) { + throw std::runtime_error( + "Binary bit packing is only supported for uint8_t"); + } + + encoded.assign(bitPackedByteSize(), 0); + encoded[0] = kBitPackedMagic; + for (size_t i = 0; i < elementSize; ++i) { + if (values[i] != bitPackFalseValue) { + encoded[1 + (i / 8)] |= (1u << (i % 8)); + } + } + } + + T *decodeBitPackedAndCache(uint32_t elementId, const uint8_t *payload) { + if (lastAccessedId != UINT32_MAX && lastAccessedId != elementId && + !lastDecompressed.empty()) { + secondAccessedId = lastAccessedId; + secondDecompressed.swap(lastDecompressed); + } + if (lastDecompressed.size() < elementSize) { + lastDecompressed.resize(elementSize); + } + + for (size_t i = 0; i < elementSize; ++i) { + const bool isSet = (payload[1 + (i / 8)] & (1u << (i % 8))) != 0; + lastDecompressed[i] = isSet ? bitPackTrueValue : bitPackFalseValue; + } + + lastAccessedId = elementId; + return lastDecompressed.data(); + } + + void clearPacked() { + packedIds.clear(); + packedOffsets.clear(); + packedSizes.clear(); + packedBlob.clear(); + packedIndexById.clear(); + packedDenseIndexById.clear(); + lastPayloadId = UINT32_MAX; + lastPayloadPtr = nullptr; + lastPayloadSize = 0; + } + + void ensurePackedIndex() const { + if (packedIds.empty()) { + packedIndexById.clear(); + packedDenseIndexById.clear(); + return; + } + if (packedIndexById.size() == packedIds.size() && + !packedDenseIndexById.empty()) { + return; + } + packedIndexById.clear(); + packedIndexById.reserve(packedIds.size()); + for (uint32_t i = 0; i < packedIds.size(); ++i) { + packedIndexById.emplace(packedIds[i], i); + } + + // Fast path for dense/small ID spaces (frame IDs, sprite IDs, etc). + // This avoids hash lookup overhead in operator[] hot loops. + packedDenseIndexById.clear(); + const uint32_t maxPackedId = packedIds.back(); + if (maxPackedId <= 1000000 && + maxPackedId <= static_cast(packedIds.size() * 8)) { + packedDenseIndexById.assign(static_cast(maxPackedId) + 1, + UINT32_MAX); + for (uint32_t i = 0; i < packedIds.size(); ++i) { + packedDenseIndexById[packedIds[i]] = i; + } + } + } + + static uint64_t hashPayload(const uint8_t *bytes, size_t size) { + uint64_t hash = 1469598103934665603ull; // FNV-1a 64-bit offset basis + for (size_t i = 0; i < size; ++i) { + hash ^= static_cast(bytes[i]); + hash *= 1099511628211ull; // FNV prime + } + hash ^= static_cast(size); + hash *= 1099511628211ull; + return hash; + } + + void deduplicatePackedBlob() { + if (packedIds.empty() || packedOffsets.size() != packedIds.size() || + packedSizes.size() != packedIds.size()) { + return; + } + + std::vector dedupBlob; + dedupBlob.reserve(packedBlob.size()); + + // hash -> list of (offset,size) candidates in dedupBlob + std::unordered_map>> + dedupIndex; + dedupIndex.reserve(packedIds.size()); + + for (size_t i = 0; i < packedIds.size(); ++i) { + const uint32_t oldOffset = packedOffsets[i]; + const uint32_t size = packedSizes[i]; + if (oldOffset > packedBlob.size() || + size > packedBlob.size() - oldOffset) { + continue; + } + + const uint8_t *payload = packedBlob.data() + oldOffset; + const uint64_t payloadHash = hashPayload(payload, size); + + uint32_t foundOffset = UINT32_MAX; + auto it = dedupIndex.find(payloadHash); + if (it != dedupIndex.end()) { + for (const auto &candidate : it->second) { + const uint32_t candidateOffset = candidate.first; + const uint32_t candidateSize = candidate.second; + if (candidateSize != size) { + continue; + } + if (candidateOffset > dedupBlob.size() || + size > dedupBlob.size() - candidateOffset) { + continue; + } + if (memcmp(payload, dedupBlob.data() + candidateOffset, size) == 0) { + foundOffset = candidateOffset; + break; + } + } + } + + if (foundOffset == UINT32_MAX) { + foundOffset = static_cast(dedupBlob.size()); + dedupBlob.insert(dedupBlob.end(), payload, payload + size); + dedupIndex[payloadHash].push_back({foundOffset, size}); + } + + packedOffsets[i] = foundOffset; + packedSizes[i] = size; + } + + packedBlob = std::move(dedupBlob); + } + + void buildPackedFromData() { + if (useIndex || data.empty()) { + return; + } + + std::vector ids; + ids.reserve(data.size()); + for (const auto &entry : data) { + ids.push_back(entry.first); + } + std::sort(ids.begin(), ids.end()); + + clearPacked(); + packedIds.reserve(ids.size()); + packedOffsets.reserve(ids.size()); + packedSizes.reserve(ids.size()); + + uint32_t offset = 0; + for (const uint32_t id : ids) { + const auto it = data.find(id); + if (it == data.end()) { + continue; + } + + const auto &payload = it->second; + packedIds.push_back(id); + packedOffsets.push_back(offset); + packedSizes.push_back(static_cast(payload.size())); + packedBlob.insert(packedBlob.end(), payload.begin(), payload.end()); + offset += static_cast(payload.size()); + } + + data.clear(); + deduplicatePackedBlob(); + lastPayloadId = UINT32_MAX; + lastPayloadPtr = nullptr; + lastPayloadSize = 0; + } + + void restoreDataFromPacked() { + if (useIndex || packedIds.empty() || !data.empty()) { + return; + } + + for (size_t i = 0; i < packedIds.size(); ++i) { + if (i >= packedOffsets.size() || i >= packedSizes.size()) { + continue; + } + const uint32_t offset = packedOffsets[i]; + const uint32_t size = packedSizes[i]; + if (offset > packedBlob.size() || size > packedBlob.size() - offset) { + continue; + } + data[packedIds[i]].assign(packedBlob.begin() + offset, + packedBlob.begin() + offset + size); + } + } + + const uint8_t *getPackedPayload(uint32_t elementId, + uint32_t *payloadSize) const { + if (packedIds.empty()) { + return nullptr; + } + + ensurePackedIndex(); + uint32_t packedIndex = UINT32_MAX; + if (elementId < packedDenseIndexById.size()) { + packedIndex = packedDenseIndexById[elementId]; + if (packedIndex == UINT32_MAX) { + return nullptr; + } + } else { + auto it = packedIndexById.find(elementId); + if (it == packedIndexById.end()) { + return nullptr; + } + packedIndex = it->second; + } + + const size_t idx = static_cast(packedIndex); + if (idx >= packedOffsets.size() || idx >= packedSizes.size()) { + return nullptr; + } + + const uint32_t offset = packedOffsets[idx]; + const uint32_t size = packedSizes[idx]; + if (offset > packedBlob.size() || size > packedBlob.size() - offset) { + return nullptr; + } + + *payloadSize = size; + return packedBlob.data() + offset; + } public: - SparseVector(T noDataSignature, bool index, bool compress = false) - : useIndex(index), useCompression(compress) { + SparseVector(T noDataSignature, bool index, bool compress = false, + bool binaryBitPack = false, T bitPackFalse = 0, + T bitPackTrue = 1) + : useIndex(index), + useCompression(compress), + useBinaryBitPacking(binaryBitPack), + bitPackFalseValue(bitPackFalse), + bitPackTrueValue(bitPackTrue) { + if (useBinaryBitPacking && !std::is_same::value) { + throw std::runtime_error( + "Binary bit packing is only supported for uint8_t SparseVector"); + } noData.resize(1, noDataSignature); } - SparseVector(T noDataSignature) : useIndex(false), useCompression(false) { + SparseVector(T noDataSignature) + : useIndex(false), + useCompression(false), + useBinaryBitPacking(false), + bitPackFalseValue(noDataSignature), + bitPackTrueValue(static_cast(1)) { noData.resize(1, noDataSignature); } @@ -41,36 +348,107 @@ class SparseVector { if (elementId >= index.size()) return noData.data(); return index[elementId].data(); } else { - auto it = data.find(elementId); - if (it == data.end()) return noData.data(); + if (useCompression && elementId == lastAccessedId && + !lastDecompressed.empty()) { + return lastDecompressed.data(); + } + if (useCompression && elementId == secondAccessedId && + !secondDecompressed.empty()) { + std::swap(lastAccessedId, secondAccessedId); + std::swap(lastDecompressed, secondDecompressed); + return lastDecompressed.data(); + } + + const uint8_t *payload = nullptr; + uint32_t payloadSize = 0; + + if (elementId == lastPayloadId && lastPayloadPtr != nullptr) { + payload = lastPayloadPtr; + payloadSize = lastPayloadSize; + } else { + if (!packedIds.empty()) { + payload = getPackedPayload(elementId, &payloadSize); + } else { + auto it = data.find(elementId); + if (it != data.end()) { + payload = it->second.data(); + payloadSize = static_cast(it->second.size()); + } + } + if (payload) { + lastPayloadId = elementId; + lastPayloadPtr = payload; + lastPayloadSize = payloadSize; + } else { + lastPayloadId = UINT32_MAX; + lastPayloadPtr = nullptr; + lastPayloadSize = 0; + } + } + + if (!payload) return noData.data(); if (useCompression) { - // Cache-Hit + // Cache hit only applies to decoded cache-backed payloads. if (elementId == lastAccessedId) { return lastDecompressed.data(); } - const auto &compressed = it->second; - - // ensure decompBuffer is large enough - if (lastDecompressed.size() < elementSize) { - lastDecompressed.resize(elementSize); + const size_t rawBytes = rawByteSize(); + if (decodeScratch.size() < rawBytes) { + decodeScratch.resize(rawBytes); } int decompressedSize = LZ4_decompress_safe( - reinterpret_cast(compressed.data()), - reinterpret_cast(lastDecompressed.data()), - static_cast(compressed.size()), - static_cast(elementSize * sizeof(T))); + reinterpret_cast(payload), + reinterpret_cast(decodeScratch.data()), + static_cast(payloadSize), static_cast(rawBytes)); + + if (decompressedSize < 0) { + // Backward compatibility: some payloads may be stored raw. + if (isBitPackedPayload(payload, payloadSize)) { + return decodeBitPackedAndCache(elementId, payload); + } - if (decompressedSize < 0) return noData.data(); + // Backward compatibility: older payloads may store raw bytes even if + // this vector now defaults to compression. + if (payloadSize == rawBytes) { + return reinterpret_cast(const_cast(payload)); + } + return noData.data(); + } - // Cache-Update + if (isBitPackedPayload(decodeScratch.data(), + static_cast(decompressedSize))) { + return decodeBitPackedAndCache(elementId, decodeScratch.data()); + } + + if (static_cast(decompressedSize) != rawBytes) { + return noData.data(); + } + + if (lastAccessedId != UINT32_MAX && lastAccessedId != elementId && + !lastDecompressed.empty()) { + secondAccessedId = lastAccessedId; + secondDecompressed.swap(lastDecompressed); + } + if (lastDecompressed.size() < elementSize) { + lastDecompressed.resize(elementSize); + } + memcpy(lastDecompressed.data(), decodeScratch.data(), rawBytes); lastAccessedId = elementId; return lastDecompressed.data(); } - return reinterpret_cast(it->second.data()); + if (isBitPackedPayload(payload, payloadSize)) { + return decodeBitPackedAndCache(elementId, payload); + } + + if (payloadSize != rawByteSize()) { + return noData.data(); + } + + return reinterpret_cast(const_cast(payload)); } } @@ -78,6 +456,13 @@ class SparseVector { if (useIndex) return elementId < index.size() && !index[elementId].empty() && index[elementId][0] != noData[0]; + if (!packedIds.empty()) { + ensurePackedIndex(); + if (elementId < packedDenseIndexById.size()) { + return packedDenseIndexById[elementId] != UINT32_MAX; + } + return packedIndexById.find(elementId) != packedIndexById.end(); + } return data.find(elementId) != data.end(); } @@ -88,7 +473,12 @@ class SparseVector { throw std::runtime_error("set() must not be used for index"); } + restoreDataFromPacked(); elementSize = size; + clearPacked(); + lastAccessedId = UINT32_MAX; + lastDecompressed.clear(); + decodeScratch.clear(); if (decompBuffer.size() < (elementSize * sizeof(T))) { decompBuffer.resize(elementSize * sizeof(T)); @@ -100,15 +490,25 @@ class SparseVector { if (parent == nullptr || parent->hasData(elementId)) { if (memcmp(values, noData.data(), elementSize * sizeof(T)) != 0) { + std::vector bitPacked; + const uint8_t *storeBytes = reinterpret_cast(values); + size_t storeByteSize = elementSize * sizeof(T); + + if (useBinaryBitPacking) { + encodeBitPacked(values, bitPacked); + storeBytes = bitPacked.data(); + storeByteSize = bitPacked.size(); + } + if (useCompression) { const size_t maxCompressedSize = - LZ4_compressBound(static_cast(elementSize * sizeof(T))); + LZ4_compressBound(static_cast(storeByteSize)); std::vector compBuffer(maxCompressedSize); int compressedSize = - LZ4_compress_HC(reinterpret_cast(values), + LZ4_compress_HC(reinterpret_cast(storeBytes), reinterpret_cast(compBuffer.data()), - static_cast(elementSize * sizeof(T)), + static_cast(storeByteSize), static_cast(maxCompressedSize), #ifdef WRITE_CROMC LZ4HC_CLEVEL_MAX // max compression level @@ -123,9 +523,7 @@ class SparseVector { } } else { // Without compression, store directly. - const uint8_t *byteValues = reinterpret_cast(values); - data[elementId].assign(byteValues, - byteValues + elementSize * sizeof(T)); + data[elementId].assign(storeBytes, storeBytes + storeByteSize); } } } @@ -169,9 +567,16 @@ class SparseVector { void clear() { index.clear(); data.clear(); + clearPacked(); noData.resize(1); + lastPayloadId = UINT32_MAX; + lastPayloadPtr = nullptr; + lastPayloadSize = 0; lastAccessedId = UINT32_MAX; + secondAccessedId = UINT32_MAX; lastDecompressed.clear(); + secondDecompressed.clear(); + decodeScratch.clear(); } template @@ -181,34 +586,130 @@ class SparseVector { // is provided } + if (!packedIds.empty()) { + std::vector newIds; + std::vector newOffsets; + std::vector newSizes; + std::vector newBlob; + + newIds.reserve(packedIds.size()); + newOffsets.reserve(packedOffsets.size()); + newSizes.reserve(packedSizes.size()); + newBlob.reserve(packedBlob.size()); + + uint32_t offset = 0; + for (size_t i = 0; i < packedIds.size(); ++i) { + const uint32_t elementId = packedIds[i]; + if (!parent->hasData(elementId)) { + continue; + } + if (i >= packedOffsets.size() || i >= packedSizes.size()) { + continue; + } + + const uint32_t oldOffset = packedOffsets[i]; + const uint32_t size = packedSizes[i]; + if (oldOffset > packedBlob.size() || + size > packedBlob.size() - oldOffset) { + continue; + } + + newIds.push_back(elementId); + newOffsets.push_back(offset); + newSizes.push_back(size); + newBlob.insert(newBlob.end(), packedBlob.begin() + oldOffset, + packedBlob.begin() + oldOffset + size); + offset += size; + } + + packedIds = std::move(newIds); + packedOffsets = std::move(newOffsets); + packedSizes = std::move(newSizes); + packedBlob = std::move(newBlob); + deduplicatePackedBlob(); + packedIndexById.clear(); + packedDenseIndexById.clear(); + data.clear(); + + lastPayloadId = UINT32_MAX; + lastPayloadPtr = nullptr; + lastPayloadSize = 0; + lastAccessedId = UINT32_MAX; + secondAccessedId = UINT32_MAX; + lastDecompressed.clear(); + secondDecompressed.clear(); + decodeScratch.clear(); + return; + } + std::unordered_map> filteredData; for (const auto &entry : data) { uint32_t elementId = entry.first; if (parent->hasData(elementId)) { - filteredData[elementId] = std::move(data[elementId]); + filteredData[elementId] = entry.second; } } data = std::move(filteredData); // Clear cache + lastPayloadId = UINT32_MAX; + lastPayloadPtr = nullptr; + lastPayloadSize = 0; lastAccessedId = UINT32_MAX; + secondAccessedId = UINT32_MAX; lastDecompressed.clear(); + secondDecompressed.clear(); + decodeScratch.clear(); } friend class cereal::access; template void serialize(Archive &ar) { - ar(index, data, noData, elementSize, decompBuffer, useIndex, - useCompression); + if constexpr (Archive::is_saving::value) { + if (!useIndex && packedIds.empty() && !data.empty()) { + buildPackedFromData(); + } - if constexpr (Archive::is_loading::value) { - // Clear cache - lastAccessedId = UINT32_MAX; - lastDecompressed.clear(); + ar(index, noData, elementSize, useIndex, useCompression, + useBinaryBitPacking, bitPackFalseValue, bitPackTrueValue); + if (!useIndex) { + ar(packedIds, packedOffsets, packedSizes, packedBlob); + } + return; } + + if (sparse_vector_serialization::IsLegacyLoadExpected()) { + ar(index, data, noData, elementSize, decompBuffer, useIndex, + useCompression); + clearPacked(); + if (!useIndex && !data.empty()) { + buildPackedFromData(); + ensurePackedIndex(); + } + } else { + ar(index, noData, elementSize, useIndex, useCompression, + useBinaryBitPacking, bitPackFalseValue, bitPackTrueValue); + data.clear(); + decompBuffer.clear(); + clearPacked(); + if (!useIndex) { + ar(packedIds, packedOffsets, packedSizes, packedBlob); + ensurePackedIndex(); + } + } + + // Clear cache + lastPayloadId = UINT32_MAX; + lastPayloadPtr = nullptr; + lastPayloadSize = 0; + lastAccessedId = UINT32_MAX; + secondAccessedId = UINT32_MAX; + lastDecompressed.clear(); + secondDecompressed.clear(); + decodeScratch.clear(); } }; From 01a6a5a20310efff695fd3fcdc70f0f817a314f3 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Fri, 13 Mar 2026 16:16:28 +0100 Subject: [PATCH 02/42] adjsuted AGENTS.md --- AGENTS.md | 224 ++++++++++++++++++++++++++++++++++++---------- src/SerumData.cpp | 3 + 2 files changed, 178 insertions(+), 49 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 5650239..89b7296 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,60 +1,186 @@ # AGENTS.md ## Purpose -This document summarizes `libserum` runtime flow, cROMc persistence, and compatibility rules. +This document explains how `libserum` works end-to-end, with emphasis on runtime flow, scene handling, and cROMc persistence. -Maintenance rule: behavior, format, or API changes must update this file in the same change. +**Maintenance rule:** Any feature change, behavior change, data format change, or API/signature change in this repository **must** be reflected in this file in the same PR/commit. -## Core files -- `src/serum-decode.cpp`: runtime load / identify / colorize pipeline. -- `src/SerumData.h/.cpp`: model + cROMc serialization. -- `src/SceneGenerator.h/.cpp`: PUP CSV scene parsing/generation. -- `src/sparse-vector.h`: sparse storage and payload serialization. +## High-level architecture +Core files: +- `src/serum-decode.cpp`: Main runtime engine (load, identify, colorize, rotate, scene orchestration). +- `src/SerumData.h/.cpp`: In-memory model + cROMc serialization/deserialization. +- `src/SceneGenerator.h/.cpp`: PUP scene CSV parsing + runtime scene frame generation. +- `src/sparse-vector.h`: Sparse/compressed storage for frame and asset blocks. +- `src/serum.h`: Public constants/flags/structs. +- `src/serum-decode.h`: Public C API declarations. +- `src/serum-version.h`: Library and concentrate format versions. + +Main global runtime state (in `serum-decode.cpp`): +- Loaded data model: `g_serumData`. +- Current output: `mySerum` (`Serum_Frame_Struc`). +- Scene playback state: `sceneFrameCount`, `sceneCurrentFrame`, duration/flags/repeat, etc. +- Identification state: `lastfound`, `lastfound_normal`, `lastfound_scene`, CRC tracking. +- Scene lookup acceleration: + - `g_serumData.frameIsScene`: frame ID -> scene/non-scene marker. + - `g_serumData.sceneFramesBySignature`: `(mask,shape,hash)` -> matching scene frame IDs. + +## SparseVector storage and compression +`SparseVector` now supports both legacy map payloads and packed sparse blobs. + +Packed sparse payload format (used for v6 save): +- `packedIds` +- `packedOffsets` +- `packedSizes` +- `packedBlob` + +Behavior: +- Packed payloads are deduplicated by payload content at pack time. +- Optional binary bit-packing exists for boolean-like `uint8_t` payloads. +- Packed vectors can still be modified at runtime (`set()`); mutable map storage is restored lazily when needed. +- Runtime lookup uses dense index fast-path when IDs are dense. + +Vector policy currently used in `SerumData`: +- `dyna4cols_v2` and `dyna4cols_v2_extra` are LZ4-compressed sparse vectors. +- `backgroundmask` and `backgroundmask_extra` use binary bit-packing + LZ4 compression. +- `spritedescriptionso`, `spritedescriptionsc`, `spriteoriginal`, and `spritemask_extra` are intentionally not compressed/bitpacked due known sprite path issues. ## Load flow -Entry: `Serum_Load(altcolorpath, romname, flags)`. - -1. Reset runtime state (`Serum_free`). -2. Detect optional `*.pup.csv`. -3. Prefer `*.cROMc` unless `skip-cromc.txt` exists. -4. Fallback to `*.cROM` / `*.cRZ`. -5. Parse CSV scenes when present. -6. Build/refresh frame lookup vectors after final scene data is known. - -## Identification and colorization strategy -- Keep master strategy/semantics for frame matching and colorization. -- Normal matching excludes scene frames. -- Scene and normal trackers remain independent. -- No behavior change here is part of this memory-optimization step. - -## cROMc format +Entry point: `Serum_Load(altcolorpath, romname, flags)`. + +1. Reset all runtime state via `Serum_free()`. +2. Look for optional `*.pup.csv`. +3. Prefer loading `*.cROMc` unless `skip-cromc.txt` exists. + - If `*.cROMc` starts with `CROM` magic, load via `SerumData::LoadFromFile`. + - Otherwise, try encrypted in-memory load (`vault::read` + `SerumData::LoadFromBuffer`). +4. If cROMc load fails or is absent, load `*.cROM`/`*.cRZ`. +5. If CSV exists and format is v2, parse scenes via `SceneGenerator::parseCSV`. +6. Set scene depth from color count when scenes are active. +7. Build or restore frame lookup acceleration: + - If loaded from cROMc v6 and no CSV update in this run: use stored lookup via `InitFrameLookupRuntimeStateFromStoredData()`. + - Otherwise: rebuild via `BuildFrameLookupVectors()`. + +Important: +- `BuildFrameLookupVectors()` must run after final scene data is known for this load cycle. +- CSV parsing after loading can invalidate stored scene lookup data and requires rebuild. + +## Frame identification +Main function: `Identify_Frame(uint8_t* frame, bool sceneFrameRequested)`. + +Identification compares incoming original DMD frame against loaded frame definitions using: +- `compmaskID` (mask) +- `shapecompmode` (shape mode) +- `hashcodes` (precomputed CRC32 domain value) + +Behavior: +- Matching starts from the stream-specific last found ID and wraps. +- Stream split is enforced: + - normal search skips scene frames + - scene search skips normal frames + using `g_serumData.frameIsScene`. +- Scene requests use signature lookup in `sceneFramesBySignature` for the current `(mask,shape,hash)`. +- Legacy same-frame behavior (`IDENTIFY_SAME_FRAME`) is preserved with full-frame CRC check. + +Return values: +- `IDENTIFY_NO_FRAME` when no match. +- `IDENTIFY_SAME_FRAME` when same frame detected with same full CRC. +- matched frame ID otherwise. + +## Scene lookup vector build +Function: `BuildFrameLookupVectors()`. + +Goal: classify loaded frame IDs into scene/non-scene and build scene signature index. + +How it works: +1. Initialize `frameIsScene` with all zeros. +2. If scene generator is active (v2 scene mode), pre-generate all scene frames: + - iterate all scenes from `sceneGenerator->getSceneData()` + - iterate all groups (`frameGroups`, default 1) + - iterate all `frameIndex` values + - generate with `generateFrame(..., disableTimer=true)` +3. Build scene signatures in identification domain: + - collect unique `(mask,shape)` combinations from loaded frames + - for every generated scene frame and every unique `(mask,shape)`, compute CRC via `calc_crc32` +4. For each loaded frame ID, if `(mask,shape,hashcodes[id])` signature is in scene signature set: + - mark `frameIsScene[id] = 1` + - add to `sceneFramesBySignature[signature]`. +5. Initialize `lastfound_scene` / `lastfound_normal` from first available IDs. + +Log line: +- `Loaded X frames and Y rotation scene frames` + +## Colorization flow (v2) +Entry point: `Serum_ColorizeWithMetadatav2(frame, sceneFrameRequested=false)`. + +Main phases: +1. Identify frame ID via `Identify_Frame`. +2. Trigger / monochrome handling. +3. Scene trigger handling. +4. Render base frame via `Colorize_Framev2(...)`. +5. Optional background-scene overlay via second `Colorize_Framev2(..., applySceneBackground=true, ...)`. +6. Optional sprite overlays. +7. Configure color rotations and return next timer. + +Background placeholder policy: +- `Colorize_Framev2` supports `suppressFrameBackgroundImage`. +- When true, frame-level background images are treated as placeholders and existing output pixel is kept in masked background areas. +- This is used when a background scene is active so the scene background can continue while foreground content changes. + +## Scene playback and options +Scene data comes from CSV (`SceneGenerator`). + +Flags (from `serum.h`): +- `1`: black when scene finished +- `2`: show previous frame when scene finished +- `4`: run scene as background +- `8`: only dynamic content in foreground over background scene +- `16`: resume interrupted scene if retriggered within 8s + +`startImmediately` behavior: +- `startImmediately` is honored only for foreground scenes. +- For background scenes (`FLAG_SCENE_AS_BACKGROUND`), `startImmediately` is forced to `false`. + +## cROMc persistence Current concentrate version: **6**. -### v6 sparse-vector payload layout -- Sparse vectors are serialized in packed form: - - `packedIds` - - `packedOffsets` - - `packedSizes` - - `packedBlob` -- Packed payloads are deduplicated by content during packing. -- Optional binary bit-packing is supported for boolean-like `uint8_t` vectors. -- Runtime can still modify vectors via `set()`; packed storage is restored to mutable map form only when needed. - -### v5 compatibility -- v5 cROMc files remain loadable. -- v5 sparse-vector legacy layout is deserialized via a load-time legacy flag and converted to packed runtime representation. -- Backward compatibility is required only for v5. +Stored in v6: +- Full Serum model payload. +- Scene data (`SceneGenerator` scene vector). +- Scene lookup acceleration: + - `frameIsScene` + - `sceneFramesBySignature` +- Sparse vectors in packed sparse layout. + +Backward compatibility: +- v5 files are loadable. +- v5 sparse vectors are deserialized with legacy sparse-vector layout and converted to packed representation after load. +- For v5 loads, scene lookup vectors are rebuilt at startup. +- For v6 loads, stored lookup vectors are reused unless scene data changed in this load cycle (for example CSV update), in which case lookup vectors are rebuilt. + +v6 snapshot policy: - Compatibility between unreleased v6 development snapshots is not required. +- Compatibility to released v5 remains required. -## Compression policy notes -- `dyna4cols_v2` and `dyna4cols_v2_extra` are LZ4-compressed sparse vectors. -- `backgroundmask` and `backgroundmask_extra` use binary bit-packing plus LZ4. -- Sprite vectors with known sensitivity (`spritedescriptionso`, `spritedescriptionsc`, `spriteoriginal`, `spritemask_extra`) remain uncompressed/unbitpacked. - -## Validation checklist -1. `cmake --build build -j4` -2. Load tests: -- v5 cROMc -- v6 cROMc -- cROM/cRZ with and without CSV -3. Verify no functional change in master matching/colorization behavior. +## Logging +- Central callback configured by `Serum_SetLogCallback`. +- `serum-decode.cpp` and `SceneGenerator.cpp` both use callback-based `Log(...)`. +- Missing-file logs from `find_case_insensitive_file(...)` use normalized path joining. + +## Safety invariants +- `frameIsScene.size()` must equal `nframes` before identification. +- `sceneFramesBySignature` must correspond to current scene data and current loaded frame definitions. +- Any change to scene generation domain (`mask/shape/hash`), sparse-vector serialization layout, or cROMc schema requires updating this file. + +## How to validate after changes +Minimum validation: +1. Build: `cmake --build build -j4` +2. Load scenarios: + - cROM/cRZ without CSV + - cROM/cRZ with CSV + - cROMc v5 with CSV update + - cROMc v6 without CSV update +3. Verify log line: + - `Loaded frames and rotation scene frames` +4. Verify scene behaviors: + - background scene + - end-of-scene behavior flags + - resume flag `16` diff --git a/src/SerumData.cpp b/src/SerumData.cpp index b0fd430..e098002 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -4,6 +4,8 @@ #include "miniz/miniz.h" #include "serum-version.h" +bool is_real_machine(); + SerumData::SerumData() : SerumVersion(0), concentrateFileVersion(SERUM_CONCENTRATE_VERSION), @@ -61,6 +63,7 @@ SerumData::SerumData() dynaspritemasks_extra(255, false, true), sprshapemode(0) { sceneGenerator = new SceneGenerator(); + if (is_real_machine()) storage.assign(384u * 1024u * 1024u, 0xA5); } SerumData::~SerumData() {} From ced988530d237ce9df3e839acc236d1ef59599c6 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Fri, 13 Mar 2026 16:29:30 +0100 Subject: [PATCH 03/42] fixed build --- src/SerumData.cpp | 2 +- src/SerumData.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/SerumData.cpp b/src/SerumData.cpp index e098002..c625fa3 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -63,7 +63,7 @@ SerumData::SerumData() dynaspritemasks_extra(255, false, true), sprshapemode(0) { sceneGenerator = new SceneGenerator(); - if (is_real_machine()) storage.assign(384u * 1024u * 1024u, 0xA5); + if (is_real_machine()) packingStorage.assign(384u * 1024u * 1024u, 0xA5); } SerumData::~SerumData() {} diff --git a/src/SerumData.h b/src/SerumData.h index e273846..3a2feec 100644 --- a/src/SerumData.h +++ b/src/SerumData.h @@ -73,6 +73,7 @@ class SerumData { uint32_t nsprites; uint16_t nbackgrounds; bool is256x64; + std::vector packingStorage; // Vector data SparseVector hashcodes; From 40709d24c476f298fbb8a6521c3c91d543b5f357 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Fri, 13 Mar 2026 18:35:00 +0100 Subject: [PATCH 04/42] adaptive bitpacking --- AGENTS.md | 28 +++++- src/SerumData.cpp | 209 +++++++++++++++++++++++++++++++++++++++++-- src/SerumData.h | 29 +++++- src/serum-decode.cpp | 107 ++++++++++++++++------ src/sparse-vector.h | 176 ++++++++++++++++++++++++++++-------- 5 files changed, 475 insertions(+), 74 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 89b7296..84359e6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -35,14 +35,31 @@ Packed sparse payload format (used for v6 save): Behavior: - Packed payloads are deduplicated by payload content at pack time. -- Optional binary bit-packing exists for boolean-like `uint8_t` payloads. +- Optional adaptive value packing exists for `uint8_t` payloads: + - per-payload mode is derived from actual max value and encoded in payload header + - 1-bit mode for values in `0..1` + - 2-bit mode for values in `0..3` + - 4-bit mode for values in `0..15` + - fallback to raw 8-bit payload otherwise +- Value packing preserves exact values for packed modes (no nonzero->1 normalization). - Packed vectors can still be modified at runtime (`set()`); mutable map storage is restored lazily when needed. - Runtime lookup uses dense index fast-path when IDs are dense. Vector policy currently used in `SerumData`: - `dyna4cols_v2` and `dyna4cols_v2_extra` are LZ4-compressed sparse vectors. -- `backgroundmask` and `backgroundmask_extra` use binary bit-packing + LZ4 compression. -- `spritedescriptionso`, `spritedescriptionsc`, `spriteoriginal`, and `spritemask_extra` are intentionally not compressed/bitpacked due known sprite path issues. +- `backgroundmask` and `backgroundmask_extra` use adaptive value packing + LZ4 compression. +- Sentinel-based vectors are normalized and packed with boolean sidecars: + - `spriteoriginal` + `spriteoriginal_opaque` + - `spritemask_extra` + `spritemask_extra_opaque` + - `spritedescriptionso` + `spritedescriptionso_opaque` + - `dynamasks` + `dynamasks_active` + - `dynamasks_extra` + `dynamasks_extra_active` + - `dynaspritemasks` + `dynaspritemasks_active` + - `dynaspritemasks_extra` + `dynaspritemasks_extra_active` +- Runtime uses sidecar flags instead of `255` sentinels for transparency / dynamic-zone activity. +- `compmasks` and `backgroundmask*` are already boolean-mask domain (`mask==0` + include / `>0` exclude) and therefore do not need separate transparency + sidecar vectors. ## Load flow Entry point: `Serum_Load(altcolorpath, romname, flags)`. @@ -58,6 +75,9 @@ Entry point: `Serum_Load(altcolorpath, romname, flags)`. 7. Build or restore frame lookup acceleration: - If loaded from cROMc v6 and no CSV update in this run: use stored lookup via `InitFrameLookupRuntimeStateFromStoredData()`. - Otherwise: rebuild via `BuildFrameLookupVectors()`. +8. Build/normalize packing sidecars via `BuildPackingSidecarsAndNormalize()`. + - The normalization step is idempotent and guarded; repeated calls in the + same load/save cycle are no-ops once completed. Important: - `BuildFrameLookupVectors()` must run after final scene data is known for this load cycle. @@ -149,6 +169,8 @@ Stored in v6: - `frameIsScene` - `sceneFramesBySignature` - Sparse vectors in packed sparse layout. +- Normalized sentinel vectors plus sidecar flag vectors for transparency and + dynamic-zone activity. Backward compatibility: - v5 files are loadable. diff --git a/src/SerumData.cpp b/src/SerumData.cpp index c625fa3..82a2281 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -21,19 +21,22 @@ SerumData::SerumData() cframes(0, false, true), cframes_v2(0, false, true), cframes_v2_extra(0, false, true), - dynamasks(255, false, true), - dynamasks_extra(255, false, true), + dynamasks(0, false, true, true, 0, 1), + dynamasks_active(0, false, true, true, 0, 1), + dynamasks_extra(0, false, true, true, 0, 1), + dynamasks_extra_active(0, false, true, true, 0, 1), dyna4cols(0), dyna4cols_v2(0, false, true), dyna4cols_v2_extra(0, false, true), framesprites(255), spritedescriptionso(0), + spritedescriptionso_opaque(0, false, true, true, 0, 1), spritedescriptionsc(0), isextrasprite(0, true), - spriteoriginal(255), // Do not compress because GetSpriteSize seems to - // have an issue with it. - spritemask_extra(255), // Do not compress because GetSpriteSize seems to - // have an issue with it. + spriteoriginal(0, false, true, true, 0, 1), + spriteoriginal_opaque(0, false, true, true, 0, 1), + spritemask_extra(0, false, true, true, 0, 1), + spritemask_extra_opaque(0, false, true, true, 0, 1), spritecolored(0, false, true), spritecolored_extra(0, false, true), activeframes(1), @@ -59,8 +62,10 @@ SerumData::SerumData() dynashadowscol_extra(0), dynasprite4cols(0), dynasprite4cols_extra(0), - dynaspritemasks(255, false, true), - dynaspritemasks_extra(255, false, true), + dynaspritemasks(0, false, true, true, 0, 1), + dynaspritemasks_active(0, false, true, true, 0, 1), + dynaspritemasks_extra(0, false, true, true, 0, 1), + dynaspritemasks_extra_active(0, false, true, true, 0, 1), sprshapemode(0) { sceneGenerator = new SceneGenerator(); if (is_real_machine()) packingStorage.assign(384u * 1024u * 1024u, 0xA5); @@ -69,6 +74,7 @@ SerumData::SerumData() SerumData::~SerumData() {} void SerumData::Clear() { + m_packingSidecarsNormalized = false; hashcodes.clear(); shapecompmode.clear(); compmaskID.clear(); @@ -79,16 +85,21 @@ void SerumData::Clear() { cframes_v2_extra.clear(); cframes.clear(); dynamasks.clear(); + dynamasks_active.clear(); dynamasks_extra.clear(); + dynamasks_extra_active.clear(); dyna4cols.clear(); dyna4cols_v2.clear(); dyna4cols_v2_extra.clear(); framesprites.clear(); spritedescriptionso.clear(); + spritedescriptionso_opaque.clear(); spritedescriptionsc.clear(); isextrasprite.clear(); spriteoriginal.clear(); + spriteoriginal_opaque.clear(); spritemask_extra.clear(); + spritemask_extra_opaque.clear(); spritecolored.clear(); spritecolored_extra.clear(); activeframes.clear(); @@ -115,14 +126,194 @@ void SerumData::Clear() { dynasprite4cols.clear(); dynasprite4cols_extra.clear(); dynaspritemasks.clear(); + dynaspritemasks_active.clear(); dynaspritemasks_extra.clear(); + dynaspritemasks_extra_active.clear(); sprshapemode.clear(); frameIsScene.clear(); sceneFramesBySignature.clear(); } +void SerumData::BuildPackingSidecarsAndNormalize() { + if (m_packingSidecarsNormalized) { + return; + } + + const size_t spritePixels = MAX_SPRITE_WIDTH * MAX_SPRITE_HEIGHT; + const size_t spritePixelsV1 = MAX_SPRITE_SIZE * MAX_SPRITE_SIZE; + const size_t framePixels = static_cast(fwidth) * fheight; + const size_t extraFramePixels = + static_cast(fwidth_extra) * fheight_extra; + + std::vector normalized; + std::vector flags; + + normalized.resize(spritePixels); + flags.resize(spritePixels); + for (uint32_t spriteId = 0; spriteId < nsprites; ++spriteId) { + const bool hasSourceVector = spriteoriginal.hasData(spriteId); + const bool hasOpaqueVector = spriteoriginal_opaque.hasData(spriteId); + if (!hasSourceVector && !hasOpaqueVector) { + continue; + } + const uint8_t *source = spriteoriginal[spriteId]; + const uint8_t *opaqueSource = spriteoriginal_opaque[spriteId]; + for (size_t i = 0; i < spritePixels; ++i) { + const uint8_t value = hasSourceVector ? source[i] : 0; + const bool opaque = + hasOpaqueVector ? (opaqueSource[i] > 0) : (value != 255); + flags[i] = opaque ? 1 : 0; + normalized[i] = opaque ? value : 0; + } + spriteoriginal_opaque.set(spriteId, flags.data(), spritePixels); + spriteoriginal.set(spriteId, normalized.data(), spritePixels); + } + + for (uint32_t spriteId = 0; spriteId < nsprites; ++spriteId) { + if (isextrasprite[spriteId][0] == 0) { + continue; + } + const bool hasSourceVector = spritemask_extra.hasData(spriteId); + const bool hasOpaqueVector = spritemask_extra_opaque.hasData(spriteId); + if (!hasSourceVector && !hasOpaqueVector) { + continue; + } + const uint8_t *source = spritemask_extra[spriteId]; + const uint8_t *opaqueSource = spritemask_extra_opaque[spriteId]; + for (size_t i = 0; i < spritePixels; ++i) { + const uint8_t value = hasSourceVector ? source[i] : 0; + const bool opaque = + hasOpaqueVector ? (opaqueSource[i] > 0) : (value != 255); + flags[i] = opaque ? 1 : 0; + normalized[i] = opaque ? value : 0; + } + spritemask_extra_opaque.set(spriteId, flags.data(), spritePixels, + &isextrasprite); + spritemask_extra.set(spriteId, normalized.data(), spritePixels, + &isextrasprite); + } + + normalized.resize(spritePixelsV1); + flags.resize(spritePixelsV1); + for (uint32_t spriteId = 0; spriteId < nsprites; ++spriteId) { + const bool hasSourceVector = spritedescriptionso.hasData(spriteId); + const bool hasOpaqueVector = spritedescriptionso_opaque.hasData(spriteId); + if (!hasSourceVector && !hasOpaqueVector) { + continue; + } + const uint8_t *source = spritedescriptionso[spriteId]; + const uint8_t *opaqueSource = spritedescriptionso_opaque[spriteId]; + for (size_t i = 0; i < spritePixelsV1; ++i) { + const uint8_t value = hasSourceVector ? source[i] : 0; + const bool opaque = + hasOpaqueVector ? (opaqueSource[i] > 0) : (value != 255); + flags[i] = opaque ? 1 : 0; + normalized[i] = opaque ? value : 0; + } + spritedescriptionso_opaque.set(spriteId, flags.data(), spritePixelsV1); + spritedescriptionso.set(spriteId, normalized.data(), spritePixelsV1); + } + + normalized.resize(framePixels); + flags.resize(framePixels); + for (uint32_t frameId = 0; frameId < nframes; ++frameId) { + const bool hasSourceVector = dynamasks.hasData(frameId); + const bool hasActiveVector = dynamasks_active.hasData(frameId); + if (!hasSourceVector && !hasActiveVector) { + continue; + } + const uint8_t *source = dynamasks[frameId]; + const uint8_t *activeSource = dynamasks_active[frameId]; + for (size_t i = 0; i < framePixels; ++i) { + const uint8_t value = hasSourceVector ? source[i] : 0; + const bool active = + hasActiveVector ? (activeSource[i] > 0) : (value != 255); + flags[i] = active ? 1 : 0; + normalized[i] = active ? value : 0; + } + dynamasks_active.set(frameId, flags.data(), framePixels); + dynamasks.set(frameId, normalized.data(), framePixels); + } + + if (extraFramePixels > 0) { + normalized.resize(extraFramePixels); + flags.resize(extraFramePixels); + for (uint32_t frameId = 0; frameId < nframes; ++frameId) { + if (isextraframe[frameId][0] == 0) { + continue; + } + const bool hasSourceVector = dynamasks_extra.hasData(frameId); + const bool hasActiveVector = dynamasks_extra_active.hasData(frameId); + if (!hasSourceVector && !hasActiveVector) { + continue; + } + const uint8_t *source = dynamasks_extra[frameId]; + const uint8_t *activeSource = dynamasks_extra_active[frameId]; + for (size_t i = 0; i < extraFramePixels; ++i) { + const uint8_t value = hasSourceVector ? source[i] : 0; + const bool active = + hasActiveVector ? (activeSource[i] > 0) : (value != 255); + flags[i] = active ? 1 : 0; + normalized[i] = active ? value : 0; + } + dynamasks_extra_active.set(frameId, flags.data(), extraFramePixels, + &isextraframe); + dynamasks_extra.set(frameId, normalized.data(), extraFramePixels, + &isextraframe); + } + } + + normalized.resize(spritePixels); + flags.resize(spritePixels); + for (uint32_t spriteId = 0; spriteId < nsprites; ++spriteId) { + const bool hasSourceVector = dynaspritemasks.hasData(spriteId); + const bool hasActiveVector = dynaspritemasks_active.hasData(spriteId); + if (!hasSourceVector && !hasActiveVector) { + continue; + } + const uint8_t *source = dynaspritemasks[spriteId]; + const uint8_t *activeSource = dynaspritemasks_active[spriteId]; + for (size_t i = 0; i < spritePixels; ++i) { + const uint8_t value = hasSourceVector ? source[i] : 0; + const bool active = + hasActiveVector ? (activeSource[i] > 0) : (value != 255); + flags[i] = active ? 1 : 0; + normalized[i] = active ? value : 0; + } + dynaspritemasks_active.set(spriteId, flags.data(), spritePixels); + dynaspritemasks.set(spriteId, normalized.data(), spritePixels); + } + + for (uint32_t spriteId = 0; spriteId < nsprites; ++spriteId) { + if (isextrasprite[spriteId][0] == 0) { + continue; + } + const bool hasSourceVector = dynaspritemasks_extra.hasData(spriteId); + const bool hasActiveVector = dynaspritemasks_extra_active.hasData(spriteId); + if (!hasSourceVector && !hasActiveVector) { + continue; + } + const uint8_t *source = dynaspritemasks_extra[spriteId]; + const uint8_t *activeSource = dynaspritemasks_extra_active[spriteId]; + for (size_t i = 0; i < spritePixels; ++i) { + const uint8_t value = hasSourceVector ? source[i] : 0; + const bool active = + hasActiveVector ? (activeSource[i] > 0) : (value != 255); + flags[i] = active ? 1 : 0; + normalized[i] = active ? value : 0; + } + dynaspritemasks_extra_active.set(spriteId, flags.data(), spritePixels, + &isextrasprite); + dynaspritemasks_extra.set(spriteId, normalized.data(), spritePixels, + &isextrasprite); + } + + m_packingSidecarsNormalized = true; +} + bool SerumData::SaveToFile(const char *filename) { try { + BuildPackingSidecarsAndNormalize(); Log("Writing %s", filename); // Serialize to memory buffer first std::ostringstream ss(std::ios::binary); @@ -183,6 +374,7 @@ bool SerumData::SaveToFile(const char *filename) { bool SerumData::LoadFromFile(const char *filename, const uint8_t flags) { m_loadFlags = flags; + m_packingSidecarsNormalized = false; FILE *fp; try { fp = fopen(filename, "rb"); @@ -303,6 +495,7 @@ bool SerumData::LoadFromFile(const char *filename, const uint8_t flags) { bool SerumData::LoadFromBuffer(const uint8_t *data, size_t size, const uint8_t flags) { m_loadFlags = flags; + m_packingSidecarsNormalized = false; try { if (!data || size < (4 + sizeof(uint16_t) + sizeof(uint32_t))) { diff --git a/src/SerumData.h b/src/SerumData.h index 3a2feec..3b3ada9 100644 --- a/src/SerumData.h +++ b/src/SerumData.h @@ -60,6 +60,7 @@ class SerumData { bool SaveToFile(const char *filename); bool LoadFromFile(const char *filename, const uint8_t flags); bool LoadFromBuffer(const uint8_t *data, size_t size, const uint8_t flags); + void BuildPackingSidecarsAndNormalize(); // Header data char rname[64]; @@ -88,16 +89,21 @@ class SerumData { SparseVector cframes_v2; SparseVector cframes_v2_extra; SparseVector dynamasks; + SparseVector dynamasks_active; SparseVector dynamasks_extra; + SparseVector dynamasks_extra_active; SparseVector dyna4cols; SparseVector dyna4cols_v2; SparseVector dyna4cols_v2_extra; SparseVector framesprites; SparseVector spritedescriptionso; + SparseVector spritedescriptionso_opaque; SparseVector spritedescriptionsc; SparseVector isextrasprite; SparseVector spriteoriginal; + SparseVector spriteoriginal_opaque; SparseVector spritemask_extra; + SparseVector spritemask_extra_opaque; SparseVector spritecolored; SparseVector spritecolored_extra; SparseVector activeframes; @@ -124,7 +130,9 @@ class SerumData { SparseVector dynasprite4cols; SparseVector dynasprite4cols_extra; SparseVector dynaspritemasks; + SparseVector dynaspritemasks_active; SparseVector dynaspritemasks_extra; + SparseVector dynaspritemasks_extra_active; SparseVector sprshapemode; std::vector frameIsScene; std::unordered_map> sceneFramesBySignature; @@ -138,6 +146,7 @@ class SerumData { const void *m_logUserData = nullptr; uint8_t m_loadFlags = 0; + bool m_packingSidecarsNormalized = false; friend class cereal::access; @@ -162,14 +171,27 @@ class SerumData { if constexpr (Archive::is_saving::value) { if (concentrateFileVersion >= 6) { - ar(frameIsScene, sceneFramesBySignature); + ar(frameIsScene, sceneFramesBySignature, spriteoriginal_opaque, + spritemask_extra_opaque, spritedescriptionso_opaque, + dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, + dynaspritemasks_extra_active); } } else { if (concentrateFileVersion >= 6) { - ar(frameIsScene, sceneFramesBySignature); + ar(frameIsScene, sceneFramesBySignature, spriteoriginal_opaque, + spritemask_extra_opaque, spritedescriptionso_opaque, + dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, + dynaspritemasks_extra_active); } else { frameIsScene.clear(); sceneFramesBySignature.clear(); + spriteoriginal_opaque.clear(); + spritemask_extra_opaque.clear(); + spritedescriptionso_opaque.clear(); + dynamasks_active.clear(); + dynamasks_extra_active.clear(); + dynaspritemasks_active.clear(); + dynaspritemasks_extra_active.clear(); } } @@ -187,8 +209,10 @@ class SerumData { cframes_v2_extra.setParent(&isextraframe); dynamasks_extra.setParent(&isextraframe); + dynamasks_extra_active.setParent(&isextraframe); dyna4cols_v2_extra.setParent(&isextraframe); spritemask_extra.setParent(&isextrasprite); + spritemask_extra_opaque.setParent(&isextrasprite); spritecolored_extra.setParent(&isextrasprite); colorrotations_v2_extra.setParent(&isextraframe); framespriteBB.setParent(&framesprites); @@ -199,6 +223,7 @@ class SerumData { dynashadowscol_extra.setParent(&isextraframe); dynasprite4cols_extra.setParent(&isextraframe); dynaspritemasks_extra.setParent(&isextraframe); + dynaspritemasks_extra_active.setParent(&isextrasprite); backgroundBB.setParent(&backgroundIDs); std::vector loadedScenes; diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 377ef88..93798a6 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -540,6 +540,8 @@ static Serum_Frame_Struc* Serum_LoadConcentratePrepared(const uint8_t flags) { return NULL; } + g_serumData.BuildPackingSidecarsAndNormalize(); + // Set requested frame types isoriginalrequested = false; isextrarequested = false; @@ -896,6 +898,8 @@ Serum_Frame_Struc* Serum_LoadFilev2(FILE* pfile, const uint8_t flags, g_serumData.sprshapemode.reserve(g_serumData.nsprites); } + g_serumData.BuildPackingSidecarsAndNormalize(); + fclose(pfile); mySerum.ntriggers = 0; @@ -1189,6 +1193,8 @@ Serum_Frame_Struc* Serum_LoadFilev1(const char* const filename, g_serumData.backgroundBB.readFromCRomFile(4, g_serumData.nframes, pfile, &g_serumData.backgroundIDs); } + + g_serumData.BuildPackingSidecarsAndNormalize(); fclose(pfile); // allocate memory for previous detected frame @@ -1567,14 +1573,66 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { return IDENTIFY_NO_FRAME; // we found no corresponding frame } +static inline bool IsSpriteOpaqueV1(uint8_t spriteId, uint32_t pixelIndex) { + if (!g_serumData.spritedescriptionso_opaque.hasData(spriteId)) { + return g_serumData.spritedescriptionso[spriteId][pixelIndex] != 255; + } + return g_serumData.spritedescriptionso_opaque[spriteId][pixelIndex] > 0; +} + +static inline bool IsSpriteOpaqueV2(uint8_t spriteId, uint32_t pixelIndex) { + if (!g_serumData.spriteoriginal_opaque.hasData(spriteId)) { + return g_serumData.spriteoriginal[spriteId][pixelIndex] != 255; + } + return g_serumData.spriteoriginal_opaque[spriteId][pixelIndex] > 0; +} + +static inline bool IsSpriteExtraOpaqueV2(uint8_t spriteId, + uint32_t pixelIndex) { + if (!g_serumData.spritemask_extra_opaque.hasData(spriteId)) { + return g_serumData.spritemask_extra[spriteId][pixelIndex] != 255; + } + return g_serumData.spritemask_extra_opaque[spriteId][pixelIndex] > 0; +} + +static inline bool IsFrameDynaActive(uint32_t frameId, uint32_t pixelIndex) { + if (!g_serumData.dynamasks_active.hasData(frameId)) { + return g_serumData.dynamasks[frameId][pixelIndex] != 255; + } + return g_serumData.dynamasks_active[frameId][pixelIndex] > 0; +} + +static inline bool IsFrameExtraDynaActive(uint32_t frameId, + uint32_t pixelIndex) { + if (!g_serumData.dynamasks_extra_active.hasData(frameId)) { + return g_serumData.dynamasks_extra[frameId][pixelIndex] != 255; + } + return g_serumData.dynamasks_extra_active[frameId][pixelIndex] > 0; +} + +static inline bool IsSpriteDynaActive(uint8_t spriteId, uint32_t pixelIndex) { + if (!g_serumData.dynaspritemasks_active.hasData(spriteId)) { + return g_serumData.dynaspritemasks[spriteId][pixelIndex] != 255; + } + return g_serumData.dynaspritemasks_active[spriteId][pixelIndex] > 0; +} + +static inline bool IsSpriteExtraDynaActive(uint8_t spriteId, + uint32_t pixelIndex) { + if (!g_serumData.dynaspritemasks_extra_active.hasData(spriteId)) { + return g_serumData.dynaspritemasks_extra[spriteId][pixelIndex] != 255; + } + return g_serumData.dynaspritemasks_extra_active[spriteId][pixelIndex] > 0; +} + void GetSpriteSize(uint8_t nospr, int* pswid, int* pshei, uint8_t* spriteData, - int sswid, int sshei) { + int sswid, int sshei, uint8_t* spriteOpaque) { *pswid = *pshei = 0; if (nospr >= g_serumData.nsprites) return; if (!spriteData) return; for (int tj = 0; tj < sshei; tj++) { for (int ti = 0; ti < sswid; ti++) { - if (spriteData[tj * sswid + ti] < 255) { + if (spriteOpaque[tj * sswid + ti] > 0) { if (tj > *pshei) *pshei = tj; if (ti > *pswid) *pswid = ti; } @@ -1596,7 +1654,8 @@ bool Check_Spritesv1(uint8_t* Frame, uint32_t quelleframe, uint8_t qspr = g_serumData.framesprites[quelleframe][ti]; int spw, sph; GetSpriteSize(qspr, &spw, &sph, g_serumData.spritedescriptionso[qspr], - MAX_SPRITE_SIZE, MAX_SPRITE_SIZE); + MAX_SPRITE_SIZE, MAX_SPRITE_SIZE, + g_serumData.spritedescriptionso_opaque[qspr]); short minxBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4]); short minyBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4 + 1]); short maxxBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4 + 2]); @@ -1649,11 +1708,11 @@ bool Check_Spritesv1(uint8_t* Frame, uint32_t quelleframe, bool notthere = false; for (uint16_t tk = 0; tk < deth; tk++) { for (uint16_t tl = 0; tl < detw; tl++) { + const uint32_t spritePixelIndex = + (tk + dety) * MAX_SPRITE_SIZE + tl + detx; + if (!IsSpriteOpaqueV1(qspr, spritePixelIndex)) continue; uint8_t val = - g_serumData.spritedescriptionso[qspr][(tk + dety) * - MAX_SPRITE_SIZE + - tl + detx]; - if (val == 255) continue; + g_serumData.spritedescriptionso[qspr][spritePixelIndex]; if (val != Frame[(tk + offsy) * g_serumData.fwidth + tl + offsx]) { notthere = true; @@ -1740,7 +1799,8 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, } int spw, sph; GetSpriteSize(qspr, &spw, &sph, g_serumData.spriteoriginal[qspr], - MAX_SPRITE_WIDTH, MAX_SPRITE_HEIGHT); + MAX_SPRITE_WIDTH, MAX_SPRITE_HEIGHT, + g_serumData.spriteoriginal_opaque[qspr]); short minxBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4]); short minyBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4 + 1]); short maxxBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4 + 2]); @@ -1793,11 +1853,11 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, bool notthere = false; for (uint16_t tk = 0; tk < deth; tk++) { for (uint16_t tl = 0; tl < detw; tl++) { + const uint32_t spritePixelIndex = + (tk + dety) * MAX_SPRITE_WIDTH + tl + detx; + if (!IsSpriteOpaqueV2(qspr, spritePixelIndex)) continue; uint8_t val = - g_serumData - .spriteoriginal[qspr][(tk + dety) * MAX_SPRITE_WIDTH + - tl + detx]; - if (val == 255) continue; + g_serumData.spriteoriginal[qspr][spritePixelIndex]; if (val != Frame[(tk + offsy) * g_serumData.fwidth + tl + offsx]) { notthere = true; @@ -1874,7 +1934,7 @@ void Colorize_Framev1(uint8_t* frame, uint32_t IDfound) { .backgroundframes[g_serumData.backgroundIDs[IDfound][0]][tk]; else { uint8_t dynacouche = g_serumData.dynamasks[IDfound][tk]; - if (dynacouche == 255) + if (!IsFrameDynaActive(IDfound, tk)) mySerum.frame[tk] = g_serumData.cframes[IDfound][tk]; else mySerum.frame[tk] = @@ -2053,7 +2113,7 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, } } else { uint8_t dynacouche = g_serumData.dynamasks[IDfound][tk]; - if (dynacouche == 255) { + if (!IsFrameDynaActive(IDfound, tk)) { if (isdynapix[tk] == 0) { if (blackOutStaticContent && (g_serumData.backgroundIDs[IDfound][0] < @@ -2150,7 +2210,7 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, } } else { uint8_t dynacouche = g_serumData.dynamasks_extra[IDfound][tk]; - if (dynacouche == 255) { + if (!IsFrameExtraDynaActive(IDfound, tk)) { if (isdynapix[tk] == 0) { if (blackOutStaticContent && (g_serumData.backgroundIDs[IDfound][0] < @@ -2194,9 +2254,7 @@ void Colorize_Spritev1(uint8_t nosprite, uint16_t frx, uint16_t fry, uint16_t spx, uint16_t spy, uint16_t wid, uint16_t hei) { for (uint16_t tj = 0; tj < hei; tj++) { for (uint16_t ti = 0; ti < wid; ti++) { - if (g_serumData - .spritedescriptionso[nosprite][(tj + spy) * MAX_SPRITE_SIZE + ti + - spx] < 255) { + if (IsSpriteOpaqueV1(nosprite, (tj + spy) * MAX_SPRITE_SIZE + ti + spx)) { mySerum.frame[(fry + tj) * g_serumData.fwidth + frx + ti] = g_serumData .spritedescriptionsc[nosprite] @@ -2231,10 +2289,9 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, for (uint16_t ti = 0; ti < wid; ti++) { uint16_t tk = (fry + tj) * g_serumData.fwidth + frx + ti; uint32_t tl = (tj + spy) * MAX_SPRITE_WIDTH + ti + spx; - uint8_t spriteref = g_serumData.spriteoriginal[nosprite][tl]; - if (spriteref < 255) { + if (IsSpriteOpaqueV2(nosprite, tl)) { uint8_t dynacouche = g_serumData.dynaspritemasks[nosprite][tl]; - if (dynacouche == 255) { + if (!IsSpriteDynaActive(nosprite, tl)) { pfr[tk] = g_serumData.spritecolored[nosprite][tl]; if (ColorInRotation(IDfound, pfr[tk], &prot[tk * 2], &prot[tk * 2 + 1], false)) @@ -2287,14 +2344,14 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, for (uint16_t tj = 0; tj < thei; tj++) { for (uint16_t ti = 0; ti < twid; ti++) { uint16_t tk = (tfry + tj) * g_serumData.fwidth_extra + tfrx + ti; - if (g_serumData - .spritemask_extra[nosprite][(tj + tspy) * MAX_SPRITE_WIDTH + - ti + tspx] < 255) { + if (IsSpriteExtraOpaqueV2(nosprite, + (tj + tspy) * MAX_SPRITE_WIDTH + ti + tspx)) { uint8_t dynacouche = g_serumData.dynaspritemasks_extra[nosprite] [(tj + tspy) * MAX_SPRITE_WIDTH + ti + tspx]; - if (dynacouche == 255) { + if (!IsSpriteExtraDynaActive( + nosprite, (tj + tspy) * MAX_SPRITE_WIDTH + ti + tspx)) { pfr[tk] = g_serumData.spritecolored_extra[nosprite] [(tj + tspy) * MAX_SPRITE_WIDTH + diff --git a/src/sparse-vector.h b/src/sparse-vector.h index cdba923..bd2e671 100644 --- a/src/sparse-vector.h +++ b/src/sparse-vector.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -59,42 +60,113 @@ class SparseVector { mutable std::unordered_map packedIndexById; mutable std::vector packedDenseIndexById; - static constexpr uint8_t kBitPackedMagic = 0xB1; + static constexpr uint8_t kLegacyBitPackedMagic = 0xB1; + static constexpr uint8_t kValuePackedMagic = 0xB2; + static constexpr uint8_t kValuePackedMode1Bit = 1; + static constexpr uint8_t kValuePackedMode2Bit = 2; + static constexpr uint8_t kValuePackedMode4Bit = 4; size_t rawByteSize() const { return elementSize * sizeof(T); } - size_t bitPackedByteSize() const { return 1 + ((elementSize + 7) / 8); } + size_t legacyBitPackedByteSize() const { return 1 + ((elementSize + 7) / 8); } - bool isBitPackedPayload(const uint8_t *payload, size_t size) const { + size_t valuePackedByteSize(uint8_t modeBits) const { + return 2 + (((elementSize * modeBits) + 7) / 8); + } + + size_t maxPackedPayloadByteSize() const { + if (!useBinaryBitPacking || !std::is_same::value) { + return rawByteSize(); + } + const size_t maxPacked = + std::max(legacyBitPackedByteSize(), valuePackedByteSize(4)); + return std::max(rawByteSize(), maxPacked); + } + + bool isLegacyBitPackedPayload(const uint8_t *payload, size_t size) const { + if (!useBinaryBitPacking || !payload) { + return false; + } + if (!std::is_same::value) { + return false; + } + return size == legacyBitPackedByteSize() && + payload[0] == kLegacyBitPackedMagic; + } + + bool isValuePackedPayload(const uint8_t *payload, size_t size) const { if (!useBinaryBitPacking || !payload) { return false; } if (!std::is_same::value) { return false; } - return size == bitPackedByteSize() && payload[0] == kBitPackedMagic; + if (size < 2 || payload[0] != kValuePackedMagic) { + return false; + } + const uint8_t mode = payload[1]; + if (mode != kValuePackedMode1Bit && mode != kValuePackedMode2Bit && + mode != kValuePackedMode4Bit) { + return false; + } + return size == valuePackedByteSize(mode); } - void encodeBitPacked(const T *values, std::vector &encoded) const { + bool encodeValuePacked(const T *values, std::vector &encoded) const { if (!useBinaryBitPacking) { encoded.clear(); - return; + return false; } if (!std::is_same::value) { - throw std::runtime_error( - "Binary bit packing is only supported for uint8_t"); + throw std::runtime_error("Value packing is only supported for uint8_t"); + } + + uint8_t maxValue = 0; + for (size_t i = 0; i < elementSize; ++i) { + const uint8_t value = values[i]; + if (value > maxValue) { + maxValue = value; + } + } + + uint8_t modeBits = 0; + if (maxValue <= 1) { + modeBits = kValuePackedMode1Bit; + } else if (maxValue <= 3) { + modeBits = kValuePackedMode2Bit; + } else if (maxValue <= 15) { + modeBits = kValuePackedMode4Bit; + } else { + return false; } - encoded.assign(bitPackedByteSize(), 0); - encoded[0] = kBitPackedMagic; + if (valuePackedByteSize(modeBits) >= rawByteSize()) { + return false; + } + + encoded.assign(valuePackedByteSize(modeBits), 0); + encoded[0] = kValuePackedMagic; + encoded[1] = modeBits; for (size_t i = 0; i < elementSize; ++i) { - if (values[i] != bitPackFalseValue) { - encoded[1 + (i / 8)] |= (1u << (i % 8)); + const uint8_t value = values[i]; + if (modeBits == kValuePackedMode1Bit) { + if (value > 0) { + encoded[2 + (i / 8)] |= static_cast(1u << (i % 8)); + } + } else if (modeBits == kValuePackedMode2Bit) { + const size_t bitPos = i * 2; + encoded[2 + (bitPos / 8)] |= + static_cast((value & 0x3u) << (bitPos % 8)); + } else { + const size_t bitPos = i * 4; + encoded[2 + (bitPos / 8)] |= + static_cast((value & 0xFu) << (bitPos % 8)); } } + return true; } - T *decodeBitPackedAndCache(uint32_t elementId, const uint8_t *payload) { + void prepareDecodedCacheForWrite(uint32_t elementId) { if (lastAccessedId != UINT32_MAX && lastAccessedId != elementId && !lastDecompressed.empty()) { secondAccessedId = lastAccessedId; @@ -103,6 +175,33 @@ class SparseVector { if (lastDecompressed.size() < elementSize) { lastDecompressed.resize(elementSize); } + } + + T *decodeValuePackedAndCache(uint32_t elementId, const uint8_t *payload) { + const uint8_t modeBits = payload[1]; + prepareDecodedCacheForWrite(elementId); + + for (size_t i = 0; i < elementSize; ++i) { + if (modeBits == kValuePackedMode1Bit) { + const bool isSet = (payload[2 + (i / 8)] & (1u << (i % 8))) != 0; + lastDecompressed[i] = static_cast(isSet ? 1 : 0); + } else if (modeBits == kValuePackedMode2Bit) { + const size_t bitPos = i * 2; + lastDecompressed[i] = + static_cast((payload[2 + (bitPos / 8)] >> (bitPos % 8)) & 0x3u); + } else { + const size_t bitPos = i * 4; + lastDecompressed[i] = + static_cast((payload[2 + (bitPos / 8)] >> (bitPos % 8)) & 0xFu); + } + } + + lastAccessedId = elementId; + return lastDecompressed.data(); + } + + T *decodeLegacyBitPackedAndCache(uint32_t elementId, const uint8_t *payload) { + prepareDecodedCacheForWrite(elementId); for (size_t i = 0; i < elementSize; ++i) { const bool isSet = (payload[1 + (i / 8)] & (1u << (i % 8))) != 0; @@ -395,19 +494,22 @@ class SparseVector { } const size_t rawBytes = rawByteSize(); - if (decodeScratch.size() < rawBytes) { - decodeScratch.resize(rawBytes); + const size_t maxDecodedSize = maxPackedPayloadByteSize(); + if (decodeScratch.size() < maxDecodedSize) { + decodeScratch.resize(maxDecodedSize); } int decompressedSize = LZ4_decompress_safe( reinterpret_cast(payload), reinterpret_cast(decodeScratch.data()), - static_cast(payloadSize), static_cast(rawBytes)); + static_cast(payloadSize), static_cast(maxDecodedSize)); if (decompressedSize < 0) { - // Backward compatibility: some payloads may be stored raw. - if (isBitPackedPayload(payload, payloadSize)) { - return decodeBitPackedAndCache(elementId, payload); + if (isValuePackedPayload(payload, payloadSize)) { + return decodeValuePackedAndCache(elementId, payload); + } + if (isLegacyBitPackedPayload(payload, payloadSize)) { + return decodeLegacyBitPackedAndCache(elementId, payload); } // Backward compatibility: older payloads may store raw bytes even if @@ -418,30 +520,31 @@ class SparseVector { return noData.data(); } - if (isBitPackedPayload(decodeScratch.data(), - static_cast(decompressedSize))) { - return decodeBitPackedAndCache(elementId, decodeScratch.data()); + if (isValuePackedPayload(decodeScratch.data(), + static_cast(decompressedSize))) { + return decodeValuePackedAndCache(elementId, decodeScratch.data()); + } + + if (isLegacyBitPackedPayload(decodeScratch.data(), + static_cast(decompressedSize))) { + return decodeLegacyBitPackedAndCache(elementId, decodeScratch.data()); } if (static_cast(decompressedSize) != rawBytes) { return noData.data(); } - if (lastAccessedId != UINT32_MAX && lastAccessedId != elementId && - !lastDecompressed.empty()) { - secondAccessedId = lastAccessedId; - secondDecompressed.swap(lastDecompressed); - } - if (lastDecompressed.size() < elementSize) { - lastDecompressed.resize(elementSize); - } + prepareDecodedCacheForWrite(elementId); memcpy(lastDecompressed.data(), decodeScratch.data(), rawBytes); lastAccessedId = elementId; return lastDecompressed.data(); } - if (isBitPackedPayload(payload, payloadSize)) { - return decodeBitPackedAndCache(elementId, payload); + if (isValuePackedPayload(payload, payloadSize)) { + return decodeValuePackedAndCache(elementId, payload); + } + if (isLegacyBitPackedPayload(payload, payloadSize)) { + return decodeLegacyBitPackedAndCache(elementId, payload); } if (payloadSize != rawByteSize()) { @@ -490,14 +593,15 @@ class SparseVector { if (parent == nullptr || parent->hasData(elementId)) { if (memcmp(values, noData.data(), elementSize * sizeof(T)) != 0) { - std::vector bitPacked; + std::vector valuePacked; const uint8_t *storeBytes = reinterpret_cast(values); size_t storeByteSize = elementSize * sizeof(T); if (useBinaryBitPacking) { - encodeBitPacked(values, bitPacked); - storeBytes = bitPacked.data(); - storeByteSize = bitPacked.size(); + if (encodeValuePacked(values, valuePacked)) { + storeBytes = valuePacked.data(); + storeByteSize = valuePacked.size(); + } } if (useCompression) { From b229c3f3257f7cfe88744c1fd9656f5b2b259e3c Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Fri, 13 Mar 2026 19:02:09 +0100 Subject: [PATCH 05/42] diabled bitpacking on shadows --- AGENTS.md | 3 +++ src/SerumData.cpp | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 84359e6..db55905 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -57,6 +57,9 @@ Vector policy currently used in `SerumData`: - `dynaspritemasks` + `dynaspritemasks_active` - `dynaspritemasks_extra` + `dynaspritemasks_extra_active` - Runtime uses sidecar flags instead of `255` sentinels for transparency / dynamic-zone activity. +- Dynamic-zone value vectors (`dynamasks*`, `dynaspritemasks*`) remain + compressed but not value-bitpacked (correctness-first to avoid visual + artifacts); only their sidecar active flags are bitpacked. - `compmasks` and `backgroundmask*` are already boolean-mask domain (`mask==0` include / `>0` exclude) and therefore do not need separate transparency sidecar vectors. diff --git a/src/SerumData.cpp b/src/SerumData.cpp index 82a2281..53e5382 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -21,9 +21,9 @@ SerumData::SerumData() cframes(0, false, true), cframes_v2(0, false, true), cframes_v2_extra(0, false, true), - dynamasks(0, false, true, true, 0, 1), + dynamasks(0, false, true), dynamasks_active(0, false, true, true, 0, 1), - dynamasks_extra(0, false, true, true, 0, 1), + dynamasks_extra(0, false, true), dynamasks_extra_active(0, false, true, true, 0, 1), dyna4cols(0), dyna4cols_v2(0, false, true), @@ -62,9 +62,9 @@ SerumData::SerumData() dynashadowscol_extra(0), dynasprite4cols(0), dynasprite4cols_extra(0), - dynaspritemasks(0, false, true, true, 0, 1), + dynaspritemasks(0, false, true), dynaspritemasks_active(0, false, true, true, 0, 1), - dynaspritemasks_extra(0, false, true, true, 0, 1), + dynaspritemasks_extra(0, false, true), dynaspritemasks_extra_active(0, false, true, true, 0, 1), sprshapemode(0) { sceneGenerator = new SceneGenerator(); From 87e3e08697c05f5d8ae30f0b163e2ee603e3f3e6 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Sun, 15 Mar 2026 10:11:13 +0100 Subject: [PATCH 06/42] fixed checks --- AGENTS.md | 6 +++--- src/SerumData.cpp | 8 ++++---- src/serum-decode.cpp | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index db55905..6dd5c02 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -57,9 +57,9 @@ Vector policy currently used in `SerumData`: - `dynaspritemasks` + `dynaspritemasks_active` - `dynaspritemasks_extra` + `dynaspritemasks_extra_active` - Runtime uses sidecar flags instead of `255` sentinels for transparency / dynamic-zone activity. -- Dynamic-zone value vectors (`dynamasks*`, `dynaspritemasks*`) remain - compressed but not value-bitpacked (correctness-first to avoid visual - artifacts); only their sidecar active flags are bitpacked. +- Dynamic-zone value vectors (`dynamasks*`, `dynaspritemasks*`) use adaptive + value packing + compression, with sidecar active flags for sentinel-free + semantics. - `compmasks` and `backgroundmask*` are already boolean-mask domain (`mask==0` include / `>0` exclude) and therefore do not need separate transparency sidecar vectors. diff --git a/src/SerumData.cpp b/src/SerumData.cpp index 53e5382..82a2281 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -21,9 +21,9 @@ SerumData::SerumData() cframes(0, false, true), cframes_v2(0, false, true), cframes_v2_extra(0, false, true), - dynamasks(0, false, true), + dynamasks(0, false, true, true, 0, 1), dynamasks_active(0, false, true, true, 0, 1), - dynamasks_extra(0, false, true), + dynamasks_extra(0, false, true, true, 0, 1), dynamasks_extra_active(0, false, true, true, 0, 1), dyna4cols(0), dyna4cols_v2(0, false, true), @@ -62,9 +62,9 @@ SerumData::SerumData() dynashadowscol_extra(0), dynasprite4cols(0), dynasprite4cols_extra(0), - dynaspritemasks(0, false, true), + dynaspritemasks(0, false, true, true, 0, 1), dynaspritemasks_active(0, false, true, true, 0, 1), - dynaspritemasks_extra(0, false, true), + dynaspritemasks_extra(0, false, true, true, 0, 1), dynaspritemasks_extra_active(0, false, true, true, 0, 1), sprshapemode(0) { sceneGenerator = new SceneGenerator(); diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 93798a6..4616cd3 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -1574,6 +1574,9 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { } static inline bool IsSpriteOpaqueV1(uint8_t spriteId, uint32_t pixelIndex) { + if (!g_serumData.spritedescriptionso.hasData(spriteId)) { + return false; + } if (!g_serumData.spritedescriptionso_opaque.hasData(spriteId)) { return g_serumData.spritedescriptionso[spriteId][pixelIndex] != 255; } @@ -1581,6 +1584,9 @@ static inline bool IsSpriteOpaqueV1(uint8_t spriteId, uint32_t pixelIndex) { } static inline bool IsSpriteOpaqueV2(uint8_t spriteId, uint32_t pixelIndex) { + if (!g_serumData.spriteoriginal.hasData(spriteId)) { + return false; + } if (!g_serumData.spriteoriginal_opaque.hasData(spriteId)) { return g_serumData.spriteoriginal[spriteId][pixelIndex] != 255; } @@ -1589,6 +1595,9 @@ static inline bool IsSpriteOpaqueV2(uint8_t spriteId, uint32_t pixelIndex) { static inline bool IsSpriteExtraOpaqueV2(uint8_t spriteId, uint32_t pixelIndex) { + if (!g_serumData.spritemask_extra.hasData(spriteId)) { + return false; + } if (!g_serumData.spritemask_extra_opaque.hasData(spriteId)) { return g_serumData.spritemask_extra[spriteId][pixelIndex] != 255; } @@ -1596,6 +1605,9 @@ static inline bool IsSpriteExtraOpaqueV2(uint8_t spriteId, } static inline bool IsFrameDynaActive(uint32_t frameId, uint32_t pixelIndex) { + if (!g_serumData.dynamasks.hasData(frameId)) { + return false; + } if (!g_serumData.dynamasks_active.hasData(frameId)) { return g_serumData.dynamasks[frameId][pixelIndex] != 255; } @@ -1604,6 +1616,9 @@ static inline bool IsFrameDynaActive(uint32_t frameId, uint32_t pixelIndex) { static inline bool IsFrameExtraDynaActive(uint32_t frameId, uint32_t pixelIndex) { + if (!g_serumData.dynamasks_extra.hasData(frameId)) { + return false; + } if (!g_serumData.dynamasks_extra_active.hasData(frameId)) { return g_serumData.dynamasks_extra[frameId][pixelIndex] != 255; } @@ -1611,6 +1626,9 @@ static inline bool IsFrameExtraDynaActive(uint32_t frameId, } static inline bool IsSpriteDynaActive(uint8_t spriteId, uint32_t pixelIndex) { + if (!g_serumData.dynaspritemasks.hasData(spriteId)) { + return false; + } if (!g_serumData.dynaspritemasks_active.hasData(spriteId)) { return g_serumData.dynaspritemasks[spriteId][pixelIndex] != 255; } @@ -1619,6 +1637,9 @@ static inline bool IsSpriteDynaActive(uint8_t spriteId, uint32_t pixelIndex) { static inline bool IsSpriteExtraDynaActive(uint8_t spriteId, uint32_t pixelIndex) { + if (!g_serumData.dynaspritemasks_extra.hasData(spriteId)) { + return false; + } if (!g_serumData.dynaspritemasks_extra_active.hasData(spriteId)) { return g_serumData.dynaspritemasks_extra[spriteId][pixelIndex] != 255; } From 3e36d2f7242635f4b07679f584ebf02467d0f5b3 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Sun, 15 Mar 2026 10:18:32 +0100 Subject: [PATCH 07/42] removed fallbacks --- AGENTS.md | 3 +++ src/serum-decode.cpp | 43 +------------------------------------------ 2 files changed, 4 insertions(+), 42 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 6dd5c02..adc9571 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -57,6 +57,9 @@ Vector policy currently used in `SerumData`: - `dynaspritemasks` + `dynaspritemasks_active` - `dynaspritemasks_extra` + `dynaspritemasks_extra_active` - Runtime uses sidecar flags instead of `255` sentinels for transparency / dynamic-zone activity. +- Runtime does not include sentinel-based fallback in sprite/dynamic helpers; + missing/incorrect sidecars are treated as a conversion/load bug and are not + masked by `255` compatibility logic. - Dynamic-zone value vectors (`dynamasks*`, `dynaspritemasks*`) use adaptive value packing + compression, with sidecar active flags for sentinel-free semantics. diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 4616cd3..d174c8e 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -1574,75 +1574,34 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { } static inline bool IsSpriteOpaqueV1(uint8_t spriteId, uint32_t pixelIndex) { - if (!g_serumData.spritedescriptionso.hasData(spriteId)) { - return false; - } - if (!g_serumData.spritedescriptionso_opaque.hasData(spriteId)) { - return g_serumData.spritedescriptionso[spriteId][pixelIndex] != 255; - } + // Sidecar flags are the single source of truth after load-time normalization. return g_serumData.spritedescriptionso_opaque[spriteId][pixelIndex] > 0; } static inline bool IsSpriteOpaqueV2(uint8_t spriteId, uint32_t pixelIndex) { - if (!g_serumData.spriteoriginal.hasData(spriteId)) { - return false; - } - if (!g_serumData.spriteoriginal_opaque.hasData(spriteId)) { - return g_serumData.spriteoriginal[spriteId][pixelIndex] != 255; - } return g_serumData.spriteoriginal_opaque[spriteId][pixelIndex] > 0; } static inline bool IsSpriteExtraOpaqueV2(uint8_t spriteId, uint32_t pixelIndex) { - if (!g_serumData.spritemask_extra.hasData(spriteId)) { - return false; - } - if (!g_serumData.spritemask_extra_opaque.hasData(spriteId)) { - return g_serumData.spritemask_extra[spriteId][pixelIndex] != 255; - } return g_serumData.spritemask_extra_opaque[spriteId][pixelIndex] > 0; } static inline bool IsFrameDynaActive(uint32_t frameId, uint32_t pixelIndex) { - if (!g_serumData.dynamasks.hasData(frameId)) { - return false; - } - if (!g_serumData.dynamasks_active.hasData(frameId)) { - return g_serumData.dynamasks[frameId][pixelIndex] != 255; - } return g_serumData.dynamasks_active[frameId][pixelIndex] > 0; } static inline bool IsFrameExtraDynaActive(uint32_t frameId, uint32_t pixelIndex) { - if (!g_serumData.dynamasks_extra.hasData(frameId)) { - return false; - } - if (!g_serumData.dynamasks_extra_active.hasData(frameId)) { - return g_serumData.dynamasks_extra[frameId][pixelIndex] != 255; - } return g_serumData.dynamasks_extra_active[frameId][pixelIndex] > 0; } static inline bool IsSpriteDynaActive(uint8_t spriteId, uint32_t pixelIndex) { - if (!g_serumData.dynaspritemasks.hasData(spriteId)) { - return false; - } - if (!g_serumData.dynaspritemasks_active.hasData(spriteId)) { - return g_serumData.dynaspritemasks[spriteId][pixelIndex] != 255; - } return g_serumData.dynaspritemasks_active[spriteId][pixelIndex] > 0; } static inline bool IsSpriteExtraDynaActive(uint8_t spriteId, uint32_t pixelIndex) { - if (!g_serumData.dynaspritemasks_extra.hasData(spriteId)) { - return false; - } - if (!g_serumData.dynaspritemasks_extra_active.hasData(spriteId)) { - return g_serumData.dynaspritemasks_extra[spriteId][pixelIndex] != 255; - } return g_serumData.dynaspritemasks_extra_active[spriteId][pixelIndex] > 0; } From 399d795c74bfe6c39329fbf41e41c8f6b2748043 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Sun, 15 Mar 2026 18:54:09 +0100 Subject: [PATCH 08/42] ficed checks --- src/serum-decode.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index d174c8e..3da4da3 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -1575,33 +1575,40 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { static inline bool IsSpriteOpaqueV1(uint8_t spriteId, uint32_t pixelIndex) { // Sidecar flags are the single source of truth after load-time normalization. + if (!g_serumData.spritedescriptionso_opaque.hasData(spriteId)) return false; return g_serumData.spritedescriptionso_opaque[spriteId][pixelIndex] > 0; } static inline bool IsSpriteOpaqueV2(uint8_t spriteId, uint32_t pixelIndex) { + if (!g_serumData.spriteoriginal_opaque.hasData(spriteId)) return false; return g_serumData.spriteoriginal_opaque[spriteId][pixelIndex] > 0; } static inline bool IsSpriteExtraOpaqueV2(uint8_t spriteId, uint32_t pixelIndex) { + if (!g_serumData.spritemask_extra_opaque.hasData(spriteId)) return false; return g_serumData.spritemask_extra_opaque[spriteId][pixelIndex] > 0; } static inline bool IsFrameDynaActive(uint32_t frameId, uint32_t pixelIndex) { + if (!g_serumData.dynamasks_active.hasData(frameId)) return false; return g_serumData.dynamasks_active[frameId][pixelIndex] > 0; } static inline bool IsFrameExtraDynaActive(uint32_t frameId, uint32_t pixelIndex) { + if (!g_serumData.dynamasks_extra_active.hasData(frameId)) return false; return g_serumData.dynamasks_extra_active[frameId][pixelIndex] > 0; } static inline bool IsSpriteDynaActive(uint8_t spriteId, uint32_t pixelIndex) { + if (!g_serumData.dynaspritemasks_active.hasData(spriteId)) return false; return g_serumData.dynaspritemasks_active[spriteId][pixelIndex] > 0; } static inline bool IsSpriteExtraDynaActive(uint8_t spriteId, uint32_t pixelIndex) { + if (!g_serumData.dynaspritemasks_extra_active.hasData(spriteId)) return false; return g_serumData.dynaspritemasks_extra_active[spriteId][pixelIndex] > 0; } From e233b6907e6c53d1c1a784fb2ece96924cea21db Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Sun, 15 Mar 2026 20:28:29 +0100 Subject: [PATCH 09/42] added profiling --- AGENTS.md | 18 +++ src/SerumData.cpp | 97 ++++++++++++++ src/SerumData.h | 12 +- src/serum-decode.cpp | 298 +++++++++++++++++++++++++++---------------- src/sparse-vector.h | 104 +++++++++++++++ 5 files changed, 420 insertions(+), 109 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index adc9571..2daecbc 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -56,6 +56,11 @@ Vector policy currently used in `SerumData`: - `dynamasks_extra` + `dynamasks_extra_active` - `dynaspritemasks` + `dynaspritemasks_active` - `dynaspritemasks_extra` + `dynaspritemasks_extra_active` +- Precomputed frame-level dynamic fast flags are persisted: + - `frameHasDynamic` + - `frameHasDynamicExtra` +- `Colorize_Framev1/v2` uses these flags to bypass dynamic-mask branches + entirely for frames without active dynamic pixels. - Runtime uses sidecar flags instead of `255` sentinels for transparency / dynamic-zone activity. - Runtime does not include sentinel-based fallback in sprite/dynamic helpers; missing/incorrect sidecars are treated as a conversion/load bug and are not @@ -84,6 +89,11 @@ Entry point: `Serum_Load(altcolorpath, romname, flags)`. 8. Build/normalize packing sidecars via `BuildPackingSidecarsAndNormalize()`. - The normalization step is idempotent and guarded; repeated calls in the same load/save cycle are no-ops once completed. +9. Optional runtime A/B switch for dynamic packed-read overhead: + - If env `SERUM_DISABLE_DYNAMIC_PACKED_READS` is enabled (`1/true/on/yes`), + `PrepareRuntimeDynamicHotCache()` predecodes dynamic vectors + (`dynamasks*`, `dynaspritemasks*`) into runtime hot caches. + - Default runtime behavior is unchanged when this env var is not set. Important: - `BuildFrameLookupVectors()` must run after final scene data is known for this load cycle. @@ -192,6 +202,14 @@ v6 snapshot policy: - Central callback configured by `Serum_SetLogCallback`. - `serum-decode.cpp` and `SceneGenerator.cpp` both use callback-based `Log(...)`. - Missing-file logs from `find_case_insensitive_file(...)` use normalized path joining. +- Optional runtime profiling: + - If env `SERUM_PROFILE_DYNAMIC_HOTPATHS` is enabled (`1/true/on/yes`), + periodic average timings for `Colorize_Framev2` and `Colorize_Spritev2` + hot paths are logged. + - If env `SERUM_PROFILE_SPARSE_VECTORS=1`, sparse-vector access snapshots are + logged at the same cadence (accesses, decode count, cache hits, direct hits) + for key runtime vectors (`cframes_v2*`, `backgroundmask*`, `dynamasks*`, + `dynaspritemasks*`). ## Safety invariants - `frameIsScene.size()` must equal `nframes` before identification. diff --git a/src/SerumData.cpp b/src/SerumData.cpp index 82a2281..a7023a5 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -67,6 +67,18 @@ SerumData::SerumData() dynaspritemasks_extra(0, false, true, true, 0, 1), dynaspritemasks_extra_active(0, false, true, true, 0, 1), sprshapemode(0) { + cframes_v2.setProfileLabel("cframes_v2"); + cframes_v2_extra.setProfileLabel("cframes_v2_extra"); + dynamasks.setProfileLabel("dynamasks"); + dynamasks_active.setProfileLabel("dynamasks_active"); + dynamasks_extra.setProfileLabel("dynamasks_extra"); + dynamasks_extra_active.setProfileLabel("dynamasks_extra_active"); + backgroundmask.setProfileLabel("backgroundmask"); + backgroundmask_extra.setProfileLabel("backgroundmask_extra"); + dynaspritemasks.setProfileLabel("dynaspritemasks"); + dynaspritemasks_active.setProfileLabel("dynaspritemasks_active"); + dynaspritemasks_extra.setProfileLabel("dynaspritemasks_extra"); + dynaspritemasks_extra_active.setProfileLabel("dynaspritemasks_extra_active"); sceneGenerator = new SceneGenerator(); if (is_real_machine()) packingStorage.assign(384u * 1024u * 1024u, 0xA5); } @@ -130,6 +142,8 @@ void SerumData::Clear() { dynaspritemasks_extra.clear(); dynaspritemasks_extra_active.clear(); sprshapemode.clear(); + frameHasDynamic.clear(); + frameHasDynamicExtra.clear(); frameIsScene.clear(); sceneFramesBySignature.clear(); } @@ -216,6 +230,7 @@ void SerumData::BuildPackingSidecarsAndNormalize() { normalized.resize(framePixels); flags.resize(framePixels); + frameHasDynamic.assign(nframes, 0); for (uint32_t frameId = 0; frameId < nframes; ++frameId) { const bool hasSourceVector = dynamasks.hasData(frameId); const bool hasActiveVector = dynamasks_active.hasData(frameId); @@ -224,20 +239,24 @@ void SerumData::BuildPackingSidecarsAndNormalize() { } const uint8_t *source = dynamasks[frameId]; const uint8_t *activeSource = dynamasks_active[frameId]; + bool anyActive = false; for (size_t i = 0; i < framePixels; ++i) { const uint8_t value = hasSourceVector ? source[i] : 0; const bool active = hasActiveVector ? (activeSource[i] > 0) : (value != 255); flags[i] = active ? 1 : 0; normalized[i] = active ? value : 0; + anyActive = anyActive || active; } dynamasks_active.set(frameId, flags.data(), framePixels); dynamasks.set(frameId, normalized.data(), framePixels); + frameHasDynamic[frameId] = anyActive ? 1 : 0; } if (extraFramePixels > 0) { normalized.resize(extraFramePixels); flags.resize(extraFramePixels); + frameHasDynamicExtra.assign(nframes, 0); for (uint32_t frameId = 0; frameId < nframes; ++frameId) { if (isextraframe[frameId][0] == 0) { continue; @@ -249,18 +268,23 @@ void SerumData::BuildPackingSidecarsAndNormalize() { } const uint8_t *source = dynamasks_extra[frameId]; const uint8_t *activeSource = dynamasks_extra_active[frameId]; + bool anyActive = false; for (size_t i = 0; i < extraFramePixels; ++i) { const uint8_t value = hasSourceVector ? source[i] : 0; const bool active = hasActiveVector ? (activeSource[i] > 0) : (value != 255); flags[i] = active ? 1 : 0; normalized[i] = active ? value : 0; + anyActive = anyActive || active; } dynamasks_extra_active.set(frameId, flags.data(), extraFramePixels, &isextraframe); dynamasks_extra.set(frameId, normalized.data(), extraFramePixels, &isextraframe); + frameHasDynamicExtra[frameId] = anyActive ? 1 : 0; } + } else { + frameHasDynamicExtra.assign(nframes, 0); } normalized.resize(spritePixels); @@ -311,6 +335,79 @@ void SerumData::BuildPackingSidecarsAndNormalize() { m_packingSidecarsNormalized = true; } +void SerumData::PrepareRuntimeDynamicHotCache() { + std::vector frameIds; + frameIds.reserve(nframes); + for (uint32_t frameId = 0; frameId < nframes; ++frameId) { + if (frameId < frameHasDynamic.size() && frameHasDynamic[frameId] > 0) { + frameIds.push_back(frameId); + } + } + dynamasks.enableForcedDecodedReadsForIds(frameIds); + dynamasks_active.enableForcedDecodedReadsForIds(frameIds); + + std::vector extraFrameIds; + extraFrameIds.reserve(nframes); + for (uint32_t frameId = 0; frameId < nframes; ++frameId) { + if (frameId < frameHasDynamicExtra.size() && + frameHasDynamicExtra[frameId] > 0) { + extraFrameIds.push_back(frameId); + } + } + dynamasks_extra.enableForcedDecodedReadsForIds(extraFrameIds); + dynamasks_extra_active.enableForcedDecodedReadsForIds(extraFrameIds); + + std::vector spriteIds; + spriteIds.reserve(nsprites); + for (uint32_t spriteId = 0; spriteId < nsprites; ++spriteId) { + if (dynaspritemasks.hasData(spriteId) || + dynaspritemasks_active.hasData(spriteId) || + dynaspritemasks_extra.hasData(spriteId) || + dynaspritemasks_extra_active.hasData(spriteId)) { + spriteIds.push_back(spriteId); + } + } + dynaspritemasks.enableForcedDecodedReadsForIds(spriteIds); + dynaspritemasks_active.enableForcedDecodedReadsForIds(spriteIds); + dynaspritemasks_extra.enableForcedDecodedReadsForIds(spriteIds); + dynaspritemasks_extra_active.enableForcedDecodedReadsForIds(spriteIds); + + Log("Prepared runtime dynamic hot cache: %u frame masks, %u extra frame masks," + " %u sprite masks", + (uint32_t)frameIds.size(), (uint32_t)extraFrameIds.size(), + (uint32_t)spriteIds.size()); +} + +void SerumData::LogSparseVectorProfileSnapshot() { + auto logCounters = [&](auto& vec) { + uint64_t accesses = 0; + uint64_t decodes = 0; + uint64_t cacheHits = 0; + uint64_t directHits = 0; + vec.consumeProfileCounters(accesses, decodes, cacheHits, directHits); + const char* label = vec.getProfileLabel(); + if (!label || accesses == 0) { + return; + } + Log("SparseProfile %s: accesses=%llu decodes=%llu cacheHits=%llu direct=%llu", + label, (unsigned long long)accesses, (unsigned long long)decodes, + (unsigned long long)cacheHits, (unsigned long long)directHits); + }; + + logCounters(cframes_v2); + logCounters(cframes_v2_extra); + logCounters(backgroundmask); + logCounters(backgroundmask_extra); + logCounters(dynamasks); + logCounters(dynamasks_active); + logCounters(dynamasks_extra); + logCounters(dynamasks_extra_active); + logCounters(dynaspritemasks); + logCounters(dynaspritemasks_active); + logCounters(dynaspritemasks_extra); + logCounters(dynaspritemasks_extra_active); +} + bool SerumData::SaveToFile(const char *filename) { try { BuildPackingSidecarsAndNormalize(); diff --git a/src/SerumData.h b/src/SerumData.h index 3b3ada9..bfdd996 100644 --- a/src/SerumData.h +++ b/src/SerumData.h @@ -61,6 +61,8 @@ class SerumData { bool LoadFromFile(const char *filename, const uint8_t flags); bool LoadFromBuffer(const uint8_t *data, size_t size, const uint8_t flags); void BuildPackingSidecarsAndNormalize(); + void PrepareRuntimeDynamicHotCache(); + void LogSparseVectorProfileSnapshot(); // Header data char rname[64]; @@ -134,6 +136,8 @@ class SerumData { SparseVector dynaspritemasks_extra; SparseVector dynaspritemasks_extra_active; SparseVector sprshapemode; + std::vector frameHasDynamic; + std::vector frameHasDynamicExtra; std::vector frameIsScene; std::unordered_map> sceneFramesBySignature; @@ -174,14 +178,16 @@ class SerumData { ar(frameIsScene, sceneFramesBySignature, spriteoriginal_opaque, spritemask_extra_opaque, spritedescriptionso_opaque, dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, - dynaspritemasks_extra_active); + dynaspritemasks_extra_active, frameHasDynamic, + frameHasDynamicExtra); } } else { if (concentrateFileVersion >= 6) { ar(frameIsScene, sceneFramesBySignature, spriteoriginal_opaque, spritemask_extra_opaque, spritedescriptionso_opaque, dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, - dynaspritemasks_extra_active); + dynaspritemasks_extra_active, frameHasDynamic, + frameHasDynamicExtra); } else { frameIsScene.clear(); sceneFramesBySignature.clear(); @@ -192,6 +198,8 @@ class SerumData { dynamasks_extra_active.clear(); dynaspritemasks_active.clear(); dynaspritemasks_extra_active.clear(); + frameHasDynamic.clear(); + frameHasDynamicExtra.clear(); } } diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 3da4da3..a9b7da5 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -73,6 +73,22 @@ void Log(const char* format, ...) { va_end(args); } +static bool IsEnvFlagEnabled(const char* name) { + const char* value = std::getenv(name); + if (!value || value[0] == '\0') { + return false; + } + return strcmp(value, "1") == 0 || strcasecmp(value, "true") == 0 || + strcasecmp(value, "yes") == 0 || strcasecmp(value, "on") == 0; +} + +static bool g_profileDynamicHotPaths = false; +static bool g_profileSparseVectors = false; +static bool g_disableDynamicPackedReads = false; +static uint64_t g_profileColorizeFrameV2Ns = 0; +static uint64_t g_profileColorizeSpriteV2Ns = 0; +static uint64_t g_profileColorizeCalls = 0; + static SerumData g_serumData; uint16_t sceneFrameCount = 0; uint16_t sceneCurrentFrame = 0; @@ -1227,6 +1243,13 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, const char* const romname, uint8_t flags) { Serum_free(); + g_profileDynamicHotPaths = IsEnvFlagEnabled("SERUM_PROFILE_DYNAMIC_HOTPATHS"); + g_profileSparseVectors = IsEnvFlagEnabled("SERUM_PROFILE_SPARSE_VECTORS"); + g_disableDynamicPackedReads = + IsEnvFlagEnabled("SERUM_DISABLE_DYNAMIC_PACKED_READS"); + g_profileColorizeFrameV2Ns = 0; + g_profileColorizeSpriteV2Ns = 0; + g_profileColorizeCalls = 0; mySerum.SerumVersion = g_serumData.SerumVersion = 0; mySerum.flags = 0; @@ -1334,6 +1357,11 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, } else { InitFrameLookupRuntimeStateFromStoredData(); } + if (g_disableDynamicPackedReads) { + g_serumData.PrepareRuntimeDynamicHotCache(); + Log("Dynamic packed reads disabled for runtime via " + "SERUM_DISABLE_DYNAMIC_PACKED_READS"); + } } if (is_real_machine()) { monochromeMode = true; @@ -1573,47 +1601,9 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { return IDENTIFY_NO_FRAME; // we found no corresponding frame } -static inline bool IsSpriteOpaqueV1(uint8_t spriteId, uint32_t pixelIndex) { - // Sidecar flags are the single source of truth after load-time normalization. - if (!g_serumData.spritedescriptionso_opaque.hasData(spriteId)) return false; - return g_serumData.spritedescriptionso_opaque[spriteId][pixelIndex] > 0; -} - -static inline bool IsSpriteOpaqueV2(uint8_t spriteId, uint32_t pixelIndex) { - if (!g_serumData.spriteoriginal_opaque.hasData(spriteId)) return false; - return g_serumData.spriteoriginal_opaque[spriteId][pixelIndex] > 0; -} - -static inline bool IsSpriteExtraOpaqueV2(uint8_t spriteId, - uint32_t pixelIndex) { - if (!g_serumData.spritemask_extra_opaque.hasData(spriteId)) return false; - return g_serumData.spritemask_extra_opaque[spriteId][pixelIndex] > 0; -} - -static inline bool IsFrameDynaActive(uint32_t frameId, uint32_t pixelIndex) { - if (!g_serumData.dynamasks_active.hasData(frameId)) return false; - return g_serumData.dynamasks_active[frameId][pixelIndex] > 0; -} - -static inline bool IsFrameExtraDynaActive(uint32_t frameId, - uint32_t pixelIndex) { - if (!g_serumData.dynamasks_extra_active.hasData(frameId)) return false; - return g_serumData.dynamasks_extra_active[frameId][pixelIndex] > 0; -} - -static inline bool IsSpriteDynaActive(uint8_t spriteId, uint32_t pixelIndex) { - if (!g_serumData.dynaspritemasks_active.hasData(spriteId)) return false; - return g_serumData.dynaspritemasks_active[spriteId][pixelIndex] > 0; -} - -static inline bool IsSpriteExtraDynaActive(uint8_t spriteId, - uint32_t pixelIndex) { - if (!g_serumData.dynaspritemasks_extra_active.hasData(spriteId)) return false; - return g_serumData.dynaspritemasks_extra_active[spriteId][pixelIndex] > 0; -} - -void GetSpriteSize(uint8_t nospr, int* pswid, int* pshei, uint8_t* spriteData, - int sswid, int sshei, uint8_t* spriteOpaque) { +void GetSpriteSize(uint8_t nospr, int* pswid, int* pshei, + const uint8_t* spriteData, int sswid, int sshei, + const uint8_t* spriteOpaque) { *pswid = *pshei = 0; if (nospr >= g_serumData.nsprites) return; if (!spriteData) return; @@ -1639,10 +1629,16 @@ bool Check_Spritesv1(uint8_t* Frame, uint32_t quelleframe, while ((ti < MAX_SPRITES_PER_FRAME) && (g_serumData.framesprites[quelleframe][ti] < 255)) { uint8_t qspr = g_serumData.framesprites[quelleframe][ti]; + if (!g_serumData.spritedescriptionso.hasData(qspr) || + !g_serumData.spritedescriptionso_opaque.hasData(qspr)) { + ti++; + continue; + } + const uint8_t* spriteDescription = g_serumData.spritedescriptionso[qspr]; + const uint8_t* spriteOpaque = g_serumData.spritedescriptionso_opaque[qspr]; int spw, sph; - GetSpriteSize(qspr, &spw, &sph, g_serumData.spritedescriptionso[qspr], - MAX_SPRITE_SIZE, MAX_SPRITE_SIZE, - g_serumData.spritedescriptionso_opaque[qspr]); + GetSpriteSize(qspr, &spw, &sph, spriteDescription, MAX_SPRITE_SIZE, + MAX_SPRITE_SIZE, spriteOpaque); short minxBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4]); short minyBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4 + 1]); short maxxBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4 + 2]); @@ -1697,9 +1693,8 @@ bool Check_Spritesv1(uint8_t* Frame, uint32_t quelleframe, for (uint16_t tl = 0; tl < detw; tl++) { const uint32_t spritePixelIndex = (tk + dety) * MAX_SPRITE_SIZE + tl + detx; - if (!IsSpriteOpaqueV1(qspr, spritePixelIndex)) continue; - uint8_t val = - g_serumData.spritedescriptionso[qspr][spritePixelIndex]; + if (spriteOpaque[spritePixelIndex] == 0) continue; + uint8_t val = spriteDescription[spritePixelIndex]; if (val != Frame[(tk + offsy) * g_serumData.fwidth + tl + offsx]) { notthere = true; @@ -1769,6 +1764,13 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, while ((ti < MAX_SPRITES_PER_FRAME) && (g_serumData.framesprites[quelleframe][ti] < 255)) { uint8_t qspr = g_serumData.framesprites[quelleframe][ti]; + if (!g_serumData.spriteoriginal.hasData(qspr) || + !g_serumData.spriteoriginal_opaque.hasData(qspr)) { + ti++; + continue; + } + const uint8_t* spriteOriginal = g_serumData.spriteoriginal[qspr]; + const uint8_t* spriteOpaque = g_serumData.spriteoriginal_opaque[qspr]; uint8_t* Frame = recframe; bool isshapecheck = false; if (g_serumData.sprshapemode[qspr][0] > 0) { @@ -1785,9 +1787,8 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, Frame = frameshape; } int spw, sph; - GetSpriteSize(qspr, &spw, &sph, g_serumData.spriteoriginal[qspr], - MAX_SPRITE_WIDTH, MAX_SPRITE_HEIGHT, - g_serumData.spriteoriginal_opaque[qspr]); + GetSpriteSize(qspr, &spw, &sph, spriteOriginal, MAX_SPRITE_WIDTH, + MAX_SPRITE_HEIGHT, spriteOpaque); short minxBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4]); short minyBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4 + 1]); short maxxBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4 + 2]); @@ -1842,9 +1843,8 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, for (uint16_t tl = 0; tl < detw; tl++) { const uint32_t spritePixelIndex = (tk + dety) * MAX_SPRITE_WIDTH + tl + detx; - if (!IsSpriteOpaqueV2(qspr, spritePixelIndex)) continue; - uint8_t val = - g_serumData.spriteoriginal[qspr][spritePixelIndex]; + if (spriteOpaque[spritePixelIndex] == 0) continue; + uint8_t val = spriteOriginal[spritePixelIndex]; if (val != Frame[(tk + offsy) * g_serumData.fwidth + tl + offsx]) { notthere = true; @@ -1907,6 +1907,13 @@ void Colorize_Framev1(uint8_t* frame, uint32_t IDfound) { uint16_t tj, ti; // Generate the colorized version of a frame once identified in the crom // frames + const bool frameHasDynamic = + IDfound < g_serumData.frameHasDynamic.size() && + g_serumData.frameHasDynamic[IDfound] > 0; + const uint8_t* frameDyna = frameHasDynamic ? g_serumData.dynamasks[IDfound] : nullptr; + const uint8_t* frameDynaActive = frameHasDynamic + ? g_serumData.dynamasks_active[IDfound] + : nullptr; for (tj = 0; tj < g_serumData.fheight; tj++) { for (ti = 0; ti < g_serumData.fwidth; ti++) { uint16_t tk = tj * g_serumData.fwidth + ti; @@ -1920,13 +1927,14 @@ void Colorize_Framev1(uint8_t* frame, uint32_t IDfound) { g_serumData .backgroundframes[g_serumData.backgroundIDs[IDfound][0]][tk]; else { - uint8_t dynacouche = g_serumData.dynamasks[IDfound][tk]; - if (!IsFrameDynaActive(IDfound, tk)) + if (!frameHasDynamic || frameDynaActive[tk] == 0) mySerum.frame[tk] = g_serumData.cframes[IDfound][tk]; - else + else { + const uint8_t dynacouche = frameDyna[tk]; mySerum.frame[tk] = g_serumData.dyna4cols[IDfound][dynacouche * g_serumData.nocolors + frame[tk]]; + } } } } @@ -2054,6 +2062,22 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, if (((mySerum.frame32 && g_serumData.fheight == 32) || (mySerum.frame64 && g_serumData.fheight == 64)) && isoriginalrequested) { + const uint16_t backgroundId = g_serumData.backgroundIDs[IDfound][0]; + const bool hasBackground = backgroundId < g_serumData.nbackgrounds; + const uint8_t* frameBackgroundMask = g_serumData.backgroundmask[IDfound]; + const uint16_t* frameBackground = hasBackground + ? g_serumData.backgroundframes_v2[backgroundId] + : nullptr; + const uint16_t* frameColors = g_serumData.cframes_v2[IDfound]; + const bool frameHasDynamic = + IDfound < g_serumData.frameHasDynamic.size() && + g_serumData.frameHasDynamic[IDfound] > 0; + const uint8_t* frameDyna = + frameHasDynamic ? g_serumData.dynamasks[IDfound] : nullptr; + const uint8_t* frameDynaActive = + frameHasDynamic ? g_serumData.dynamasks_active[IDfound] : nullptr; + const uint16_t* frameDynaColors = + frameHasDynamic ? g_serumData.dyna4cols_v2[IDfound] : nullptr; // create the original res frame if (g_serumData.fheight == 32) { pfr = mySerum.frame32; @@ -2079,15 +2103,12 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, for (tj = 0; tj < g_serumData.fheight; tj++) { for (ti = 0; ti < g_serumData.fwidth; ti++) { uint16_t tk = tj * g_serumData.fwidth + ti; - if ((g_serumData.backgroundIDs[IDfound][0] < - g_serumData.nbackgrounds) && - (frame[tk] == 0) && (g_serumData.backgroundmask[IDfound][tk] > 0)) { + if (hasBackground && (frame[tk] == 0) && (frameBackgroundMask[tk] > 0)) { if (isdynapix[tk] == 0) { if (applySceneBackground) { pfr[tk] = sceneBackgroundFrame[tk]; } else if (!suppressFrameBackgroundImage) { - pfr[tk] = g_serumData.backgroundframes_v2 - [g_serumData.backgroundIDs[IDfound][0]][tk]; + pfr[tk] = frameBackground[tk]; if (ColorInRotation(IDfound, pfr[tk], &prot[tk * 2], &prot[tk * 2 + 1], false)) pfr[tk] = @@ -2099,17 +2120,14 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, } } } else { - uint8_t dynacouche = g_serumData.dynamasks[IDfound][tk]; - if (!IsFrameDynaActive(IDfound, tk)) { + if (!frameHasDynamic || frameDynaActive[tk] == 0) { if (isdynapix[tk] == 0) { if (blackOutStaticContent && - (g_serumData.backgroundIDs[IDfound][0] < - g_serumData.nbackgrounds) && - (frame[tk] > 0) && - (g_serumData.backgroundmask[IDfound][tk] > 0)) { + hasBackground && (frame[tk] > 0) && + (frameBackgroundMask[tk] > 0)) { pfr[tk] = sceneBackgroundFrame[tk]; } else { - pfr[tk] = g_serumData.cframes_v2[IDfound][tk]; + pfr[tk] = frameColors[tk]; if (ColorInRotation(IDfound, pfr[tk], &prot[tk * 2], &prot[tk * 2 + 1], false)) pfr[tk] = @@ -2119,19 +2137,16 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, } } } else { + const uint8_t dynacouche = frameDyna[tk]; if (frame[tk] > 0) { CheckDynaShadow(pfr, IDfound, dynacouche, isdynapix, ti, tj, g_serumData.fwidth, g_serumData.fheight, false); isdynapix[tk] = 1; - pfr[tk] = - g_serumData - .dyna4cols_v2[IDfound][dynacouche * g_serumData.nocolors + - frame[tk]]; + pfr[tk] = frameDynaColors[dynacouche * g_serumData.nocolors + + frame[tk]]; } else if (isdynapix[tk] == 0) - pfr[tk] = - g_serumData - .dyna4cols_v2[IDfound][dynacouche * g_serumData.nocolors + - frame[tk]]; + pfr[tk] = frameDynaColors[dynacouche * g_serumData.nocolors + + frame[tk]]; prot[tk * 2] = prot[tk * 2 + 1] = 0xffff; } } @@ -2142,6 +2157,26 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, ((mySerum.frame32 && g_serumData.fheight_extra == 32) || (mySerum.frame64 && g_serumData.fheight_extra == 64)) && isextrarequested) { + const uint16_t backgroundId = g_serumData.backgroundIDs[IDfound][0]; + const bool hasBackground = backgroundId < g_serumData.nbackgrounds; + const uint8_t* frameBackgroundMaskExtra = + g_serumData.backgroundmask_extra[IDfound]; + const uint16_t* frameBackgroundExtra = + hasBackground ? g_serumData.backgroundframes_v2_extra[backgroundId] + : nullptr; + const uint16_t* frameColorsExtra = g_serumData.cframes_v2_extra[IDfound]; + const bool frameHasDynamicExtra = + IDfound < g_serumData.frameHasDynamicExtra.size() && + g_serumData.frameHasDynamicExtra[IDfound] > 0; + const uint8_t* frameDynaExtra = frameHasDynamicExtra + ? g_serumData.dynamasks_extra[IDfound] + : nullptr; + const uint8_t* frameDynaExtraActive = frameHasDynamicExtra + ? g_serumData.dynamasks_extra_active[IDfound] + : nullptr; + const uint16_t* frameDynaColorsExtra = + frameHasDynamicExtra ? g_serumData.dyna4cols_v2_extra[IDfound] + : nullptr; // create the extra res frame if (g_serumData.fheight_extra == 32) { pfr = mySerum.frame32; @@ -2174,16 +2209,13 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, else tl = tj * 2 * g_serumData.fwidth + ti * 2; - if ((g_serumData.backgroundIDs[IDfound][0] < - g_serumData.nbackgrounds) && - (frame[tl] == 0) && - (g_serumData.backgroundmask_extra[IDfound][tk] > 0)) { + if (hasBackground && (frame[tl] == 0) && + (frameBackgroundMaskExtra[tk] > 0)) { if (isdynapix[tk] == 0) { if (applySceneBackground) { pfr[tk] = sceneBackgroundFrame[tk]; } else if (!suppressFrameBackgroundImage) { - pfr[tk] = g_serumData.backgroundframes_v2_extra - [g_serumData.backgroundIDs[IDfound][0]][tk]; + pfr[tk] = frameBackgroundExtra[tk]; if (ColorInRotation(IDfound, pfr[tk], &prot[tk * 2], &prot[tk * 2 + 1], true)) { pfr[tk] = @@ -2196,17 +2228,14 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, } } } else { - uint8_t dynacouche = g_serumData.dynamasks_extra[IDfound][tk]; - if (!IsFrameExtraDynaActive(IDfound, tk)) { + if (!frameHasDynamicExtra || frameDynaExtraActive[tk] == 0) { if (isdynapix[tk] == 0) { if (blackOutStaticContent && - (g_serumData.backgroundIDs[IDfound][0] < - g_serumData.nbackgrounds) && - (frame[tl] > 0) && - (g_serumData.backgroundmask_extra[IDfound][tk] > 0)) { + hasBackground && (frame[tl] > 0) && + (frameBackgroundMaskExtra[tk] > 0)) { pfr[tk] = sceneBackgroundFrame[tk]; } else { - pfr[tk] = g_serumData.cframes_v2_extra[IDfound][tk]; + pfr[tk] = frameColorsExtra[tk]; if (ColorInRotation(IDfound, pfr[tk], &prot[tk * 2], &prot[tk * 2 + 1], true)) { pfr[tk] = @@ -2217,18 +2246,17 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, } } } else { + const uint8_t dynacouche = frameDynaExtra[tk]; if (frame[tl] > 0) { CheckDynaShadow(pfr, IDfound, dynacouche, isdynapix, ti, tj, g_serumData.fwidth_extra, g_serumData.fheight_extra, true); isdynapix[tk] = 1; - pfr[tk] = - g_serumData.dyna4cols_v2_extra - [IDfound][dynacouche * g_serumData.nocolors + frame[tl]]; + pfr[tk] = frameDynaColorsExtra[dynacouche * g_serumData.nocolors + + frame[tl]]; } else if (isdynapix[tk] == 0) - pfr[tk] = - g_serumData.dyna4cols_v2_extra - [IDfound][dynacouche * g_serumData.nocolors + frame[tl]]; + pfr[tk] = frameDynaColorsExtra[dynacouche * g_serumData.nocolors + + frame[tl]]; prot[tk * 2] = prot[tk * 2 + 1] = 0xffff; } } @@ -2239,9 +2267,11 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, void Colorize_Spritev1(uint8_t nosprite, uint16_t frx, uint16_t fry, uint16_t spx, uint16_t spy, uint16_t wid, uint16_t hei) { + if (!g_serumData.spritedescriptionso_opaque.hasData(nosprite)) return; + const uint8_t* spriteOpaque = g_serumData.spritedescriptionso_opaque[nosprite]; for (uint16_t tj = 0; tj < hei; tj++) { for (uint16_t ti = 0; ti < wid; ti++) { - if (IsSpriteOpaqueV1(nosprite, (tj + spy) * MAX_SPRITE_SIZE + ti + spx)) { + if (spriteOpaque[(tj + spy) * MAX_SPRITE_SIZE + ti + spx] > 0) { mySerum.frame[(fry + tj) * g_serumData.fwidth + frx + ti] = g_serumData .spritedescriptionsc[nosprite] @@ -2257,6 +2287,15 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, uint16_t *pfr, *prot; uint16_t* prt; uint32_t* cshft; + if (!g_serumData.spriteoriginal_opaque.hasData(nosprite) || + !g_serumData.dynaspritemasks_active.hasData(nosprite) || + !g_serumData.dynaspritemasks.hasData(nosprite)) { + return; + } + const uint8_t* spriteOpaque = g_serumData.spriteoriginal_opaque[nosprite]; + const uint8_t* spriteDyna = g_serumData.dynaspritemasks[nosprite]; + const uint8_t* spriteDynaActive = + g_serumData.dynaspritemasks_active[nosprite]; if (((mySerum.flags & FLAG_RETURNED_32P_FRAME_OK) && g_serumData.fheight == 32) || ((mySerum.flags & FLAG_RETURNED_64P_FRAME_OK) && @@ -2276,9 +2315,8 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, for (uint16_t ti = 0; ti < wid; ti++) { uint16_t tk = (fry + tj) * g_serumData.fwidth + frx + ti; uint32_t tl = (tj + spy) * MAX_SPRITE_WIDTH + ti + spx; - if (IsSpriteOpaqueV2(nosprite, tl)) { - uint8_t dynacouche = g_serumData.dynaspritemasks[nosprite][tl]; - if (!IsSpriteDynaActive(nosprite, tl)) { + if (spriteOpaque[tl] > 0) { + if (spriteDynaActive[tl] == 0) { pfr[tk] = g_serumData.spritecolored[nosprite][tl]; if (ColorInRotation(IDfound, pfr[tk], &prot[tk * 2], &prot[tk * 2 + 1], false)) @@ -2286,6 +2324,7 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, (prot[tk * 2 + 1] + cshft[prot[tk * 2]]) % prt[prot[tk * 2] * MAX_LENGTH_COLOR_ROTATION]]; } else { + const uint8_t dynacouche = spriteDyna[tl]; pfr[tk] = g_serumData.dynasprite4cols[nosprite] [dynacouche * g_serumData.nocolors + @@ -2304,6 +2343,17 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, g_serumData.fheight_extra == 32) || ((mySerum.flags & FLAG_RETURNED_64P_FRAME_OK) && g_serumData.fheight_extra == 64)) { + if (!g_serumData.spritemask_extra_opaque.hasData(nosprite) || + !g_serumData.dynaspritemasks_extra_active.hasData(nosprite) || + !g_serumData.dynaspritemasks_extra.hasData(nosprite)) { + return; + } + const uint8_t* spriteExtraOpaque = + g_serumData.spritemask_extra_opaque[nosprite]; + const uint8_t* spriteExtraDyna = + g_serumData.dynaspritemasks_extra[nosprite]; + const uint8_t* spriteExtraDynaActive = + g_serumData.dynaspritemasks_extra_active[nosprite]; uint16_t thei, twid, tfrx, tfry, tspy, tspx; if (g_serumData.fheight_extra == 32) { pfr = mySerum.frame32; @@ -2331,14 +2381,9 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, for (uint16_t tj = 0; tj < thei; tj++) { for (uint16_t ti = 0; ti < twid; ti++) { uint16_t tk = (tfry + tj) * g_serumData.fwidth_extra + tfrx + ti; - if (IsSpriteExtraOpaqueV2(nosprite, - (tj + tspy) * MAX_SPRITE_WIDTH + ti + tspx)) { - uint8_t dynacouche = - g_serumData.dynaspritemasks_extra[nosprite] - [(tj + tspy) * MAX_SPRITE_WIDTH + - ti + tspx]; - if (!IsSpriteExtraDynaActive( - nosprite, (tj + tspy) * MAX_SPRITE_WIDTH + ti + tspx)) { + const uint32_t spritePixel = (tj + tspy) * MAX_SPRITE_WIDTH + ti + tspx; + if (spriteExtraOpaque[spritePixel] > 0) { + if (spriteExtraDynaActive[spritePixel] == 0) { pfr[tk] = g_serumData.spritecolored_extra[nosprite] [(tj + tspy) * MAX_SPRITE_WIDTH + @@ -2349,6 +2394,7 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, (prot[tk * 2 + 1] + cshft[prot[tk * 2]]) % prt[prot[tk * 2] * MAX_LENGTH_COLOR_ROTATION]]; } else { + const uint8_t dynacouche = spriteExtraDyna[spritePixel]; uint16_t tl; if (g_serumData.fheight_extra == 64) tl = (tj / 2 + fry) * g_serumData.fwidth + ti / 2 + frx; @@ -2799,6 +2845,11 @@ Serum_ColorizeWithMetadatav2(uint8_t* frame, bool sceneFrameRequested = false) { nosprite, &nspr, frx, fry, spx, spy, wid, hei); if (((frameID < MAX_NUMBER_FRAMES) || isspr) && g_serumData.activeframes[lastfound][0] != 0) { + const bool profileNow = g_profileDynamicHotPaths; + std::chrono::steady_clock::time_point profStart; + if (profileNow) { + profStart = std::chrono::steady_clock::now(); + } if (!sceneIsLastBackgroundFrame) { Colorize_Framev2(frame, lastfound, false, false, suppressPlaceholderBackground); @@ -2809,7 +2860,17 @@ Serum_ColorizeWithMetadatav2(uint8_t* frame, bool sceneFrameRequested = false) { (sceneOptionFlags & FLAG_SCENE_ONLY_DYNAMIC_CONTENT) == FLAG_SCENE_ONLY_DYNAMIC_CONTENT); } + if (profileNow) { + g_profileColorizeFrameV2Ns += + (uint64_t)std::chrono::duration_cast( + std::chrono::steady_clock::now() - profStart) + .count(); + } if (isspr) { + std::chrono::steady_clock::time_point spriteStart; + if (profileNow) { + spriteStart = std::chrono::steady_clock::now(); + } uint8_t ti = 0; while (ti < nspr) { Colorize_Spritev2( @@ -2818,6 +2879,29 @@ Serum_ColorizeWithMetadatav2(uint8_t* frame, bool sceneFrameRequested = false) { isBackgroundSceneRequested ? lastFrameId : lastfound); ti++; } + if (profileNow) { + g_profileColorizeSpriteV2Ns += + (uint64_t)std::chrono::duration_cast( + std::chrono::steady_clock::now() - spriteStart) + .count(); + } + } + if (profileNow) { + ++g_profileColorizeCalls; + if ((g_profileColorizeCalls % 240u) == 0u) { + const double frameMs = + (double)g_profileColorizeFrameV2Ns / + (double)g_profileColorizeCalls / 1000000.0; + const double spriteMs = + (double)g_profileColorizeSpriteV2Ns / + (double)g_profileColorizeCalls / 1000000.0; + Log("Perf dynamic avg: Colorize_Framev2=%.3fms " + "Colorize_Spritev2=%.3fms over %u frames", + frameMs, spriteMs, (uint32_t)g_profileColorizeCalls); + if (g_profileSparseVectors) { + g_serumData.LogSparseVectorProfileSnapshot(); + } + } } bool allowParallelRotations = diff --git a/src/sparse-vector.h b/src/sparse-vector.h index bd2e671..adb3a9b 100644 --- a/src/sparse-vector.h +++ b/src/sparse-vector.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -53,6 +54,13 @@ class SparseVector { mutable std::vector lastDecompressed; mutable std::vector secondDecompressed; mutable std::vector decodeScratch; + mutable bool forceDecodedReads = false; + mutable std::unordered_map> forcedDecoded; + const char* profileLabel = nullptr; + mutable uint64_t profileAccessCount = 0; + mutable uint64_t profileDecodeCount = 0; + mutable uint64_t profileCacheHitCount = 0; + mutable uint64_t profileDirectHitCount = 0; std::vector packedIds; std::vector packedOffsets; std::vector packedSizes; @@ -66,6 +74,22 @@ class SparseVector { static constexpr uint8_t kValuePackedMode2Bit = 2; static constexpr uint8_t kValuePackedMode4Bit = 4; + static bool isProfilingEnabled() { + static bool initialized = false; + static bool enabled = false; + if (!initialized) { + const char* value = std::getenv("SERUM_PROFILE_SPARSE_VECTORS"); + enabled = + value && value[0] != '\0' && + (strcmp(value, "1") == 0 || strcmp(value, "true") == 0 || + strcmp(value, "TRUE") == 0 || strcmp(value, "yes") == 0 || + strcmp(value, "YES") == 0 || strcmp(value, "on") == 0 || + strcmp(value, "ON") == 0); + initialized = true; + } + return enabled; + } + size_t rawByteSize() const { return elementSize * sizeof(T); } size_t legacyBitPackedByteSize() const { return 1 + ((elementSize + 7) / 8); } @@ -443,18 +467,40 @@ class SparseVector { } T *operator[](const uint32_t elementId) { + if (isProfilingEnabled()) { + ++profileAccessCount; + } if (useIndex) { if (elementId >= index.size()) return noData.data(); + if (isProfilingEnabled()) { + ++profileDirectHitCount; + } return index[elementId].data(); } else { + if (forceDecodedReads) { + auto cached = forcedDecoded.find(elementId); + if (cached != forcedDecoded.end()) { + if (isProfilingEnabled()) { + ++profileCacheHitCount; + } + return cached->second.data(); + } + return noData.data(); + } if (useCompression && elementId == lastAccessedId && !lastDecompressed.empty()) { + if (isProfilingEnabled()) { + ++profileCacheHitCount; + } return lastDecompressed.data(); } if (useCompression && elementId == secondAccessedId && !secondDecompressed.empty()) { std::swap(lastAccessedId, secondAccessedId); std::swap(lastDecompressed, secondDecompressed); + if (isProfilingEnabled()) { + ++profileCacheHitCount; + } return lastDecompressed.data(); } @@ -488,8 +534,14 @@ class SparseVector { if (!payload) return noData.data(); if (useCompression) { + if (isProfilingEnabled()) { + ++profileDecodeCount; + } // Cache hit only applies to decoded cache-backed payloads. if (elementId == lastAccessedId) { + if (isProfilingEnabled()) { + ++profileCacheHitCount; + } return lastDecompressed.data(); } @@ -515,6 +567,9 @@ class SparseVector { // Backward compatibility: older payloads may store raw bytes even if // this vector now defaults to compression. if (payloadSize == rawBytes) { + if (isProfilingEnabled()) { + ++profileDirectHitCount; + } return reinterpret_cast(const_cast(payload)); } return noData.data(); @@ -551,6 +606,9 @@ class SparseVector { return noData.data(); } + if (isProfilingEnabled()) { + ++profileDirectHitCount; + } return reinterpret_cast(const_cast(payload)); } } @@ -681,6 +739,8 @@ class SparseVector { lastDecompressed.clear(); secondDecompressed.clear(); decodeScratch.clear(); + forceDecodedReads = false; + forcedDecoded.clear(); } template @@ -743,6 +803,8 @@ class SparseVector { lastDecompressed.clear(); secondDecompressed.clear(); decodeScratch.clear(); + forceDecodedReads = false; + forcedDecoded.clear(); return; } @@ -767,6 +829,46 @@ class SparseVector { lastDecompressed.clear(); secondDecompressed.clear(); decodeScratch.clear(); + forceDecodedReads = false; + forcedDecoded.clear(); + } + + void clearForcedDecodedCache() { + forceDecodedReads = false; + forcedDecoded.clear(); + } + + void setProfileLabel(const char* label) { profileLabel = label; } + + const char* getProfileLabel() const { return profileLabel; } + + void consumeProfileCounters(uint64_t& accesses, uint64_t& decodes, + uint64_t& cacheHits, uint64_t& directHits) { + accesses = profileAccessCount; + decodes = profileDecodeCount; + cacheHits = profileCacheHitCount; + directHits = profileDirectHitCount; + profileAccessCount = 0; + profileDecodeCount = 0; + profileCacheHitCount = 0; + profileDirectHitCount = 0; + } + + void enableForcedDecodedReadsForIds(const std::vector &ids) { + forcedDecoded.clear(); + forcedDecoded.reserve(ids.size()); + forceDecodedReads = false; + for (uint32_t id : ids) { + if (!hasData(id)) { + continue; + } + T *decoded = (*this)[id]; + if (!decoded) { + continue; + } + forcedDecoded.emplace(id, std::vector(decoded, decoded + elementSize)); + } + forceDecodedReads = true; } friend class cereal::access; @@ -815,5 +917,7 @@ class SparseVector { lastDecompressed.clear(); secondDecompressed.clear(); decodeScratch.clear(); + forceDecodedReads = false; + forcedDecoded.clear(); } }; From 226534e5bdf0b3e29acb2bcf98411c0ac5debb1f Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Sun, 15 Mar 2026 23:11:24 +0100 Subject: [PATCH 10/42] scene triplet keys --- AGENTS.md | 15 ++- src/SceneGenerator.cpp | 72 ++++++++--- src/SceneGenerator.h | 3 + src/SerumData.cpp | 14 +- src/SerumData.h | 12 +- src/serum-decode.cpp | 283 +++++++++++++++++++++++++++-------------- src/sparse-vector.h | 23 ++-- 7 files changed, 286 insertions(+), 136 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 2daecbc..e267168 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -23,6 +23,7 @@ Main global runtime state (in `serum-decode.cpp`): - Scene lookup acceleration: - `g_serumData.frameIsScene`: frame ID -> scene/non-scene marker. - `g_serumData.sceneFramesBySignature`: `(mask,shape,hash)` -> matching scene frame IDs. + - `g_serumData.sceneFrameIdByTriplet`: `(sceneId,group,frameIndex)` -> frame ID. ## SparseVector storage and compression `SparseVector` now supports both legacy map payloads and packed sparse blobs. @@ -114,6 +115,11 @@ Behavior: - scene search skips normal frames using `g_serumData.frameIsScene`. - Scene requests use signature lookup in `sceneFramesBySignature` for the current `(mask,shape,hash)`. +- Scene rendering can bypass generic scene identification when a direct triplet + entry exists in `sceneFrameIdByTriplet`. +- During scene playback, direct-triplet mode uses lightweight group progression + (`SceneGenerator::updateAndGetCurrentGroup`) and does not call + `generateFrame(...)` per tick unless fallback is needed. - Legacy same-frame behavior (`IDENTIFY_SAME_FRAME`) is preserved with full-frame CRC check. Return values: @@ -139,7 +145,11 @@ How it works: 4. For each loaded frame ID, if `(mask,shape,hashcodes[id])` signature is in scene signature set: - mark `frameIsScene[id] = 1` - add to `sceneFramesBySignature[signature]`. -5. Initialize `lastfound_scene` / `lastfound_normal` from first available IDs. +5. For v6 (`concentrateFileVersion >= 6`), precompute direct scene frame IDs: + - generate each `(sceneId,group,frameIndex)` scene marker frame + - identify it once + - persist mapping in `sceneFrameIdByTriplet`. +6. Initialize `lastfound_scene` / `lastfound_normal` from first available IDs. Log line: - `Loaded X frames and Y rotation scene frames` @@ -184,6 +194,7 @@ Stored in v6: - Scene lookup acceleration: - `frameIsScene` - `sceneFramesBySignature` + - `sceneFrameIdByTriplet` - Sparse vectors in packed sparse layout. - Normalized sentinel vectors plus sidecar flag vectors for transparency and dynamic-zone activity. @@ -193,6 +204,7 @@ Backward compatibility: - v5 sparse vectors are deserialized with legacy sparse-vector layout and converted to packed representation after load. - For v5 loads, scene lookup vectors are rebuilt at startup. - For v6 loads, stored lookup vectors are reused unless scene data changed in this load cycle (for example CSV update), in which case lookup vectors are rebuilt. +- Direct scene-triplet preprocessing is only executed for v6. v6 snapshot policy: - Compatibility between unreleased v6 development snapshots is not required. @@ -214,6 +226,7 @@ v6 snapshot policy: ## Safety invariants - `frameIsScene.size()` must equal `nframes` before identification. - `sceneFramesBySignature` must correspond to current scene data and current loaded frame definitions. +- `sceneFrameIdByTriplet` (when present) must correspond to current scene data. - Any change to scene generation domain (`mask/shape/hash`), sparse-vector serialization layout, or cROMc schema requires updating this file. ## How to validate after changes diff --git a/src/SceneGenerator.cpp b/src/SceneGenerator.cpp index 5514e21..e92b51b 100644 --- a/src/SceneGenerator.cpp +++ b/src/SceneGenerator.cpp @@ -209,6 +209,58 @@ bool SceneGenerator::getSceneInfo(uint16_t sceneId, uint16_t &frameCount, return true; } +bool SceneGenerator::getCurrentGroup(uint16_t sceneId, uint8_t &group) const { + auto it = std::find_if( + m_sceneData.begin(), m_sceneData.end(), + [sceneId](const SceneData &data) { return data.sceneId == sceneId; }); + if (it == m_sceneData.end()) { + group = 1; + return false; + } + group = it->currentGroup > 0 ? it->currentGroup : 1; + return true; +} + +bool SceneGenerator::updateAndGetCurrentGroup(uint16_t sceneId, + uint16_t frameIndex, + int requestedGroup, + uint8_t &group) { + auto it = std::find_if( + m_sceneData.begin(), m_sceneData.end(), + [sceneId](const SceneData &data) { return data.sceneId == sceneId; }); + if (it == m_sceneData.end()) { + group = 1; + return false; + } + + if (frameIndex >= it->frameCount) { + group = 1; + return false; + } + + if (frameIndex == 0) { + if (requestedGroup == -1) { + if (it->frameGroups > 1) { + if (it->random) { + it->currentGroup = rand() % it->frameGroups + 1; + } else { + it->currentGroup++; + if (it->currentGroup > it->frameGroups) it->currentGroup = 1; + } + } else { + it->currentGroup = 1; + } + } else { + it->currentGroup = (uint8_t)requestedGroup; + } + } else if (it->currentGroup == 0) { + it->currentGroup = 1; + } + + group = it->currentGroup > 0 ? it->currentGroup : 1; + return true; +} + bool SceneGenerator::getSceneEndHoldDurationMs(uint16_t sceneId, uint32_t &durationMs) const { auto holdIt = m_sceneEndHoldDurationMs.find(sceneId); @@ -269,21 +321,9 @@ uint16_t SceneGenerator::generateFrame(uint16_t sceneId, uint16_t frameIndex, } lastTime = now; - if (frameIndex == 0) { - if (group == -1) { - if (it->frameGroups > 1) { - if (it->random) { - it->currentGroup = rand() % it->frameGroups + 1; - } else { - it->currentGroup++; - if (it->currentGroup > it->frameGroups) it->currentGroup = 1; - } - } else { - it->currentGroup = 1; - } - } else { - it->currentGroup = (uint8_t)group; - } + uint8_t currentGroup = 1; + if (!updateAndGetCurrentGroup(sceneId, frameIndex, group, currentGroup)) { + return 0; } // Copy pre-rendered template @@ -293,7 +333,7 @@ uint16_t SceneGenerator::generateFrame(uint16_t sceneId, uint16_t frameIndex, std::string sceneIdStr = formatNumber(sceneId, NUMBER_WIDTH); renderString(buffer, sceneIdStr, NUM_X, SCENE_Y); - std::string groupStr = formatNumber(it->currentGroup, NUMBER_WIDTH); + std::string groupStr = formatNumber(currentGroup, NUMBER_WIDTH); renderString(buffer, groupStr, NUM_X, GROUP_Y); std::string frameStr = formatNumber(frameIndex + 1, NUMBER_WIDTH); diff --git a/src/SceneGenerator.h b/src/SceneGenerator.h index 6db0c82..7d6405d 100644 --- a/src/SceneGenerator.h +++ b/src/SceneGenerator.h @@ -54,6 +54,9 @@ class SceneGenerator { uint16_t &durationPerFrame, bool &interruptable, bool &startImmediately, uint8_t &repeat, uint8_t &sceneOptions) const; + bool updateAndGetCurrentGroup(uint16_t sceneId, uint16_t frameIndex, + int requestedGroup, uint8_t &group); + bool getCurrentGroup(uint16_t sceneId, uint8_t &group) const; bool getSceneEndHoldDurationMs(uint16_t sceneId, uint32_t &durationMs) const; bool getAutoStartSceneInfo(uint16_t &frameCount, uint16_t &durationPerFrame, bool &interruptable, bool &startImmediately, diff --git a/src/SerumData.cpp b/src/SerumData.cpp index a7023a5..4a6704c 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -80,7 +80,8 @@ SerumData::SerumData() dynaspritemasks_extra.setProfileLabel("dynaspritemasks_extra"); dynaspritemasks_extra_active.setProfileLabel("dynaspritemasks_extra_active"); sceneGenerator = new SceneGenerator(); - if (is_real_machine()) packingStorage.assign(384u * 1024u * 1024u, 0xA5); + if (is_real_machine()) + m_packingSidecarsStorage.assign(384u * 1024u * 1024u, 0xA5); } SerumData::~SerumData() {} @@ -146,6 +147,7 @@ void SerumData::Clear() { frameHasDynamicExtra.clear(); frameIsScene.clear(); sceneFramesBySignature.clear(); + sceneFrameIdByTriplet.clear(); } void SerumData::BuildPackingSidecarsAndNormalize() { @@ -372,24 +374,26 @@ void SerumData::PrepareRuntimeDynamicHotCache() { dynaspritemasks_extra.enableForcedDecodedReadsForIds(spriteIds); dynaspritemasks_extra_active.enableForcedDecodedReadsForIds(spriteIds); - Log("Prepared runtime dynamic hot cache: %u frame masks, %u extra frame masks," + Log("Prepared runtime dynamic hot cache: %u frame masks, %u extra frame " + "masks," " %u sprite masks", (uint32_t)frameIds.size(), (uint32_t)extraFrameIds.size(), (uint32_t)spriteIds.size()); } void SerumData::LogSparseVectorProfileSnapshot() { - auto logCounters = [&](auto& vec) { + auto logCounters = [&](auto &vec) { uint64_t accesses = 0; uint64_t decodes = 0; uint64_t cacheHits = 0; uint64_t directHits = 0; vec.consumeProfileCounters(accesses, decodes, cacheHits, directHits); - const char* label = vec.getProfileLabel(); + const char *label = vec.getProfileLabel(); if (!label || accesses == 0) { return; } - Log("SparseProfile %s: accesses=%llu decodes=%llu cacheHits=%llu direct=%llu", + Log("SparseProfile %s: accesses=%llu decodes=%llu cacheHits=%llu " + "direct=%llu", label, (unsigned long long)accesses, (unsigned long long)decodes, (unsigned long long)cacheHits, (unsigned long long)directHits); }; diff --git a/src/SerumData.h b/src/SerumData.h index bfdd996..6730f66 100644 --- a/src/SerumData.h +++ b/src/SerumData.h @@ -76,7 +76,6 @@ class SerumData { uint32_t nsprites; uint16_t nbackgrounds; bool is256x64; - std::vector packingStorage; // Vector data SparseVector hashcodes; @@ -140,6 +139,7 @@ class SerumData { std::vector frameHasDynamicExtra; std::vector frameIsScene; std::unordered_map> sceneFramesBySignature; + std::unordered_map sceneFrameIdByTriplet; SceneGenerator *sceneGenerator; @@ -151,6 +151,7 @@ class SerumData { uint8_t m_loadFlags = 0; bool m_packingSidecarsNormalized = false; + std::vector m_packingSidecarsStorage; friend class cereal::access; @@ -178,19 +179,20 @@ class SerumData { ar(frameIsScene, sceneFramesBySignature, spriteoriginal_opaque, spritemask_extra_opaque, spritedescriptionso_opaque, dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, - dynaspritemasks_extra_active, frameHasDynamic, - frameHasDynamicExtra); + dynaspritemasks_extra_active, frameHasDynamic, frameHasDynamicExtra, + sceneFrameIdByTriplet); } } else { if (concentrateFileVersion >= 6) { ar(frameIsScene, sceneFramesBySignature, spriteoriginal_opaque, spritemask_extra_opaque, spritedescriptionso_opaque, dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, - dynaspritemasks_extra_active, frameHasDynamic, - frameHasDynamicExtra); + dynaspritemasks_extra_active, frameHasDynamic, frameHasDynamicExtra, + sceneFrameIdByTriplet); } else { frameIsScene.clear(); sceneFramesBySignature.clear(); + sceneFrameIdByTriplet.clear(); spriteoriginal_opaque.clear(); spritemask_extra_opaque.clear(); spritedescriptionso_opaque.clear(); diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index a9b7da5..20baf6c 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -146,6 +146,8 @@ uint8_t monochromePaletteV2Length = 0; uint32_t Serum_RenderScene(void); static void BuildFrameLookupVectors(void); static uint64_t MakeFrameSignature(uint8_t mask, uint8_t shape, uint32_t hash); +static uint64_t MakeSceneTripletKey(uint16_t sceneId, uint8_t group, + uint16_t frameIndex); static void InitFrameLookupRuntimeStateFromStoredData(void); static void StopV2ColorRotations(void); static bool CaptureMonochromePaletteFromFrameV2(uint32_t frameId); @@ -154,6 +156,7 @@ static void ConfigureSceneEndHold(uint16_t sceneId, bool interruptable, uint8_t sceneOptions); static void ForceNormalFrameRefreshAfterSceneEnd(void); static bool ValidateLoadedGeometry(bool isV2, const char* sourceTag); +uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested); struct SceneResumeState { uint16_t nextFrame = 0; @@ -170,6 +173,9 @@ uint32_t lastfound_normal = 0; // last frame ID for non-scene frames uint32_t lastfound_scene = 0; // last frame ID for scene frames uint32_t lastframe_full_crc_normal = 0; uint32_t lastframe_full_crc_scene = 0; +bool first_match_normal = true; +bool first_match_scene = true; +uint32_t forced_scene_frame_id = IDENTIFY_NO_FRAME; uint32_t lastframe_found = GetMonotonicTimeMs(); uint32_t lastTriggerID = 0xffffffff; // last trigger ID found uint32_t lasttriggerTimestamp = 0; @@ -370,6 +376,9 @@ void Serum_free(void) { lastfound_scene = 0; lastframe_full_crc_normal = 0; lastframe_full_crc_scene = 0; + first_match_normal = true; + first_match_scene = true; + forced_scene_frame_id = IDENTIFY_NO_FRAME; sceneEndHoldUntilMs = 0; sceneEndHoldDurationMs = 0; monochromeMode = false; @@ -1376,6 +1385,7 @@ static void BuildFrameLookupVectors(void) { uint32_t numSceneFrames = 0; g_serumData.frameIsScene.clear(); g_serumData.sceneFramesBySignature.clear(); + g_serumData.sceneFrameIdByTriplet.clear(); if (g_serumData.nframes == 0) return; g_serumData.frameIsScene.resize(g_serumData.nframes, 0); @@ -1439,6 +1449,56 @@ static void BuildFrameLookupVectors(void) { numSceneFrames++; } } + + if (g_serumData.concentrateFileVersion >= 6) { + // Build direct lookup table: (sceneId, group, frameIndex) -> frameId. + // Keep this as a preprocessing step only; runtime scene rendering can + // use it to bypass generic scene identification. + const uint32_t saved_lastfound = lastfound; + const uint32_t saved_lastfound_scene = lastfound_scene; + const uint32_t saved_lastframe_full_crc_scene = lastframe_full_crc_scene; + const bool saved_first_match_scene = first_match_scene; + const uint32_t saved_forced_scene_frame_id = forced_scene_frame_id; + + first_match_scene = true; + lastfound_scene = 0; + lastframe_full_crc_scene = 0; + forced_scene_frame_id = IDENTIFY_NO_FRAME; + + for (const auto& scene : scenes) { + const int groups = scene.frameGroups > 0 ? scene.frameGroups : 1; + for (int group = 1; group <= groups; ++group) { + for (uint16_t frameIndex = 0; frameIndex < scene.frameCount; + ++frameIndex) { + if (g_serumData.sceneGenerator->generateFrame( + scene.sceneId, frameIndex, generatedSceneFrame, group, + true) != 0xffff) { + continue; + } + const uint32_t identified = + Identify_Frame(generatedSceneFrame, true); + if (identified == IDENTIFY_NO_FRAME) { + continue; + } + const uint32_t frameId = (identified == IDENTIFY_SAME_FRAME) + ? lastfound_scene + : identified; + if (frameId >= g_serumData.nframes) { + continue; + } + g_serumData.sceneFrameIdByTriplet[MakeSceneTripletKey( + scene.sceneId, static_cast(group), frameIndex)] = + frameId; + } + } + } + + lastfound = saved_lastfound; + lastfound_scene = saved_lastfound_scene; + lastframe_full_crc_scene = saved_lastframe_full_crc_scene; + first_match_scene = saved_first_match_scene; + forced_scene_frame_id = saved_forced_scene_frame_id; + } } Log("Loaded %d frames and %d rotation scene frames", @@ -1465,6 +1525,12 @@ static uint64_t MakeFrameSignature(uint8_t mask, uint8_t shape, uint32_t hash) { return (uint64_t(mask) << 40) | (uint64_t(shape) << 32) | hash; } +static uint64_t MakeSceneTripletKey(uint16_t sceneId, uint8_t group, + uint16_t frameIndex) { + return (uint64_t(sceneId) << 24) | (uint64_t(group) << 16) | + uint64_t(frameIndex); +} + static void InitFrameLookupRuntimeStateFromStoredData(void) { if (g_serumData.frameIsScene.size() != g_serumData.nframes) { BuildFrameLookupVectors(); @@ -1496,12 +1562,23 @@ static void InitFrameLookupRuntimeStateFromStoredData(void) { } uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { - // Usually the first frame has the ID 0, but lastfound is also initialized - // with 0. So we need a helper to be able to detect frame 0 as new. - static bool first_match_normal = true; - static bool first_match_scene = true; - if (!cromloaded) return IDENTIFY_NO_FRAME; + uint32_t tj = sceneFrameRequested + ? lastfound_scene + : lastfound_normal; // stream-local search start + const uint32_t pixels = g_serumData.is256x64 + ? (256 * 64) + : (g_serumData.fwidth * g_serumData.fheight); + if (sceneFrameRequested && forced_scene_frame_id < g_serumData.nframes && + g_serumData.frameIsScene.size() == g_serumData.nframes && + g_serumData.frameIsScene[forced_scene_frame_id]) { + lastfound_scene = forced_scene_frame_id; + lastfound = forced_scene_frame_id; + lastframe_full_crc_scene = crc32_fast(frame, pixels); + first_match_scene = false; + return forced_scene_frame_id; + } + memset(framechecked, false, g_serumData.nframes); uint32_t& lastfound_stream = sceneFrameRequested ? lastfound_scene : lastfound_normal; @@ -1510,11 +1587,6 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { uint32_t& lastframe_full_crc = sceneFrameRequested ? lastframe_full_crc_scene : lastframe_full_crc_normal; - uint32_t tj = - lastfound_stream; // we start from the last found frame in this stream - const uint32_t pixels = g_serumData.is256x64 - ? (256 * 64) - : (g_serumData.fwidth * g_serumData.fheight); do { if (g_serumData.frameIsScene[tj] != (sceneFrameRequested ? 1 : 0)) { if (++tj >= g_serumData.nframes) tj = 0; @@ -1907,13 +1979,12 @@ void Colorize_Framev1(uint8_t* frame, uint32_t IDfound) { uint16_t tj, ti; // Generate the colorized version of a frame once identified in the crom // frames - const bool frameHasDynamic = - IDfound < g_serumData.frameHasDynamic.size() && - g_serumData.frameHasDynamic[IDfound] > 0; - const uint8_t* frameDyna = frameHasDynamic ? g_serumData.dynamasks[IDfound] : nullptr; - const uint8_t* frameDynaActive = frameHasDynamic - ? g_serumData.dynamasks_active[IDfound] - : nullptr; + const bool frameHasDynamic = IDfound < g_serumData.frameHasDynamic.size() && + g_serumData.frameHasDynamic[IDfound] > 0; + const uint8_t* frameDyna = + frameHasDynamic ? g_serumData.dynamasks[IDfound] : nullptr; + const uint8_t* frameDynaActive = + frameHasDynamic ? g_serumData.dynamasks_active[IDfound] : nullptr; for (tj = 0; tj < g_serumData.fheight; tj++) { for (ti = 0; ti < g_serumData.fwidth; ti++) { uint16_t tk = tj * g_serumData.fwidth + ti; @@ -2065,13 +2136,11 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, const uint16_t backgroundId = g_serumData.backgroundIDs[IDfound][0]; const bool hasBackground = backgroundId < g_serumData.nbackgrounds; const uint8_t* frameBackgroundMask = g_serumData.backgroundmask[IDfound]; - const uint16_t* frameBackground = hasBackground - ? g_serumData.backgroundframes_v2[backgroundId] - : nullptr; + const uint16_t* frameBackground = + hasBackground ? g_serumData.backgroundframes_v2[backgroundId] : nullptr; const uint16_t* frameColors = g_serumData.cframes_v2[IDfound]; - const bool frameHasDynamic = - IDfound < g_serumData.frameHasDynamic.size() && - g_serumData.frameHasDynamic[IDfound] > 0; + const bool frameHasDynamic = IDfound < g_serumData.frameHasDynamic.size() && + g_serumData.frameHasDynamic[IDfound] > 0; const uint8_t* frameDyna = frameHasDynamic ? g_serumData.dynamasks[IDfound] : nullptr; const uint8_t* frameDynaActive = @@ -2103,7 +2172,8 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, for (tj = 0; tj < g_serumData.fheight; tj++) { for (ti = 0; ti < g_serumData.fwidth; ti++) { uint16_t tk = tj * g_serumData.fwidth + ti; - if (hasBackground && (frame[tk] == 0) && (frameBackgroundMask[tk] > 0)) { + if (hasBackground && (frame[tk] == 0) && + (frameBackgroundMask[tk] > 0)) { if (isdynapix[tk] == 0) { if (applySceneBackground) { pfr[tk] = sceneBackgroundFrame[tk]; @@ -2122,8 +2192,7 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, } else { if (!frameHasDynamic || frameDynaActive[tk] == 0) { if (isdynapix[tk] == 0) { - if (blackOutStaticContent && - hasBackground && (frame[tk] > 0) && + if (blackOutStaticContent && hasBackground && (frame[tk] > 0) && (frameBackgroundMask[tk] > 0)) { pfr[tk] = sceneBackgroundFrame[tk]; } else { @@ -2168,12 +2237,11 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, const bool frameHasDynamicExtra = IDfound < g_serumData.frameHasDynamicExtra.size() && g_serumData.frameHasDynamicExtra[IDfound] > 0; - const uint8_t* frameDynaExtra = frameHasDynamicExtra - ? g_serumData.dynamasks_extra[IDfound] - : nullptr; - const uint8_t* frameDynaExtraActive = frameHasDynamicExtra - ? g_serumData.dynamasks_extra_active[IDfound] - : nullptr; + const uint8_t* frameDynaExtra = + frameHasDynamicExtra ? g_serumData.dynamasks_extra[IDfound] : nullptr; + const uint8_t* frameDynaExtraActive = + frameHasDynamicExtra ? g_serumData.dynamasks_extra_active[IDfound] + : nullptr; const uint16_t* frameDynaColorsExtra = frameHasDynamicExtra ? g_serumData.dyna4cols_v2_extra[IDfound] : nullptr; @@ -2230,8 +2298,7 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, } else { if (!frameHasDynamicExtra || frameDynaExtraActive[tk] == 0) { if (isdynapix[tk] == 0) { - if (blackOutStaticContent && - hasBackground && (frame[tl] > 0) && + if (blackOutStaticContent && hasBackground && (frame[tl] > 0) && (frameBackgroundMaskExtra[tk] > 0)) { pfr[tk] = sceneBackgroundFrame[tk]; } else { @@ -2268,7 +2335,8 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, void Colorize_Spritev1(uint8_t nosprite, uint16_t frx, uint16_t fry, uint16_t spx, uint16_t spy, uint16_t wid, uint16_t hei) { if (!g_serumData.spritedescriptionso_opaque.hasData(nosprite)) return; - const uint8_t* spriteOpaque = g_serumData.spritedescriptionso_opaque[nosprite]; + const uint8_t* spriteOpaque = + g_serumData.spritedescriptionso_opaque[nosprite]; for (uint16_t tj = 0; tj < hei; tj++) { for (uint16_t ti = 0; ti < wid; ti++) { if (spriteOpaque[(tj + spy) * MAX_SPRITE_SIZE + ti + spx] > 0) { @@ -2889,12 +2957,10 @@ Serum_ColorizeWithMetadatav2(uint8_t* frame, bool sceneFrameRequested = false) { if (profileNow) { ++g_profileColorizeCalls; if ((g_profileColorizeCalls % 240u) == 0u) { - const double frameMs = - (double)g_profileColorizeFrameV2Ns / - (double)g_profileColorizeCalls / 1000000.0; - const double spriteMs = - (double)g_profileColorizeSpriteV2Ns / - (double)g_profileColorizeCalls / 1000000.0; + const double frameMs = (double)g_profileColorizeFrameV2Ns / + (double)g_profileColorizeCalls / 1000000.0; + const double spriteMs = (double)g_profileColorizeSpriteV2Ns / + (double)g_profileColorizeCalls / 1000000.0; Log("Perf dynamic avg: Colorize_Framev2=%.3fms " "Colorize_Spritev2=%.3fms over %u frames", frameMs, spriteMs, (uint32_t)g_profileColorizeCalls); @@ -3156,77 +3222,100 @@ uint32_t Serum_RenderScene(void) { return FLAG_RETURNED_V2_SCENE; } - uint16_t result = g_serumData.sceneGenerator->generateFrame( - lastTriggerID, sceneCurrentFrame, sceneFrame); - // if result is 0xffff, the frame was generated and we can go - if (0xffff == result) { + bool renderedFromDirectTriplet = false; + uint8_t currentGroup = 1; + bool hasGroup = g_serumData.sceneGenerator->updateAndGetCurrentGroup( + static_cast(lastTriggerID), sceneCurrentFrame, -1, + currentGroup); + if (hasGroup && !g_serumData.sceneFrameIdByTriplet.empty()) { + auto it = g_serumData.sceneFrameIdByTriplet.find( + MakeSceneTripletKey(static_cast(lastTriggerID), + currentGroup, sceneCurrentFrame)); + if (it != g_serumData.sceneFrameIdByTriplet.end() && + it->second < g_serumData.nframes) { + memset(sceneFrame, 0, sizeof(sceneFrame)); + mySerum.rotationtimer = sceneDurationPerFrame; + forced_scene_frame_id = it->second; + Serum_ColorizeWithMetadatav2(sceneFrame, true); + forced_scene_frame_id = IDENTIFY_NO_FRAME; + renderedFromDirectTriplet = true; + } + } + + if (!renderedFromDirectTriplet) { + uint16_t result = g_serumData.sceneGenerator->generateFrame( + lastTriggerID, sceneCurrentFrame, sceneFrame, + hasGroup ? currentGroup : -1); + if (result > 0 && result < 0xffff) { + // frame not ready yet, return the time to wait + mySerum.rotationtimer = result; + return mySerum.rotationtimer | FLAG_RETURNED_V2_SCENE; + } + if (result != 0xffff) { + sceneFrameCount = 0; // error generating scene frame, stop the scene + mySerum.rotationtimer = 0; + ForceNormalFrameRefreshAfterSceneEnd(); + return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_ROTATED32 | + FLAG_RETURNED_V2_ROTATED64 | FLAG_RETURNED_V2_SCENE; + } mySerum.rotationtimer = sceneDurationPerFrame; Serum_ColorizeWithMetadatav2(sceneFrame, true); - sceneCurrentFrame++; - if (sceneCurrentFrame >= sceneFrameCount && sceneRepeatCount > 0) { - if (sceneRepeatCount == 1) { - sceneCurrentFrame = 0; // loop - } else { - sceneCurrentFrame = 0; // repeat the scene - if (--sceneRepeatCount <= 1) { - sceneRepeatCount = 0; // no more repeat - } + } + + sceneCurrentFrame++; + if (sceneCurrentFrame >= sceneFrameCount && sceneRepeatCount > 0) { + if (sceneRepeatCount == 1) { + sceneCurrentFrame = 0; // loop + } else { + sceneCurrentFrame = 0; // repeat the scene + if (--sceneRepeatCount <= 1) { + sceneRepeatCount = 0; // no more repeat } } + } - if (sceneCurrentFrame >= sceneFrameCount) { - if (sceneEndHoldDurationMs > 0) { - sceneEndHoldUntilMs = now + sceneEndHoldDurationMs; - mySerum.rotationtimer = sceneEndHoldDurationMs; - return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_SCENE; - } + if (sceneCurrentFrame >= sceneFrameCount) { + if (sceneEndHoldDurationMs > 0) { + sceneEndHoldUntilMs = now + sceneEndHoldDurationMs; + mySerum.rotationtimer = sceneEndHoldDurationMs; + return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_SCENE; + } - sceneFrameCount = 0; // scene ended - mySerum.rotationtimer = 0; - ForceNormalFrameRefreshAfterSceneEnd(); + sceneFrameCount = 0; // scene ended + mySerum.rotationtimer = 0; + ForceNormalFrameRefreshAfterSceneEnd(); + + switch (sceneOptionFlags) { + case FLAG_SCENE_BLACK_WHEN_FINISHED: + if (mySerum.frame32) memset(mySerum.frame32, 0, 32 * mySerum.width32); + if (mySerum.frame64) memset(mySerum.frame64, 0, 64 * mySerum.width64); + break; - switch (sceneOptionFlags) { - case FLAG_SCENE_BLACK_WHEN_FINISHED: + case FLAG_SCENE_SHOW_PREVIOUS_FRAME_WHEN_FINISHED: + if (lastfound < MAX_NUMBER_FRAMES && + g_serumData.activeframes[lastfound][0] != 0) { + Serum_ColorizeWithMetadatav2(lastFrame); + } else { if (mySerum.frame32) memset(mySerum.frame32, 0, 32 * mySerum.width32); if (mySerum.frame64) memset(mySerum.frame64, 0, 64 * mySerum.width64); - break; - - case FLAG_SCENE_SHOW_PREVIOUS_FRAME_WHEN_FINISHED: - if (lastfound < MAX_NUMBER_FRAMES && - g_serumData.activeframes[lastfound][0] != 0) { - Serum_ColorizeWithMetadatav2(lastFrame); - } else { - if (mySerum.frame32) - memset(mySerum.frame32, 0, 32 * mySerum.width32); - if (mySerum.frame64) - memset(mySerum.frame64, 0, 64 * mySerum.width64); - } - break; + } + break; - case 0: // keep the last frame of the scene - default: - if (sceneEndHoldDurationMs > 0 && !sceneInterruptable) { - // autoStart+flag0 for non-interruptable scene means timed - // end-hold. - break; - } - if (sceneOptionFlags & FLAG_SCENE_AS_BACKGROUND) { - sceneIsLastBackgroundFrame = true; - } + case 0: // keep the last frame of the scene + default: + if (sceneEndHoldDurationMs > 0 && !sceneInterruptable) { + // autoStart+flag0 for non-interruptable scene means timed end-hold. break; - } + } + if (sceneOptionFlags & FLAG_SCENE_AS_BACKGROUND) { + sceneIsLastBackgroundFrame = true; + } + break; } - } else if (result > 0) { - // frame not ready yet, return the time to wait - mySerum.rotationtimer = result; - return mySerum.rotationtimer | FLAG_RETURNED_V2_SCENE; - } else { - sceneFrameCount = 0; // error generating scene frame, stop the scene - mySerum.rotationtimer = 0; - ForceNormalFrameRefreshAfterSceneEnd(); } + return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_ROTATED32 | FLAG_RETURNED_V2_ROTATED64 | FLAG_RETURNED_V2_SCENE; // scene frame, so we consider both frames diff --git a/src/sparse-vector.h b/src/sparse-vector.h index adb3a9b..8845979 100644 --- a/src/sparse-vector.h +++ b/src/sparse-vector.h @@ -56,7 +56,7 @@ class SparseVector { mutable std::vector decodeScratch; mutable bool forceDecodedReads = false; mutable std::unordered_map> forcedDecoded; - const char* profileLabel = nullptr; + const char *profileLabel = nullptr; mutable uint64_t profileAccessCount = 0; mutable uint64_t profileDecodeCount = 0; mutable uint64_t profileCacheHitCount = 0; @@ -78,13 +78,12 @@ class SparseVector { static bool initialized = false; static bool enabled = false; if (!initialized) { - const char* value = std::getenv("SERUM_PROFILE_SPARSE_VECTORS"); - enabled = - value && value[0] != '\0' && - (strcmp(value, "1") == 0 || strcmp(value, "true") == 0 || - strcmp(value, "TRUE") == 0 || strcmp(value, "yes") == 0 || - strcmp(value, "YES") == 0 || strcmp(value, "on") == 0 || - strcmp(value, "ON") == 0); + const char *value = std::getenv("SERUM_PROFILE_SPARSE_VECTORS"); + enabled = value && value[0] != '\0' && + (strcmp(value, "1") == 0 || strcmp(value, "true") == 0 || + strcmp(value, "TRUE") == 0 || strcmp(value, "yes") == 0 || + strcmp(value, "YES") == 0 || strcmp(value, "on") == 0 || + strcmp(value, "ON") == 0); initialized = true; } return enabled; @@ -838,12 +837,12 @@ class SparseVector { forcedDecoded.clear(); } - void setProfileLabel(const char* label) { profileLabel = label; } + void setProfileLabel(const char *label) { profileLabel = label; } - const char* getProfileLabel() const { return profileLabel; } + const char *getProfileLabel() const { return profileLabel; } - void consumeProfileCounters(uint64_t& accesses, uint64_t& decodes, - uint64_t& cacheHits, uint64_t& directHits) { + void consumeProfileCounters(uint64_t &accesses, uint64_t &decodes, + uint64_t &cacheHits, uint64_t &directHits) { accesses = profileAccessCount; decodes = profileDecodeCount; cacheHits = profileCacheHitCount; From e2d38c7385c24ca1990a285c42346dd9a0ab9661 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Sun, 15 Mar 2026 23:20:02 +0100 Subject: [PATCH 11/42] removed obsolete code --- src/serum-decode.cpp | 55 ++++++++++++++++++++++++++------------------ src/sparse-vector.h | 2 ++ 2 files changed, 35 insertions(+), 22 deletions(-) diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 20baf6c..8186b06 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -175,7 +175,6 @@ uint32_t lastframe_full_crc_normal = 0; uint32_t lastframe_full_crc_scene = 0; bool first_match_normal = true; bool first_match_scene = true; -uint32_t forced_scene_frame_id = IDENTIFY_NO_FRAME; uint32_t lastframe_found = GetMonotonicTimeMs(); uint32_t lastTriggerID = 0xffffffff; // last trigger ID found uint32_t lasttriggerTimestamp = 0; @@ -378,7 +377,6 @@ void Serum_free(void) { lastframe_full_crc_scene = 0; first_match_normal = true; first_match_scene = true; - forced_scene_frame_id = IDENTIFY_NO_FRAME; sceneEndHoldUntilMs = 0; sceneEndHoldDurationMs = 0; monochromeMode = false; @@ -1458,12 +1456,10 @@ static void BuildFrameLookupVectors(void) { const uint32_t saved_lastfound_scene = lastfound_scene; const uint32_t saved_lastframe_full_crc_scene = lastframe_full_crc_scene; const bool saved_first_match_scene = first_match_scene; - const uint32_t saved_forced_scene_frame_id = forced_scene_frame_id; first_match_scene = true; lastfound_scene = 0; lastframe_full_crc_scene = 0; - forced_scene_frame_id = IDENTIFY_NO_FRAME; for (const auto& scene : scenes) { const int groups = scene.frameGroups > 0 ? scene.frameGroups : 1; @@ -1497,7 +1493,6 @@ static void BuildFrameLookupVectors(void) { lastfound_scene = saved_lastfound_scene; lastframe_full_crc_scene = saved_lastframe_full_crc_scene; first_match_scene = saved_first_match_scene; - forced_scene_frame_id = saved_forced_scene_frame_id; } } @@ -1569,16 +1564,6 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { const uint32_t pixels = g_serumData.is256x64 ? (256 * 64) : (g_serumData.fwidth * g_serumData.fheight); - if (sceneFrameRequested && forced_scene_frame_id < g_serumData.nframes && - g_serumData.frameIsScene.size() == g_serumData.nframes && - g_serumData.frameIsScene[forced_scene_frame_id]) { - lastfound_scene = forced_scene_frame_id; - lastfound = forced_scene_frame_id; - lastframe_full_crc_scene = crc32_fast(frame, pixels); - first_match_scene = false; - return forced_scene_frame_id; - } - memset(framechecked, false, g_serumData.nframes); uint32_t& lastfound_stream = sceneFrameRequested ? lastfound_scene : lastfound_normal; @@ -1829,6 +1814,8 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, uint8_t* pquelsprites, uint8_t* nspr, uint16_t* pfrx, uint16_t* pfry, uint16_t* pspx, uint16_t* pspy, uint16_t* pwid, uint16_t* phei) { + // TODO(perf #4): add a cheap prefilter/early-out before full detection-area + // matching to reduce sprite scan cost on frames without sprites. uint8_t ti = 0; uint32_t mdword; *nspr = 0; @@ -2028,6 +2015,8 @@ bool CheckExtraFrameAvailable(uint32_t frID) { bool ColorInRotation(uint32_t IDfound, uint16_t col, uint16_t* norot, uint16_t* posinrot, bool isextra) { + // TODO(perf #1): replace this per-pixel linear scan with a precomputed + // O(1) color->rotation lookup table persisted in v6 cROMc. uint16_t* pcol = NULL; if (isextra) pcol = g_serumData.colorrotations_v2_extra[IDfound]; @@ -2052,6 +2041,8 @@ bool ColorInRotation(uint32_t IDfound, uint16_t col, uint16_t* norot, void CheckDynaShadow(uint16_t* pfr, uint32_t nofr, uint8_t dynacouche, uint8_t* isdynapix, uint16_t fx, uint16_t fy, uint32_t fw, uint32_t fh, bool isextra) { + // TODO(perf #3): precompute active neighbor offsets per dynacouche to reduce + // branch-heavy checks in this hot path. uint8_t dsdir; if (isextra) dsdir = g_serumData.dynashadowsdir_extra[nofr][dynacouche]; @@ -2737,8 +2728,8 @@ static void ForceNormalFrameRefreshAfterSceneEnd(void) { lastframe_full_crc_normal = 0xffffffff; } -SERUM_API uint32_t -Serum_ColorizeWithMetadatav2(uint8_t* frame, bool sceneFrameRequested = false) { +static uint32_t Serum_ColorizeWithMetadatav2Internal( + uint8_t* frame, bool sceneFrameRequested, uint32_t knownFrameId) { // return IDENTIFY_NO_FRAME if no new frame detected // return 0 if new frame with no rotation detected // return > 0 if new frame with rotations detected, the value is the delay @@ -2746,8 +2737,23 @@ Serum_ColorizeWithMetadatav2(uint8_t* frame, bool sceneFrameRequested = false) { mySerum.triggerID = 0xffffffff; mySerum.frameID = IDENTIFY_NO_FRAME; - // Let's first identify the incoming frame among the ones we have in the crom - uint32_t frameID = Identify_Frame(frame, sceneFrameRequested); + // Identify frame unless caller already resolved a concrete frame ID. + uint32_t frameID = IDENTIFY_NO_FRAME; + if (knownFrameId < g_serumData.nframes) { + frameID = knownFrameId; + lastfound = knownFrameId; + if (sceneFrameRequested) { + lastfound_scene = knownFrameId; + first_match_scene = false; + lastframe_full_crc_scene = 0; + } else { + lastfound_normal = knownFrameId; + first_match_normal = false; + lastframe_full_crc_normal = 0; + } + } else { + frameID = Identify_Frame(frame, sceneFrameRequested); + } uint32_t now = GetMonotonicTimeMs(); bool rotationIsScene = false; if (is_real_machine() && !showStatusMessages) { @@ -3127,6 +3133,13 @@ Serum_ColorizeWithMetadatav2(uint8_t* frame, bool sceneFrameRequested = false) { return IDENTIFY_NO_FRAME; // no new frame, client has to update rotations! } +SERUM_API uint32_t +Serum_ColorizeWithMetadatav2(uint8_t* frame, + bool sceneFrameRequested = false) { + return Serum_ColorizeWithMetadatav2Internal(frame, sceneFrameRequested, + IDENTIFY_NO_FRAME); +} + SERUM_API uint32_t Serum_Colorize(uint8_t* frame) { // return IDENTIFY_NO_FRAME if no new frame detected // return 0 if new frame with no rotation detected @@ -3235,9 +3248,7 @@ uint32_t Serum_RenderScene(void) { it->second < g_serumData.nframes) { memset(sceneFrame, 0, sizeof(sceneFrame)); mySerum.rotationtimer = sceneDurationPerFrame; - forced_scene_frame_id = it->second; - Serum_ColorizeWithMetadatav2(sceneFrame, true); - forced_scene_frame_id = IDENTIFY_NO_FRAME; + Serum_ColorizeWithMetadatav2Internal(sceneFrame, true, it->second); renderedFromDirectTriplet = true; } } diff --git a/src/sparse-vector.h b/src/sparse-vector.h index 8845979..992eb0c 100644 --- a/src/sparse-vector.h +++ b/src/sparse-vector.h @@ -53,6 +53,8 @@ class SparseVector { mutable uint32_t secondAccessedId = UINT32_MAX; mutable std::vector lastDecompressed; mutable std::vector secondDecompressed; + // TODO(perf #5): evaluate a tiny bounded LRU decoded cache (4-8 entries) + // for alternating hot IDs to reduce decode churn with minimal RAM overhead. mutable std::vector decodeScratch; mutable bool forceDecodedReads = false; mutable std::unordered_map> forcedDecoded; From 39ef6b67f279b22330590c92d40b72f3aa5b5496 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Sun, 15 Mar 2026 23:35:51 +0100 Subject: [PATCH 12/42] color rataion lookups and scene fix --- AGENTS.md | 12 ++++++++++ src/SerumData.cpp | 48 +++++++++++++++++++++++++++++++++++++++ src/SerumData.h | 53 +++++++++++++++++++++++++++++++++++++++----- src/serum-decode.cpp | 27 +++++++--------------- 4 files changed, 116 insertions(+), 24 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index e267168..574c42b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -62,6 +62,10 @@ Vector policy currently used in `SerumData`: - `frameHasDynamicExtra` - `Colorize_Framev1/v2` uses these flags to bypass dynamic-mask branches entirely for frames without active dynamic pixels. +- Color rotations use a precomputed lookup index: + `colorRotationLookupByFrameAndColor[(frameId,isExtra,color)] -> (rotation,position)` + restored from v6 cROMc when present and rebuilt at load time otherwise. + - `ColorInRotation` uses lookup-only runtime path (no linear scan fallback). - Runtime uses sidecar flags instead of `255` sentinels for transparency / dynamic-zone activity. - Runtime does not include sentinel-based fallback in sprite/dynamic helpers; missing/incorrect sidecars are treated as a conversion/load bug and are not @@ -195,6 +199,10 @@ Stored in v6: - `frameIsScene` - `sceneFramesBySignature` - `sceneFrameIdByTriplet` +- Color-rotation lookup acceleration: + - `colorRotationLookupByFrameAndColor` +- Scene data block uses guarded encoding (`SCD1` magic + bounded count) to + prevent unbounded allocations on corrupted/misaligned input. - Sparse vectors in packed sparse layout. - Normalized sentinel vectors plus sidecar flag vectors for transparency and dynamic-zone activity. @@ -205,6 +213,8 @@ Backward compatibility: - For v5 loads, scene lookup vectors are rebuilt at startup. - For v6 loads, stored lookup vectors are reused unless scene data changed in this load cycle (for example CSV update), in which case lookup vectors are rebuilt. - Direct scene-triplet preprocessing is only executed for v6. +- v6 scene-data deserialization validates block magic and count before + allocation. v6 snapshot policy: - Compatibility between unreleased v6 development snapshots is not required. @@ -243,3 +253,5 @@ Minimum validation: - background scene - end-of-scene behavior flags - resume flag `16` +10. Build color-rotation lookup index via `BuildColorRotationLookup()` for + O(1) `ColorInRotation` checks only when missing from loaded v6 data. diff --git a/src/SerumData.cpp b/src/SerumData.cpp index 4a6704c..55ffb0c 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -148,6 +148,7 @@ void SerumData::Clear() { frameIsScene.clear(); sceneFramesBySignature.clear(); sceneFrameIdByTriplet.clear(); + colorRotationLookupByFrameAndColor.clear(); } void SerumData::BuildPackingSidecarsAndNormalize() { @@ -412,6 +413,53 @@ void SerumData::LogSparseVectorProfileSnapshot() { logCounters(dynaspritemasks_extra_active); } +void SerumData::BuildColorRotationLookup() { + colorRotationLookupByFrameAndColor.clear(); + if (SerumVersion != SERUM_V2 || nframes == 0) { + return; + } + + colorRotationLookupByFrameAndColor.reserve(nframes * 8); + auto buildPlane = [&](bool isextra) { + for (uint32_t frameId = 0; frameId < nframes; ++frameId) { + uint16_t *pcol = isextra ? colorrotations_v2_extra[frameId] + : colorrotations_v2[frameId]; + for (uint32_t rot = 0; rot < MAX_COLOR_ROTATION_V2; ++rot) { + const uint32_t base = rot * MAX_LENGTH_COLOR_ROTATION; + const uint16_t length = pcol[base]; + for (uint16_t pos = 0; pos < length; ++pos) { + const uint16_t color = pcol[base + 2 + pos]; + const uint64_t key = (uint64_t(frameId) << 17) | + (uint64_t(isextra ? 1 : 0) << 16) | color; + // Keep first assignment in case of duplicates. + if (colorRotationLookupByFrameAndColor.find(key) == + colorRotationLookupByFrameAndColor.end()) { + colorRotationLookupByFrameAndColor[key] = + static_cast((rot << 8) | (pos & 0xff)); + } + } + } + } + }; + + buildPlane(false); + buildPlane(true); +} + +bool SerumData::TryGetColorRotation(uint32_t frameId, uint16_t color, + bool isextra, uint16_t &rotationIndex, + uint16_t &positionInRotation) const { + const uint64_t key = + (uint64_t(frameId) << 17) | (uint64_t(isextra ? 1 : 0) << 16) | color; + auto it = colorRotationLookupByFrameAndColor.find(key); + if (it == colorRotationLookupByFrameAndColor.end()) { + return false; + } + rotationIndex = static_cast((it->second >> 8) & 0xff); + positionInRotation = static_cast(it->second & 0xff); + return true; +} + bool SerumData::SaveToFile(const char *filename) { try { BuildPackingSidecarsAndNormalize(); diff --git a/src/SerumData.h b/src/SerumData.h index 6730f66..56da6a8 100644 --- a/src/SerumData.h +++ b/src/SerumData.h @@ -62,6 +62,10 @@ class SerumData { bool LoadFromBuffer(const uint8_t *data, size_t size, const uint8_t flags); void BuildPackingSidecarsAndNormalize(); void PrepareRuntimeDynamicHotCache(); + void BuildColorRotationLookup(); + bool TryGetColorRotation(uint32_t frameId, uint16_t color, bool isextra, + uint16_t &rotationIndex, + uint16_t &positionInRotation) const; void LogSparseVectorProfileSnapshot(); // Header data @@ -140,6 +144,7 @@ class SerumData { std::vector frameIsScene; std::unordered_map> sceneFramesBySignature; std::unordered_map sceneFrameIdByTriplet; + std::unordered_map colorRotationLookupByFrameAndColor; SceneGenerator *sceneGenerator; @@ -180,7 +185,7 @@ class SerumData { spritemask_extra_opaque, spritedescriptionso_opaque, dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, dynaspritemasks_extra_active, frameHasDynamic, frameHasDynamicExtra, - sceneFrameIdByTriplet); + sceneFrameIdByTriplet, colorRotationLookupByFrameAndColor); } } else { if (concentrateFileVersion >= 6) { @@ -188,11 +193,12 @@ class SerumData { spritemask_extra_opaque, spritedescriptionso_opaque, dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, dynaspritemasks_extra_active, frameHasDynamic, frameHasDynamicExtra, - sceneFrameIdByTriplet); + sceneFrameIdByTriplet, colorRotationLookupByFrameAndColor); } else { frameIsScene.clear(); sceneFramesBySignature.clear(); sceneFrameIdByTriplet.clear(); + colorRotationLookupByFrameAndColor.clear(); spriteoriginal_opaque.clear(); spritemask_extra_opaque.clear(); spritedescriptionso_opaque.clear(); @@ -206,8 +212,25 @@ class SerumData { } if constexpr (Archive::is_saving::value) { - ar(sceneGenerator ? sceneGenerator->getSceneData() - : std::vector{}); + if (concentrateFileVersion >= 6) { + constexpr uint32_t kSceneDataMagic = 0x53434431; // "SCD1" + constexpr uint32_t kMaxSceneDataEntries = 100000; + uint32_t magic = kSceneDataMagic; + const std::vector scenes = + sceneGenerator ? sceneGenerator->getSceneData() + : std::vector{}; + uint32_t count = static_cast(scenes.size()); + if (count > kMaxSceneDataEntries) { + count = kMaxSceneDataEntries; + } + ar(magic, count); + for (uint32_t i = 0; i < count; ++i) { + ar(scenes[i]); + } + } else { + ar(sceneGenerator ? sceneGenerator->getSceneData() + : std::vector{}); + } } else { if (SERUM_V2 == SerumVersion && ((fheight == 32 && !(m_loadFlags & FLAG_REQUEST_64P_FRAMES)) || @@ -237,7 +260,27 @@ class SerumData { backgroundBB.setParent(&backgroundIDs); std::vector loadedScenes; - ar(loadedScenes); + if (concentrateFileVersion >= 6) { + constexpr uint32_t kSceneDataMagic = 0x53434431; // "SCD1" + constexpr uint32_t kMaxSceneDataEntries = 100000; + uint32_t magic = 0; + uint32_t count = 0; + ar(magic, count); + if (magic != kSceneDataMagic) { + throw std::runtime_error("Invalid scene data block in cROMc"); + } + if (count > kMaxSceneDataEntries) { + throw std::runtime_error("Scene data count exceeds hard limit"); + } + loadedScenes.reserve(count); + for (uint32_t i = 0; i < count; ++i) { + SceneData scene; + ar(scene); + loadedScenes.push_back(scene); + } + } else { + ar(loadedScenes); + } if (sceneGenerator) { sceneGenerator->setSceneData(std::move(loadedScenes)); sceneGenerator->setDepth(nocolors == 16 ? 4 : 2); diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 8186b06..8310316 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -1364,6 +1364,9 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, } else { InitFrameLookupRuntimeStateFromStoredData(); } + if (g_serumData.colorRotationLookupByFrameAndColor.empty()) { + g_serumData.BuildColorRotationLookup(); + } if (g_disableDynamicPackedReads) { g_serumData.PrepareRuntimeDynamicHotCache(); Log("Dynamic packed reads disabled for runtime via " @@ -2015,26 +2018,12 @@ bool CheckExtraFrameAvailable(uint32_t frID) { bool ColorInRotation(uint32_t IDfound, uint16_t col, uint16_t* norot, uint16_t* posinrot, bool isextra) { - // TODO(perf #1): replace this per-pixel linear scan with a precomputed - // O(1) color->rotation lookup table persisted in v6 cROMc. - uint16_t* pcol = NULL; - if (isextra) - pcol = g_serumData.colorrotations_v2_extra[IDfound]; - else - pcol = g_serumData.colorrotations_v2[IDfound]; - *norot = 0xffff; - for (uint32_t ti = 0; ti < MAX_COLOR_ROTATION_V2; ti++) { - for (uint32_t tj = 2; tj < 2u + pcol[ti * MAX_LENGTH_COLOR_ROTATION]; - tj++) // val [0] is for length and val [1] is for duration in ms - { - if (col == pcol[ti * MAX_LENGTH_COLOR_ROTATION + tj]) { - *norot = ti; - *posinrot = - tj - 2; // val [0] is for length and val [1] is for duration in ms - return true; - } - } + // Fast path: precomputed O(1) lookup built at load time. + if (g_serumData.TryGetColorRotation(IDfound, col, isextra, *norot, + *posinrot)) { + return true; } + *norot = 0xffff; return false; } From 305c5897d0c1d5ab8ea60aae8cdf0b3f71030b4a Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Mon, 16 Mar 2026 00:17:03 +0100 Subject: [PATCH 13/42] more efficient shadows, LRU cache for sparse vector --- AGENTS.md | 10 ++++ src/serum-decode.cpp | 99 +++++++++++++------------------------ src/sparse-vector.h | 113 +++++++++++++++++++++++++++++++++---------- 3 files changed, 131 insertions(+), 91 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 574c42b..cedbfde 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -45,6 +45,9 @@ Behavior: - Value packing preserves exact values for packed modes (no nonzero->1 normalization). - Packed vectors can still be modified at runtime (`set()`); mutable map storage is restored lazily when needed. - Runtime lookup uses dense index fast-path when IDs are dense. +- Compressed sparse vectors keep a tiny bounded decoded cache (6 entries, LRU + replacement) in addition to the last/second hot-entry cache, reducing decode + churn for alternating IDs without large RAM growth. Vector policy currently used in `SerumData`: - `dyna4cols_v2` and `dyna4cols_v2_extra` are LZ4-compressed sparse vectors. @@ -170,6 +173,13 @@ Main phases: 6. Optional sprite overlays. 7. Configure color rotations and return next timer. +Dynamic-shadow hot path: +- `CheckDynaShadow(...)` receives pre-fetched per-frame shadow vectors + (`dynashadowsdir*`, `dynashadowscol*`) from `Colorize_Framev2` instead of + loading sparse vectors per pixel. +- Neighbor probing is done by iterating a compact offset table + (8-connected neighbors) rather than repeated hand-written branch blocks. + Background placeholder policy: - `Colorize_Framev2` supports `suppressFrameBackgroundImage`. - When true, frame-level background images are treated as placeholders and existing output pixel is kept in masked background areas. diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 8310316..042efdb 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -2027,69 +2027,26 @@ bool ColorInRotation(uint32_t IDfound, uint16_t col, uint16_t* norot, return false; } -void CheckDynaShadow(uint16_t* pfr, uint32_t nofr, uint8_t dynacouche, +void CheckDynaShadow(uint16_t* pfr, const uint8_t* shadowDirByLayer, + const uint16_t* shadowColorByLayer, uint8_t dynacouche, uint8_t* isdynapix, uint16_t fx, uint16_t fy, uint32_t fw, - uint32_t fh, bool isextra) { - // TODO(perf #3): precompute active neighbor offsets per dynacouche to reduce - // branch-heavy checks in this hot path. - uint8_t dsdir; - if (isextra) - dsdir = g_serumData.dynashadowsdir_extra[nofr][dynacouche]; - else - dsdir = g_serumData.dynashadowsdir[nofr][dynacouche]; + uint32_t fh) { + if (!shadowDirByLayer || !shadowColorByLayer) return; + const uint8_t dsdir = shadowDirByLayer[dynacouche]; if (dsdir == 0) return; - uint16_t tcol; - if (isextra) - tcol = g_serumData.dynashadowscol_extra[nofr][dynacouche]; - else - tcol = g_serumData.dynashadowscol[nofr][dynacouche]; - if ((dsdir & 0b1) > 0 && fx > 0 && fy > 0 && - isdynapix[(fy - 1) * fw + fx - 1] == 0) // dyna shadow top left - { - isdynapix[(fy - 1) * fw + fx - 1] = 1; - pfr[(fy - 1) * fw + fx - 1] = tcol; - } - if ((dsdir & 0b10) > 0 && fy > 0 && - isdynapix[(fy - 1) * fw + fx] == 0) // dyna shadow top - { - isdynapix[(fy - 1) * fw + fx] = 1; - pfr[(fy - 1) * fw + fx] = tcol; - } - if ((dsdir & 0b100) > 0 && fx < fw - 1 && fy > 0 && - isdynapix[(fy - 1) * fw + fx + 1] == 0) // dyna shadow top right - { - isdynapix[(fy - 1) * fw + fx + 1] = 1; - pfr[(fy - 1) * fw + fx + 1] = tcol; - } - if ((dsdir & 0b1000) > 0 && fx < fw - 1 && - isdynapix[fy * fw + fx + 1] == 0) // dyna shadow right - { - isdynapix[fy * fw + fx + 1] = 1; - pfr[fy * fw + fx + 1] = tcol; - } - if ((dsdir & 0b10000) > 0 && fx < fw - 1 && fy < fh - 1 && - isdynapix[(fy + 1) * fw + fx + 1] == 0) // dyna shadow bottom right - { - isdynapix[(fy + 1) * fw + fx + 1] = 1; - pfr[(fy + 1) * fw + fx + 1] = tcol; - } - if ((dsdir & 0b100000) > 0 && fy < fh - 1 && - isdynapix[(fy + 1) * fw + fx] == 0) // dyna shadow bottom - { - isdynapix[(fy + 1) * fw + fx] = 1; - pfr[(fy + 1) * fw + fx] = tcol; - } - if ((dsdir & 0b1000000) > 0 && fx > 0 && fy < fh - 1 && - isdynapix[(fy + 1) * fw + fx - 1] == 0) // dyna shadow bottom left - { - isdynapix[(fy + 1) * fw + fx - 1] = 1; - pfr[(fy + 1) * fw + fx - 1] = tcol; - } - if ((dsdir & 0b10000000) > 0 && fx > 0 && - isdynapix[fy * fw + fx - 1] == 0) // dyna shadow left - { - isdynapix[fy * fw + fx - 1] = 1; - pfr[fy * fw + fx - 1] = tcol; + const uint16_t tcol = shadowColorByLayer[dynacouche]; + + static const int8_t kNeighborDx[8] = {-1, 0, 1, 1, 1, 0, -1, -1}; + static const int8_t kNeighborDy[8] = {-1, -1, -1, 0, 1, 1, 1, 0}; + for (uint8_t bit = 0; bit < 8; ++bit) { + if ((dsdir & (1u << bit)) == 0) continue; + const int32_t nx = (int32_t)fx + kNeighborDx[bit]; + const int32_t ny = (int32_t)fy + kNeighborDy[bit]; + if (nx < 0 || ny < 0 || nx >= (int32_t)fw || ny >= (int32_t)fh) continue; + const uint32_t neighborIndex = (uint32_t)ny * fw + (uint32_t)nx; + if (isdynapix[neighborIndex] != 0) continue; + isdynapix[neighborIndex] = 1; + pfr[neighborIndex] = tcol; } } @@ -2127,6 +2084,10 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, frameHasDynamic ? g_serumData.dynamasks_active[IDfound] : nullptr; const uint16_t* frameDynaColors = frameHasDynamic ? g_serumData.dyna4cols_v2[IDfound] : nullptr; + const uint8_t* frameShadowDir = + frameHasDynamic ? g_serumData.dynashadowsdir[IDfound] : nullptr; + const uint16_t* frameShadowColor = + frameHasDynamic ? g_serumData.dynashadowscol[IDfound] : nullptr; // create the original res frame if (g_serumData.fheight == 32) { pfr = mySerum.frame32; @@ -2188,8 +2149,9 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, } else { const uint8_t dynacouche = frameDyna[tk]; if (frame[tk] > 0) { - CheckDynaShadow(pfr, IDfound, dynacouche, isdynapix, ti, tj, - g_serumData.fwidth, g_serumData.fheight, false); + CheckDynaShadow(pfr, frameShadowDir, frameShadowColor, dynacouche, + isdynapix, ti, tj, g_serumData.fwidth, + g_serumData.fheight); isdynapix[tk] = 1; pfr[tk] = frameDynaColors[dynacouche * g_serumData.nocolors + frame[tk]]; @@ -2225,6 +2187,12 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, const uint16_t* frameDynaColorsExtra = frameHasDynamicExtra ? g_serumData.dyna4cols_v2_extra[IDfound] : nullptr; + const uint8_t* frameShadowDirExtra = frameHasDynamicExtra + ? g_serumData.dynashadowsdir_extra[IDfound] + : nullptr; + const uint16_t* frameShadowColorExtra = + frameHasDynamicExtra ? g_serumData.dynashadowscol_extra[IDfound] + : nullptr; // create the extra res frame if (g_serumData.fheight_extra == 32) { pfr = mySerum.frame32; @@ -2295,9 +2263,10 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, } else { const uint8_t dynacouche = frameDynaExtra[tk]; if (frame[tl] > 0) { - CheckDynaShadow(pfr, IDfound, dynacouche, isdynapix, ti, tj, + CheckDynaShadow(pfr, frameShadowDirExtra, frameShadowColorExtra, + dynacouche, isdynapix, ti, tj, g_serumData.fwidth_extra, - g_serumData.fheight_extra, true); + g_serumData.fheight_extra); isdynapix[tk] = 1; pfr[tk] = frameDynaColorsExtra[dynacouche * g_serumData.nocolors + frame[tl]]; diff --git a/src/sparse-vector.h b/src/sparse-vector.h index 992eb0c..45be10a 100644 --- a/src/sparse-vector.h +++ b/src/sparse-vector.h @@ -53,8 +53,14 @@ class SparseVector { mutable uint32_t secondAccessedId = UINT32_MAX; mutable std::vector lastDecompressed; mutable std::vector secondDecompressed; - // TODO(perf #5): evaluate a tiny bounded LRU decoded cache (4-8 entries) - // for alternating hot IDs to reduce decode churn with minimal RAM overhead. + struct DecodedCacheEntry { + uint32_t id = UINT32_MAX; + uint64_t stamp = 0; + std::vector values; + }; + static constexpr size_t kDecodedCacheCapacity = 6; + mutable std::vector decodedCache; + mutable uint64_t decodedCacheStamp = 0; mutable std::vector decodeScratch; mutable bool forceDecodedReads = false; mutable std::unordered_map> forcedDecoded; @@ -202,6 +208,68 @@ class SparseVector { } } + DecodedCacheEntry *findDecodedCacheEntry(uint32_t elementId) const { + for (auto &entry : decodedCache) { + if (entry.id == elementId && !entry.values.empty()) { + entry.stamp = ++decodedCacheStamp; + return &entry; + } + } + return nullptr; + } + + DecodedCacheEntry *reserveDecodedCacheEntry(uint32_t elementId) const { + DecodedCacheEntry *target = nullptr; + + for (auto &entry : decodedCache) { + if (entry.id == elementId) { + target = &entry; + break; + } + if (!target && entry.id == UINT32_MAX) { + target = &entry; + } + } + + if (!target) { + target = &decodedCache.front(); + for (auto &entry : decodedCache) { + if (entry.stamp < target->stamp) { + target = &entry; + } + } + } + + target->id = elementId; + target->stamp = ++decodedCacheStamp; + if (target->values.size() < elementSize) { + target->values.resize(elementSize); + } + return target; + } + + T *cacheDecodedFromBytes(uint32_t elementId, const uint8_t *bytes, + size_t bytesSize) const { + const size_t rawBytes = rawByteSize(); + if (!bytes || bytesSize != rawBytes) { + return const_cast(noData.data()); + } + auto *entry = reserveDecodedCacheEntry(elementId); + memcpy(entry->values.data(), bytes, rawBytes); + return entry->values.data(); + } + + void resetDecodedCaches() { + lastAccessedId = UINT32_MAX; + secondAccessedId = UINT32_MAX; + lastDecompressed.clear(); + secondDecompressed.clear(); + decodedCache.clear(); + decodedCache.resize(kDecodedCacheCapacity); + decodedCacheStamp = 0; + decodeScratch.clear(); + } + T *decodeValuePackedAndCache(uint32_t elementId, const uint8_t *payload) { const uint8_t modeBits = payload[1]; prepareDecodedCacheForWrite(elementId); @@ -456,6 +524,7 @@ class SparseVector { "Binary bit packing is only supported for uint8_t SparseVector"); } noData.resize(1, noDataSignature); + decodedCache.resize(kDecodedCacheCapacity); } SparseVector(T noDataSignature) @@ -465,6 +534,7 @@ class SparseVector { bitPackFalseValue(noDataSignature), bitPackTrueValue(static_cast(1)) { noData.resize(1, noDataSignature); + decodedCache.resize(kDecodedCacheCapacity); } T *operator[](const uint32_t elementId) { @@ -504,6 +574,14 @@ class SparseVector { } return lastDecompressed.data(); } + if (useCompression) { + if (auto *entry = findDecodedCacheEntry(elementId)) { + if (isProfilingEnabled()) { + ++profileCacheHitCount; + } + return entry->values.data(); + } + } const uint8_t *payload = nullptr; uint32_t payloadSize = 0; @@ -571,7 +649,7 @@ class SparseVector { if (isProfilingEnabled()) { ++profileDirectHitCount; } - return reinterpret_cast(const_cast(payload)); + return cacheDecodedFromBytes(elementId, payload, payloadSize); } return noData.data(); } @@ -593,6 +671,7 @@ class SparseVector { prepareDecodedCacheForWrite(elementId); memcpy(lastDecompressed.data(), decodeScratch.data(), rawBytes); lastAccessedId = elementId; + cacheDecodedFromBytes(elementId, decodeScratch.data(), rawBytes); return lastDecompressed.data(); } @@ -638,9 +717,7 @@ class SparseVector { restoreDataFromPacked(); elementSize = size; clearPacked(); - lastAccessedId = UINT32_MAX; - lastDecompressed.clear(); - decodeScratch.clear(); + resetDecodedCaches(); if (decompBuffer.size() < (elementSize * sizeof(T))) { decompBuffer.resize(elementSize * sizeof(T)); @@ -735,11 +812,7 @@ class SparseVector { lastPayloadId = UINT32_MAX; lastPayloadPtr = nullptr; lastPayloadSize = 0; - lastAccessedId = UINT32_MAX; - secondAccessedId = UINT32_MAX; - lastDecompressed.clear(); - secondDecompressed.clear(); - decodeScratch.clear(); + resetDecodedCaches(); forceDecodedReads = false; forcedDecoded.clear(); } @@ -799,11 +872,7 @@ class SparseVector { lastPayloadId = UINT32_MAX; lastPayloadPtr = nullptr; lastPayloadSize = 0; - lastAccessedId = UINT32_MAX; - secondAccessedId = UINT32_MAX; - lastDecompressed.clear(); - secondDecompressed.clear(); - decodeScratch.clear(); + resetDecodedCaches(); forceDecodedReads = false; forcedDecoded.clear(); return; @@ -825,11 +894,7 @@ class SparseVector { lastPayloadId = UINT32_MAX; lastPayloadPtr = nullptr; lastPayloadSize = 0; - lastAccessedId = UINT32_MAX; - secondAccessedId = UINT32_MAX; - lastDecompressed.clear(); - secondDecompressed.clear(); - decodeScratch.clear(); + resetDecodedCaches(); forceDecodedReads = false; forcedDecoded.clear(); } @@ -913,11 +978,7 @@ class SparseVector { lastPayloadId = UINT32_MAX; lastPayloadPtr = nullptr; lastPayloadSize = 0; - lastAccessedId = UINT32_MAX; - secondAccessedId = UINT32_MAX; - lastDecompressed.clear(); - secondDecompressed.clear(); - decodeScratch.clear(); + resetDecodedCaches(); forceDecodedReads = false; forcedDecoded.clear(); } From 77339631cdf1bc03a777b968f4b0187a72e4e567 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Mon, 16 Mar 2026 00:48:08 +0100 Subject: [PATCH 14/42] new sprite matching --- AGENTS.md | 30 ++++++- src/SerumData.cpp | 202 ++++++++++++++++++++++++++++++++++++++++++ src/SerumData.h | 58 ++++++++++++- src/serum-decode.cpp | 203 ++++++++++++++++++++++++++++++++----------- 4 files changed, 441 insertions(+), 52 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index cedbfde..d2e34ed 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -69,6 +69,15 @@ Vector policy currently used in `SerumData`: `colorRotationLookupByFrameAndColor[(frameId,isExtra,color)] -> (rotation,position)` restored from v6 cROMc when present and rebuilt at load time otherwise. - `ColorInRotation` uses lookup-only runtime path (no linear scan fallback). +- Sprite runtime sidecars are precomputed and used by `Check_Spritesv2`: + - frame candidate list with sprite slot indices (`spriteCandidateOffsets`, + `spriteCandidateIds`, `spriteCandidateSlots`) + - frame-level shaped-sprite marker (`frameHasShapeSprite`) + - per-sprite dimensions and shape flags (`spriteWidth`, `spriteHeight`, + `spriteUsesShape`) + - flattened detection metadata (`spriteDetectOffsets`, `spriteDetectMeta`) + - per-sprite opaque row-segment runs (`spriteOpaqueRowSegmentStart`, + `spriteOpaqueRowSegmentCount`, `spriteOpaqueSegments`) - Runtime uses sidecar flags instead of `255` sentinels for transparency / dynamic-zone activity. - Runtime does not include sentinel-based fallback in sprite/dynamic helpers; missing/incorrect sidecars are treated as a conversion/load bug and are not @@ -97,7 +106,11 @@ Entry point: `Serum_Load(altcolorpath, romname, flags)`. 8. Build/normalize packing sidecars via `BuildPackingSidecarsAndNormalize()`. - The normalization step is idempotent and guarded; repeated calls in the same load/save cycle are no-ops once completed. -9. Optional runtime A/B switch for dynamic packed-read overhead: +9. Build or restore sprite runtime sidecars via `BuildSpriteRuntimeSidecars()`. + - For v6 cROMc loads, sidecars are restored from file when present. + - For v5 loads (and any missing/corrupt sidecar case), sidecars are rebuilt + from loaded sprite vectors at startup. +10. Optional runtime A/B switch for dynamic packed-read overhead: - If env `SERUM_DISABLE_DYNAMIC_PACKED_READS` is enabled (`1/true/on/yes`), `PrepareRuntimeDynamicHotCache()` predecodes dynamic vectors (`dynamasks*`, `dynaspritemasks*`) into runtime hot caches. @@ -180,6 +193,14 @@ Dynamic-shadow hot path: - Neighbor probing is done by iterating a compact offset table (8-connected neighbors) rather than repeated hand-written branch blocks. +Sprite matching prefilter: +- `Check_Spritesv2` builds an exact per-frame 32-bit dword index and skips + detection-area scans for detection words that are not present. +- This replaces the Bloom prefilter path (no false positives from hash + collisions). +- Detection-area verification uses precomputed opaque row-segment runs to avoid + per-pixel checks on transparent sprite zones. + Background placeholder policy: - `Colorize_Framev2` supports `suppressFrameBackgroundImage`. - When true, frame-level background images are treated as placeholders and existing output pixel is kept in masked background areas. @@ -211,6 +232,13 @@ Stored in v6: - `sceneFrameIdByTriplet` - Color-rotation lookup acceleration: - `colorRotationLookupByFrameAndColor` +- Sprite runtime sidecars: + - `spriteCandidateOffsets`, `spriteCandidateIds`, `spriteCandidateSlots` + - `frameHasShapeSprite` + - `spriteWidth`, `spriteHeight`, `spriteUsesShape` + - `spriteDetectOffsets`, `spriteDetectMeta` + - `spriteOpaqueRowSegmentStart`, `spriteOpaqueRowSegmentCount`, + `spriteOpaqueSegments` - Scene data block uses guarded encoding (`SCD1` magic + bounded count) to prevent unbounded allocations on corrupted/misaligned input. - Sparse vectors in packed sparse layout. diff --git a/src/SerumData.cpp b/src/SerumData.cpp index 55ffb0c..32b2cd4 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -4,6 +4,8 @@ #include "miniz/miniz.h" #include "serum-version.h" +#include + bool is_real_machine(); SerumData::SerumData() @@ -146,6 +148,18 @@ void SerumData::Clear() { frameHasDynamic.clear(); frameHasDynamicExtra.clear(); frameIsScene.clear(); + spriteCandidateOffsets.clear(); + spriteCandidateIds.clear(); + spriteCandidateSlots.clear(); + frameHasShapeSprite.clear(); + spriteWidth.clear(); + spriteHeight.clear(); + spriteUsesShape.clear(); + spriteDetectOffsets.clear(); + spriteDetectMeta.clear(); + spriteOpaqueRowSegmentStart.clear(); + spriteOpaqueRowSegmentCount.clear(); + spriteOpaqueSegments.clear(); sceneFramesBySignature.clear(); sceneFrameIdByTriplet.clear(); colorRotationLookupByFrameAndColor.clear(); @@ -382,6 +396,191 @@ void SerumData::PrepareRuntimeDynamicHotCache() { (uint32_t)spriteIds.size()); } +bool SerumData::HasSpriteRuntimeSidecars() const { + if (nframes == 0 || nsprites == 0) { + return false; + } + if (spriteCandidateOffsets.size() != static_cast(nframes) + 1) { + return false; + } + if (spriteCandidateIds.size() != spriteCandidateSlots.size()) { + return false; + } + if (frameHasShapeSprite.size() != nframes) { + return false; + } + if (spriteWidth.size() != nsprites || spriteHeight.size() != nsprites || + spriteUsesShape.size() != nsprites) { + return false; + } + if (spriteDetectOffsets.size() != static_cast(nsprites) + 1) { + return false; + } + if (spriteOpaqueRowSegmentStart.size() != + static_cast(nsprites) * MAX_SPRITE_HEIGHT || + spriteOpaqueRowSegmentCount.size() != + static_cast(nsprites) * MAX_SPRITE_HEIGHT) { + return false; + } + return true; +} + +void SerumData::BuildSpriteRuntimeSidecars() { + spriteCandidateOffsets.assign(static_cast(nframes) + 1, 0); + spriteCandidateIds.clear(); + spriteCandidateSlots.clear(); + frameHasShapeSprite.assign(nframes, 0); + + spriteWidth.assign(nsprites, 0); + spriteHeight.assign(nsprites, 0); + spriteUsesShape.assign(nsprites, 0); + spriteDetectOffsets.assign(static_cast(nsprites) + 1, 0); + spriteDetectMeta.clear(); + spriteOpaqueRowSegmentStart.assign(static_cast(nsprites) * + MAX_SPRITE_HEIGHT, + 0); + spriteOpaqueRowSegmentCount.assign(static_cast(nsprites) * + MAX_SPRITE_HEIGHT, + 0); + spriteOpaqueSegments.clear(); + + const size_t spritePixels = MAX_SPRITE_WIDTH * MAX_SPRITE_HEIGHT; + for (uint32_t spriteId = 0; spriteId < nsprites; ++spriteId) { + if (sprshapemode.hasData(spriteId) && sprshapemode[spriteId][0] > 0) { + spriteUsesShape[spriteId] = 1; + } + + if (!spriteoriginal_opaque.hasData(spriteId) || + !spriteoriginal.hasData(spriteId)) { + spriteDetectOffsets[spriteId + 1] = spriteDetectOffsets[spriteId]; + continue; + } + + const uint8_t *spriteOpaque = spriteoriginal_opaque[spriteId]; + int maxX = -1; + int maxY = -1; + for (int y = 0; y < MAX_SPRITE_HEIGHT; ++y) { + const uint32_t rowIndex = + static_cast(spriteId) * MAX_SPRITE_HEIGHT + y; + spriteOpaqueRowSegmentStart[rowIndex] = + static_cast(spriteOpaqueSegments.size()); + uint16_t rowSegmentCount = 0; + + int x = 0; + while (x < MAX_SPRITE_WIDTH) { + const uint32_t pixelIndex = y * MAX_SPRITE_WIDTH + x; + if (spriteOpaque[pixelIndex] == 0) { + ++x; + continue; + } + const int segmentStart = x; + while (x < MAX_SPRITE_WIDTH && + spriteOpaque[y * MAX_SPRITE_WIDTH + x] > 0) { + if (x > maxX) { + maxX = x; + } + if (y > maxY) { + maxY = y; + } + ++x; + } + const int segmentLength = x - segmentStart; + spriteOpaqueSegments.push_back(static_cast(segmentStart)); + spriteOpaqueSegments.push_back(static_cast(segmentLength)); + ++rowSegmentCount; + } + spriteOpaqueRowSegmentCount[rowIndex] = rowSegmentCount; + } + + spriteWidth[spriteId] = static_cast(maxX >= 0 ? maxX + 1 : 0); + spriteHeight[spriteId] = static_cast(maxY >= 0 ? maxY + 1 : 0); + + const uint32_t detectStart = static_cast(spriteDetectMeta.size()); + for (uint32_t detectIndex = 0; detectIndex < MAX_SPRITE_DETECT_AREAS; + ++detectIndex) { + const uint16_t *areas = spritedetareas[spriteId]; + if (areas[detectIndex * 4] == 0xffff) { + continue; + } + SpriteDetectMeta meta; + meta.detectionWord = spritedetdwords[spriteId][detectIndex]; + meta.detectionWordPos = spritedetdwordpos[spriteId][detectIndex]; + meta.detectX = areas[detectIndex * 4]; + meta.detectY = areas[detectIndex * 4 + 1]; + meta.detectWidth = areas[detectIndex * 4 + 2]; + meta.detectHeight = areas[detectIndex * 4 + 3]; + + if (meta.detectWidth == 0 || meta.detectHeight == 0) { + continue; + } + + // Guard malformed data that would read beyond sprite storage. + const uint32_t maxDetectX = static_cast(meta.detectX) + + static_cast(meta.detectWidth); + const uint32_t maxDetectY = static_cast(meta.detectY) + + static_cast(meta.detectHeight); + if (maxDetectX > MAX_SPRITE_WIDTH || maxDetectY > MAX_SPRITE_HEIGHT) { + continue; + } + + // Skip detect zones with no opaque pixels after normalization. + bool hasOpaque = false; + for (uint16_t dy = 0; dy < meta.detectHeight && !hasOpaque; ++dy) { + const uint32_t row = static_cast(meta.detectY + dy); + for (uint16_t dx = 0; dx < meta.detectWidth; ++dx) { + const uint32_t x = static_cast(meta.detectX + dx); + const uint32_t idx = row * MAX_SPRITE_WIDTH + x; + if (idx < spritePixels && spriteOpaque[idx] > 0) { + hasOpaque = true; + break; + } + } + } + if (hasOpaque) { + spriteDetectMeta.push_back(meta); + } + } + spriteDetectOffsets[spriteId + 1] = + static_cast(spriteDetectMeta.size()); + if (spriteDetectOffsets[spriteId + 1] < detectStart) { + spriteDetectOffsets[spriteId + 1] = detectStart; + } + } + + spriteCandidateIds.reserve(static_cast(nframes) * + MAX_SPRITES_PER_FRAME / 2); + spriteCandidateSlots.reserve(static_cast(nframes) * + MAX_SPRITES_PER_FRAME / 2); + for (uint32_t frameId = 0; frameId < nframes; ++frameId) { + spriteCandidateOffsets[frameId] = + static_cast(spriteCandidateIds.size()); + std::unordered_set dedupe; + dedupe.reserve(MAX_SPRITES_PER_FRAME); + for (uint32_t i = 0; i < MAX_SPRITES_PER_FRAME; ++i) { + const uint8_t spriteId = framesprites[frameId][i]; + if (spriteId >= 255 || spriteId >= nsprites) { + break; + } + if (!spriteoriginal.hasData(spriteId) || + !spriteoriginal_opaque.hasData(spriteId)) { + continue; + } + if (spriteDetectOffsets[spriteId] == spriteDetectOffsets[spriteId + 1]) { + continue; + } + if (!dedupe.insert(spriteId).second) { + continue; + } + spriteCandidateIds.push_back(spriteId); + spriteCandidateSlots.push_back(static_cast(i)); + frameHasShapeSprite[frameId] = + frameHasShapeSprite[frameId] || spriteUsesShape[spriteId]; + } + } + spriteCandidateOffsets[nframes] = + static_cast(spriteCandidateIds.size()); +} + void SerumData::LogSparseVectorProfileSnapshot() { auto logCounters = [&](auto &vec) { uint64_t accesses = 0; @@ -463,6 +662,9 @@ bool SerumData::TryGetColorRotation(uint32_t frameId, uint16_t color, bool SerumData::SaveToFile(const char *filename) { try { BuildPackingSidecarsAndNormalize(); + if (!HasSpriteRuntimeSidecars()) { + BuildSpriteRuntimeSidecars(); + } Log("Writing %s", filename); // Serialize to memory buffer first std::ostringstream ss(std::ios::binary); diff --git a/src/SerumData.h b/src/SerumData.h index 56da6a8..7036cb5 100644 --- a/src/SerumData.h +++ b/src/SerumData.h @@ -44,6 +44,21 @@ inline uint32_t FromLittleEndian32(uint32_t value) { class SerumData { public: + struct SpriteDetectMeta { + uint32_t detectionWord = 0; + uint16_t detectionWordPos = 0; + uint16_t detectX = 0; + uint16_t detectY = 0; + uint16_t detectWidth = 0; + uint16_t detectHeight = 0; + + template + void serialize(Archive &ar) { + ar(detectionWord, detectionWordPos, detectX, detectY, detectWidth, + detectHeight); + } + }; + SerumData(); ~SerumData(); @@ -62,6 +77,8 @@ class SerumData { bool LoadFromBuffer(const uint8_t *data, size_t size, const uint8_t flags); void BuildPackingSidecarsAndNormalize(); void PrepareRuntimeDynamicHotCache(); + void BuildSpriteRuntimeSidecars(); + bool HasSpriteRuntimeSidecars() const; void BuildColorRotationLookup(); bool TryGetColorRotation(uint32_t frameId, uint16_t color, bool isextra, uint16_t &rotationIndex, @@ -142,6 +159,18 @@ class SerumData { std::vector frameHasDynamic; std::vector frameHasDynamicExtra; std::vector frameIsScene; + std::vector spriteCandidateOffsets; + std::vector spriteCandidateIds; + std::vector spriteCandidateSlots; + std::vector frameHasShapeSprite; + std::vector spriteWidth; + std::vector spriteHeight; + std::vector spriteUsesShape; + std::vector spriteDetectOffsets; + std::vector spriteDetectMeta; + std::vector spriteOpaqueRowSegmentStart; + std::vector spriteOpaqueRowSegmentCount; + std::vector spriteOpaqueSegments; std::unordered_map> sceneFramesBySignature; std::unordered_map sceneFrameIdByTriplet; std::unordered_map colorRotationLookupByFrameAndColor; @@ -185,7 +214,12 @@ class SerumData { spritemask_extra_opaque, spritedescriptionso_opaque, dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, dynaspritemasks_extra_active, frameHasDynamic, frameHasDynamicExtra, - sceneFrameIdByTriplet, colorRotationLookupByFrameAndColor); + sceneFrameIdByTriplet, colorRotationLookupByFrameAndColor, + spriteCandidateOffsets, spriteCandidateIds, spriteCandidateSlots, + frameHasShapeSprite, + spriteWidth, spriteHeight, spriteUsesShape, spriteDetectOffsets, + spriteDetectMeta, spriteOpaqueRowSegmentStart, + spriteOpaqueRowSegmentCount, spriteOpaqueSegments); } } else { if (concentrateFileVersion >= 6) { @@ -193,7 +227,12 @@ class SerumData { spritemask_extra_opaque, spritedescriptionso_opaque, dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, dynaspritemasks_extra_active, frameHasDynamic, frameHasDynamicExtra, - sceneFrameIdByTriplet, colorRotationLookupByFrameAndColor); + sceneFrameIdByTriplet, colorRotationLookupByFrameAndColor, + spriteCandidateOffsets, spriteCandidateIds, spriteCandidateSlots, + frameHasShapeSprite, + spriteWidth, spriteHeight, spriteUsesShape, spriteDetectOffsets, + spriteDetectMeta, spriteOpaqueRowSegmentStart, + spriteOpaqueRowSegmentCount, spriteOpaqueSegments); } else { frameIsScene.clear(); sceneFramesBySignature.clear(); @@ -208,6 +247,18 @@ class SerumData { dynaspritemasks_extra_active.clear(); frameHasDynamic.clear(); frameHasDynamicExtra.clear(); + spriteCandidateOffsets.clear(); + spriteCandidateIds.clear(); + spriteCandidateSlots.clear(); + frameHasShapeSprite.clear(); + spriteWidth.clear(); + spriteHeight.clear(); + spriteUsesShape.clear(); + spriteDetectOffsets.clear(); + spriteDetectMeta.clear(); + spriteOpaqueRowSegmentStart.clear(); + spriteOpaqueRowSegmentCount.clear(); + spriteOpaqueSegments.clear(); } } @@ -285,6 +336,9 @@ class SerumData { sceneGenerator->setSceneData(std::move(loadedScenes)); sceneGenerator->setDepth(nocolors == 16 ? 4 : 2); } + if (!HasSpriteRuntimeSidecars()) { + BuildSpriteRuntimeSidecars(); + } } } }; diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 042efdb..dd93d44 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -1367,6 +1367,9 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, if (g_serumData.colorRotationLookupByFrameAndColor.empty()) { g_serumData.BuildColorRotationLookup(); } + if (!g_serumData.HasSpriteRuntimeSidecars()) { + g_serumData.BuildSpriteRuntimeSidecars(); + } if (g_disableDynamicPackedReads) { g_serumData.PrepareRuntimeDynamicHotCache(); Log("Dynamic packed reads disabled for runtime via " @@ -1817,46 +1820,119 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, uint8_t* pquelsprites, uint8_t* nspr, uint16_t* pfrx, uint16_t* pfry, uint16_t* pspx, uint16_t* pspy, uint16_t* pwid, uint16_t* phei) { - // TODO(perf #4): add a cheap prefilter/early-out before full detection-area - // matching to reduce sprite scan cost on frames without sprites. - uint8_t ti = 0; - uint32_t mdword; *nspr = 0; - bool isshapedframe = false; - while ((ti < MAX_SPRITES_PER_FRAME) && - (g_serumData.framesprites[quelleframe][ti] < 255)) { - uint8_t qspr = g_serumData.framesprites[quelleframe][ti]; + if (g_serumData.fwidth < 4 || quelleframe >= g_serumData.nframes) { + return false; + } + + // Exact dword index for this frame (replaces Bloom false-positive path). + std::unordered_set frameDwords; + frameDwords.reserve(static_cast(g_serumData.fheight) * + std::max(1u, g_serumData.fwidth - 3)); + for (uint32_t y = 0; y < g_serumData.fheight; ++y) { + const uint32_t rowBase = y * g_serumData.fwidth; + uint32_t dword = (uint32_t)(recframe[rowBase] << 8) | + (uint32_t)(recframe[rowBase + 1] << 16) | + (uint32_t)(recframe[rowBase + 2] << 24); + for (uint32_t x = 0; x <= g_serumData.fwidth - 4; ++x) { + dword = (dword >> 8) | (uint32_t)(recframe[rowBase + x + 3] << 24); + frameDwords.insert(dword); + } + } + + const uint16_t* frameSpriteBoundingBoxes = g_serumData.framespriteBB[quelleframe]; + uint32_t candidateStart = 0; + uint32_t candidateEnd = 0; + const bool hasCandidateSidecars = + g_serumData.spriteCandidateOffsets.size() == + static_cast(g_serumData.nframes) + 1 && + g_serumData.spriteCandidateIds.size() == + g_serumData.spriteCandidateSlots.size(); + if (hasCandidateSidecars) { + candidateStart = g_serumData.spriteCandidateOffsets[quelleframe]; + candidateEnd = g_serumData.spriteCandidateOffsets[quelleframe + 1]; + if (candidateEnd > g_serumData.spriteCandidateIds.size()) { + candidateEnd = static_cast(g_serumData.spriteCandidateIds.size()); + } + } + + uint32_t mdword; + bool hasShapeFrameBuffer = false; + const bool frameHasShapeCandidates = + hasCandidateSidecars && + quelleframe < g_serumData.frameHasShapeSprite.size() && + g_serumData.frameHasShapeSprite[quelleframe] > 0; + const uint32_t candidateCount = + hasCandidateSidecars ? (candidateEnd - candidateStart) + : MAX_SPRITES_PER_FRAME; + for (uint32_t candidateIndex = 0; candidateIndex < candidateCount; + ++candidateIndex) { + uint8_t qspr = 255; + uint8_t spriteSlot = 0; + if (hasCandidateSidecars) { + qspr = g_serumData.spriteCandidateIds[candidateStart + candidateIndex]; + spriteSlot = + g_serumData.spriteCandidateSlots[candidateStart + candidateIndex]; + } else { + qspr = g_serumData.framesprites[quelleframe][candidateIndex]; + if (qspr >= 255) { + break; + } + spriteSlot = static_cast(candidateIndex); + } + if (qspr >= g_serumData.nsprites || spriteSlot >= MAX_SPRITES_PER_FRAME) { + continue; + } + if (!g_serumData.spriteoriginal.hasData(qspr) || !g_serumData.spriteoriginal_opaque.hasData(qspr)) { - ti++; continue; } const uint8_t* spriteOriginal = g_serumData.spriteoriginal[qspr]; const uint8_t* spriteOpaque = g_serumData.spriteoriginal_opaque[qspr]; uint8_t* Frame = recframe; - bool isshapecheck = false; - if (g_serumData.sprshapemode[qspr][0] > 0) { - isshapecheck = true; - if (!isshapedframe) { + const bool isshapecheck = + qspr < g_serumData.spriteUsesShape.size() + ? (g_serumData.spriteUsesShape[qspr] > 0) + : (g_serumData.sprshapemode[qspr][0] > 0); + if (isshapecheck && frameHasShapeCandidates) { + if (!hasShapeFrameBuffer) { for (int i = 0; i < g_serumData.fwidth * g_serumData.fheight; i++) { - if (Frame[i] > 0) - frameshape[i] = 1; - else - frameshape[i] = 0; + frameshape[i] = (Frame[i] > 0) ? 1 : 0; } - isshapedframe = true; + hasShapeFrameBuffer = true; } Frame = frameshape; } - int spw, sph; - GetSpriteSize(qspr, &spw, &sph, spriteOriginal, MAX_SPRITE_WIDTH, - MAX_SPRITE_HEIGHT, spriteOpaque); - short minxBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4]); - short minyBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4 + 1]); - short maxxBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4 + 2]); - short maxyBB = (short)(g_serumData.framespriteBB[quelleframe][ti * 4 + 3]); - for (uint32_t tm = 0; tm < MAX_SPRITE_DETECT_AREAS; tm++) { - if (g_serumData.spritedetareas[qspr][tm * 4] == 0xffff) continue; + + const int spw = (qspr < g_serumData.spriteWidth.size()) + ? g_serumData.spriteWidth[qspr] + : MAX_SPRITE_WIDTH; + const int sph = (qspr < g_serumData.spriteHeight.size()) + ? g_serumData.spriteHeight[qspr] + : MAX_SPRITE_HEIGHT; + + short minxBB = (short)(frameSpriteBoundingBoxes[spriteSlot * 4]); + short minyBB = (short)(frameSpriteBoundingBoxes[spriteSlot * 4 + 1]); + short maxxBB = (short)(frameSpriteBoundingBoxes[spriteSlot * 4 + 2]); + short maxyBB = (short)(frameSpriteBoundingBoxes[spriteSlot * 4 + 3]); + if (minxBB > maxxBB || minyBB > maxyBB || maxxBB - minxBB < 3) { + continue; + } + + const uint32_t detectStart = qspr < g_serumData.spriteDetectOffsets.size() + ? g_serumData.spriteDetectOffsets[qspr] + : 0; + const uint32_t detectEnd = + (qspr + 1) < g_serumData.spriteDetectOffsets.size() + ? g_serumData.spriteDetectOffsets[qspr + 1] + : detectStart; + for (uint32_t tm = detectStart; tm < detectEnd; tm++) { + const auto& detMeta = g_serumData.spriteDetectMeta[tm]; + if (frameDwords.find(detMeta.detectionWord) == frameDwords.end()) { + continue; + } + // we look for the sprite in the frame sent for (short ty = minyBB; ty <= maxyBB; ty++) { mdword = (uint32_t)(Frame[ty * g_serumData.fwidth + minxBB] << 8) | @@ -1866,8 +1942,8 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, uint32_t tj = ty * g_serumData.fwidth + tx; mdword = (mdword >> 8) | (uint32_t)(Frame[tj + 3] << 24); // we look for the magic dword first: - uint16_t sddp = g_serumData.spritedetdwordpos[qspr][tm]; - if (mdword == g_serumData.spritedetdwords[qspr][tm]) { + const uint16_t sddp = detMeta.detectionWordPos; + if (mdword == detMeta.detectionWord) { short frax = (short)tx; // position in the frame of the detection dword short fray = (short)ty; @@ -1876,16 +1952,10 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, // the detection dword short spry = (short)(sddp / MAX_SPRITE_WIDTH); // details of the det area: - short detx = - (short)g_serumData - .spritedetareas[qspr][tm * 4]; // position of the detection - // area in the sprite - short dety = (short)g_serumData.spritedetareas[qspr][tm * 4 + 1]; - short detw = - (short)g_serumData - .spritedetareas[qspr] - [tm * 4 + 2]; // size of the detection area - short deth = (short)g_serumData.spritedetareas[qspr][tm * 4 + 3]; + const short detx = static_cast(detMeta.detectX); + const short dety = static_cast(detMeta.detectY); + const short detw = static_cast(detMeta.detectWidth); + const short deth = static_cast(detMeta.detectHeight); // if the detection area starts before the frame (left or top), // continue: if ((frax - minxBB < sprx - detx) || (fray - minyBB < spry - dety)) @@ -1901,19 +1971,55 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, // we can now check if the full detection area is around the found // detection dword bool notthere = false; - for (uint16_t tk = 0; tk < deth; tk++) { - for (uint16_t tl = 0; tl < detw; tl++) { - const uint32_t spritePixelIndex = - (tk + dety) * MAX_SPRITE_WIDTH + tl + detx; - if (spriteOpaque[spritePixelIndex] == 0) continue; - uint8_t val = spriteOriginal[spritePixelIndex]; - if (val != - Frame[(tk + offsy) * g_serumData.fwidth + tl + offsx]) { + for (uint16_t tk = 0; tk < deth && !notthere; tk++) { + const uint32_t spriteRow = static_cast(dety + tk); + const uint32_t rowIndex = static_cast(qspr) * + MAX_SPRITE_HEIGHT + + spriteRow; + if (rowIndex >= g_serumData.spriteOpaqueRowSegmentStart.size()) { + notthere = true; + break; + } + const uint32_t segStartIndex = + g_serumData.spriteOpaqueRowSegmentStart[rowIndex]; + const uint16_t segCount = + g_serumData.spriteOpaqueRowSegmentCount[rowIndex]; + for (uint16_t seg = 0; seg < segCount && !notthere; ++seg) { + const uint32_t segIndex = segStartIndex + seg * 2; + if (segIndex + 1 >= g_serumData.spriteOpaqueSegments.size()) { notthere = true; break; } + const uint16_t segmentX = + g_serumData.spriteOpaqueSegments[segIndex]; + const uint16_t segmentLen = + g_serumData.spriteOpaqueSegments[segIndex + 1]; + const uint16_t segFrom = + std::max(segmentX, static_cast(detx)); + const uint16_t segTo = std::min( + static_cast(segmentX + segmentLen), + static_cast(detx + detw)); + if (segFrom >= segTo) { + continue; + } + + const uint32_t spriteBase = + spriteRow * MAX_SPRITE_WIDTH + segFrom; + const uint32_t frameBase = + static_cast(tk + offsy) * g_serumData.fwidth + + static_cast(segFrom - detx + offsx); + for (uint16_t x = segFrom; x < segTo; ++x) { + const uint32_t spriteOffset = spriteBase + (x - segFrom); + const uint32_t frameOffset = frameBase + (x - segFrom); + if (spriteOpaque[spriteOffset] == 0) { + continue; + } + if (spriteOriginal[spriteOffset] != Frame[frameOffset]) { + notthere = true; + break; + } + } } - if (notthere == true) break; } if (!notthere) { pquelsprites[*nspr] = qspr; @@ -1959,7 +2065,6 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, } } } - ti++; } if (*nspr > 0) return true; return false; From cc3d32642d4004cc4a4ea5cc4c9e85fec9756c32 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Mon, 16 Mar 2026 09:50:13 +0100 Subject: [PATCH 15/42] fixed sprite shape mode --- AGENTS.md | 3 +++ src/serum-decode.cpp | 25 ++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/AGENTS.md b/AGENTS.md index d2e34ed..6f15b1b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -198,6 +198,9 @@ Sprite matching prefilter: detection-area scans for detection words that are not present. - This replaces the Bloom prefilter path (no false positives from hash collisions). +- Shape-mode sprites use a separate exact dword index built from the binary + `frameshape` domain, so shape detection words are not filtered against raw + grayscale frame dwords. - Detection-area verification uses precomputed opaque row-segment runs to avoid per-pixel checks on transparent sprite zones. diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index dd93d44..6ffb56d 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -1839,6 +1839,8 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, frameDwords.insert(dword); } } + std::unordered_set frameShapeDwords; + bool frameShapeDwordsBuilt = false; const uint16_t* frameSpriteBoundingBoxes = g_serumData.framespriteBB[quelleframe]; uint32_t candidateStart = 0; @@ -1903,6 +1905,22 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, hasShapeFrameBuffer = true; } Frame = frameshape; + if (!frameShapeDwordsBuilt) { + frameShapeDwords.clear(); + frameShapeDwords.reserve(static_cast(g_serumData.fheight) * + std::max(1u, g_serumData.fwidth - 3)); + for (uint32_t y = 0; y < g_serumData.fheight; ++y) { + const uint32_t rowBase = y * g_serumData.fwidth; + uint32_t dword = (uint32_t)(frameshape[rowBase] << 8) | + (uint32_t)(frameshape[rowBase + 1] << 16) | + (uint32_t)(frameshape[rowBase + 2] << 24); + for (uint32_t x = 0; x <= g_serumData.fwidth - 4; ++x) { + dword = (dword >> 8) | (uint32_t)(frameshape[rowBase + x + 3] << 24); + frameShapeDwords.insert(dword); + } + } + frameShapeDwordsBuilt = true; + } } const int spw = (qspr < g_serumData.spriteWidth.size()) @@ -1929,7 +1947,12 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, : detectStart; for (uint32_t tm = detectStart; tm < detectEnd; tm++) { const auto& detMeta = g_serumData.spriteDetectMeta[tm]; - if (frameDwords.find(detMeta.detectionWord) == frameDwords.end()) { + const bool hasDetectionWord = + isshapecheck + ? (frameShapeDwords.find(detMeta.detectionWord) != + frameShapeDwords.end()) + : (frameDwords.find(detMeta.detectionWord) != frameDwords.end()); + if (!hasDetectionWord) { continue; } From 139444cac1cd7cb80ec12b8d7aa4e933a023c510 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Mon, 16 Mar 2026 12:01:47 +0100 Subject: [PATCH 16/42] added basic lookahead support --- AGENTS.md | 9 ++++ src/serum-decode.cpp | 123 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index 6f15b1b..5c29ea3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -115,6 +115,12 @@ Entry point: `Serum_Load(altcolorpath, romname, flags)`. `PrepareRuntimeDynamicHotCache()` predecodes dynamic vectors (`dynamasks*`, `dynaspritemasks*`) into runtime hot caches. - Default runtime behavior is unchanged when this env var is not set. +11. Optional normal-frame lookahead prefetch: + - Env `SERUM_LOOKAHEAD_DEPTH=` (`N=0..8`, default `0`) enables cache + warming for up to `N` next non-scene frame IDs after each identified + normal frame. + - Prefetch performs sparse-vector reads only (no matching/render changes). + - Scene frames are skipped and frame-ID wrap-around is respected. Important: - `BuildFrameLookupVectors()` must run after final scene data is known for this load cycle. @@ -273,6 +279,9 @@ v6 snapshot policy: logged at the same cadence (accesses, decode count, cache hits, direct hits) for key runtime vectors (`cframes_v2*`, `backgroundmask*`, `dynamasks*`, `dynaspritemasks*`). +- Optional lookahead logging: + - When `SERUM_LOOKAHEAD_DEPTH>0`, load logs + `Frame lookahead prefetch enabled via SERUM_LOOKAHEAD_DEPTH=`. ## Safety invariants - `frameIsScene.size()` must equal `nframes` before identification. diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 6ffb56d..d54b178 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -82,9 +82,26 @@ static bool IsEnvFlagEnabled(const char* name) { strcasecmp(value, "yes") == 0 || strcasecmp(value, "on") == 0; } +static uint32_t GetEnvUintClamped(const char* name, uint32_t maxValue) { + const char* value = std::getenv(name); + if (!value || value[0] == '\0') { + return 0; + } + char* endPtr = nullptr; + unsigned long parsed = std::strtoul(value, &endPtr, 10); + if (endPtr == value || *endPtr != '\0') { + return 0; + } + if (parsed > maxValue) { + parsed = maxValue; + } + return static_cast(parsed); +} + static bool g_profileDynamicHotPaths = false; static bool g_profileSparseVectors = false; static bool g_disableDynamicPackedReads = false; +static uint32_t g_frameLookaheadDepth = 0; static uint64_t g_profileColorizeFrameV2Ns = 0; static uint64_t g_profileColorizeSpriteV2Ns = 0; static uint64_t g_profileColorizeCalls = 0; @@ -1254,9 +1271,14 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, g_profileSparseVectors = IsEnvFlagEnabled("SERUM_PROFILE_SPARSE_VECTORS"); g_disableDynamicPackedReads = IsEnvFlagEnabled("SERUM_DISABLE_DYNAMIC_PACKED_READS"); + g_frameLookaheadDepth = GetEnvUintClamped("SERUM_LOOKAHEAD_DEPTH", 8); g_profileColorizeFrameV2Ns = 0; g_profileColorizeSpriteV2Ns = 0; g_profileColorizeCalls = 0; + if (g_frameLookaheadDepth > 0) { + Log("Frame lookahead prefetch enabled via SERUM_LOOKAHEAD_DEPTH=%u", + g_frameLookaheadDepth); + } mySerum.SerumVersion = g_serumData.SerumVersion = 0; mySerum.flags = 0; @@ -1664,6 +1686,104 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { return IDENTIFY_NO_FRAME; // we found no corresponding frame } +static void WarmFrameAssetsForId(uint32_t frameId) { + if (frameId >= g_serumData.nframes) { + return; + } + + (void)g_serumData.activeframes[frameId][0]; + (void)g_serumData.cframes_v2[frameId]; + (void)g_serumData.colorrotations_v2[frameId]; + + if (g_serumData.isextraframe[frameId][0] > 0) { + (void)g_serumData.cframes_v2_extra[frameId]; + (void)g_serumData.colorrotations_v2_extra[frameId]; + } + + const uint16_t backgroundId = g_serumData.backgroundIDs[frameId][0]; + if (backgroundId < g_serumData.nbackgrounds) { + (void)g_serumData.backgroundmask[backgroundId]; + (void)g_serumData.backgroundframes_v2[backgroundId]; + if (g_serumData.isextrabackground[backgroundId][0] > 0) { + (void)g_serumData.backgroundmask_extra[backgroundId]; + (void)g_serumData.backgroundframes_v2_extra[backgroundId]; + } + } + + if (frameId < g_serumData.frameHasDynamic.size() && + g_serumData.frameHasDynamic[frameId] > 0) { + (void)g_serumData.dynamasks[frameId]; + (void)g_serumData.dynamasks_active[frameId]; + (void)g_serumData.dyna4cols_v2[frameId]; + (void)g_serumData.dynashadowsdir[frameId]; + (void)g_serumData.dynashadowscol[frameId]; + } + + if (frameId < g_serumData.frameHasDynamicExtra.size() && + g_serumData.frameHasDynamicExtra[frameId] > 0 && + g_serumData.isextraframe[frameId][0] > 0) { + (void)g_serumData.dynamasks_extra[frameId]; + (void)g_serumData.dynamasks_extra_active[frameId]; + (void)g_serumData.dyna4cols_v2_extra[frameId]; + (void)g_serumData.dynashadowsdir_extra[frameId]; + (void)g_serumData.dynashadowscol_extra[frameId]; + } + + if (g_serumData.spriteCandidateOffsets.size() == + static_cast(g_serumData.nframes) + 1 && + g_serumData.spriteCandidateIds.size() == + g_serumData.spriteCandidateSlots.size()) { + uint32_t start = g_serumData.spriteCandidateOffsets[frameId]; + uint32_t end = g_serumData.spriteCandidateOffsets[frameId + 1]; + if (end > g_serumData.spriteCandidateIds.size()) { + end = static_cast(g_serumData.spriteCandidateIds.size()); + } + for (uint32_t i = start; i < end; ++i) { + const uint8_t spriteId = g_serumData.spriteCandidateIds[i]; + if (spriteId >= g_serumData.nsprites) { + continue; + } + (void)g_serumData.spriteoriginal[spriteId]; + (void)g_serumData.spriteoriginal_opaque[spriteId]; + (void)g_serumData.spritecolored[spriteId]; + if (g_serumData.isextrasprite[spriteId][0] > 0) { + (void)g_serumData.spritemask_extra[spriteId]; + (void)g_serumData.spritemask_extra_opaque[spriteId]; + (void)g_serumData.spritecolored_extra[spriteId]; + } + (void)g_serumData.dynaspritemasks[spriteId]; + (void)g_serumData.dynaspritemasks_active[spriteId]; + if (g_serumData.isextrasprite[spriteId][0] > 0) { + (void)g_serumData.dynaspritemasks_extra[spriteId]; + (void)g_serumData.dynaspritemasks_extra_active[spriteId]; + } + } + } +} + +static void PrefetchNextNormalFrameAssets(uint32_t currentFrameId) { + if (g_frameLookaheadDepth == 0 || g_serumData.nframes == 0) { + return; + } + uint32_t cursor = currentFrameId; + for (uint32_t level = 0; level < g_frameLookaheadDepth; ++level) { + bool found = false; + for (uint32_t hop = 0; hop < g_serumData.nframes; ++hop) { + cursor = (cursor + 1 >= g_serumData.nframes) ? 0 : (cursor + 1); + if (g_serumData.frameIsScene.size() == g_serumData.nframes && + g_serumData.frameIsScene[cursor] > 0) { + continue; + } + WarmFrameAssetsForId(cursor); + found = true; + break; + } + if (!found) { + break; + } + } +} + void GetSpriteSize(uint8_t nospr, int* pswid, int* pshei, const uint8_t* spriteData, int sswid, int sshei, const uint8_t* spriteOpaque) { @@ -2888,6 +3008,9 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( } mySerum.frameID = frameID; + if (!sceneFrameRequested) { + PrefetchNextNormalFrameAssets(frameID); + } if (!sceneFrameRequested) { memcpy(lastFrame, frame, g_serumData.fwidth * g_serumData.fheight); lastFrameId = frameID; From 9a31cf8d6e7286f979407752471bc3e5726e48b0 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Tue, 17 Mar 2026 09:02:28 +0100 Subject: [PATCH 17/42] GetRuntimeMetadata --- src/serum-decode.cpp | 85 ++++++++++++++++++++++++++++++++++++++++++++ src/serum-decode.h | 11 ++++++ src/serum.h | 25 +++++++++++++ 3 files changed, 121 insertions(+) diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index d54b178..6518c72 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -1761,6 +1761,71 @@ static void WarmFrameAssetsForId(uint32_t frameId) { } } +static uint32_t BuildRuntimeFeatureFlags(uint32_t frameId) { + uint32_t featureFlags = 0; + + if (frameId == IDENTIFY_NO_FRAME) { + return featureFlags; + } + + if (frameId == 0xfffffffd) { + return SERUM_RUNTIME_FEATURE_MONOCHROME_FALLBACK; + } + + if (frameId >= g_serumData.nframes) { + return featureFlags; + } + + featureFlags |= SERUM_RUNTIME_FEATURE_MATCHED; + + if (g_serumData.backgroundIDs[frameId][0] < g_serumData.nbackgrounds) { + featureFlags |= SERUM_RUNTIME_FEATURE_BACKGROUND; + } + + if (frameId < g_serumData.frameHasDynamic.size() && + g_serumData.frameHasDynamic[frameId] > 0) { + featureFlags |= SERUM_RUNTIME_FEATURE_DYNAMIC; + } + + if (frameId < g_serumData.frameHasDynamicExtra.size() && + g_serumData.frameHasDynamicExtra[frameId] > 0) { + featureFlags |= SERUM_RUNTIME_FEATURE_DYNAMIC_EXTRA; + } + + for (uint8_t spriteIndex = 0; spriteIndex < MAX_SPRITES_PER_FRAME; + ++spriteIndex) { + if (g_serumData.framesprites[frameId][spriteIndex] < 255) { + featureFlags |= SERUM_RUNTIME_FEATURE_SPRITES; + break; + } + } + + if (frameId < g_serumData.frameHasShapeSprite.size() && + g_serumData.frameHasShapeSprite[frameId] > 0) { + featureFlags |= SERUM_RUNTIME_FEATURE_SHAPE_SPRITES; + } + + const uint16_t* rotations = g_serumData.colorrotations_v2[frameId]; + for (uint8_t rotationIndex = 0; rotationIndex < MAX_COLOR_ROTATION_V2; + ++rotationIndex) { + if (rotations[rotationIndex * MAX_LENGTH_COLOR_ROTATION] > 0) { + featureFlags |= SERUM_RUNTIME_FEATURE_COLOR_ROTATION; + break; + } + } + + if (frameId < g_serumData.frameIsScene.size() && + g_serumData.frameIsScene[frameId] > 0) { + featureFlags |= SERUM_RUNTIME_FEATURE_SCENE; + } + + if (g_serumData.triggerIDs[frameId][0] < 0xffffffff) { + featureFlags |= SERUM_RUNTIME_FEATURE_TRIGGER; + } + + return featureFlags; +} + static void PrefetchNextNormalFrameAssets(uint32_t currentFrameId) { if (g_frameLookaheadDepth == 0 || g_serumData.nframes == 0) { return; @@ -3667,6 +3732,26 @@ SERUM_API void Serum_DisablePupTriggers(void) { keepTriggersInternal = true; } SERUM_API void Serum_EnablePupTrigers(void) { keepTriggersInternal = false; } +SERUM_API bool Serum_GetRuntimeMetadata(Serum_Runtime_Metadata* metadata) { + if (metadata == nullptr) { + return false; + } + + if (metadata->size != 0 && + metadata->size < sizeof(Serum_Runtime_Metadata)) { + return false; + } + + memset(metadata, 0, sizeof(*metadata)); + metadata->size = sizeof(*metadata); + metadata->serumVersion = mySerum.SerumVersion; + metadata->frameID = mySerum.frameID; + metadata->triggerID = mySerum.triggerID; + metadata->rotationtimer = mySerum.rotationtimer; + metadata->featureFlags = BuildRuntimeFeatureFlags(mySerum.frameID); + return true; +} + SERUM_API bool Serum_Scene_ParseCSV(const char* const csv_filename) { if (!g_serumData.sceneGenerator) return false; return g_serumData.sceneGenerator->parseCSV(csv_filename); diff --git a/src/serum-decode.h b/src/serum-decode.h index bf0b0b8..ace485a 100644 --- a/src/serum-decode.h +++ b/src/serum-decode.h @@ -107,6 +107,17 @@ SERUM_API void Serum_DisablePupTriggers(void); */ SERUM_API void Serum_EnablePupTrigers(void); +/** @brief Get runtime metadata for the last Serum colorize/rotate result + * + * Provides frame-level metadata for the most recently processed Serum frame. + * This is intended for diagnostics/profiling tools. + * + * @param metadata: Output structure. metadata->size should be set to + * sizeof(Serum_Runtime_Metadata); zero is also accepted for current versions. + * @return true if metadata was filled, false on invalid arguments + */ +SERUM_API bool Serum_GetRuntimeMetadata(Serum_Runtime_Metadata* metadata); + /** @brief Get the full version of this library * * @return A string formatted "major.minor.patch" diff --git a/src/serum.h b/src/serum.h index 226ce29..3078a41 100644 --- a/src/serum.h +++ b/src/serum.h @@ -62,6 +62,30 @@ enum // returned flags that are added to the timings if there were rotations FLAG_RETURNED_V2_SCENE = 0x40000, }; +enum // runtime metadata feature flags +{ + SERUM_RUNTIME_FEATURE_MATCHED = 1 << 0, + SERUM_RUNTIME_FEATURE_BACKGROUND = 1 << 1, + SERUM_RUNTIME_FEATURE_DYNAMIC = 1 << 2, + SERUM_RUNTIME_FEATURE_DYNAMIC_EXTRA = 1 << 3, + SERUM_RUNTIME_FEATURE_SPRITES = 1 << 4, + SERUM_RUNTIME_FEATURE_SHAPE_SPRITES = 1 << 5, + SERUM_RUNTIME_FEATURE_COLOR_ROTATION = 1 << 6, + SERUM_RUNTIME_FEATURE_SCENE = 1 << 7, + SERUM_RUNTIME_FEATURE_TRIGGER = 1 << 8, + SERUM_RUNTIME_FEATURE_MONOCHROME_FALLBACK = 1 << 9, +}; + +typedef struct _Serum_Runtime_Metadata { + uint32_t size; + uint32_t serumVersion; + uint32_t frameID; + uint32_t triggerID; + uint32_t rotationtimer; + uint32_t featureFlags; + uint32_t reserved; +} Serum_Runtime_Metadata; + typedef struct _Serum_Frame_Struc { // data for v1 Serum format uint8_t* frame; // return the colorized frame @@ -161,6 +185,7 @@ typedef void (*Serum_DisableColorizationFunc)(void); typedef void (*Serum_EnableColorizationFunc)(void); typedef void (*Serum_DisablePupTriggersFunc)(void); typedef void (*Serum_EnablePupTrigersFunc)(void); +typedef bool (*Serum_GetRuntimeMetadataFunc)(Serum_Runtime_Metadata* metadata); typedef bool (*Serum_Scene_ParseCSVFunc)(const char* const csv_filename); typedef bool (*Serum_Scene_GenerateDumpFunc)(const char* const dump_filename, int id); From f0a4a66210ce2162d5e24f8c99ca484cea0defc4 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Tue, 17 Mar 2026 12:05:42 +0100 Subject: [PATCH 18/42] debug tracing --- src/serum-decode.cpp | 236 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 6518c72..1e30b9f 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -105,6 +105,11 @@ static uint32_t g_frameLookaheadDepth = 0; static uint64_t g_profileColorizeFrameV2Ns = 0; static uint64_t g_profileColorizeSpriteV2Ns = 0; static uint64_t g_profileColorizeCalls = 0; +static bool g_debugFrameTracingInitialized = false; +static uint32_t g_debugTargetInputCrc = 0; +static uint32_t g_debugTargetFrameId = 0xffffffffu; +static bool g_debugStageHashes = false; +static uint32_t g_debugCurrentInputCrc = 0; static SerumData g_serumData; uint16_t sceneFrameCount = 0; @@ -238,6 +243,172 @@ Serum_Frame_Struc mySerum; // structure to keep communicate colorization data uint8_t* frameshape = NULL; // memory for shape mode conversion of ythe frame +static uint32_t GetEnvUint32Auto(const char* name, uint32_t defaultValue) { + const char* value = std::getenv(name); + if (!value || value[0] == '\0') { + return defaultValue; + } + char* endPtr = nullptr; + unsigned long parsed = std::strtoul(value, &endPtr, 0); + if (endPtr == value || *endPtr != '\0') { + return defaultValue; + } + return static_cast(parsed); +} + +static void InitDebugFrameTracingFromEnv(void) { + if (g_debugFrameTracingInitialized) { + return; + } + g_debugFrameTracingInitialized = true; + g_debugTargetInputCrc = GetEnvUint32Auto("SERUM_DEBUG_INPUT_CRC", 0); + g_debugTargetFrameId = + GetEnvUint32Auto("SERUM_DEBUG_FRAME_ID", 0xffffffffu); + g_debugStageHashes = IsEnvFlagEnabled("SERUM_DEBUG_STAGE_HASHES"); + if (g_debugTargetInputCrc != 0 || g_debugTargetFrameId != 0xffffffffu || + g_debugStageHashes) { + Log("Serum debug tracing enabled: inputCrc=%u frameId=%u stageHashes=%s", + g_debugTargetInputCrc, g_debugTargetFrameId, + g_debugStageHashes ? "on" : "off"); + } +} + +static bool DebugTraceMatches(uint32_t inputCrc, uint32_t frameId) { + InitDebugFrameTracingFromEnv(); + const bool crcMatches = + (g_debugTargetInputCrc == 0) || (inputCrc == g_debugTargetInputCrc); + const bool frameMatches = (g_debugTargetFrameId == 0xffffffffu) || + (frameId == g_debugTargetFrameId); + return crcMatches && frameMatches; +} + +static bool DebugTraceMatchesInputCrc(uint32_t inputCrc) { + InitDebugFrameTracingFromEnv(); + return (g_debugTargetInputCrc == 0) || (inputCrc == g_debugTargetInputCrc); +} + +static uint64_t DebugHashBytesFNV1a64(const void* data, size_t size) { + const uint8_t* bytes = static_cast(data); + uint64_t hash = 1469598103934665603ULL; + for (size_t i = 0; i < size; ++i) { + hash ^= bytes[i]; + hash *= 1099511628211ULL; + } + return hash; +} + +static uint64_t DebugHashCurrentOutputFrame(uint32_t frameId, bool isExtra) { + uint16_t* output = nullptr; + uint32_t width = 0; + uint32_t height = 0; + if ((mySerum.flags & FLAG_RETURNED_32P_FRAME_OK) && mySerum.frame32) { + output = mySerum.frame32; + width = mySerum.width32; + height = 32; + } else if ((mySerum.flags & FLAG_RETURNED_64P_FRAME_OK) && mySerum.frame64) { + output = mySerum.frame64; + width = mySerum.width64; + height = 64; + } + if (!output || width == 0 || height == 0) { + return 0; + } + const uint64_t hash = DebugHashBytesFNV1a64( + output, static_cast(width) * height * sizeof(uint16_t)); + if (g_debugStageHashes && DebugTraceMatches(g_debugCurrentInputCrc, frameId)) { + Log("Serum debug stage hash: frameId=%u inputCrc=%u stage=%s hash=%llu " + "size=%ux%u", + frameId, g_debugCurrentInputCrc, isExtra ? "base-extra" : "base", + static_cast(hash), width, height); + } + return hash; +} + +static void DebugLogColorizeFrameV2Assets( + uint32_t frameId, uint32_t inputCrc, bool isExtra, uint32_t width, + uint32_t height, const uint16_t* frameColors, + const uint8_t* frameBackgroundMask, const uint16_t* frameBackground, + bool frameHasDynamic, const uint8_t* frameDyna, + const uint8_t* frameDynaActive, const uint16_t* frameDynaColors, + const uint16_t* colorRotations, uint16_t backgroundId) { + if (!g_debugStageHashes || !DebugTraceMatches(inputCrc, frameId)) { + return; + } + + const uint32_t pixelCount = width * height; + uint32_t backgroundMaskPixels = 0; + uint32_t dynamicActivePixels = 0; + uint32_t dynamicNonZeroPixels = 0; + if (frameBackgroundMask) { + for (uint32_t i = 0; i < pixelCount; ++i) { + if (frameBackgroundMask[i] > 0) { + ++backgroundMaskPixels; + } + } + } + if (frameHasDynamic && frameDynaActive) { + for (uint32_t i = 0; i < pixelCount; ++i) { + if (frameDynaActive[i] > 0) { + ++dynamicActivePixels; + if (frameDyna && frameDyna[i] > 0) { + ++dynamicNonZeroPixels; + } + } + } + } + + const uint64_t colorsHash = + frameColors ? DebugHashBytesFNV1a64(frameColors, + (size_t)pixelCount * sizeof(uint16_t)) + : 0; + const uint64_t backgroundMaskHash = + frameBackgroundMask + ? DebugHashBytesFNV1a64(frameBackgroundMask, (size_t)pixelCount) + : 0; + const uint64_t backgroundHash = + frameBackground + ? DebugHashBytesFNV1a64(frameBackground, + (size_t)pixelCount * sizeof(uint16_t)) + : 0; + const uint64_t dynaHash = + (frameHasDynamic && frameDyna) + ? DebugHashBytesFNV1a64(frameDyna, (size_t)pixelCount) + : 0; + const uint64_t dynaActiveHash = + (frameHasDynamic && frameDynaActive) + ? DebugHashBytesFNV1a64(frameDynaActive, (size_t)pixelCount) + : 0; + const uint64_t dynaColorsHash = + (frameHasDynamic && frameDynaColors) + ? DebugHashBytesFNV1a64( + frameDynaColors, + (size_t)MAX_DYNA_SETS_PER_FRAME_V2 * g_serumData.nocolors * + sizeof(uint16_t)) + : 0; + const uint64_t rotationHash = + colorRotations + ? DebugHashBytesFNV1a64( + colorRotations, + (size_t)MAX_COLOR_ROTATION_V2 * MAX_LENGTH_COLOR_ROTATION * + sizeof(uint16_t)) + : 0; + + Log("Serum debug stage assets: frameId=%u inputCrc=%u stage=%s " + "backgroundId=%u colorsHash=%llu backgroundMaskHash=%llu " + "backgroundHash=%llu backgroundPixels=%u dynamic=%s " + "dynaHash=%llu dynaActiveHash=%llu dynaColorsHash=%llu " + "dynamicPixels=%u dynamicNonZero=%u rotationHash=%llu", + frameId, inputCrc, isExtra ? "assets-extra" : "assets", backgroundId, + static_cast(colorsHash), + static_cast(backgroundMaskHash), + static_cast(backgroundHash), backgroundMaskPixels, + frameHasDynamic ? "true" : "false", + static_cast(dynaHash), + static_cast(dynaActiveHash), + static_cast(dynaColorsHash), dynamicActivePixels, + dynamicNonZeroPixels, static_cast(rotationHash)); +} + SERUM_API void Serum_SetLogCallback(Serum_LogCallback callback, const void* userData) { g_serumData.SetLogCallback(callback, userData); @@ -1592,6 +1763,7 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { const uint32_t pixels = g_serumData.is256x64 ? (256 * 64) : (g_serumData.fwidth * g_serumData.fheight); + const uint32_t inputCrc = crc32_fast(frame, pixels); memset(framechecked, false, g_serumData.nframes); uint32_t& lastfound_stream = sceneFrameRequested ? lastfound_scene : lastfound_normal; @@ -1611,6 +1783,12 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { uint8_t mask = g_serumData.compmaskID[tj][0]; uint8_t Shape = g_serumData.shapecompmode[tj][0]; uint32_t Hashc = calc_crc32(frame, mask, pixels, Shape); + if (DebugTraceMatches(inputCrc, tj)) { + Log("Serum debug identify seed: inputCrc=%u startFrame=%u " + "sceneRequested=%s mask=%u shape=%u hash=%u", + inputCrc, tj, sceneFrameRequested ? "true" : "false", mask, + Shape, Hashc); + } if (sceneFrameRequested) { auto sigIt = g_serumData.sceneFramesBySignature.find( MakeFrameSignature(mask, Shape, Hashc)); @@ -1620,6 +1798,12 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { continue; } for (uint32_t ti : sigIt->second) { + if (DebugTraceMatches(inputCrc, ti)) { + Log("Serum debug identify scene candidate: inputCrc=%u frameId=%u " + "mask=%u shape=%u hash=%u storedHash=%u lastfound=%u", + inputCrc, ti, mask, Shape, Hashc, + g_serumData.hashcodes[ti][0], lastfound_stream); + } if (first_match || ti != lastfound_stream || mask < 255) { lastfound_stream = ti; lastfound = ti; @@ -1652,6 +1836,12 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { if (!framechecked[ti]) { if ((g_serumData.compmaskID[ti][0] == mask) && (g_serumData.shapecompmode[ti][0] == Shape)) { + if (DebugTraceMatches(inputCrc, ti)) { + Log("Serum debug identify candidate: inputCrc=%u frameId=%u " + "mask=%u shape=%u hash=%u storedHash=%u lastfound=%u", + inputCrc, ti, mask, Shape, Hashc, + g_serumData.hashcodes[ti][0], lastfound_stream); + } if (Hashc == g_serumData.hashcodes[ti][0]) { if (first_match || ti != lastfound_stream || mask < 255) { // Reset_ColorRotations(); @@ -1683,6 +1873,10 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { if (++tj >= g_serumData.nframes) tj = 0; } while (tj != lastfound_stream); + if (DebugTraceMatchesInputCrc(inputCrc)) { + Log("Serum debug identify miss: inputCrc=%u sceneRequested=%s", + inputCrc, sceneFrameRequested ? "true" : "false"); + } return IDENTIFY_NO_FRAME; // we found no corresponding frame } @@ -2419,6 +2613,11 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, cshft = colorshifts64; pSceneBackgroundFrame = mySerum.frame64; } + DebugLogColorizeFrameV2Assets( + IDfound, g_debugCurrentInputCrc, false, g_serumData.fwidth, + g_serumData.fheight, frameColors, frameBackgroundMask, frameBackground, + frameHasDynamic, frameDyna, frameDynaActive, frameDynaColors, prt, + backgroundId); if (applySceneBackground) memcpy(sceneBackgroundFrame, pSceneBackgroundFrame, g_serumData.fwidth * g_serumData.fheight * sizeof(uint16_t)); @@ -2524,6 +2723,11 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, cshft = colorshifts64; pSceneBackgroundFrame = mySerum.frame64; } + DebugLogColorizeFrameV2Assets( + IDfound, g_debugCurrentInputCrc, true, g_serumData.fwidth_extra, + g_serumData.fheight_extra, frameColorsExtra, frameBackgroundMaskExtra, + frameBackgroundExtra, frameHasDynamicExtra, frameDynaExtra, + frameDynaExtraActive, frameDynaColorsExtra, prt, backgroundId); if (applySceneBackground) memcpy(sceneBackgroundFrame, pSceneBackgroundFrame, g_serumData.fwidth_extra * g_serumData.fheight_extra * @@ -3007,6 +3211,7 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( // before the first rotation in ms mySerum.triggerID = 0xffffffff; mySerum.frameID = IDENTIFY_NO_FRAME; + g_debugCurrentInputCrc = 0; // Identify frame unless caller already resolved a concrete frame ID. uint32_t frameID = IDENTIFY_NO_FRAME; @@ -3025,6 +3230,10 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( } else { frameID = Identify_Frame(frame, sceneFrameRequested); } + if (frame && g_serumData.fwidth > 0 && g_serumData.fheight > 0) { + g_debugCurrentInputCrc = + crc32_fast(frame, g_serumData.fwidth * g_serumData.fheight); + } uint32_t now = GetMonotonicTimeMs(); bool rotationIsScene = false; if (is_real_machine() && !showStatusMessages) { @@ -3073,6 +3282,13 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( } mySerum.frameID = frameID; + if (DebugTraceMatches(g_debugCurrentInputCrc, frameID)) { + Log("Serum debug identify result: inputCrc=%u frameId=%u " + "sceneRequested=%s triggerId=%u", + g_debugCurrentInputCrc, frameID, + sceneFrameRequested ? "true" : "false", + g_serumData.triggerIDs[lastfound][0]); + } if (!sceneFrameRequested) { PrefetchNextNormalFrameAssets(frameID); } @@ -3201,12 +3417,14 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( if (!sceneIsLastBackgroundFrame) { Colorize_Framev2(frame, lastfound, false, false, suppressPlaceholderBackground); + DebugHashCurrentOutputFrame(lastfound, false); } if ((isBackgroundSceneRequested) || sceneIsLastBackgroundFrame) { Colorize_Framev2( lastFrame, lastFrameId, true, (sceneOptionFlags & FLAG_SCENE_ONLY_DYNAMIC_CONTENT) == FLAG_SCENE_ONLY_DYNAMIC_CONTENT); + DebugHashCurrentOutputFrame(lastFrameId, false); } if (profileNow) { g_profileColorizeFrameV2Ns += @@ -3227,6 +3445,24 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( isBackgroundSceneRequested ? lastFrameId : lastfound); ti++; } + if (DebugTraceMatches(g_debugCurrentInputCrc, + isBackgroundSceneRequested ? lastFrameId + : lastfound)) { + uint64_t spriteHash = DebugHashBytesFNV1a64( + ((mySerum.flags & FLAG_RETURNED_32P_FRAME_OK) && mySerum.frame32) + ? static_cast(mySerum.frame32) + : static_cast(mySerum.frame64), + ((mySerum.flags & FLAG_RETURNED_32P_FRAME_OK) && mySerum.frame32) + ? static_cast(mySerum.width32) * 32 * + sizeof(uint16_t) + : static_cast(mySerum.width64) * 64 * + sizeof(uint16_t)); + Log("Serum debug stage hash: frameId=%u inputCrc=%u stage=post-sprites " + "hash=%llu sprites=%u", + isBackgroundSceneRequested ? lastFrameId : lastfound, + g_debugCurrentInputCrc, + static_cast(spriteHash), nspr); + } if (profileNow) { g_profileColorizeSpriteV2Ns += (uint64_t)std::chrono::duration_cast( From fbd0b328c782434c5de116b71b08c12f71cf3599 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Tue, 17 Mar 2026 17:17:38 +0100 Subject: [PATCH 19/42] more debug --- src/serum-decode.cpp | 217 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 214 insertions(+), 3 deletions(-) diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 1e30b9f..1ab5948 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -110,6 +110,8 @@ static uint32_t g_debugTargetInputCrc = 0; static uint32_t g_debugTargetFrameId = 0xffffffffu; static bool g_debugStageHashes = false; static uint32_t g_debugCurrentInputCrc = 0; +static bool g_debugTraceAllInputs = false; +static uint32_t g_debugFrameMetaLoggedFor = 0xffffffffu; static SerumData g_serumData; uint16_t sceneFrameCount = 0; @@ -265,11 +267,14 @@ static void InitDebugFrameTracingFromEnv(void) { g_debugTargetFrameId = GetEnvUint32Auto("SERUM_DEBUG_FRAME_ID", 0xffffffffu); g_debugStageHashes = IsEnvFlagEnabled("SERUM_DEBUG_STAGE_HASHES"); + g_debugTraceAllInputs = IsEnvFlagEnabled("SERUM_DEBUG_TRACE_INPUTS"); if (g_debugTargetInputCrc != 0 || g_debugTargetFrameId != 0xffffffffu || - g_debugStageHashes) { - Log("Serum debug tracing enabled: inputCrc=%u frameId=%u stageHashes=%s", + g_debugStageHashes || g_debugTraceAllInputs) { + Log("Serum debug tracing enabled: inputCrc=%u frameId=%u stageHashes=%s " + "traceAllInputs=%s", g_debugTargetInputCrc, g_debugTargetFrameId, - g_debugStageHashes ? "on" : "off"); + g_debugStageHashes ? "on" : "off", + g_debugTraceAllInputs ? "on" : "off"); } } @@ -287,6 +292,71 @@ static bool DebugTraceMatchesInputCrc(uint32_t inputCrc) { return (g_debugTargetInputCrc == 0) || (inputCrc == g_debugTargetInputCrc); } +static bool DebugTraceAllInputsEnabled() { + InitDebugFrameTracingFromEnv(); + return g_debugTraceAllInputs; +} + +static void DebugLogFrameMetadataIfRequested(uint32_t frameId) { + InitDebugFrameTracingFromEnv(); + if (g_debugTargetFrameId == 0xffffffffu || frameId != g_debugTargetFrameId || + frameId >= g_serumData.nframes || g_debugFrameMetaLoggedFor == frameId) { + return; + } + g_debugFrameMetaLoggedFor = frameId; + + const uint8_t mask = g_serumData.compmaskID[frameId][0]; + const uint8_t shape = g_serumData.shapecompmode[frameId][0]; + const uint32_t hash = g_serumData.hashcodes[frameId][0]; + const uint8_t active = g_serumData.activeframes[frameId][0]; + const uint32_t triggerId = g_serumData.triggerIDs[frameId][0]; + const uint16_t backgroundId = g_serumData.backgroundIDs[frameId][0]; + const uint8_t isExtra = g_serumData.isextraframe[frameId][0]; + const uint8_t hasDynamic = + (frameId < g_serumData.frameHasDynamic.size()) + ? g_serumData.frameHasDynamic[frameId] + : 0; + const uint8_t hasDynamicExtra = + (frameId < g_serumData.frameHasDynamicExtra.size()) + ? g_serumData.frameHasDynamicExtra[frameId] + : 0; + const uint8_t isScene = + (frameId < g_serumData.frameIsScene.size()) ? g_serumData.frameIsScene[frameId] + : 0; + + Log("Serum debug frame meta: frameId=%u mask=%u shape=%u hash=%u active=%u " + "triggerId=%u backgroundId=%u isExtra=%u hasDynamic=%u " + "hasDynamicExtra=%u isScene=%u", + frameId, mask, shape, hash, active, triggerId, backgroundId, isExtra, + hasDynamic, hasDynamicExtra, isScene); + + const uint8_t* spriteSlots = g_serumData.framesprites[frameId]; + const uint16_t* spriteBB = g_serumData.framespriteBB[frameId]; + uint32_t spriteCount = 0; + for (uint32_t i = 0; i < MAX_SPRITES_PER_FRAME; ++i) { + if (spriteSlots[i] >= 255) { + break; + } + ++spriteCount; + } + if (spriteCount == 0) { + Log("Serum debug frame sprites: frameId=%u count=0", frameId); + return; + } + + for (uint32_t i = 0; i < spriteCount; ++i) { + const uint8_t spriteId = spriteSlots[i]; + const uint8_t usesShape = + (spriteId < g_serumData.spriteUsesShape.size()) + ? g_serumData.spriteUsesShape[spriteId] + : g_serumData.sprshapemode[spriteId][0]; + Log("Serum debug frame sprite-slot: frameId=%u slot=%u spriteId=%u " + "bbox=[%u,%u..%u,%u] usesShape=%u", + frameId, i, spriteId, spriteBB[i * 4], spriteBB[i * 4 + 1], + spriteBB[i * 4 + 2], spriteBB[i * 4 + 3], usesShape); + } +} + static uint64_t DebugHashBytesFNV1a64(const void* data, size_t size) { const uint8_t* bytes = static_cast(data); uint64_t hash = 1469598103934665603ULL; @@ -409,6 +479,74 @@ static void DebugLogColorizeFrameV2Assets( dynamicNonZeroPixels, static_cast(rotationHash)); } +static bool DebugTraceSpritesForCurrentInput() { + return DebugTraceMatchesInputCrc(g_debugCurrentInputCrc); +} + +static void DebugLogSpriteCheckStart(uint32_t frameId, uint32_t candidateCount, + bool hasCandidateSidecars, + bool frameHasShapeCandidates) { + if (!DebugTraceSpritesForCurrentInput()) { + return; + } + Log("Serum debug sprites start: frameId=%u inputCrc=%u candidates=%u " + "sidecars=%s shapeCandidates=%s", + frameId, g_debugCurrentInputCrc, candidateCount, + hasCandidateSidecars ? "true" : "false", + frameHasShapeCandidates ? "true" : "false"); +} + +static void DebugLogSpriteCandidate(uint32_t frameId, uint8_t spriteId, + uint8_t spriteSlot, bool usesShape, + uint32_t detectCount, short minxBB, + short minyBB, short maxxBB, short maxyBB, + int spriteWidth, int spriteHeight) { + if (!DebugTraceSpritesForCurrentInput()) { + return; + } + Log("Serum debug sprite candidate: frameId=%u inputCrc=%u spriteId=%u " + "slot=%u shape=%s detectCount=%u bbox=[%d,%d..%d,%d] size=%dx%d", + frameId, g_debugCurrentInputCrc, spriteId, spriteSlot, + usesShape ? "true" : "false", detectCount, minxBB, minyBB, maxxBB, maxyBB, + spriteWidth, spriteHeight); +} + +static void DebugLogSpriteDetectionWord(uint32_t frameId, uint8_t spriteId, + uint32_t detectionIndex, + uint32_t detectionWord, short frax, + short fray, short offsx, short offsy, + short detw, short deth) { + if (!DebugTraceSpritesForCurrentInput()) { + return; + } + Log("Serum debug sprite detection: frameId=%u inputCrc=%u spriteId=%u " + "detectIndex=%u word=%u framePos=(%d,%d) area=(%d,%d %dx%d)", + frameId, g_debugCurrentInputCrc, spriteId, detectionIndex, detectionWord, + frax, fray, offsx, offsy, detw, deth); +} + +static void DebugLogSpriteAccepted(uint32_t frameId, uint8_t spriteId, + uint8_t spriteSlot, uint16_t frameX, + uint16_t frameY, uint16_t spriteX, + uint16_t spriteY, uint16_t width, + uint16_t height, bool duplicate) { + if (!DebugTraceSpritesForCurrentInput()) { + return; + } + Log("Serum debug sprite accepted: frameId=%u inputCrc=%u spriteId=%u " + "slot=%u frame=(%u,%u) sprite=(%u,%u) size=%ux%u duplicate=%s", + frameId, g_debugCurrentInputCrc, spriteId, spriteSlot, frameX, frameY, + spriteX, spriteY, width, height, duplicate ? "true" : "false"); +} + +static void DebugLogSpriteCheckResult(uint32_t frameId, uint8_t nspr) { + if (!DebugTraceSpritesForCurrentInput()) { + return; + } + Log("Serum debug sprites result: frameId=%u inputCrc=%u matches=%u", frameId, + g_debugCurrentInputCrc, nspr); +} + SERUM_API void Serum_SetLogCallback(Serum_LogCallback callback, const void* userData) { g_serumData.SetLogCallback(callback, userData); @@ -1757,6 +1895,7 @@ static void InitFrameLookupRuntimeStateFromStoredData(void) { uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { if (!cromloaded) return IDENTIFY_NO_FRAME; + DebugLogFrameMetadataIfRequested(g_debugTargetFrameId); uint32_t tj = sceneFrameRequested ? lastfound_scene : lastfound_normal; // stream-local search start @@ -2246,6 +2385,8 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, const uint32_t candidateCount = hasCandidateSidecars ? (candidateEnd - candidateStart) : MAX_SPRITES_PER_FRAME; + DebugLogSpriteCheckStart(quelleframe, candidateCount, hasCandidateSidecars, + frameHasShapeCandidates); for (uint32_t candidateIndex = 0; candidateIndex < candidateCount; ++candidateIndex) { uint8_t qspr = 255; @@ -2324,6 +2465,9 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, (qspr + 1) < g_serumData.spriteDetectOffsets.size() ? g_serumData.spriteDetectOffsets[qspr + 1] : detectStart; + DebugLogSpriteCandidate(quelleframe, qspr, spriteSlot, isshapecheck, + detectEnd - detectStart, minxBB, minyBB, maxxBB, + maxyBB, spw, sph); for (uint32_t tm = detectStart; tm < detectEnd; tm++) { const auto& detMeta = g_serumData.spriteDetectMeta[tm]; const bool hasDetectionWord = @@ -2370,6 +2514,10 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, if ((offsx + detw > (int)maxxBB + 1) || (offsy + deth > (int)maxyBB + 1)) continue; + DebugLogSpriteDetectionWord(quelleframe, qspr, tm - detectStart, + detMeta.detectionWord, frax, fray, + static_cast(offsx), + static_cast(offsy), detw, deth); // we can now check if the full detection area is around the found // detection dword bool notthere = false; @@ -2458,6 +2606,10 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, (pwid[*nspr] == pwid[tk]) && (phei[*nspr] == phei[tk])) identicalfound = true; } + DebugLogSpriteAccepted(quelleframe, qspr, spriteSlot, + pfrx[*nspr], pfry[*nspr], pspx[*nspr], + pspy[*nspr], pwid[*nspr], phei[*nspr], + identicalfound); if (!identicalfound) { (*nspr)++; if (*nspr == MAX_SPRITES_PER_FRAME) return true; @@ -2468,6 +2620,7 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, } } } + DebugLogSpriteCheckResult(quelleframe, *nspr); if (*nspr > 0) return true; return false; } @@ -3000,6 +3153,14 @@ uint32_t Serum_ColorizeWithMetadatav1(uint8_t* frame) { } // Let's first identify the incoming frame among the ones we have in the crom + const uint32_t inputCrc = + (frame && g_serumData.fwidth > 0 && g_serumData.fheight > 0) + ? crc32_fast(frame, g_serumData.fwidth * g_serumData.fheight) + : 0; + g_debugCurrentInputCrc = inputCrc; + if (DebugTraceAllInputsEnabled()) { + Log("Serum debug input: api=v1 inputCrc=%u", inputCrc); + } uint32_t frameID = Identify_Frame(frame, false); mySerum.frameID = IDENTIFY_NO_FRAME; uint32_t now = GetMonotonicTimeMs(); @@ -3026,6 +3187,17 @@ uint32_t Serum_ColorizeWithMetadatav1(uint8_t* frame) { } if (frameID == IDENTIFY_SAME_FRAME) { + if (DebugTraceMatchesInputCrc(g_debugCurrentInputCrc)) { + Log("Serum debug identify same-frame: inputCrc=%u lastfound=%u " + "sceneRequested=%s triggerId=%u", + g_debugCurrentInputCrc, lastfound, "false", + g_serumData.triggerIDs[lastfound][0]); + } + if (DebugTraceAllInputsEnabled()) { + Log("Serum debug input result: api=v1 inputCrc=%u result=same-frame " + "lastfound=%u", + g_debugCurrentInputCrc, lastfound); + } if (keepTriggersInternal || mySerum.triggerID >= PUP_TRIGGER_MAX_THRESHOLD) mySerum.triggerID = 0xffffffff; @@ -3034,6 +3206,11 @@ uint32_t Serum_ColorizeWithMetadatav1(uint8_t* frame) { mySerum.frameID = frameID; mySerum.rotationtimer = 0; + if (DebugTraceAllInputsEnabled()) { + Log("Serum debug input result: api=v1 inputCrc=%u result=frame " + "frameId=%u", + g_debugCurrentInputCrc, frameID); + } uint8_t nosprite[MAX_SPRITES_PER_FRAME], nspr; uint16_t frx[MAX_SPRITES_PER_FRAME], fry[MAX_SPRITES_PER_FRAME], @@ -3234,6 +3411,11 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( g_debugCurrentInputCrc = crc32_fast(frame, g_serumData.fwidth * g_serumData.fheight); } + if (DebugTraceAllInputsEnabled()) { + Log("Serum debug input: api=v2 inputCrc=%u sceneRequested=%s knownFrameId=%u", + g_debugCurrentInputCrc, sceneFrameRequested ? "true" : "false", + knownFrameId); + } uint32_t now = GetMonotonicTimeMs(); bool rotationIsScene = false; if (is_real_machine() && !showStatusMessages) { @@ -3275,6 +3457,19 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( } if (frameID == IDENTIFY_SAME_FRAME) { + if (DebugTraceMatchesInputCrc(g_debugCurrentInputCrc)) { + Log("Serum debug identify same-frame: inputCrc=%u lastfound=%u " + "sceneRequested=%s triggerId=%u", + g_debugCurrentInputCrc, lastfound, + sceneFrameRequested ? "true" : "false", + g_serumData.triggerIDs[lastfound][0]); + } + if (DebugTraceAllInputsEnabled()) { + Log("Serum debug input result: api=v2 inputCrc=%u result=same-frame " + "lastfound=%u sceneRequested=%s", + g_debugCurrentInputCrc, lastfound, + sceneFrameRequested ? "true" : "false"); + } if (keepTriggersInternal || mySerum.triggerID >= PUP_TRIGGER_MAX_THRESHOLD) mySerum.triggerID = 0xffffffff; @@ -3282,6 +3477,12 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( } mySerum.frameID = frameID; + if (DebugTraceAllInputsEnabled()) { + Log("Serum debug input result: api=v2 inputCrc=%u result=frame " + "frameId=%u sceneRequested=%s", + g_debugCurrentInputCrc, frameID, + sceneFrameRequested ? "true" : "false"); + } if (DebugTraceMatches(g_debugCurrentInputCrc, frameID)) { Log("Serum debug identify result: inputCrc=%u frameId=%u " "sceneRequested=%s triggerId=%u", @@ -3602,6 +3803,12 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( } } + if (DebugTraceAllInputsEnabled()) { + Log("Serum debug input result: api=v2 inputCrc=%u result=no-frame " + "sceneRequested=%s", + g_debugCurrentInputCrc, sceneFrameRequested ? "true" : "false"); + } + mySerum.triggerID = 0xffffffff; if (monochromeMode || monochromePaletteMode || @@ -3629,6 +3836,10 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( mySerum.width32 = g_serumData.fwidth; mySerum.width64 = 0; mySerum.frameID = 0xfffffffd; // monochrome frame ID + if (DebugTraceAllInputsEnabled()) { + Log("Serum debug input result: api=v2 inputCrc=%u result=monochrome", + g_debugCurrentInputCrc); + } // disable render features like rotations for (uint8_t ti = 0; ti < MAX_COLOR_ROTATION_V2; ti++) { From 3b7c0cbed83b659de4272b08bc95c528d3b41e97 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Wed, 18 Mar 2026 10:11:11 +0100 Subject: [PATCH 20/42] more logging --- src/serum-decode.cpp | 50 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 1ab5948..0193784 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -1944,6 +1944,17 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { g_serumData.hashcodes[ti][0], lastfound_stream); } if (first_match || ti != lastfound_stream || mask < 255) { + if (DebugTraceMatches(inputCrc, ti)) { + Log("Serum debug identify decision: inputCrc=%u frameId=%u " + "reason=%s firstMatch=%s lastfoundStream=%u mask=%u " + "fullCrcBefore=%u", + inputCrc, ti, + first_match ? "first-match" + : (ti != lastfound_stream ? "new-frame-id" + : "mask-lt-255"), + first_match ? "true" : "false", lastfound_stream, mask, + lastframe_full_crc); + } lastfound_stream = ti; lastfound = ti; lastframe_full_crc = crc32_fast(frame, pixels); @@ -1953,10 +1964,24 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { uint32_t full_crc = crc32_fast(frame, pixels); if (full_crc != lastframe_full_crc) { + if (DebugTraceMatches(inputCrc, ti)) { + Log("Serum debug identify decision: inputCrc=%u frameId=%u " + "reason=full-crc-diff firstMatch=%s lastfoundStream=%u " + "mask=%u fullCrcBefore=%u fullCrcNow=%u", + inputCrc, ti, first_match ? "true" : "false", + lastfound_stream, mask, lastframe_full_crc, full_crc); + } lastframe_full_crc = full_crc; lastfound = ti; return ti; } + if (DebugTraceMatches(inputCrc, ti)) { + Log("Serum debug identify decision: inputCrc=%u frameId=%u " + "reason=same-frame firstMatch=%s lastfoundStream=%u mask=%u " + "fullCrc=%u", + inputCrc, ti, first_match ? "true" : "false", + lastfound_stream, mask, full_crc); + } lastfound = ti; return IDENTIFY_SAME_FRAME; } @@ -1983,6 +2008,17 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { } if (Hashc == g_serumData.hashcodes[ti][0]) { if (first_match || ti != lastfound_stream || mask < 255) { + if (DebugTraceMatches(inputCrc, ti)) { + Log("Serum debug identify decision: inputCrc=%u frameId=%u " + "reason=%s firstMatch=%s lastfoundStream=%u mask=%u " + "fullCrcBefore=%u", + inputCrc, ti, + first_match ? "first-match" + : (ti != lastfound_stream ? "new-frame-id" + : "mask-lt-255"), + first_match ? "true" : "false", lastfound_stream, mask, + lastframe_full_crc); + } // Reset_ColorRotations(); lastfound_stream = ti; lastfound = ti; @@ -1993,11 +2029,25 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { uint32_t full_crc = crc32_fast(frame, pixels); if (full_crc != lastframe_full_crc) { + if (DebugTraceMatches(inputCrc, ti)) { + Log("Serum debug identify decision: inputCrc=%u frameId=%u " + "reason=full-crc-diff firstMatch=%s lastfoundStream=%u " + "mask=%u fullCrcBefore=%u fullCrcNow=%u", + inputCrc, ti, first_match ? "true" : "false", + lastfound_stream, mask, lastframe_full_crc, full_crc); + } lastframe_full_crc = full_crc; lastfound = ti; return ti; // we found the same frame with shape as before, but // the full frame is different } + if (DebugTraceMatches(inputCrc, ti)) { + Log("Serum debug identify decision: inputCrc=%u frameId=%u " + "reason=same-frame firstMatch=%s lastfoundStream=%u " + "mask=%u fullCrc=%u", + inputCrc, ti, first_match ? "true" : "false", + lastfound_stream, mask, full_crc); + } lastfound = ti; return IDENTIFY_SAME_FRAME; // we found the frame, but it is the // same full frame as before (no From f41ab7f3ff46abb04838177dc2af73b03a5f5b6d Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Wed, 18 Mar 2026 19:03:57 +0100 Subject: [PATCH 21/42] in depth sprite debugging --- src/SerumData.cpp | 67 +++++++++++++ src/SerumData.h | 1 + src/serum-decode.cpp | 219 ++++++++++++++++++++++++++++++++++++++----- src/sparse-vector.h | 4 + 4 files changed, 268 insertions(+), 23 deletions(-) diff --git a/src/SerumData.cpp b/src/SerumData.cpp index 32b2cd4..8dd47a8 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -8,6 +8,19 @@ bool is_real_machine(); +static uint32_t GetDebugSpriteIdFromEnv() { + const char *value = std::getenv("SERUM_DEBUG_SPRITE_ID"); + if (!value || value[0] == '\0') { + return 0xffffffffu; + } + char *endPtr = nullptr; + unsigned long parsed = std::strtoul(value, &endPtr, 0); + if (endPtr == value || *endPtr != '\0') { + return 0xffffffffu; + } + return static_cast(parsed); +} + SerumData::SerumData() : SerumVersion(0), concentrateFileVersion(SERUM_CONCENTRATE_VERSION), @@ -165,6 +178,58 @@ void SerumData::Clear() { colorRotationLookupByFrameAndColor.clear(); } +void SerumData::DebugLogSpriteDynamicSidecarState(const char *stage, + uint32_t spriteId) { + const uint32_t debugSpriteId = GetDebugSpriteIdFromEnv(); + if (spriteId != debugSpriteId || spriteId >= nsprites) { + return; + } + + const bool hasDyna = dynaspritemasks.hasData(spriteId); + const bool hasActive = dynaspritemasks_active.hasData(spriteId); + uint32_t dyna255 = 0; + uint32_t dyna0 = 0; + uint32_t dynaOther = 0; + uint32_t active0 = 0; + uint32_t active1 = 0; + uint32_t activeOther = 0; + + if (hasDyna) { + const uint8_t *source = dynaspritemasks[spriteId]; + for (size_t i = 0; i < MAX_SPRITE_WIDTH * MAX_SPRITE_HEIGHT; ++i) { + const uint8_t value = source[i]; + if (value == 255) { + ++dyna255; + } else if (value == 0) { + ++dyna0; + } else { + ++dynaOther; + } + } + } + + if (hasActive) { + const uint8_t *active = dynaspritemasks_active[spriteId]; + for (size_t i = 0; i < MAX_SPRITE_WIDTH * MAX_SPRITE_HEIGHT; ++i) { + const uint8_t value = active[i]; + if (value == 0) { + ++active0; + } else if (value == 1) { + ++active1; + } else { + ++activeOther; + } + } + } + + Log("Serum debug sprite sidecar: stage=%s spriteId=%u hasDyna=%s " + "hasActive=%s dyna255=%u dyna0=%u dynaOther=%u active0=%u active1=%u " + "activeOther=%u", + stage ? stage : "unknown", spriteId, hasDyna ? "true" : "false", + hasActive ? "true" : "false", dyna255, dyna0, dynaOther, active0, + active1, activeOther); +} + void SerumData::BuildPackingSidecarsAndNormalize() { if (m_packingSidecarsNormalized) { return; @@ -307,6 +372,7 @@ void SerumData::BuildPackingSidecarsAndNormalize() { normalized.resize(spritePixels); flags.resize(spritePixels); for (uint32_t spriteId = 0; spriteId < nsprites; ++spriteId) { + DebugLogSpriteDynamicSidecarState("normalize-before", spriteId); const bool hasSourceVector = dynaspritemasks.hasData(spriteId); const bool hasActiveVector = dynaspritemasks_active.hasData(spriteId); if (!hasSourceVector && !hasActiveVector) { @@ -323,6 +389,7 @@ void SerumData::BuildPackingSidecarsAndNormalize() { } dynaspritemasks_active.set(spriteId, flags.data(), spritePixels); dynaspritemasks.set(spriteId, normalized.data(), spritePixels); + DebugLogSpriteDynamicSidecarState("normalize-after", spriteId); } for (uint32_t spriteId = 0; spriteId < nsprites; ++spriteId) { diff --git a/src/SerumData.h b/src/SerumData.h index 7036cb5..61027bb 100644 --- a/src/SerumData.h +++ b/src/SerumData.h @@ -78,6 +78,7 @@ class SerumData { void BuildPackingSidecarsAndNormalize(); void PrepareRuntimeDynamicHotCache(); void BuildSpriteRuntimeSidecars(); + void DebugLogSpriteDynamicSidecarState(const char *stage, uint32_t spriteId); bool HasSpriteRuntimeSidecars() const; void BuildColorRotationLookup(); bool TryGetColorRotation(uint32_t frameId, uint16_t color, bool isextra, diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 0193784..dc334d9 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -112,6 +112,7 @@ static bool g_debugStageHashes = false; static uint32_t g_debugCurrentInputCrc = 0; static bool g_debugTraceAllInputs = false; static uint32_t g_debugFrameMetaLoggedFor = 0xffffffffu; +static bool g_debugBypassSceneGate = false; static SerumData g_serumData; uint16_t sceneFrameCount = 0; @@ -268,13 +269,16 @@ static void InitDebugFrameTracingFromEnv(void) { GetEnvUint32Auto("SERUM_DEBUG_FRAME_ID", 0xffffffffu); g_debugStageHashes = IsEnvFlagEnabled("SERUM_DEBUG_STAGE_HASHES"); g_debugTraceAllInputs = IsEnvFlagEnabled("SERUM_DEBUG_TRACE_INPUTS"); + g_debugBypassSceneGate = IsEnvFlagEnabled("SERUM_DEBUG_BYPASS_SCENE_GATE"); if (g_debugTargetInputCrc != 0 || g_debugTargetFrameId != 0xffffffffu || - g_debugStageHashes || g_debugTraceAllInputs) { + g_debugStageHashes || g_debugTraceAllInputs || + g_debugBypassSceneGate) { Log("Serum debug tracing enabled: inputCrc=%u frameId=%u stageHashes=%s " - "traceAllInputs=%s", + "traceAllInputs=%s bypassSceneGate=%s", g_debugTargetInputCrc, g_debugTargetFrameId, g_debugStageHashes ? "on" : "off", - g_debugTraceAllInputs ? "on" : "off"); + g_debugTraceAllInputs ? "on" : "off", + g_debugBypassSceneGate ? "on" : "off"); } } @@ -367,6 +371,28 @@ static uint64_t DebugHashBytesFNV1a64(const void* data, size_t size) { return hash; } +static uint64_t DebugHashFrameRegionFNV1a64(const uint16_t* frame, + uint32_t stride, uint16_t x, + uint16_t y, uint16_t width, + uint16_t height) { + if (!frame || width == 0 || height == 0) { + return 1469598103934665603ull; + } + uint64_t hash = 1469598103934665603ull; + for (uint16_t row = 0; row < height; ++row) { + const uint16_t* src = frame + static_cast(y + row) * stride + x; + for (uint16_t col = 0; col < width; ++col) { + const uint16_t value = src[col]; + const uint8_t* bytes = reinterpret_cast(&value); + hash ^= bytes[0]; + hash *= 1099511628211ull; + hash ^= bytes[1]; + hash *= 1099511628211ull; + } + } + return hash; +} + static uint64_t DebugHashCurrentOutputFrame(uint32_t frameId, bool isExtra) { uint16_t* output = nullptr; uint32_t width = 0; @@ -525,6 +551,25 @@ static void DebugLogSpriteDetectionWord(uint32_t frameId, uint8_t spriteId, frax, fray, offsx, offsy, detw, deth); } +static void DebugLogSpriteRejected(uint32_t frameId, uint8_t spriteId, + uint8_t spriteSlot, const char* reason, + uint32_t detectionIndex, short frax, + short fray, short offsx, short offsy, + uint32_t detailA = 0, + uint32_t detailB = 0, + uint32_t detailC = 0, + uint32_t detailD = 0) { + if (!DebugTraceSpritesForCurrentInput()) { + return; + } + Log("Serum debug sprite rejected: frameId=%u inputCrc=%u spriteId=%u " + "slot=%u reason=%s detectIndex=%u framePos=(%d,%d) area=(%d,%d) " + "detailA=%u detailB=%u detailC=%u detailD=%u", + frameId, g_debugCurrentInputCrc, spriteId, spriteSlot, reason, + detectionIndex, frax, fray, offsx, offsy, detailA, detailB, detailC, + detailD); +} + static void DebugLogSpriteAccepted(uint32_t frameId, uint8_t spriteId, uint8_t spriteSlot, uint16_t frameX, uint16_t frameY, uint16_t spriteX, @@ -890,6 +935,17 @@ static Serum_Frame_Struc* Serum_LoadConcentratePrepared(const uint8_t flags) { } g_serumData.BuildPackingSidecarsAndNormalize(); + { + const char *debugSpriteId = std::getenv("SERUM_DEBUG_SPRITE_ID"); + if (debugSpriteId && debugSpriteId[0] != '\0') { + char *endPtr = nullptr; + unsigned long parsed = std::strtoul(debugSpriteId, &endPtr, 0); + if (endPtr != debugSpriteId && *endPtr == '\0') { + g_serumData.DebugLogSpriteDynamicSidecarState( + "post-load-prepared", static_cast(parsed)); + } + } + } // Set requested frame types isoriginalrequested = false; @@ -2554,16 +2610,34 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, const short deth = static_cast(detMeta.detectHeight); // if the detection area starts before the frame (left or top), // continue: - if ((frax - minxBB < sprx - detx) || (fray - minyBB < spry - dety)) + if ((frax - minxBB < sprx - detx) || + (fray - minyBB < spry - dety)) { + DebugLogSpriteRejected( + quelleframe, qspr, spriteSlot, "bbox-start", + tm - detectStart, frax, fray, 0, 0, + static_cast(frax - minxBB), + static_cast(sprx - detx), + static_cast(fray - minyBB), + static_cast(spry - dety)); continue; + } // position of the detection area in the frame int offsx = frax - sprx + detx; int offsy = fray - spry + dety; // if the detection area extends beyond the bounding box (right or // bottom), continue: if ((offsx + detw > (int)maxxBB + 1) || - (offsy + deth > (int)maxyBB + 1)) + (offsy + deth > (int)maxyBB + 1)) { + DebugLogSpriteRejected( + quelleframe, qspr, spriteSlot, "bbox-end", + tm - detectStart, frax, fray, static_cast(offsx), + static_cast(offsy), + static_cast(offsx + detw), + static_cast((int)maxxBB + 1), + static_cast(offsy + deth), + static_cast((int)maxyBB + 1)); continue; + } DebugLogSpriteDetectionWord(quelleframe, qspr, tm - detectStart, detMeta.detectionWord, frax, fray, static_cast(offsx), @@ -2577,6 +2651,13 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, MAX_SPRITE_HEIGHT + spriteRow; if (rowIndex >= g_serumData.spriteOpaqueRowSegmentStart.size()) { + DebugLogSpriteRejected( + quelleframe, qspr, spriteSlot, "row-sidecar-oob", + tm - detectStart, frax, fray, static_cast(offsx), + static_cast(offsy), rowIndex, + static_cast( + g_serumData.spriteOpaqueRowSegmentStart.size()), + spriteRow, static_cast(tk)); notthere = true; break; } @@ -2587,6 +2668,13 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, for (uint16_t seg = 0; seg < segCount && !notthere; ++seg) { const uint32_t segIndex = segStartIndex + seg * 2; if (segIndex + 1 >= g_serumData.spriteOpaqueSegments.size()) { + DebugLogSpriteRejected( + quelleframe, qspr, spriteSlot, "segment-sidecar-oob", + tm - detectStart, frax, fray, static_cast(offsx), + static_cast(offsy), segIndex, + static_cast( + g_serumData.spriteOpaqueSegments.size()), + segStartIndex, segCount); notthere = true; break; } @@ -2614,7 +2702,16 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, if (spriteOpaque[spriteOffset] == 0) { continue; } - if (spriteOriginal[spriteOffset] != Frame[frameOffset]) { + const uint8_t expectedValue = + isshapecheck ? static_cast(spriteOriginal[spriteOffset] > 0) + : spriteOriginal[spriteOffset]; + if (expectedValue != Frame[frameOffset]) { + DebugLogSpriteRejected( + quelleframe, qspr, spriteSlot, "opaque-run-mismatch", + tm - detectStart, frax, fray, + static_cast(offsx), static_cast(offsy), + spriteOffset, frameOffset, expectedValue, + Frame[frameOffset]); notthere = true; break; } @@ -2660,6 +2757,12 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, pfrx[*nspr], pfry[*nspr], pspx[*nspr], pspy[*nspr], pwid[*nspr], phei[*nspr], identicalfound); + if (identicalfound) { + DebugLogSpriteRejected( + quelleframe, qspr, spriteSlot, "duplicate", + tm - detectStart, frax, fray, pfrx[*nspr], pfry[*nspr], + pspx[*nspr], pspy[*nspr], pwid[*nspr], phei[*nspr]); + } if (!identicalfound) { (*nspr)++; if (*nspr == MAX_SPRITES_PER_FRAME) return true; @@ -3024,15 +3127,36 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, uint16_t *pfr, *prot; uint16_t* prt; uint32_t* cshft; - if (!g_serumData.spriteoriginal_opaque.hasData(nosprite) || - !g_serumData.dynaspritemasks_active.hasData(nosprite) || - !g_serumData.dynaspritemasks.hasData(nosprite)) { + const bool traceSprite = DebugTraceMatches(g_debugCurrentInputCrc, IDfound); + const bool hasOpaque = g_serumData.spriteoriginal_opaque.hasData(nosprite); + const bool hasDynaActive = + g_serumData.dynaspritemasks_active.hasData(nosprite); + const bool hasDyna = g_serumData.dynaspritemasks.hasData(nosprite); + const bool hasColor = g_serumData.spritecolored.hasData(nosprite); + const bool hasColorExtra = g_serumData.spritecolored_extra.hasData(nosprite); + if (!hasOpaque) { + if (traceSprite) { + Log("Serum debug sprite render skip: frameId=%u inputCrc=%u spriteId=%u " + "reason=missing-base-opaque-sidecar", + IDfound, g_debugCurrentInputCrc, nosprite); + } return; } const uint8_t* spriteOpaque = g_serumData.spriteoriginal_opaque[nosprite]; - const uint8_t* spriteDyna = g_serumData.dynaspritemasks[nosprite]; + const uint8_t* spriteDyna = hasDyna ? g_serumData.dynaspritemasks[nosprite] + : nullptr; const uint8_t* spriteDynaActive = - g_serumData.dynaspritemasks_active[nosprite]; + hasDynaActive ? g_serumData.dynaspritemasks_active[nosprite] : nullptr; + const bool hasUsableBaseDyna = spriteDyna != nullptr && spriteDynaActive != nullptr; + if (traceSprite) { + Log("Serum debug sprite render source: frameId=%u inputCrc=%u spriteId=%u " + "frame=(%u,%u) sprite=(%u,%u) size=%ux%u hasOpaque=%s hasColor=%s " + "hasDyna=%s hasDynaActive=%s hasExtraColor=%s", + IDfound, g_debugCurrentInputCrc, nosprite, frx, fry, spx, spy, wid, + hei, hasOpaque ? "true" : "false", hasColor ? "true" : "false", + hasDyna ? "true" : "false", hasDynaActive ? "true" : "false", + hasColorExtra ? "true" : "false"); + } if (((mySerum.flags & FLAG_RETURNED_32P_FRAME_OK) && g_serumData.fheight == 32) || ((mySerum.flags & FLAG_RETURNED_64P_FRAME_OK) && @@ -3053,7 +3177,15 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, uint16_t tk = (fry + tj) * g_serumData.fwidth + frx + ti; uint32_t tl = (tj + spy) * MAX_SPRITE_WIDTH + ti + spx; if (spriteOpaque[tl] > 0) { - if (spriteDynaActive[tl] == 0) { + if (!hasColor) { + if (traceSprite) { + Log("Serum debug sprite render skip: frameId=%u inputCrc=%u " + "spriteId=%u reason=missing-base-color", + IDfound, g_debugCurrentInputCrc, nosprite); + } + return; + } + if (!hasUsableBaseDyna || spriteDynaActive[tl] == 0) { pfr[tk] = g_serumData.spritecolored[nosprite][tl]; if (ColorInRotation(IDfound, pfr[tk], &prot[tk * 2], &prot[tk * 2 + 1], false)) @@ -3075,22 +3207,39 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, } } } + if (traceSprite) { + Log("Serum debug sprite render result: frameId=%u inputCrc=%u " + "spriteId=%u plane=base rendered=true", + IDfound, g_debugCurrentInputCrc, nosprite); + } } if (((mySerum.flags & FLAG_RETURNED_32P_FRAME_OK) && g_serumData.fheight_extra == 32) || ((mySerum.flags & FLAG_RETURNED_64P_FRAME_OK) && g_serumData.fheight_extra == 64)) { - if (!g_serumData.spritemask_extra_opaque.hasData(nosprite) || - !g_serumData.dynaspritemasks_extra_active.hasData(nosprite) || - !g_serumData.dynaspritemasks_extra.hasData(nosprite)) { + const bool hasExtraOpaque = + g_serumData.spritemask_extra_opaque.hasData(nosprite); + const bool hasExtraDynaActive = + g_serumData.dynaspritemasks_extra_active.hasData(nosprite); + const bool hasExtraDyna = + g_serumData.dynaspritemasks_extra.hasData(nosprite); + if (!hasExtraOpaque) { + if (traceSprite) { + Log("Serum debug sprite render skip: frameId=%u inputCrc=%u " + "spriteId=%u reason=missing-extra-opaque-sidecar", + IDfound, g_debugCurrentInputCrc, nosprite); + } return; } const uint8_t* spriteExtraOpaque = g_serumData.spritemask_extra_opaque[nosprite]; const uint8_t* spriteExtraDyna = - g_serumData.dynaspritemasks_extra[nosprite]; + hasExtraDyna ? g_serumData.dynaspritemasks_extra[nosprite] : nullptr; const uint8_t* spriteExtraDynaActive = - g_serumData.dynaspritemasks_extra_active[nosprite]; + hasExtraDynaActive ? g_serumData.dynaspritemasks_extra_active[nosprite] + : nullptr; + const bool hasUsableExtraDyna = + spriteExtraDyna != nullptr && spriteExtraDynaActive != nullptr; uint16_t thei, twid, tfrx, tfry, tspy, tspx; if (g_serumData.fheight_extra == 32) { pfr = mySerum.frame32; @@ -3120,7 +3269,16 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, uint16_t tk = (tfry + tj) * g_serumData.fwidth_extra + tfrx + ti; const uint32_t spritePixel = (tj + tspy) * MAX_SPRITE_WIDTH + ti + tspx; if (spriteExtraOpaque[spritePixel] > 0) { - if (spriteExtraDynaActive[spritePixel] == 0) { + if (!hasColorExtra) { + if (traceSprite) { + Log("Serum debug sprite render skip: frameId=%u inputCrc=%u " + "spriteId=%u reason=missing-extra-color", + IDfound, g_debugCurrentInputCrc, nosprite); + } + return; + } + if (!hasUsableExtraDyna || + spriteExtraDynaActive[spritePixel] == 0) { pfr[tk] = g_serumData.spritecolored_extra[nosprite] [(tj + tspy) * MAX_SPRITE_WIDTH + @@ -3149,6 +3307,11 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, } } } + if (traceSprite) { + Log("Serum debug sprite render result: frameId=%u inputCrc=%u " + "spriteId=%u plane=extra rendered=true", + IDfound, g_debugCurrentInputCrc, nosprite); + } } } @@ -3493,11 +3656,21 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( !sceneFrameRequested && (sceneCurrentFrame < sceneFrameCount || sceneEndHoldUntilMs > 0) && !sceneInterruptable) { - if (keepTriggersInternal || - mySerum.triggerID >= PUP_TRIGGER_MAX_THRESHOLD) - mySerum.triggerID = 0xffffffff; - // Scene is active and not interruptable - return IDENTIFY_NO_FRAME; + if (DebugTraceMatches(g_debugCurrentInputCrc, lastfound)) { + Log("Serum debug v2 gate: inputCrc=%u frameId=%u " + "gate=scene-noninterruptable currentFrame=%u sceneFrameCount=%u " + "endHoldUntil=%u bypass=%s", + g_debugCurrentInputCrc, lastfound, sceneCurrentFrame, + sceneFrameCount, sceneEndHoldUntilMs, + g_debugBypassSceneGate ? "true" : "false"); + } + if (!g_debugBypassSceneGate) { + if (keepTriggersInternal || + mySerum.triggerID >= PUP_TRIGGER_MAX_THRESHOLD) + mySerum.triggerID = 0xffffffff; + // Scene is active and not interruptable + return IDENTIFY_NO_FRAME; + } } // frame identified diff --git a/src/sparse-vector.h b/src/sparse-vector.h index 45be10a..9e304a7 100644 --- a/src/sparse-vector.h +++ b/src/sparse-vector.h @@ -728,6 +728,10 @@ class SparseVector { } if (parent == nullptr || parent->hasData(elementId)) { + if (memcmp(values, noData.data(), elementSize * sizeof(T)) == 0) { + data.erase(elementId); + return; + } if (memcmp(values, noData.data(), elementSize * sizeof(T)) != 0) { std::vector valuePacked; const uint8_t *storeBytes = reinterpret_cast(values); From ac406a9c944a95bc990007f92e62e6683d425298 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Wed, 18 Mar 2026 23:18:47 +0100 Subject: [PATCH 22/42] scene debugging --- src/serum-decode.cpp | 157 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 144 insertions(+), 13 deletions(-) diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index dc334d9..b34f0f9 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -301,6 +301,23 @@ static bool DebugTraceAllInputsEnabled() { return g_debugTraceAllInputs; } +static void DebugLogSceneEvent(const char* event, uint16_t sceneId, + uint16_t frameIndex, uint16_t frameCount, + uint16_t durationPerFrame, uint8_t options, + bool interruptable, bool startImmediately, + uint8_t repeatCount, uint8_t group = 0, + int result = -1) { + if (!DebugTraceAllInputsEnabled()) { + return; + } + Log("Serum debug scene event: event=%s sceneId=%u frameIndex=%u " + "frameCount=%u duration=%u options=%u interruptable=%s " + "startImmediately=%s repeat=%u group=%u result=%d", + event ? event : "unknown", sceneId, frameIndex, frameCount, + durationPerFrame, options, interruptable ? "true" : "false", + startImmediately ? "true" : "false", repeatCount, group, result); +} + static void DebugLogFrameMetadataIfRequested(uint32_t frameId) { InitDebugFrameTracingFromEnv(); if (g_debugTargetFrameId == 0xffffffffu || frameId != g_debugTargetFrameId || @@ -2141,10 +2158,10 @@ static void WarmFrameAssetsForId(uint32_t frameId) { const uint16_t backgroundId = g_serumData.backgroundIDs[frameId][0]; if (backgroundId < g_serumData.nbackgrounds) { - (void)g_serumData.backgroundmask[backgroundId]; + (void)g_serumData.backgroundmask[frameId]; (void)g_serumData.backgroundframes_v2[backgroundId]; if (g_serumData.isextrabackground[backgroundId][0] > 0) { - (void)g_serumData.backgroundmask_extra[backgroundId]; + (void)g_serumData.backgroundmask_extra[frameId]; (void)g_serumData.backgroundframes_v2_extra[backgroundId]; } } @@ -3147,7 +3164,18 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, : nullptr; const uint8_t* spriteDynaActive = hasDynaActive ? g_serumData.dynaspritemasks_active[nosprite] : nullptr; - const bool hasUsableBaseDyna = spriteDyna != nullptr && spriteDynaActive != nullptr; + if (hasDyna != hasDynaActive || (hasDyna && spriteDyna == nullptr) || + (hasDynaActive && spriteDynaActive == nullptr)) { + if (traceSprite) { + Log("Serum debug sprite render skip: frameId=%u inputCrc=%u spriteId=%u " + "reason=inconsistent-base-dynamic-sidecars hasDyna=%s " + "hasDynaActive=%s ptrDyna=%s ptrDynaActive=%s", + IDfound, g_debugCurrentInputCrc, nosprite, hasDyna ? "true" : "false", + hasDynaActive ? "true" : "false", spriteDyna ? "true" : "false", + spriteDynaActive ? "true" : "false"); + } + return; + } if (traceSprite) { Log("Serum debug sprite render source: frameId=%u inputCrc=%u spriteId=%u " "frame=(%u,%u) sprite=(%u,%u) size=%ux%u hasOpaque=%s hasColor=%s " @@ -3185,7 +3213,7 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, } return; } - if (!hasUsableBaseDyna || spriteDynaActive[tl] == 0) { + if (!hasDynaActive || spriteDynaActive[tl] == 0) { pfr[tk] = g_serumData.spritecolored[nosprite][tl]; if (ColorInRotation(IDfound, pfr[tk], &prot[tk * 2], &prot[tk * 2 + 1], false)) @@ -3238,8 +3266,21 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, const uint8_t* spriteExtraDynaActive = hasExtraDynaActive ? g_serumData.dynaspritemasks_extra_active[nosprite] : nullptr; - const bool hasUsableExtraDyna = - spriteExtraDyna != nullptr && spriteExtraDynaActive != nullptr; + if (hasExtraDyna != hasExtraDynaActive || + (hasExtraDyna && spriteExtraDyna == nullptr) || + (hasExtraDynaActive && spriteExtraDynaActive == nullptr)) { + if (traceSprite) { + Log("Serum debug sprite render skip: frameId=%u inputCrc=%u " + "spriteId=%u reason=inconsistent-extra-dynamic-sidecars " + "hasDyna=%s hasDynaActive=%s ptrDyna=%s ptrDynaActive=%s", + IDfound, g_debugCurrentInputCrc, nosprite, + hasExtraDyna ? "true" : "false", + hasExtraDynaActive ? "true" : "false", + spriteExtraDyna ? "true" : "false", + spriteExtraDynaActive ? "true" : "false"); + } + return; + } uint16_t thei, twid, tfrx, tfry, tspy, tspx; if (g_serumData.fheight_extra == 32) { pfr = mySerum.frame32; @@ -3277,8 +3318,7 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, } return; } - if (!hasUsableExtraDyna || - spriteExtraDynaActive[spritePixel] == 0) { + if (!hasExtraDynaActive || spriteExtraDynaActive[spritePixel] == 0) { pfr[tk] = g_serumData.spritecolored_extra[nosprite] [(tj + tspy) * MAX_SPRITE_WIDTH + @@ -3712,6 +3752,11 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( g_debugCurrentInputCrc, frameID, sceneFrameRequested ? "true" : "false", g_serumData.triggerIDs[lastfound][0]); + } else if (DebugTraceAllInputsEnabled() && !sceneFrameRequested) { + Log("Serum debug trigger candidate: inputCrc=%u frameId=%u triggerId=%u " + "lastTriggerId=%u", + g_debugCurrentInputCrc, frameID, g_serumData.triggerIDs[lastfound][0], + lastTriggerID); } if (!sceneFrameRequested) { PrefetchNextNormalFrameAssets(frameID); @@ -3745,6 +3790,13 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( } // stop any scene + if (sceneFrameCount > 0 || sceneEndHoldUntilMs > 0) { + DebugLogSceneEvent("stop-normal-frame", static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, + sceneDurationPerFrame, sceneOptionFlags, + sceneInterruptable, sceneStartImmediately, + sceneRepeatCount); + } sceneFrameCount = 0; sceneIsLastBackgroundFrame = false; sceneEndHoldUntilMs = 0; @@ -3759,13 +3811,43 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( lastTriggerID = mySerum.triggerID = g_serumData.triggerIDs[lastfound][0]; lasttriggerTimestamp = now; + if (DebugTraceAllInputsEnabled()) { + Log("Serum debug trigger commit: inputCrc=%u frameId=%u triggerId=%u", + g_debugCurrentInputCrc, lastfound, lastTriggerID); + } + + if (DebugTraceAllInputsEnabled()) { + Log("Serum debug trigger scene-gate: triggerId=%u " + "sceneGeneratorActive=%s triggerValid=%s", + lastTriggerID, + (g_serumData.sceneGenerator && + g_serumData.sceneGenerator->isActive()) + ? "true" + : "false", + lastTriggerID < 0xffffffff ? "true" : "false"); + } if (g_serumData.sceneGenerator->isActive() && lastTriggerID < 0xffffffff) { - if (g_serumData.sceneGenerator->getSceneInfo( - lastTriggerID, sceneFrameCount, sceneDurationPerFrame, - sceneInterruptable, sceneStartImmediately, - sceneRepeatCount, sceneOptionFlags)) { + const bool hasSceneInfo = g_serumData.sceneGenerator->getSceneInfo( + lastTriggerID, sceneFrameCount, sceneDurationPerFrame, + sceneInterruptable, sceneStartImmediately, sceneRepeatCount, + sceneOptionFlags); + if (DebugTraceAllInputsEnabled()) { + Log("Serum debug trigger scene-info: triggerId=%u found=%s " + "frameCount=%u duration=%u interruptable=%s " + "startImmediately=%s repeat=%u options=%u", + lastTriggerID, hasSceneInfo ? "true" : "false", + sceneFrameCount, sceneDurationPerFrame, + sceneInterruptable ? "true" : "false", + sceneStartImmediately ? "true" : "false", + sceneRepeatCount, sceneOptionFlags); + } + if (hasSceneInfo) { + DebugLogSceneEvent("trigger", static_cast(lastTriggerID), + 0, sceneFrameCount, sceneDurationPerFrame, + sceneOptionFlags, sceneInterruptable, + sceneStartImmediately, sceneRepeatCount); const bool sceneIsBackground = (sceneOptionFlags & FLAG_SCENE_AS_BACKGROUND) == FLAG_SCENE_AS_BACKGROUND; @@ -3799,6 +3881,11 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( g_sceneResumeState.erase(lastTriggerID); } if (sceneStartImmediately) { + DebugLogSceneEvent("start-immediate", + static_cast(lastTriggerID), 0, + sceneFrameCount, sceneDurationPerFrame, + sceneOptionFlags, sceneInterruptable, + sceneStartImmediately, sceneRepeatCount); uint32_t sceneRotationResult = Serum_RenderScene(); if (sceneRotationResult & FLAG_RETURNED_V2_SCENE) return sceneRotationResult; @@ -4136,12 +4223,22 @@ uint32_t Serum_RenderScene(void) { const uint32_t now = GetMonotonicTimeMs(); if (sceneEndHoldUntilMs > 0) { if (now < sceneEndHoldUntilMs) { + DebugLogSceneEvent("end-hold", static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, + sceneDurationPerFrame, sceneOptionFlags, + sceneInterruptable, sceneStartImmediately, + sceneRepeatCount); mySerum.rotationtimer = sceneEndHoldUntilMs - now; return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_SCENE; } // End hold elapsed: finish scene now. sceneEndHoldUntilMs = 0; + DebugLogSceneEvent("end-hold-finished", + static_cast(lastTriggerID), sceneCurrentFrame, + sceneFrameCount, sceneDurationPerFrame, + sceneOptionFlags, sceneInterruptable, + sceneStartImmediately, sceneRepeatCount); sceneFrameCount = 0; mySerum.rotationtimer = 0; ForceNormalFrameRefreshAfterSceneEnd(); @@ -4179,12 +4276,15 @@ uint32_t Serum_RenderScene(void) { return FLAG_RETURNED_V2_SCENE; } + const bool debugDisableSceneTriplets = + IsEnvFlagEnabled("SERUM_DEBUG_DISABLE_SCENE_TRIPLETS"); bool renderedFromDirectTriplet = false; uint8_t currentGroup = 1; bool hasGroup = g_serumData.sceneGenerator->updateAndGetCurrentGroup( static_cast(lastTriggerID), sceneCurrentFrame, -1, currentGroup); - if (hasGroup && !g_serumData.sceneFrameIdByTriplet.empty()) { + if (!debugDisableSceneTriplets && hasGroup && + !g_serumData.sceneFrameIdByTriplet.empty()) { auto it = g_serumData.sceneFrameIdByTriplet.find( MakeSceneTripletKey(static_cast(lastTriggerID), currentGroup, sceneCurrentFrame)); @@ -4196,17 +4296,36 @@ uint32_t Serum_RenderScene(void) { renderedFromDirectTriplet = true; } } + if (DebugTraceAllInputsEnabled()) { + Log("Serum debug scene path: sceneId=%u frameIndex=%u group=%u " + "disableTriplets=%s usedTriplet=%s tripletCount=%u", + static_cast(lastTriggerID), sceneCurrentFrame, currentGroup, + debugDisableSceneTriplets ? "true" : "false", + renderedFromDirectTriplet ? "true" : "false", + static_cast(g_serumData.sceneFrameIdByTriplet.size())); + } if (!renderedFromDirectTriplet) { uint16_t result = g_serumData.sceneGenerator->generateFrame( lastTriggerID, sceneCurrentFrame, sceneFrame, hasGroup ? currentGroup : -1); + DebugLogSceneEvent("generate", static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, + sceneDurationPerFrame, sceneOptionFlags, + sceneInterruptable, sceneStartImmediately, + sceneRepeatCount, currentGroup, result); if (result > 0 && result < 0xffff) { // frame not ready yet, return the time to wait mySerum.rotationtimer = result; return mySerum.rotationtimer | FLAG_RETURNED_V2_SCENE; } if (result != 0xffff) { + DebugLogSceneEvent("generate-error", + static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, + sceneDurationPerFrame, sceneOptionFlags, + sceneInterruptable, sceneStartImmediately, + sceneRepeatCount, currentGroup, result); sceneFrameCount = 0; // error generating scene frame, stop the scene mySerum.rotationtimer = 0; ForceNormalFrameRefreshAfterSceneEnd(); @@ -4215,6 +4334,13 @@ uint32_t Serum_RenderScene(void) { } mySerum.rotationtimer = sceneDurationPerFrame; Serum_ColorizeWithMetadatav2(sceneFrame, true); + } else { + DebugLogSceneEvent("triplet-render", + static_cast(lastTriggerID), sceneCurrentFrame, + sceneFrameCount, sceneDurationPerFrame, + sceneOptionFlags, sceneInterruptable, + sceneStartImmediately, sceneRepeatCount, currentGroup, + 0xffff); } sceneCurrentFrame++; @@ -4230,6 +4356,11 @@ uint32_t Serum_RenderScene(void) { } if (sceneCurrentFrame >= sceneFrameCount) { + DebugLogSceneEvent("scene-finished", + static_cast(lastTriggerID), sceneCurrentFrame, + sceneFrameCount, sceneDurationPerFrame, + sceneOptionFlags, sceneInterruptable, + sceneStartImmediately, sceneRepeatCount); if (sceneEndHoldDurationMs > 0) { sceneEndHoldUntilMs = now + sceneEndHoldDurationMs; mySerum.rotationtimer = sceneEndHoldDurationMs; From 049efc413a8e2357b871b12133acaafa54af5d34 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Thu, 19 Mar 2026 10:28:22 +0100 Subject: [PATCH 23/42] AFM sprite 43 fix --- AGENTS.md | 4 ++++ src/SerumData.cpp | 15 +++++++++++---- src/sparse-vector.h | 9 ++++++--- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 5c29ea3..cc4834e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -60,6 +60,10 @@ Vector policy currently used in `SerumData`: - `dynamasks_extra` + `dynamasks_extra_active` - `dynaspritemasks` + `dynaspritemasks_active` - `dynaspritemasks_extra` + `dynaspritemasks_extra_active` + - During normalization, dynamic value vectors may preserve an explicit + all-zero payload only when the paired active sidecar still marks active + pixels; this is required because dynamic layer `0` is a valid value and is + not equivalent to "no payload". - Precomputed frame-level dynamic fast flags are persisted: - `frameHasDynamic` - `frameHasDynamicExtra` diff --git a/src/SerumData.cpp b/src/SerumData.cpp index 8dd47a8..8d51228 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -331,7 +331,8 @@ void SerumData::BuildPackingSidecarsAndNormalize() { anyActive = anyActive || active; } dynamasks_active.set(frameId, flags.data(), framePixels); - dynamasks.set(frameId, normalized.data(), framePixels); + dynamasks.set(frameId, normalized.data(), framePixels, + static_cast *>(nullptr), anyActive); frameHasDynamic[frameId] = anyActive ? 1 : 0; } @@ -362,7 +363,7 @@ void SerumData::BuildPackingSidecarsAndNormalize() { dynamasks_extra_active.set(frameId, flags.data(), extraFramePixels, &isextraframe); dynamasks_extra.set(frameId, normalized.data(), extraFramePixels, - &isextraframe); + &isextraframe, anyActive); frameHasDynamicExtra[frameId] = anyActive ? 1 : 0; } } else { @@ -380,15 +381,19 @@ void SerumData::BuildPackingSidecarsAndNormalize() { } const uint8_t *source = dynaspritemasks[spriteId]; const uint8_t *activeSource = dynaspritemasks_active[spriteId]; + bool anyActive = false; for (size_t i = 0; i < spritePixels; ++i) { const uint8_t value = hasSourceVector ? source[i] : 0; const bool active = hasActiveVector ? (activeSource[i] > 0) : (value != 255); flags[i] = active ? 1 : 0; normalized[i] = active ? value : 0; + anyActive = anyActive || active; } dynaspritemasks_active.set(spriteId, flags.data(), spritePixels); - dynaspritemasks.set(spriteId, normalized.data(), spritePixels); + dynaspritemasks.set(spriteId, normalized.data(), spritePixels, + static_cast *>(nullptr), + anyActive); DebugLogSpriteDynamicSidecarState("normalize-after", spriteId); } @@ -403,17 +408,19 @@ void SerumData::BuildPackingSidecarsAndNormalize() { } const uint8_t *source = dynaspritemasks_extra[spriteId]; const uint8_t *activeSource = dynaspritemasks_extra_active[spriteId]; + bool anyActive = false; for (size_t i = 0; i < spritePixels; ++i) { const uint8_t value = hasSourceVector ? source[i] : 0; const bool active = hasActiveVector ? (activeSource[i] > 0) : (value != 255); flags[i] = active ? 1 : 0; normalized[i] = active ? value : 0; + anyActive = anyActive || active; } dynaspritemasks_extra_active.set(spriteId, flags.data(), spritePixels, &isextrasprite); dynaspritemasks_extra.set(spriteId, normalized.data(), spritePixels, - &isextrasprite); + &isextrasprite, anyActive); } m_packingSidecarsNormalized = true; diff --git a/src/sparse-vector.h b/src/sparse-vector.h index 9e304a7..b96f3e3 100644 --- a/src/sparse-vector.h +++ b/src/sparse-vector.h @@ -709,7 +709,8 @@ class SparseVector { template void set(uint32_t elementId, const T *values, size_t size, - SparseVector *parent = nullptr) { + SparseVector *parent = nullptr, + bool preserveNoDataPayload = false) { if (useIndex) { throw std::runtime_error("set() must not be used for index"); } @@ -728,11 +729,13 @@ class SparseVector { } if (parent == nullptr || parent->hasData(elementId)) { - if (memcmp(values, noData.data(), elementSize * sizeof(T)) == 0) { + if (!preserveNoDataPayload && + memcmp(values, noData.data(), elementSize * sizeof(T)) == 0) { data.erase(elementId); return; } - if (memcmp(values, noData.data(), elementSize * sizeof(T)) != 0) { + if (preserveNoDataPayload || + memcmp(values, noData.data(), elementSize * sizeof(T)) != 0) { std::vector valuePacked; const uint8_t *storeBytes = reinterpret_cast(values); size_t storeByteSize = elementSize * sizeof(T); From e502753a08dfd7e0c290b512656a216bfa85d6a8 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Thu, 19 Mar 2026 11:16:54 +0100 Subject: [PATCH 24/42] profiling --- AGENTS.md | 13 ++++- src/serum-decode.cpp | 126 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 116 insertions(+), 23 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index cc4834e..fbaeabc 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -275,10 +275,21 @@ v6 snapshot policy: - Central callback configured by `Serum_SetLogCallback`. - `serum-decode.cpp` and `SceneGenerator.cpp` both use callback-based `Log(...)`. - Missing-file logs from `find_case_insensitive_file(...)` use normalized path joining. +- Optional runtime debug tracing is env-gated and split by verbosity: + - `SERUM_DEBUG_TRACE_INPUTS=1` enables high-level lifecycle logs (input, + trigger, scene-info). + - `SERUM_DEBUG_IDENTIFY_VERBOSE=1` enables per-candidate identification logs. + - `SERUM_DEBUG_SPRITE_VERBOSE=1` enables sprite candidate/detection/rejection + logs. + - `SERUM_DEBUG_SCENE_VERBOSE=1` enables scene-path and scene-event logs. + - `SERUM_DEBUG_INPUT_CRC`, `SERUM_DEBUG_FRAME_ID`, and + `SERUM_DEBUG_STAGE_HASHES=1` remain available as output filters and + expensive hash tracing controls. - Optional runtime profiling: - If env `SERUM_PROFILE_DYNAMIC_HOTPATHS` is enabled (`1/true/on/yes`), periodic average timings for `Colorize_Framev2` and `Colorize_Spritev2` - hot paths are logged. + hot paths are logged, along with total average frame render time and + current process RSS memory usage. - If env `SERUM_PROFILE_SPARSE_VECTORS=1`, sparse-vector access snapshots are logged at the same cadence (accesses, decode count, cache hits, direct hits) for key runtime vectors (`cframes_v2*`, `backgroundmask*`, `dynamasks*`, diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index b34f0f9..2dbdd3a 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -24,6 +24,12 @@ #if defined(__APPLE__) #include +#include +#endif + +#if defined(__unix__) || defined(__APPLE__) +#include +#include #endif #if defined(_WIN32) || defined(_WIN64) @@ -113,6 +119,9 @@ static uint32_t g_debugCurrentInputCrc = 0; static bool g_debugTraceAllInputs = false; static uint32_t g_debugFrameMetaLoggedFor = 0xffffffffu; static bool g_debugBypassSceneGate = false; +static bool g_debugVerboseIdentify = false; +static bool g_debugVerboseSprites = false; +static bool g_debugVerboseScenes = false; static SerumData g_serumData; uint16_t sceneFrameCount = 0; @@ -259,6 +268,46 @@ static uint32_t GetEnvUint32Auto(const char* name, uint32_t defaultValue) { return static_cast(parsed); } +static uint64_t GetProcessResidentMemoryBytes() { +#if defined(__APPLE__) + mach_task_basic_info info; + mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT; + if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, + reinterpret_cast(&info), + &count) == KERN_SUCCESS) { + return static_cast(info.resident_size); + } +#elif defined(__unix__) + long rssPages = 0; + FILE* statm = std::fopen("/proc/self/statm", "r"); + if (statm != nullptr) { + if (std::fscanf(statm, "%*s %ld", &rssPages) == 1 && rssPages > 0) { + std::fclose(statm); + const long pageSize = sysconf(_SC_PAGESIZE); + if (pageSize > 0) { + return static_cast(rssPages) * + static_cast(pageSize); + } + } else { + std::fclose(statm); + } + } +#endif + +#if defined(__unix__) || defined(__APPLE__) + struct rusage usage; + if (getrusage(RUSAGE_SELF, &usage) == 0) { +#if defined(__APPLE__) + return static_cast(usage.ru_maxrss); +#else + return static_cast(usage.ru_maxrss) * 1024ull; +#endif + } +#endif + + return 0; +} + static void InitDebugFrameTracingFromEnv(void) { if (g_debugFrameTracingInitialized) { return; @@ -270,15 +319,23 @@ static void InitDebugFrameTracingFromEnv(void) { g_debugStageHashes = IsEnvFlagEnabled("SERUM_DEBUG_STAGE_HASHES"); g_debugTraceAllInputs = IsEnvFlagEnabled("SERUM_DEBUG_TRACE_INPUTS"); g_debugBypassSceneGate = IsEnvFlagEnabled("SERUM_DEBUG_BYPASS_SCENE_GATE"); + g_debugVerboseIdentify = IsEnvFlagEnabled("SERUM_DEBUG_IDENTIFY_VERBOSE"); + g_debugVerboseSprites = IsEnvFlagEnabled("SERUM_DEBUG_SPRITE_VERBOSE"); + g_debugVerboseScenes = IsEnvFlagEnabled("SERUM_DEBUG_SCENE_VERBOSE"); if (g_debugTargetInputCrc != 0 || g_debugTargetFrameId != 0xffffffffu || g_debugStageHashes || g_debugTraceAllInputs || - g_debugBypassSceneGate) { + g_debugBypassSceneGate || g_debugVerboseIdentify || + g_debugVerboseSprites || g_debugVerboseScenes) { Log("Serum debug tracing enabled: inputCrc=%u frameId=%u stageHashes=%s " - "traceAllInputs=%s bypassSceneGate=%s", + "traceAllInputs=%s bypassSceneGate=%s identifyVerbose=%s " + "spriteVerbose=%s sceneVerbose=%s", g_debugTargetInputCrc, g_debugTargetFrameId, g_debugStageHashes ? "on" : "off", g_debugTraceAllInputs ? "on" : "off", - g_debugBypassSceneGate ? "on" : "off"); + g_debugBypassSceneGate ? "on" : "off", + g_debugVerboseIdentify ? "on" : "off", + g_debugVerboseSprites ? "on" : "off", + g_debugVerboseScenes ? "on" : "off"); } } @@ -301,13 +358,28 @@ static bool DebugTraceAllInputsEnabled() { return g_debugTraceAllInputs; } +static bool DebugIdentifyVerboseEnabled() { + InitDebugFrameTracingFromEnv(); + return g_debugVerboseIdentify; +} + +static bool DebugSpriteVerboseEnabled() { + InitDebugFrameTracingFromEnv(); + return g_debugVerboseSprites; +} + +static bool DebugSceneVerboseEnabled() { + InitDebugFrameTracingFromEnv(); + return g_debugVerboseScenes; +} + static void DebugLogSceneEvent(const char* event, uint16_t sceneId, uint16_t frameIndex, uint16_t frameCount, uint16_t durationPerFrame, uint8_t options, bool interruptable, bool startImmediately, uint8_t repeatCount, uint8_t group = 0, int result = -1) { - if (!DebugTraceAllInputsEnabled()) { + if (!DebugSceneVerboseEnabled()) { return; } Log("Serum debug scene event: event=%s sceneId=%u frameIndex=%u " @@ -529,7 +601,7 @@ static bool DebugTraceSpritesForCurrentInput() { static void DebugLogSpriteCheckStart(uint32_t frameId, uint32_t candidateCount, bool hasCandidateSidecars, bool frameHasShapeCandidates) { - if (!DebugTraceSpritesForCurrentInput()) { + if (!DebugSpriteVerboseEnabled() || !DebugTraceSpritesForCurrentInput()) { return; } Log("Serum debug sprites start: frameId=%u inputCrc=%u candidates=%u " @@ -544,7 +616,7 @@ static void DebugLogSpriteCandidate(uint32_t frameId, uint8_t spriteId, uint32_t detectCount, short minxBB, short minyBB, short maxxBB, short maxyBB, int spriteWidth, int spriteHeight) { - if (!DebugTraceSpritesForCurrentInput()) { + if (!DebugSpriteVerboseEnabled() || !DebugTraceSpritesForCurrentInput()) { return; } Log("Serum debug sprite candidate: frameId=%u inputCrc=%u spriteId=%u " @@ -559,7 +631,7 @@ static void DebugLogSpriteDetectionWord(uint32_t frameId, uint8_t spriteId, uint32_t detectionWord, short frax, short fray, short offsx, short offsy, short detw, short deth) { - if (!DebugTraceSpritesForCurrentInput()) { + if (!DebugSpriteVerboseEnabled() || !DebugTraceSpritesForCurrentInput()) { return; } Log("Serum debug sprite detection: frameId=%u inputCrc=%u spriteId=%u " @@ -576,7 +648,7 @@ static void DebugLogSpriteRejected(uint32_t frameId, uint8_t spriteId, uint32_t detailB = 0, uint32_t detailC = 0, uint32_t detailD = 0) { - if (!DebugTraceSpritesForCurrentInput()) { + if (!DebugSpriteVerboseEnabled() || !DebugTraceSpritesForCurrentInput()) { return; } Log("Serum debug sprite rejected: frameId=%u inputCrc=%u spriteId=%u " @@ -1995,7 +2067,7 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { uint8_t mask = g_serumData.compmaskID[tj][0]; uint8_t Shape = g_serumData.shapecompmode[tj][0]; uint32_t Hashc = calc_crc32(frame, mask, pixels, Shape); - if (DebugTraceMatches(inputCrc, tj)) { + if (DebugIdentifyVerboseEnabled() && DebugTraceMatches(inputCrc, tj)) { Log("Serum debug identify seed: inputCrc=%u startFrame=%u " "sceneRequested=%s mask=%u shape=%u hash=%u", inputCrc, tj, sceneFrameRequested ? "true" : "false", mask, @@ -2010,14 +2082,15 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { continue; } for (uint32_t ti : sigIt->second) { - if (DebugTraceMatches(inputCrc, ti)) { + if (DebugIdentifyVerboseEnabled() && DebugTraceMatches(inputCrc, ti)) { Log("Serum debug identify scene candidate: inputCrc=%u frameId=%u " "mask=%u shape=%u hash=%u storedHash=%u lastfound=%u", inputCrc, ti, mask, Shape, Hashc, g_serumData.hashcodes[ti][0], lastfound_stream); } if (first_match || ti != lastfound_stream || mask < 255) { - if (DebugTraceMatches(inputCrc, ti)) { + if (DebugIdentifyVerboseEnabled() && + DebugTraceMatches(inputCrc, ti)) { Log("Serum debug identify decision: inputCrc=%u frameId=%u " "reason=%s firstMatch=%s lastfoundStream=%u mask=%u " "fullCrcBefore=%u", @@ -2037,7 +2110,8 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { uint32_t full_crc = crc32_fast(frame, pixels); if (full_crc != lastframe_full_crc) { - if (DebugTraceMatches(inputCrc, ti)) { + if (DebugIdentifyVerboseEnabled() && + DebugTraceMatches(inputCrc, ti)) { Log("Serum debug identify decision: inputCrc=%u frameId=%u " "reason=full-crc-diff firstMatch=%s lastfoundStream=%u " "mask=%u fullCrcBefore=%u fullCrcNow=%u", @@ -2048,7 +2122,7 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { lastfound = ti; return ti; } - if (DebugTraceMatches(inputCrc, ti)) { + if (DebugIdentifyVerboseEnabled() && DebugTraceMatches(inputCrc, ti)) { Log("Serum debug identify decision: inputCrc=%u frameId=%u " "reason=same-frame firstMatch=%s lastfoundStream=%u mask=%u " "fullCrc=%u", @@ -2073,7 +2147,8 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { if (!framechecked[ti]) { if ((g_serumData.compmaskID[ti][0] == mask) && (g_serumData.shapecompmode[ti][0] == Shape)) { - if (DebugTraceMatches(inputCrc, ti)) { + if (DebugIdentifyVerboseEnabled() && + DebugTraceMatches(inputCrc, ti)) { Log("Serum debug identify candidate: inputCrc=%u frameId=%u " "mask=%u shape=%u hash=%u storedHash=%u lastfound=%u", inputCrc, ti, mask, Shape, Hashc, @@ -2081,7 +2156,8 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { } if (Hashc == g_serumData.hashcodes[ti][0]) { if (first_match || ti != lastfound_stream || mask < 255) { - if (DebugTraceMatches(inputCrc, ti)) { + if (DebugIdentifyVerboseEnabled() && + DebugTraceMatches(inputCrc, ti)) { Log("Serum debug identify decision: inputCrc=%u frameId=%u " "reason=%s firstMatch=%s lastfoundStream=%u mask=%u " "fullCrcBefore=%u", @@ -2102,7 +2178,8 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { uint32_t full_crc = crc32_fast(frame, pixels); if (full_crc != lastframe_full_crc) { - if (DebugTraceMatches(inputCrc, ti)) { + if (DebugIdentifyVerboseEnabled() && + DebugTraceMatches(inputCrc, ti)) { Log("Serum debug identify decision: inputCrc=%u frameId=%u " "reason=full-crc-diff firstMatch=%s lastfoundStream=%u " "mask=%u fullCrcBefore=%u fullCrcNow=%u", @@ -2114,7 +2191,8 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { return ti; // we found the same frame with shape as before, but // the full frame is different } - if (DebugTraceMatches(inputCrc, ti)) { + if (DebugIdentifyVerboseEnabled() && + DebugTraceMatches(inputCrc, ti)) { Log("Serum debug identify decision: inputCrc=%u frameId=%u " "reason=same-frame firstMatch=%s lastfoundStream=%u " "mask=%u fullCrc=%u", @@ -2135,7 +2213,7 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { if (++tj >= g_serumData.nframes) tj = 0; } while (tj != lastfound_stream); - if (DebugTraceMatchesInputCrc(inputCrc)) { + if (DebugIdentifyVerboseEnabled() && DebugTraceMatchesInputCrc(inputCrc)) { Log("Serum debug identify miss: inputCrc=%u sceneRequested=%s", inputCrc, sceneFrameRequested ? "true" : "false"); } @@ -3988,9 +4066,13 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( (double)g_profileColorizeCalls / 1000000.0; const double spriteMs = (double)g_profileColorizeSpriteV2Ns / (double)g_profileColorizeCalls / 1000000.0; - Log("Perf dynamic avg: Colorize_Framev2=%.3fms " - "Colorize_Spritev2=%.3fms over %u frames", - frameMs, spriteMs, (uint32_t)g_profileColorizeCalls); + const double totalMs = frameMs + spriteMs; + const uint64_t rssBytes = GetProcessResidentMemoryBytes(); + const double rssMiB = (double)rssBytes / (1024.0 * 1024.0); + Log("Perf dynamic avg: frame=%.3fms Colorize_Framev2=%.3fms " + "Colorize_Spritev2=%.3fms rss=%.1fMiB over %u frames", + totalMs, frameMs, spriteMs, rssMiB, + (uint32_t)g_profileColorizeCalls); if (g_profileSparseVectors) { g_serumData.LogSparseVectorProfileSnapshot(); } @@ -4296,7 +4378,7 @@ uint32_t Serum_RenderScene(void) { renderedFromDirectTriplet = true; } } - if (DebugTraceAllInputsEnabled()) { + if (DebugSceneVerboseEnabled()) { Log("Serum debug scene path: sceneId=%u frameIndex=%u group=%u " "disableTriplets=%s usedTriplet=%s tripletCount=%u", static_cast(lastTriggerID), sceneCurrentFrame, currentGroup, From df963119a26d8d4656f15d5407fd3531ab8d6cf9 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Thu, 19 Mar 2026 12:49:40 +0100 Subject: [PATCH 25/42] avoid memory peak on loading --- AGENTS.md | 38 +++++++++++++++++++++++++++++++------- src/serum-decode.cpp | 4 ++-- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index fbaeabc..8dc2f68 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -108,12 +108,15 @@ Entry point: `Serum_Load(altcolorpath, romname, flags)`. - If loaded from cROMc v6 and no CSV update in this run: use stored lookup via `InitFrameLookupRuntimeStateFromStoredData()`. - Otherwise: rebuild via `BuildFrameLookupVectors()`. 8. Build/normalize packing sidecars via `BuildPackingSidecarsAndNormalize()`. - - The normalization step is idempotent and guarded; repeated calls in the - same load/save cycle are no-ops once completed. + - This normalization/repair path is for source-data build flows and `v5` + compatibility handling. + - Direct `v6` cROMc runtime load is expected to consume already-normalized + runtime-ready data instead of mutating or repairing it on device. 9. Build or restore sprite runtime sidecars via `BuildSpriteRuntimeSidecars()`. - - For v6 cROMc loads, sidecars are restored from file when present. - - For v5 loads (and any missing/corrupt sidecar case), sidecars are rebuilt - from loaded sprite vectors at startup. + - For direct `v6` cROMc loads, runtime sidecars are expected to be restored + from file as final runtime data. + - Rebuild-on-load behavior belongs to `v5` compatibility handling and + authoring-time rebuild flows, not to the final-device direct `v6` path. 10. Optional runtime A/B switch for dynamic packed-read overhead: - If env `SERUM_DISABLE_DYNAMIC_PACKED_READS` is enabled (`1/true/on/yes`), `PrepareRuntimeDynamicHotCache()` predecodes dynamic vectors @@ -129,6 +132,19 @@ Entry point: `Serum_Load(altcolorpath, romname, flags)`. Important: - `BuildFrameLookupVectors()` must run after final scene data is known for this load cycle. - CSV parsing after loading can invalidate stored scene lookup data and requires rebuild. +- Design policy: + - `v5` backward-compatibility logic must remain scoped to `v5` loads. + - `pup.csv`-driven rebuild/update logic is an authoring-time path and is not + the target for memory-sensitive final-device runtime behavior. + - Direct `v6` runtime load is expected to trust the stored runtime-ready data; + do not add safety nets, compatibility shims, or repair logic for unreleased + `v6` snapshot-to-snapshot compatibility on that path. + - If a direct `v6` runtime load still needs mutation/repair to work, that is + a generation/save contract bug and should be fixed at `cROMc` creation time + rather than masked in the final-device load path. + - The final-device direct `v6` load path must not run + `BuildPackingSidecarsAndNormalize()` or rebuild missing sprite runtime + sidecars on load. ## Frame identification Main function: `Identify_Frame(uint8_t* frame, bool sceneFrameRequested)`. @@ -261,8 +277,9 @@ Stored in v6: Backward compatibility: - v5 files are loadable. - v5 sparse vectors are deserialized with legacy sparse-vector layout and converted to packed representation after load. -- For v5 loads, scene lookup vectors are rebuilt at startup. -- For v6 loads, stored lookup vectors are reused unless scene data changed in this load cycle (for example CSV update), in which case lookup vectors are rebuilt. +- For v5 loads, scene lookup vectors and other derived runtime sidecars may be rebuilt at startup. +- For direct `v6` loads, stored lookup vectors and runtime sidecars are expected to be consumed as persisted runtime-ready data. +- A `pup.csv` update in the same authoring-time load cycle may invalidate persisted scene lookup data and requires rebuild before re-save. - Direct scene-triplet preprocessing is only executed for v6. - v6 scene-data deserialization validates block magic and count before allocation. @@ -270,6 +287,13 @@ Backward compatibility: v6 snapshot policy: - Compatibility between unreleased v6 development snapshots is not required. - Compatibility to released v5 remains required. +- Therefore: + - if `v6` data needs new runtime-ready fields or stricter invariants, update + the `v6` generation/load contract directly rather than adding fallback logic + for older `v6` development snapshots. + - do not introduce final-device runtime safety nets, repair paths, or + compatibility shims merely to keep older `v6` development snapshots + loading. ## Logging - Central callback configured by `Serum_SetLogCallback`. diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 2dbdd3a..9a385d7 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -1023,7 +1023,6 @@ static Serum_Frame_Struc* Serum_LoadConcentratePrepared(const uint8_t flags) { return NULL; } - g_serumData.BuildPackingSidecarsAndNormalize(); { const char *debugSpriteId = std::getenv("SERUM_DEBUG_SPRITE_ID"); if (debugSpriteId && debugSpriteId[0] != '\0') { @@ -1843,7 +1842,8 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, if (g_serumData.colorRotationLookupByFrameAndColor.empty()) { g_serumData.BuildColorRotationLookup(); } - if (!g_serumData.HasSpriteRuntimeSidecars()) { + if (!g_serumData.HasSpriteRuntimeSidecars() && + (!loadedFromConcentrate || g_serumData.concentrateFileVersion < 6)) { g_serumData.BuildSpriteRuntimeSidecars(); } if (g_disableDynamicPackedReads) { From f2a58fcf656f1419f7fa0b8d24c5e2e9a0bfc220 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Thu, 19 Mar 2026 13:50:37 +0100 Subject: [PATCH 26/42] removed lookahead support --- AGENTS.md | 9 ---- src/serum-decode.cpp | 107 ------------------------------------------- 2 files changed, 116 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 8dc2f68..82ae4c2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -122,12 +122,6 @@ Entry point: `Serum_Load(altcolorpath, romname, flags)`. `PrepareRuntimeDynamicHotCache()` predecodes dynamic vectors (`dynamasks*`, `dynaspritemasks*`) into runtime hot caches. - Default runtime behavior is unchanged when this env var is not set. -11. Optional normal-frame lookahead prefetch: - - Env `SERUM_LOOKAHEAD_DEPTH=` (`N=0..8`, default `0`) enables cache - warming for up to `N` next non-scene frame IDs after each identified - normal frame. - - Prefetch performs sparse-vector reads only (no matching/render changes). - - Scene frames are skipped and frame-ID wrap-around is respected. Important: - `BuildFrameLookupVectors()` must run after final scene data is known for this load cycle. @@ -318,9 +312,6 @@ v6 snapshot policy: logged at the same cadence (accesses, decode count, cache hits, direct hits) for key runtime vectors (`cframes_v2*`, `backgroundmask*`, `dynamasks*`, `dynaspritemasks*`). -- Optional lookahead logging: - - When `SERUM_LOOKAHEAD_DEPTH>0`, load logs - `Frame lookahead prefetch enabled via SERUM_LOOKAHEAD_DEPTH=`. ## Safety invariants - `frameIsScene.size()` must equal `nframes` before identification. diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 9a385d7..d1fa8ee 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -107,7 +107,6 @@ static uint32_t GetEnvUintClamped(const char* name, uint32_t maxValue) { static bool g_profileDynamicHotPaths = false; static bool g_profileSparseVectors = false; static bool g_disableDynamicPackedReads = false; -static uint32_t g_frameLookaheadDepth = 0; static uint64_t g_profileColorizeFrameV2Ns = 0; static uint64_t g_profileColorizeSpriteV2Ns = 0; static uint64_t g_profileColorizeCalls = 0; @@ -1724,14 +1723,9 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, g_profileSparseVectors = IsEnvFlagEnabled("SERUM_PROFILE_SPARSE_VECTORS"); g_disableDynamicPackedReads = IsEnvFlagEnabled("SERUM_DISABLE_DYNAMIC_PACKED_READS"); - g_frameLookaheadDepth = GetEnvUintClamped("SERUM_LOOKAHEAD_DEPTH", 8); g_profileColorizeFrameV2Ns = 0; g_profileColorizeSpriteV2Ns = 0; g_profileColorizeCalls = 0; - if (g_frameLookaheadDepth > 0) { - Log("Frame lookahead prefetch enabled via SERUM_LOOKAHEAD_DEPTH=%u", - g_frameLookaheadDepth); - } mySerum.SerumVersion = g_serumData.SerumVersion = 0; mySerum.flags = 0; @@ -2220,81 +2214,6 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { return IDENTIFY_NO_FRAME; // we found no corresponding frame } -static void WarmFrameAssetsForId(uint32_t frameId) { - if (frameId >= g_serumData.nframes) { - return; - } - - (void)g_serumData.activeframes[frameId][0]; - (void)g_serumData.cframes_v2[frameId]; - (void)g_serumData.colorrotations_v2[frameId]; - - if (g_serumData.isextraframe[frameId][0] > 0) { - (void)g_serumData.cframes_v2_extra[frameId]; - (void)g_serumData.colorrotations_v2_extra[frameId]; - } - - const uint16_t backgroundId = g_serumData.backgroundIDs[frameId][0]; - if (backgroundId < g_serumData.nbackgrounds) { - (void)g_serumData.backgroundmask[frameId]; - (void)g_serumData.backgroundframes_v2[backgroundId]; - if (g_serumData.isextrabackground[backgroundId][0] > 0) { - (void)g_serumData.backgroundmask_extra[frameId]; - (void)g_serumData.backgroundframes_v2_extra[backgroundId]; - } - } - - if (frameId < g_serumData.frameHasDynamic.size() && - g_serumData.frameHasDynamic[frameId] > 0) { - (void)g_serumData.dynamasks[frameId]; - (void)g_serumData.dynamasks_active[frameId]; - (void)g_serumData.dyna4cols_v2[frameId]; - (void)g_serumData.dynashadowsdir[frameId]; - (void)g_serumData.dynashadowscol[frameId]; - } - - if (frameId < g_serumData.frameHasDynamicExtra.size() && - g_serumData.frameHasDynamicExtra[frameId] > 0 && - g_serumData.isextraframe[frameId][0] > 0) { - (void)g_serumData.dynamasks_extra[frameId]; - (void)g_serumData.dynamasks_extra_active[frameId]; - (void)g_serumData.dyna4cols_v2_extra[frameId]; - (void)g_serumData.dynashadowsdir_extra[frameId]; - (void)g_serumData.dynashadowscol_extra[frameId]; - } - - if (g_serumData.spriteCandidateOffsets.size() == - static_cast(g_serumData.nframes) + 1 && - g_serumData.spriteCandidateIds.size() == - g_serumData.spriteCandidateSlots.size()) { - uint32_t start = g_serumData.spriteCandidateOffsets[frameId]; - uint32_t end = g_serumData.spriteCandidateOffsets[frameId + 1]; - if (end > g_serumData.spriteCandidateIds.size()) { - end = static_cast(g_serumData.spriteCandidateIds.size()); - } - for (uint32_t i = start; i < end; ++i) { - const uint8_t spriteId = g_serumData.spriteCandidateIds[i]; - if (spriteId >= g_serumData.nsprites) { - continue; - } - (void)g_serumData.spriteoriginal[spriteId]; - (void)g_serumData.spriteoriginal_opaque[spriteId]; - (void)g_serumData.spritecolored[spriteId]; - if (g_serumData.isextrasprite[spriteId][0] > 0) { - (void)g_serumData.spritemask_extra[spriteId]; - (void)g_serumData.spritemask_extra_opaque[spriteId]; - (void)g_serumData.spritecolored_extra[spriteId]; - } - (void)g_serumData.dynaspritemasks[spriteId]; - (void)g_serumData.dynaspritemasks_active[spriteId]; - if (g_serumData.isextrasprite[spriteId][0] > 0) { - (void)g_serumData.dynaspritemasks_extra[spriteId]; - (void)g_serumData.dynaspritemasks_extra_active[spriteId]; - } - } - } -} - static uint32_t BuildRuntimeFeatureFlags(uint32_t frameId) { uint32_t featureFlags = 0; @@ -2360,29 +2279,6 @@ static uint32_t BuildRuntimeFeatureFlags(uint32_t frameId) { return featureFlags; } -static void PrefetchNextNormalFrameAssets(uint32_t currentFrameId) { - if (g_frameLookaheadDepth == 0 || g_serumData.nframes == 0) { - return; - } - uint32_t cursor = currentFrameId; - for (uint32_t level = 0; level < g_frameLookaheadDepth; ++level) { - bool found = false; - for (uint32_t hop = 0; hop < g_serumData.nframes; ++hop) { - cursor = (cursor + 1 >= g_serumData.nframes) ? 0 : (cursor + 1); - if (g_serumData.frameIsScene.size() == g_serumData.nframes && - g_serumData.frameIsScene[cursor] > 0) { - continue; - } - WarmFrameAssetsForId(cursor); - found = true; - break; - } - if (!found) { - break; - } - } -} - void GetSpriteSize(uint8_t nospr, int* pswid, int* pshei, const uint8_t* spriteData, int sswid, int sshei, const uint8_t* spriteOpaque) { @@ -3836,9 +3732,6 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( g_debugCurrentInputCrc, frameID, g_serumData.triggerIDs[lastfound][0], lastTriggerID); } - if (!sceneFrameRequested) { - PrefetchNextNormalFrameAssets(frameID); - } if (!sceneFrameRequested) { memcpy(lastFrame, frame, g_serumData.fwidth * g_serumData.fheight); lastFrameId = frameID; From b5dbbab0a73e473ee46c4e47d1249b998fff4b0e Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Thu, 19 Mar 2026 15:09:20 +0100 Subject: [PATCH 27/42] added peak memory usage --- AGENTS.md | 6 +++- src/serum-decode.cpp | 67 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 82ae4c2..36f7bab 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -307,7 +307,11 @@ v6 snapshot policy: - If env `SERUM_PROFILE_DYNAMIC_HOTPATHS` is enabled (`1/true/on/yes`), periodic average timings for `Colorize_Framev2` and `Colorize_Spritev2` hot paths are logged, along with total average frame render time and - current process RSS memory usage. + current process RSS memory usage and process-local peak RSS seen so far. + - The same profiler also logs a one-time startup summary before normal frame + processing begins: + `Perf startup peak: start=...MiB current=...MiB peak=...MiB stage=...` + where `peak` is the highest sampled RSS observed during the load pipeline. - If env `SERUM_PROFILE_SPARSE_VECTORS=1`, sparse-vector access snapshots are logged at the same cadence (accesses, decode count, cache hits, direct hits) for key runtime vectors (`cframes_v2*`, `backgroundmask*`, `dynamasks*`, diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index d1fa8ee..9f6e223 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -110,6 +110,10 @@ static bool g_disableDynamicPackedReads = false; static uint64_t g_profileColorizeFrameV2Ns = 0; static uint64_t g_profileColorizeSpriteV2Ns = 0; static uint64_t g_profileColorizeCalls = 0; +static uint64_t g_profilePeakRssBytes = 0; +static uint64_t g_profileStartupStartRssBytes = 0; +static uint64_t g_profileStartupPeakRssBytes = 0; +static const char* g_profileStartupPeakStage = "startup-begin"; static bool g_debugFrameTracingInitialized = false; static uint32_t g_debugTargetInputCrc = 0; static uint32_t g_debugTargetFrameId = 0xffffffffu; @@ -307,6 +311,42 @@ static uint64_t GetProcessResidentMemoryBytes() { return 0; } +static void ResetStartupRssProfile() { + if (!g_profileDynamicHotPaths) { + g_profileStartupStartRssBytes = 0; + g_profileStartupPeakRssBytes = 0; + g_profileStartupPeakStage = "startup-begin"; + return; + } + g_profileStartupStartRssBytes = GetProcessResidentMemoryBytes(); + g_profileStartupPeakRssBytes = g_profileStartupStartRssBytes; + g_profileStartupPeakStage = "startup-begin"; +} + +static void NoteStartupRssSample(const char* stage) { + if (!g_profileDynamicHotPaths) { + return; + } + const uint64_t rssBytes = GetProcessResidentMemoryBytes(); + if (rssBytes >= g_profileStartupPeakRssBytes) { + g_profileStartupPeakRssBytes = rssBytes; + g_profileStartupPeakStage = stage; + } +} + +static void LogStartupRssSummary() { + if (!g_profileDynamicHotPaths) { + return; + } + const uint64_t currentBytes = GetProcessResidentMemoryBytes(); + const double startMiB = + (double)g_profileStartupStartRssBytes / (1024.0 * 1024.0); + const double currentMiB = (double)currentBytes / (1024.0 * 1024.0); + const double peakMiB = (double)g_profileStartupPeakRssBytes / (1024.0 * 1024.0); + Log("Perf startup peak: start=%.1fMiB current=%.1fMiB peak=%.1fMiB stage=%s", + startMiB, currentMiB, peakMiB, g_profileStartupPeakStage); +} + static void InitDebugFrameTracingFromEnv(void) { if (g_debugFrameTracingInitialized) { return; @@ -1726,6 +1766,8 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, g_profileColorizeFrameV2Ns = 0; g_profileColorizeSpriteV2Ns = 0; g_profileColorizeCalls = 0; + g_profilePeakRssBytes = 0; + ResetStartupRssProfile(); mySerum.SerumVersion = g_serumData.SerumVersion = 0; mySerum.flags = 0; @@ -1756,6 +1798,7 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, std::optional csvFoundFile = find_case_insensitive_file(pathbuf, std::string(romname) + ".pup.csv"); + NoteStartupRssSample("after-file-scan"); if (csvFoundFile) { Log("Found %s", csvFoundFile->c_str()); #ifdef WRITE_CROMC @@ -1777,13 +1820,16 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, if (pFoundFile) { Log("Found %s", pFoundFile->c_str()); + NoteStartupRssSample("before-cromc-load"); result = Serum_LoadConcentrate(pFoundFile->c_str(), flags); loadedFromConcentrate = (result != NULL); if (result) { + NoteStartupRssSample("after-cromc-load"); Log("Loaded %s", pFoundFile->c_str()); if (csvFoundFile && g_serumData.SerumVersion == SERUM_V2 && g_serumData.sceneGenerator->parseCSV(csvFoundFile->c_str())) { sceneDataUpdatedFromCsv = true; + NoteStartupRssSample("after-csv-update"); #ifdef WRITE_CROMC // Update the concentrate file with new PUP data if (generateCRomC) Serum_SaveConcentrate(pFoundFile->c_str()); @@ -1810,12 +1856,17 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, return NULL; } Log("Found %s", pFoundFile->c_str()); + NoteStartupRssSample("before-crom-load"); result = Serum_LoadFilev1(pFoundFile->c_str(), flags); if (result) { + NoteStartupRssSample("after-crom-load"); Log("Loaded %s", pFoundFile->c_str()); if (csvFoundFile && g_serumData.SerumVersion == SERUM_V2) { sceneDataUpdatedFromCsv = g_serumData.sceneGenerator->parseCSV(csvFoundFile->c_str()); + if (sceneDataUpdatedFromCsv) { + NoteStartupRssSample("after-csv-update"); + } } #ifdef WRITE_CROMC if (generateCRomC) Serum_SaveConcentrate(pFoundFile->c_str()); @@ -1830,21 +1881,28 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, if (!loadedFromConcentrate || g_serumData.concentrateFileVersion < 6 || sceneDataUpdatedFromCsv) { BuildFrameLookupVectors(); + NoteStartupRssSample("after-frame-lookup-build"); } else { InitFrameLookupRuntimeStateFromStoredData(); + NoteStartupRssSample("after-frame-lookup-restore"); } if (g_serumData.colorRotationLookupByFrameAndColor.empty()) { g_serumData.BuildColorRotationLookup(); + NoteStartupRssSample("after-color-rotation-build"); } if (!g_serumData.HasSpriteRuntimeSidecars() && (!loadedFromConcentrate || g_serumData.concentrateFileVersion < 6)) { g_serumData.BuildSpriteRuntimeSidecars(); + NoteStartupRssSample("after-sprite-sidecar-build"); } if (g_disableDynamicPackedReads) { g_serumData.PrepareRuntimeDynamicHotCache(); + NoteStartupRssSample("after-dynamic-hot-cache"); Log("Dynamic packed reads disabled for runtime via " "SERUM_DISABLE_DYNAMIC_PACKED_READS"); } + NoteStartupRssSample("before-runtime"); + LogStartupRssSummary(); } if (is_real_machine()) { monochromeMode = true; @@ -3961,10 +4019,15 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( (double)g_profileColorizeCalls / 1000000.0; const double totalMs = frameMs + spriteMs; const uint64_t rssBytes = GetProcessResidentMemoryBytes(); + if (rssBytes > g_profilePeakRssBytes) { + g_profilePeakRssBytes = rssBytes; + } const double rssMiB = (double)rssBytes / (1024.0 * 1024.0); + const double peakRssMiB = + (double)g_profilePeakRssBytes / (1024.0 * 1024.0); Log("Perf dynamic avg: frame=%.3fms Colorize_Framev2=%.3fms " - "Colorize_Spritev2=%.3fms rss=%.1fMiB over %u frames", - totalMs, frameMs, spriteMs, rssMiB, + "Colorize_Spritev2=%.3fms rss=%.1fMiB peak=%.1fMiB over %u frames", + totalMs, frameMs, spriteMs, rssMiB, peakRssMiB, (uint32_t)g_profileColorizeCalls); if (g_profileSparseVectors) { g_serumData.LogSparseVectorProfileSnapshot(); From 1d7eef97d26b09bc05dd58881f6b5feceb5181f6 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Thu, 19 Mar 2026 16:23:09 +0100 Subject: [PATCH 28/42] removed SERUM_DISABLE_DYNAMIC_PACKED_READS --- AGENTS.md | 14 +- src/SerumData.cpp | 64 ++------- src/SerumData.h | 14 +- src/serum-decode.cpp | 329 +++++++++++++++++++++---------------------- 4 files changed, 176 insertions(+), 245 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 36f7bab..978d5c3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -71,7 +71,8 @@ Vector policy currently used in `SerumData`: entirely for frames without active dynamic pixels. - Color rotations use a precomputed lookup index: `colorRotationLookupByFrameAndColor[(frameId,isExtra,color)] -> (rotation,position)` - restored from v6 cROMc when present and rebuilt at load time otherwise. + restored from v6 cROMc when present. + - v5 / authoring-time rebuild flows may rebuild the lookup before re-save. - `ColorInRotation` uses lookup-only runtime path (no linear scan fallback). - Sprite runtime sidecars are precomputed and used by `Check_Spritesv2`: - frame candidate list with sprite slot indices (`spriteCandidateOffsets`, @@ -117,12 +118,6 @@ Entry point: `Serum_Load(altcolorpath, romname, flags)`. from file as final runtime data. - Rebuild-on-load behavior belongs to `v5` compatibility handling and authoring-time rebuild flows, not to the final-device direct `v6` path. -10. Optional runtime A/B switch for dynamic packed-read overhead: - - If env `SERUM_DISABLE_DYNAMIC_PACKED_READS` is enabled (`1/true/on/yes`), - `PrepareRuntimeDynamicHotCache()` predecodes dynamic vectors - (`dynamasks*`, `dynaspritemasks*`) into runtime hot caches. - - Default runtime behavior is unchanged when this env var is not set. - Important: - `BuildFrameLookupVectors()` must run after final scene data is known for this load cycle. - CSV parsing after loading can invalidate stored scene lookup data and requires rebuild. @@ -337,5 +332,6 @@ Minimum validation: - background scene - end-of-scene behavior flags - resume flag `16` -10. Build color-rotation lookup index via `BuildColorRotationLookup()` for - O(1) `ColorInRotation` checks only when missing from loaded v6 data. +10. Build color-rotation lookup index via `BuildColorRotationLookup()` during + v5 / authoring-time rebuild flows so persisted v6 data provides O(1) + `ColorInRotation` checks without direct-load fallback. diff --git a/src/SerumData.cpp b/src/SerumData.cpp index 8d51228..ecd84d4 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -1,11 +1,11 @@ #include "SerumData.h" +#include + #include "DecompressingIStream.h" #include "miniz/miniz.h" #include "serum-version.h" -#include - bool is_real_machine(); static uint32_t GetDebugSpriteIdFromEnv() { @@ -96,7 +96,7 @@ SerumData::SerumData() dynaspritemasks_extra_active.setProfileLabel("dynaspritemasks_extra_active"); sceneGenerator = new SceneGenerator(); if (is_real_machine()) - m_packingSidecarsStorage.assign(384u * 1024u * 1024u, 0xA5); + m_packingSidecarsStorage.assign(256u * 1024u * 1024u, 0xA5); } SerumData::~SerumData() {} @@ -226,8 +226,8 @@ void SerumData::DebugLogSpriteDynamicSidecarState(const char *stage, "hasActive=%s dyna255=%u dyna0=%u dynaOther=%u active0=%u active1=%u " "activeOther=%u", stage ? stage : "unknown", spriteId, hasDyna ? "true" : "false", - hasActive ? "true" : "false", dyna255, dyna0, dynaOther, active0, - active1, activeOther); + hasActive ? "true" : "false", dyna255, dyna0, dynaOther, active0, active1, + activeOther); } void SerumData::BuildPackingSidecarsAndNormalize() { @@ -426,50 +426,6 @@ void SerumData::BuildPackingSidecarsAndNormalize() { m_packingSidecarsNormalized = true; } -void SerumData::PrepareRuntimeDynamicHotCache() { - std::vector frameIds; - frameIds.reserve(nframes); - for (uint32_t frameId = 0; frameId < nframes; ++frameId) { - if (frameId < frameHasDynamic.size() && frameHasDynamic[frameId] > 0) { - frameIds.push_back(frameId); - } - } - dynamasks.enableForcedDecodedReadsForIds(frameIds); - dynamasks_active.enableForcedDecodedReadsForIds(frameIds); - - std::vector extraFrameIds; - extraFrameIds.reserve(nframes); - for (uint32_t frameId = 0; frameId < nframes; ++frameId) { - if (frameId < frameHasDynamicExtra.size() && - frameHasDynamicExtra[frameId] > 0) { - extraFrameIds.push_back(frameId); - } - } - dynamasks_extra.enableForcedDecodedReadsForIds(extraFrameIds); - dynamasks_extra_active.enableForcedDecodedReadsForIds(extraFrameIds); - - std::vector spriteIds; - spriteIds.reserve(nsprites); - for (uint32_t spriteId = 0; spriteId < nsprites; ++spriteId) { - if (dynaspritemasks.hasData(spriteId) || - dynaspritemasks_active.hasData(spriteId) || - dynaspritemasks_extra.hasData(spriteId) || - dynaspritemasks_extra_active.hasData(spriteId)) { - spriteIds.push_back(spriteId); - } - } - dynaspritemasks.enableForcedDecodedReadsForIds(spriteIds); - dynaspritemasks_active.enableForcedDecodedReadsForIds(spriteIds); - dynaspritemasks_extra.enableForcedDecodedReadsForIds(spriteIds); - dynaspritemasks_extra_active.enableForcedDecodedReadsForIds(spriteIds); - - Log("Prepared runtime dynamic hot cache: %u frame masks, %u extra frame " - "masks," - " %u sprite masks", - (uint32_t)frameIds.size(), (uint32_t)extraFrameIds.size(), - (uint32_t)spriteIds.size()); -} - bool SerumData::HasSpriteRuntimeSidecars() const { if (nframes == 0 || nsprites == 0) { return false; @@ -510,12 +466,10 @@ void SerumData::BuildSpriteRuntimeSidecars() { spriteUsesShape.assign(nsprites, 0); spriteDetectOffsets.assign(static_cast(nsprites) + 1, 0); spriteDetectMeta.clear(); - spriteOpaqueRowSegmentStart.assign(static_cast(nsprites) * - MAX_SPRITE_HEIGHT, - 0); - spriteOpaqueRowSegmentCount.assign(static_cast(nsprites) * - MAX_SPRITE_HEIGHT, - 0); + spriteOpaqueRowSegmentStart.assign( + static_cast(nsprites) * MAX_SPRITE_HEIGHT, 0); + spriteOpaqueRowSegmentCount.assign( + static_cast(nsprites) * MAX_SPRITE_HEIGHT, 0); spriteOpaqueSegments.clear(); const size_t spritePixels = MAX_SPRITE_WIDTH * MAX_SPRITE_HEIGHT; diff --git a/src/SerumData.h b/src/SerumData.h index 61027bb..9dc4a1c 100644 --- a/src/SerumData.h +++ b/src/SerumData.h @@ -76,7 +76,6 @@ class SerumData { bool LoadFromFile(const char *filename, const uint8_t flags); bool LoadFromBuffer(const uint8_t *data, size_t size, const uint8_t flags); void BuildPackingSidecarsAndNormalize(); - void PrepareRuntimeDynamicHotCache(); void BuildSpriteRuntimeSidecars(); void DebugLogSpriteDynamicSidecarState(const char *stage, uint32_t spriteId); bool HasSpriteRuntimeSidecars() const; @@ -217,9 +216,8 @@ class SerumData { dynaspritemasks_extra_active, frameHasDynamic, frameHasDynamicExtra, sceneFrameIdByTriplet, colorRotationLookupByFrameAndColor, spriteCandidateOffsets, spriteCandidateIds, spriteCandidateSlots, - frameHasShapeSprite, - spriteWidth, spriteHeight, spriteUsesShape, spriteDetectOffsets, - spriteDetectMeta, spriteOpaqueRowSegmentStart, + frameHasShapeSprite, spriteWidth, spriteHeight, spriteUsesShape, + spriteDetectOffsets, spriteDetectMeta, spriteOpaqueRowSegmentStart, spriteOpaqueRowSegmentCount, spriteOpaqueSegments); } } else { @@ -230,9 +228,8 @@ class SerumData { dynaspritemasks_extra_active, frameHasDynamic, frameHasDynamicExtra, sceneFrameIdByTriplet, colorRotationLookupByFrameAndColor, spriteCandidateOffsets, spriteCandidateIds, spriteCandidateSlots, - frameHasShapeSprite, - spriteWidth, spriteHeight, spriteUsesShape, spriteDetectOffsets, - spriteDetectMeta, spriteOpaqueRowSegmentStart, + frameHasShapeSprite, spriteWidth, spriteHeight, spriteUsesShape, + spriteDetectOffsets, spriteDetectMeta, spriteOpaqueRowSegmentStart, spriteOpaqueRowSegmentCount, spriteOpaqueSegments); } else { frameIsScene.clear(); @@ -337,9 +334,6 @@ class SerumData { sceneGenerator->setSceneData(std::move(loadedScenes)); sceneGenerator->setDepth(nocolors == 16 ? 4 : 2); } - if (!HasSpriteRuntimeSidecars()) { - BuildSpriteRuntimeSidecars(); - } } } }; diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 9f6e223..f2934bf 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -106,7 +106,6 @@ static uint32_t GetEnvUintClamped(const char* name, uint32_t maxValue) { static bool g_profileDynamicHotPaths = false; static bool g_profileSparseVectors = false; -static bool g_disableDynamicPackedReads = false; static uint64_t g_profileColorizeFrameV2Ns = 0; static uint64_t g_profileColorizeSpriteV2Ns = 0; static uint64_t g_profileColorizeCalls = 0; @@ -276,8 +275,7 @@ static uint64_t GetProcessResidentMemoryBytes() { mach_task_basic_info info; mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT; if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, - reinterpret_cast(&info), - &count) == KERN_SUCCESS) { + reinterpret_cast(&info), &count) == KERN_SUCCESS) { return static_cast(info.resident_size); } #elif defined(__unix__) @@ -342,7 +340,8 @@ static void LogStartupRssSummary() { const double startMiB = (double)g_profileStartupStartRssBytes / (1024.0 * 1024.0); const double currentMiB = (double)currentBytes / (1024.0 * 1024.0); - const double peakMiB = (double)g_profileStartupPeakRssBytes / (1024.0 * 1024.0); + const double peakMiB = + (double)g_profileStartupPeakRssBytes / (1024.0 * 1024.0); Log("Perf startup peak: start=%.1fMiB current=%.1fMiB peak=%.1fMiB stage=%s", startMiB, currentMiB, peakMiB, g_profileStartupPeakStage); } @@ -353,8 +352,7 @@ static void InitDebugFrameTracingFromEnv(void) { } g_debugFrameTracingInitialized = true; g_debugTargetInputCrc = GetEnvUint32Auto("SERUM_DEBUG_INPUT_CRC", 0); - g_debugTargetFrameId = - GetEnvUint32Auto("SERUM_DEBUG_FRAME_ID", 0xffffffffu); + g_debugTargetFrameId = GetEnvUint32Auto("SERUM_DEBUG_FRAME_ID", 0xffffffffu); g_debugStageHashes = IsEnvFlagEnabled("SERUM_DEBUG_STAGE_HASHES"); g_debugTraceAllInputs = IsEnvFlagEnabled("SERUM_DEBUG_TRACE_INPUTS"); g_debugBypassSceneGate = IsEnvFlagEnabled("SERUM_DEBUG_BYPASS_SCENE_GATE"); @@ -362,15 +360,13 @@ static void InitDebugFrameTracingFromEnv(void) { g_debugVerboseSprites = IsEnvFlagEnabled("SERUM_DEBUG_SPRITE_VERBOSE"); g_debugVerboseScenes = IsEnvFlagEnabled("SERUM_DEBUG_SCENE_VERBOSE"); if (g_debugTargetInputCrc != 0 || g_debugTargetFrameId != 0xffffffffu || - g_debugStageHashes || g_debugTraceAllInputs || - g_debugBypassSceneGate || g_debugVerboseIdentify || - g_debugVerboseSprites || g_debugVerboseScenes) { + g_debugStageHashes || g_debugTraceAllInputs || g_debugBypassSceneGate || + g_debugVerboseIdentify || g_debugVerboseSprites || g_debugVerboseScenes) { Log("Serum debug tracing enabled: inputCrc=%u frameId=%u stageHashes=%s " "traceAllInputs=%s bypassSceneGate=%s identifyVerbose=%s " "spriteVerbose=%s sceneVerbose=%s", g_debugTargetInputCrc, g_debugTargetFrameId, - g_debugStageHashes ? "on" : "off", - g_debugTraceAllInputs ? "on" : "off", + g_debugStageHashes ? "on" : "off", g_debugTraceAllInputs ? "on" : "off", g_debugBypassSceneGate ? "on" : "off", g_debugVerboseIdentify ? "on" : "off", g_debugVerboseSprites ? "on" : "off", @@ -444,17 +440,16 @@ static void DebugLogFrameMetadataIfRequested(uint32_t frameId) { const uint32_t triggerId = g_serumData.triggerIDs[frameId][0]; const uint16_t backgroundId = g_serumData.backgroundIDs[frameId][0]; const uint8_t isExtra = g_serumData.isextraframe[frameId][0]; - const uint8_t hasDynamic = - (frameId < g_serumData.frameHasDynamic.size()) - ? g_serumData.frameHasDynamic[frameId] - : 0; + const uint8_t hasDynamic = (frameId < g_serumData.frameHasDynamic.size()) + ? g_serumData.frameHasDynamic[frameId] + : 0; const uint8_t hasDynamicExtra = (frameId < g_serumData.frameHasDynamicExtra.size()) ? g_serumData.frameHasDynamicExtra[frameId] : 0; - const uint8_t isScene = - (frameId < g_serumData.frameIsScene.size()) ? g_serumData.frameIsScene[frameId] - : 0; + const uint8_t isScene = (frameId < g_serumData.frameIsScene.size()) + ? g_serumData.frameIsScene[frameId] + : 0; Log("Serum debug frame meta: frameId=%u mask=%u shape=%u hash=%u active=%u " "triggerId=%u backgroundId=%u isExtra=%u hasDynamic=%u " @@ -478,10 +473,9 @@ static void DebugLogFrameMetadataIfRequested(uint32_t frameId) { for (uint32_t i = 0; i < spriteCount; ++i) { const uint8_t spriteId = spriteSlots[i]; - const uint8_t usesShape = - (spriteId < g_serumData.spriteUsesShape.size()) - ? g_serumData.spriteUsesShape[spriteId] - : g_serumData.sprshapemode[spriteId][0]; + const uint8_t usesShape = (spriteId < g_serumData.spriteUsesShape.size()) + ? g_serumData.spriteUsesShape[spriteId] + : g_serumData.sprshapemode[spriteId][0]; Log("Serum debug frame sprite-slot: frameId=%u slot=%u spriteId=%u " "bbox=[%u,%u..%u,%u] usesShape=%u", frameId, i, spriteId, spriteBB[i * 4], spriteBB[i * 4 + 1], @@ -500,9 +494,9 @@ static uint64_t DebugHashBytesFNV1a64(const void* data, size_t size) { } static uint64_t DebugHashFrameRegionFNV1a64(const uint16_t* frame, - uint32_t stride, uint16_t x, - uint16_t y, uint16_t width, - uint16_t height) { + uint32_t stride, uint16_t x, + uint16_t y, uint16_t width, + uint16_t height) { if (!frame || width == 0 || height == 0) { return 1469598103934665603ull; } @@ -539,7 +533,8 @@ static uint64_t DebugHashCurrentOutputFrame(uint32_t frameId, bool isExtra) { } const uint64_t hash = DebugHashBytesFNV1a64( output, static_cast(width) * height * sizeof(uint16_t)); - if (g_debugStageHashes && DebugTraceMatches(g_debugCurrentInputCrc, frameId)) { + if (g_debugStageHashes && + DebugTraceMatches(g_debugCurrentInputCrc, frameId)) { Log("Serum debug stage hash: frameId=%u inputCrc=%u stage=%s hash=%llu " "size=%ux%u", frameId, g_debugCurrentInputCrc, isExtra ? "base-extra" : "base", @@ -583,7 +578,7 @@ static void DebugLogColorizeFrameV2Assets( const uint64_t colorsHash = frameColors ? DebugHashBytesFNV1a64(frameColors, - (size_t)pixelCount * sizeof(uint16_t)) + (size_t)pixelCount * sizeof(uint16_t)) : 0; const uint64_t backgroundMaskHash = frameBackgroundMask @@ -592,7 +587,7 @@ static void DebugLogColorizeFrameV2Assets( const uint64_t backgroundHash = frameBackground ? DebugHashBytesFNV1a64(frameBackground, - (size_t)pixelCount * sizeof(uint16_t)) + (size_t)pixelCount * sizeof(uint16_t)) : 0; const uint64_t dynaHash = (frameHasDynamic && frameDyna) @@ -604,18 +599,16 @@ static void DebugLogColorizeFrameV2Assets( : 0; const uint64_t dynaColorsHash = (frameHasDynamic && frameDynaColors) - ? DebugHashBytesFNV1a64( - frameDynaColors, - (size_t)MAX_DYNA_SETS_PER_FRAME_V2 * g_serumData.nocolors * - sizeof(uint16_t)) + ? DebugHashBytesFNV1a64(frameDynaColors, + (size_t)MAX_DYNA_SETS_PER_FRAME_V2 * + g_serumData.nocolors * sizeof(uint16_t)) : 0; const uint64_t rotationHash = - colorRotations - ? DebugHashBytesFNV1a64( - colorRotations, - (size_t)MAX_COLOR_ROTATION_V2 * MAX_LENGTH_COLOR_ROTATION * - sizeof(uint16_t)) - : 0; + colorRotations ? DebugHashBytesFNV1a64(colorRotations, + (size_t)MAX_COLOR_ROTATION_V2 * + MAX_LENGTH_COLOR_ROTATION * + sizeof(uint16_t)) + : 0; Log("Serum debug stage assets: frameId=%u inputCrc=%u stage=%s " "backgroundId=%u colorsHash=%llu backgroundMaskHash=%llu " @@ -683,10 +676,8 @@ static void DebugLogSpriteRejected(uint32_t frameId, uint8_t spriteId, uint8_t spriteSlot, const char* reason, uint32_t detectionIndex, short frax, short fray, short offsx, short offsy, - uint32_t detailA = 0, - uint32_t detailB = 0, - uint32_t detailC = 0, - uint32_t detailD = 0) { + uint32_t detailA = 0, uint32_t detailB = 0, + uint32_t detailC = 0, uint32_t detailD = 0) { if (!DebugSpriteVerboseEnabled() || !DebugTraceSpritesForCurrentInput()) { return; } @@ -1063,9 +1054,9 @@ static Serum_Frame_Struc* Serum_LoadConcentratePrepared(const uint8_t flags) { } { - const char *debugSpriteId = std::getenv("SERUM_DEBUG_SPRITE_ID"); + const char* debugSpriteId = std::getenv("SERUM_DEBUG_SPRITE_ID"); if (debugSpriteId && debugSpriteId[0] != '\0') { - char *endPtr = nullptr; + char* endPtr = nullptr; unsigned long parsed = std::strtoul(debugSpriteId, &endPtr, 0); if (endPtr != debugSpriteId && *endPtr == '\0') { g_serumData.DebugLogSpriteDynamicSidecarState( @@ -1761,8 +1752,6 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, Serum_free(); g_profileDynamicHotPaths = IsEnvFlagEnabled("SERUM_PROFILE_DYNAMIC_HOTPATHS"); g_profileSparseVectors = IsEnvFlagEnabled("SERUM_PROFILE_SPARSE_VECTORS"); - g_disableDynamicPackedReads = - IsEnvFlagEnabled("SERUM_DISABLE_DYNAMIC_PACKED_READS"); g_profileColorizeFrameV2Ns = 0; g_profileColorizeSpriteV2Ns = 0; g_profileColorizeCalls = 0; @@ -1886,7 +1875,9 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, InitFrameLookupRuntimeStateFromStoredData(); NoteStartupRssSample("after-frame-lookup-restore"); } - if (g_serumData.colorRotationLookupByFrameAndColor.empty()) { + if (g_serumData.colorRotationLookupByFrameAndColor.empty() && + (!loadedFromConcentrate || g_serumData.concentrateFileVersion < 6 || + sceneDataUpdatedFromCsv)) { g_serumData.BuildColorRotationLookup(); NoteStartupRssSample("after-color-rotation-build"); } @@ -1895,12 +1886,6 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, g_serumData.BuildSpriteRuntimeSidecars(); NoteStartupRssSample("after-sprite-sidecar-build"); } - if (g_disableDynamicPackedReads) { - g_serumData.PrepareRuntimeDynamicHotCache(); - NoteStartupRssSample("after-dynamic-hot-cache"); - Log("Dynamic packed reads disabled for runtime via " - "SERUM_DISABLE_DYNAMIC_PACKED_READS"); - } NoteStartupRssSample("before-runtime"); LogStartupRssSummary(); } @@ -2122,8 +2107,8 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { if (DebugIdentifyVerboseEnabled() && DebugTraceMatches(inputCrc, tj)) { Log("Serum debug identify seed: inputCrc=%u startFrame=%u " "sceneRequested=%s mask=%u shape=%u hash=%u", - inputCrc, tj, sceneFrameRequested ? "true" : "false", mask, - Shape, Hashc); + inputCrc, tj, sceneFrameRequested ? "true" : "false", mask, Shape, + Hashc); } if (sceneFrameRequested) { auto sigIt = g_serumData.sceneFramesBySignature.find( @@ -2134,11 +2119,12 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { continue; } for (uint32_t ti : sigIt->second) { - if (DebugIdentifyVerboseEnabled() && DebugTraceMatches(inputCrc, ti)) { + if (DebugIdentifyVerboseEnabled() && + DebugTraceMatches(inputCrc, ti)) { Log("Serum debug identify scene candidate: inputCrc=%u frameId=%u " "mask=%u shape=%u hash=%u storedHash=%u lastfound=%u", - inputCrc, ti, mask, Shape, Hashc, - g_serumData.hashcodes[ti][0], lastfound_stream); + inputCrc, ti, mask, Shape, Hashc, g_serumData.hashcodes[ti][0], + lastfound_stream); } if (first_match || ti != lastfound_stream || mask < 255) { if (DebugIdentifyVerboseEnabled() && @@ -2174,12 +2160,13 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { lastfound = ti; return ti; } - if (DebugIdentifyVerboseEnabled() && DebugTraceMatches(inputCrc, ti)) { + if (DebugIdentifyVerboseEnabled() && + DebugTraceMatches(inputCrc, ti)) { Log("Serum debug identify decision: inputCrc=%u frameId=%u " "reason=same-frame firstMatch=%s lastfoundStream=%u mask=%u " "fullCrc=%u", - inputCrc, ti, first_match ? "true" : "false", - lastfound_stream, mask, full_crc); + inputCrc, ti, first_match ? "true" : "false", lastfound_stream, + mask, full_crc); } lastfound = ti; return IDENTIFY_SAME_FRAME; @@ -2266,8 +2253,8 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { } while (tj != lastfound_stream); if (DebugIdentifyVerboseEnabled() && DebugTraceMatchesInputCrc(inputCrc)) { - Log("Serum debug identify miss: inputCrc=%u sceneRequested=%s", - inputCrc, sceneFrameRequested ? "true" : "false"); + Log("Serum debug identify miss: inputCrc=%u sceneRequested=%s", inputCrc, + sceneFrameRequested ? "true" : "false"); } return IDENTIFY_NO_FRAME; // we found no corresponding frame } @@ -2515,7 +2502,8 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, std::unordered_set frameShapeDwords; bool frameShapeDwordsBuilt = false; - const uint16_t* frameSpriteBoundingBoxes = g_serumData.framespriteBB[quelleframe]; + const uint16_t* frameSpriteBoundingBoxes = + g_serumData.framespriteBB[quelleframe]; uint32_t candidateStart = 0; uint32_t candidateEnd = 0; const bool hasCandidateSidecars = @@ -2527,7 +2515,8 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, candidateStart = g_serumData.spriteCandidateOffsets[quelleframe]; candidateEnd = g_serumData.spriteCandidateOffsets[quelleframe + 1]; if (candidateEnd > g_serumData.spriteCandidateIds.size()) { - candidateEnd = static_cast(g_serumData.spriteCandidateIds.size()); + candidateEnd = + static_cast(g_serumData.spriteCandidateIds.size()); } } @@ -2537,9 +2526,9 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, hasCandidateSidecars && quelleframe < g_serumData.frameHasShapeSprite.size() && g_serumData.frameHasShapeSprite[quelleframe] > 0; - const uint32_t candidateCount = - hasCandidateSidecars ? (candidateEnd - candidateStart) - : MAX_SPRITES_PER_FRAME; + const uint32_t candidateCount = hasCandidateSidecars + ? (candidateEnd - candidateStart) + : MAX_SPRITES_PER_FRAME; DebugLogSpriteCheckStart(quelleframe, candidateCount, hasCandidateSidecars, frameHasShapeCandidates); for (uint32_t candidateIndex = 0; candidateIndex < candidateCount; @@ -2568,10 +2557,9 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, const uint8_t* spriteOriginal = g_serumData.spriteoriginal[qspr]; const uint8_t* spriteOpaque = g_serumData.spriteoriginal_opaque[qspr]; uint8_t* Frame = recframe; - const bool isshapecheck = - qspr < g_serumData.spriteUsesShape.size() - ? (g_serumData.spriteUsesShape[qspr] > 0) - : (g_serumData.sprshapemode[qspr][0] > 0); + const bool isshapecheck = qspr < g_serumData.spriteUsesShape.size() + ? (g_serumData.spriteUsesShape[qspr] > 0) + : (g_serumData.sprshapemode[qspr][0] > 0); if (isshapecheck && frameHasShapeCandidates) { if (!hasShapeFrameBuffer) { for (int i = 0; i < g_serumData.fwidth * g_serumData.fheight; i++) { @@ -2590,7 +2578,8 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, (uint32_t)(frameshape[rowBase + 1] << 16) | (uint32_t)(frameshape[rowBase + 2] << 24); for (uint32_t x = 0; x <= g_serumData.fwidth - 4; ++x) { - dword = (dword >> 8) | (uint32_t)(frameshape[rowBase + x + 3] << 24); + dword = + (dword >> 8) | (uint32_t)(frameshape[rowBase + x + 3] << 24); frameShapeDwords.insert(dword); } } @@ -2661,13 +2650,12 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, // continue: if ((frax - minxBB < sprx - detx) || (fray - minyBB < spry - dety)) { - DebugLogSpriteRejected( - quelleframe, qspr, spriteSlot, "bbox-start", - tm - detectStart, frax, fray, 0, 0, - static_cast(frax - minxBB), - static_cast(sprx - detx), - static_cast(fray - minyBB), - static_cast(spry - dety)); + DebugLogSpriteRejected(quelleframe, qspr, spriteSlot, + "bbox-start", tm - detectStart, frax, fray, + 0, 0, static_cast(frax - minxBB), + static_cast(sprx - detx), + static_cast(fray - minyBB), + static_cast(spry - dety)); continue; } // position of the detection area in the frame @@ -2677,14 +2665,14 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, // bottom), continue: if ((offsx + detw > (int)maxxBB + 1) || (offsy + deth > (int)maxyBB + 1)) { - DebugLogSpriteRejected( - quelleframe, qspr, spriteSlot, "bbox-end", - tm - detectStart, frax, fray, static_cast(offsx), - static_cast(offsy), - static_cast(offsx + detw), - static_cast((int)maxxBB + 1), - static_cast(offsy + deth), - static_cast((int)maxyBB + 1)); + DebugLogSpriteRejected(quelleframe, qspr, spriteSlot, "bbox-end", + tm - detectStart, frax, fray, + static_cast(offsx), + static_cast(offsy), + static_cast(offsx + detw), + static_cast((int)maxxBB + 1), + static_cast(offsy + deth), + static_cast((int)maxyBB + 1)); continue; } DebugLogSpriteDetectionWord(quelleframe, qspr, tm - detectStart, @@ -2696,9 +2684,8 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, bool notthere = false; for (uint16_t tk = 0; tk < deth && !notthere; tk++) { const uint32_t spriteRow = static_cast(dety + tk); - const uint32_t rowIndex = static_cast(qspr) * - MAX_SPRITE_HEIGHT + - spriteRow; + const uint32_t rowIndex = + static_cast(qspr) * MAX_SPRITE_HEIGHT + spriteRow; if (rowIndex >= g_serumData.spriteOpaqueRowSegmentStart.size()) { DebugLogSpriteRejected( quelleframe, qspr, spriteSlot, "row-sidecar-oob", @@ -2752,15 +2739,15 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, continue; } const uint8_t expectedValue = - isshapecheck ? static_cast(spriteOriginal[spriteOffset] > 0) + isshapecheck ? static_cast( + spriteOriginal[spriteOffset] > 0) : spriteOriginal[spriteOffset]; if (expectedValue != Frame[frameOffset]) { DebugLogSpriteRejected( quelleframe, qspr, spriteSlot, "opaque-run-mismatch", - tm - detectStart, frax, fray, - static_cast(offsx), static_cast(offsy), - spriteOffset, frameOffset, expectedValue, - Frame[frameOffset]); + tm - detectStart, frax, fray, static_cast(offsx), + static_cast(offsy), spriteOffset, frameOffset, + expectedValue, Frame[frameOffset]); notthere = true; break; } @@ -2802,10 +2789,9 @@ bool Check_Spritesv2(uint8_t* recframe, uint32_t quelleframe, (pwid[*nspr] == pwid[tk]) && (phei[*nspr] == phei[tk])) identicalfound = true; } - DebugLogSpriteAccepted(quelleframe, qspr, spriteSlot, - pfrx[*nspr], pfry[*nspr], pspx[*nspr], - pspy[*nspr], pwid[*nspr], phei[*nspr], - identicalfound); + DebugLogSpriteAccepted(quelleframe, qspr, spriteSlot, pfrx[*nspr], + pfry[*nspr], pspx[*nspr], pspy[*nspr], + pwid[*nspr], phei[*nspr], identicalfound); if (identicalfound) { DebugLogSpriteRejected( quelleframe, qspr, spriteSlot, "duplicate", @@ -3054,9 +3040,9 @@ void Colorize_Framev2(uint8_t* frame, uint32_t IDfound, const uint16_t* frameDynaColorsExtra = frameHasDynamicExtra ? g_serumData.dyna4cols_v2_extra[IDfound] : nullptr; - const uint8_t* frameShadowDirExtra = frameHasDynamicExtra - ? g_serumData.dynashadowsdir_extra[IDfound] - : nullptr; + const uint8_t* frameShadowDirExtra = + frameHasDynamicExtra ? g_serumData.dynashadowsdir_extra[IDfound] + : nullptr; const uint16_t* frameShadowColorExtra = frameHasDynamicExtra ? g_serumData.dynashadowscol_extra[IDfound] : nullptr; @@ -3192,8 +3178,8 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, return; } const uint8_t* spriteOpaque = g_serumData.spriteoriginal_opaque[nosprite]; - const uint8_t* spriteDyna = hasDyna ? g_serumData.dynaspritemasks[nosprite] - : nullptr; + const uint8_t* spriteDyna = + hasDyna ? g_serumData.dynaspritemasks[nosprite] : nullptr; const uint8_t* spriteDynaActive = hasDynaActive ? g_serumData.dynaspritemasks_active[nosprite] : nullptr; if (hasDyna != hasDynaActive || (hasDyna && spriteDyna == nullptr) || @@ -3212,8 +3198,8 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, Log("Serum debug sprite render source: frameId=%u inputCrc=%u spriteId=%u " "frame=(%u,%u) sprite=(%u,%u) size=%ux%u hasOpaque=%s hasColor=%s " "hasDyna=%s hasDynaActive=%s hasExtraColor=%s", - IDfound, g_debugCurrentInputCrc, nosprite, frx, fry, spx, spy, wid, - hei, hasOpaque ? "true" : "false", hasColor ? "true" : "false", + IDfound, g_debugCurrentInputCrc, nosprite, frx, fry, spx, spy, wid, hei, + hasOpaque ? "true" : "false", hasColor ? "true" : "false", hasDyna ? "true" : "false", hasDynaActive ? "true" : "false", hasColorExtra ? "true" : "false"); } @@ -3665,8 +3651,9 @@ static void ForceNormalFrameRefreshAfterSceneEnd(void) { lastframe_full_crc_normal = 0xffffffff; } -static uint32_t Serum_ColorizeWithMetadatav2Internal( - uint8_t* frame, bool sceneFrameRequested, uint32_t knownFrameId) { +static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, + bool sceneFrameRequested, + uint32_t knownFrameId) { // return IDENTIFY_NO_FRAME if no new frame detected // return 0 if new frame with no rotation detected // return > 0 if new frame with rotations detected, the value is the delay @@ -3697,7 +3684,8 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( crc32_fast(frame, g_serumData.fwidth * g_serumData.fheight); } if (DebugTraceAllInputsEnabled()) { - Log("Serum debug input: api=v2 inputCrc=%u sceneRequested=%s knownFrameId=%u", + Log("Serum debug input: api=v2 inputCrc=%u sceneRequested=%s " + "knownFrameId=%u", g_debugCurrentInputCrc, sceneFrameRequested ? "true" : "false", knownFrameId); } @@ -3785,10 +3773,11 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( sceneFrameRequested ? "true" : "false", g_serumData.triggerIDs[lastfound][0]); } else if (DebugTraceAllInputsEnabled() && !sceneFrameRequested) { - Log("Serum debug trigger candidate: inputCrc=%u frameId=%u triggerId=%u " + Log("Serum debug trigger candidate: inputCrc=%u frameId=%u " + "triggerId=%u " "lastTriggerId=%u", - g_debugCurrentInputCrc, frameID, g_serumData.triggerIDs[lastfound][0], - lastTriggerID); + g_debugCurrentInputCrc, frameID, + g_serumData.triggerIDs[lastfound][0], lastTriggerID); } if (!sceneFrameRequested) { memcpy(lastFrame, frame, g_serumData.fwidth * g_serumData.fheight); @@ -3820,11 +3809,11 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( // stop any scene if (sceneFrameCount > 0 || sceneEndHoldUntilMs > 0) { - DebugLogSceneEvent("stop-normal-frame", static_cast(lastTriggerID), - sceneCurrentFrame, sceneFrameCount, - sceneDurationPerFrame, sceneOptionFlags, - sceneInterruptable, sceneStartImmediately, - sceneRepeatCount); + DebugLogSceneEvent( + "stop-normal-frame", static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, sceneDurationPerFrame, + sceneOptionFlags, sceneInterruptable, sceneStartImmediately, + sceneRepeatCount); } sceneFrameCount = 0; sceneIsLastBackgroundFrame = false; @@ -3841,7 +3830,8 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( g_serumData.triggerIDs[lastfound][0]; lasttriggerTimestamp = now; if (DebugTraceAllInputsEnabled()) { - Log("Serum debug trigger commit: inputCrc=%u frameId=%u triggerId=%u", + Log("Serum debug trigger commit: inputCrc=%u frameId=%u " + "triggerId=%u", g_debugCurrentInputCrc, lastfound, lastTriggerID); } @@ -3858,10 +3848,11 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( if (g_serumData.sceneGenerator->isActive() && lastTriggerID < 0xffffffff) { - const bool hasSceneInfo = g_serumData.sceneGenerator->getSceneInfo( - lastTriggerID, sceneFrameCount, sceneDurationPerFrame, - sceneInterruptable, sceneStartImmediately, sceneRepeatCount, - sceneOptionFlags); + const bool hasSceneInfo = + g_serumData.sceneGenerator->getSceneInfo( + lastTriggerID, sceneFrameCount, sceneDurationPerFrame, + sceneInterruptable, sceneStartImmediately, + sceneRepeatCount, sceneOptionFlags); if (DebugTraceAllInputsEnabled()) { Log("Serum debug trigger scene-info: triggerId=%u found=%s " "frameCount=%u duration=%u interruptable=%s " @@ -3869,12 +3860,13 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( lastTriggerID, hasSceneInfo ? "true" : "false", sceneFrameCount, sceneDurationPerFrame, sceneInterruptable ? "true" : "false", - sceneStartImmediately ? "true" : "false", - sceneRepeatCount, sceneOptionFlags); + sceneStartImmediately ? "true" : "false", sceneRepeatCount, + sceneOptionFlags); } if (hasSceneInfo) { - DebugLogSceneEvent("trigger", static_cast(lastTriggerID), - 0, sceneFrameCount, sceneDurationPerFrame, + DebugLogSceneEvent("trigger", + static_cast(lastTriggerID), 0, + sceneFrameCount, sceneDurationPerFrame, sceneOptionFlags, sceneInterruptable, sceneStartImmediately, sceneRepeatCount); const bool sceneIsBackground = @@ -3985,19 +3977,22 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( isBackgroundSceneRequested ? lastFrameId : lastfound); ti++; } - if (DebugTraceMatches(g_debugCurrentInputCrc, - isBackgroundSceneRequested ? lastFrameId - : lastfound)) { + if (DebugTraceMatches( + g_debugCurrentInputCrc, + isBackgroundSceneRequested ? lastFrameId : lastfound)) { uint64_t spriteHash = DebugHashBytesFNV1a64( - ((mySerum.flags & FLAG_RETURNED_32P_FRAME_OK) && mySerum.frame32) + ((mySerum.flags & FLAG_RETURNED_32P_FRAME_OK) && + mySerum.frame32) ? static_cast(mySerum.frame32) : static_cast(mySerum.frame64), - ((mySerum.flags & FLAG_RETURNED_32P_FRAME_OK) && mySerum.frame32) + ((mySerum.flags & FLAG_RETURNED_32P_FRAME_OK) && + mySerum.frame32) ? static_cast(mySerum.width32) * 32 * sizeof(uint16_t) : static_cast(mySerum.width64) * 64 * sizeof(uint16_t)); - Log("Serum debug stage hash: frameId=%u inputCrc=%u stage=post-sprites " + Log("Serum debug stage hash: frameId=%u inputCrc=%u " + "stage=post-sprites " "hash=%llu sprites=%u", isBackgroundSceneRequested ? lastFrameId : lastfound, g_debugCurrentInputCrc, @@ -4026,7 +4021,8 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( const double peakRssMiB = (double)g_profilePeakRssBytes / (1024.0 * 1024.0); Log("Perf dynamic avg: frame=%.3fms Colorize_Framev2=%.3fms " - "Colorize_Spritev2=%.3fms rss=%.1fMiB peak=%.1fMiB over %u frames", + "Colorize_Spritev2=%.3fms rss=%.1fMiB peak=%.1fMiB over %u " + "frames", totalMs, frameMs, spriteMs, rssMiB, peakRssMiB, (uint32_t)g_profileColorizeCalls); if (g_profileSparseVectors) { @@ -4203,8 +4199,7 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal( } SERUM_API uint32_t -Serum_ColorizeWithMetadatav2(uint8_t* frame, - bool sceneFrameRequested = false) { +Serum_ColorizeWithMetadatav2(uint8_t* frame, bool sceneFrameRequested = false) { return Serum_ColorizeWithMetadatav2Internal(frame, sceneFrameRequested, IDENTIFY_NO_FRAME); } @@ -4261,22 +4256,21 @@ uint32_t Serum_RenderScene(void) { const uint32_t now = GetMonotonicTimeMs(); if (sceneEndHoldUntilMs > 0) { if (now < sceneEndHoldUntilMs) { - DebugLogSceneEvent("end-hold", static_cast(lastTriggerID), - sceneCurrentFrame, sceneFrameCount, - sceneDurationPerFrame, sceneOptionFlags, - sceneInterruptable, sceneStartImmediately, - sceneRepeatCount); + DebugLogSceneEvent( + "end-hold", static_cast(lastTriggerID), sceneCurrentFrame, + sceneFrameCount, sceneDurationPerFrame, sceneOptionFlags, + sceneInterruptable, sceneStartImmediately, sceneRepeatCount); mySerum.rotationtimer = sceneEndHoldUntilMs - now; return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_SCENE; } // End hold elapsed: finish scene now. sceneEndHoldUntilMs = 0; - DebugLogSceneEvent("end-hold-finished", - static_cast(lastTriggerID), sceneCurrentFrame, - sceneFrameCount, sceneDurationPerFrame, - sceneOptionFlags, sceneInterruptable, - sceneStartImmediately, sceneRepeatCount); + DebugLogSceneEvent( + "end-hold-finished", static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, sceneDurationPerFrame, + sceneOptionFlags, sceneInterruptable, sceneStartImmediately, + sceneRepeatCount); sceneFrameCount = 0; mySerum.rotationtimer = 0; ForceNormalFrameRefreshAfterSceneEnd(); @@ -4314,15 +4308,12 @@ uint32_t Serum_RenderScene(void) { return FLAG_RETURNED_V2_SCENE; } - const bool debugDisableSceneTriplets = - IsEnvFlagEnabled("SERUM_DEBUG_DISABLE_SCENE_TRIPLETS"); bool renderedFromDirectTriplet = false; uint8_t currentGroup = 1; bool hasGroup = g_serumData.sceneGenerator->updateAndGetCurrentGroup( static_cast(lastTriggerID), sceneCurrentFrame, -1, currentGroup); - if (!debugDisableSceneTriplets && hasGroup && - !g_serumData.sceneFrameIdByTriplet.empty()) { + if (hasGroup && !g_serumData.sceneFrameIdByTriplet.empty()) { auto it = g_serumData.sceneFrameIdByTriplet.find( MakeSceneTripletKey(static_cast(lastTriggerID), currentGroup, sceneCurrentFrame)); @@ -4336,9 +4327,8 @@ uint32_t Serum_RenderScene(void) { } if (DebugSceneVerboseEnabled()) { Log("Serum debug scene path: sceneId=%u frameIndex=%u group=%u " - "disableTriplets=%s usedTriplet=%s tripletCount=%u", + "usedTriplet=%s tripletCount=%u", static_cast(lastTriggerID), sceneCurrentFrame, currentGroup, - debugDisableSceneTriplets ? "true" : "false", renderedFromDirectTriplet ? "true" : "false", static_cast(g_serumData.sceneFrameIdByTriplet.size())); } @@ -4358,12 +4348,11 @@ uint32_t Serum_RenderScene(void) { return mySerum.rotationtimer | FLAG_RETURNED_V2_SCENE; } if (result != 0xffff) { - DebugLogSceneEvent("generate-error", - static_cast(lastTriggerID), - sceneCurrentFrame, sceneFrameCount, - sceneDurationPerFrame, sceneOptionFlags, - sceneInterruptable, sceneStartImmediately, - sceneRepeatCount, currentGroup, result); + DebugLogSceneEvent( + "generate-error", static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, sceneDurationPerFrame, + sceneOptionFlags, sceneInterruptable, sceneStartImmediately, + sceneRepeatCount, currentGroup, result); sceneFrameCount = 0; // error generating scene frame, stop the scene mySerum.rotationtimer = 0; ForceNormalFrameRefreshAfterSceneEnd(); @@ -4373,12 +4362,11 @@ uint32_t Serum_RenderScene(void) { mySerum.rotationtimer = sceneDurationPerFrame; Serum_ColorizeWithMetadatav2(sceneFrame, true); } else { - DebugLogSceneEvent("triplet-render", - static_cast(lastTriggerID), sceneCurrentFrame, - sceneFrameCount, sceneDurationPerFrame, - sceneOptionFlags, sceneInterruptable, - sceneStartImmediately, sceneRepeatCount, currentGroup, - 0xffff); + DebugLogSceneEvent("triplet-render", static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, + sceneDurationPerFrame, sceneOptionFlags, + sceneInterruptable, sceneStartImmediately, + sceneRepeatCount, currentGroup, 0xffff); } sceneCurrentFrame++; @@ -4394,11 +4382,11 @@ uint32_t Serum_RenderScene(void) { } if (sceneCurrentFrame >= sceneFrameCount) { - DebugLogSceneEvent("scene-finished", - static_cast(lastTriggerID), sceneCurrentFrame, - sceneFrameCount, sceneDurationPerFrame, - sceneOptionFlags, sceneInterruptable, - sceneStartImmediately, sceneRepeatCount); + DebugLogSceneEvent("scene-finished", static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, + sceneDurationPerFrame, sceneOptionFlags, + sceneInterruptable, sceneStartImmediately, + sceneRepeatCount); if (sceneEndHoldDurationMs > 0) { sceneEndHoldUntilMs = now + sceneEndHoldDurationMs; mySerum.rotationtimer = sceneEndHoldDurationMs; @@ -4576,8 +4564,7 @@ SERUM_API bool Serum_GetRuntimeMetadata(Serum_Runtime_Metadata* metadata) { return false; } - if (metadata->size != 0 && - metadata->size < sizeof(Serum_Runtime_Metadata)) { + if (metadata->size != 0 && metadata->size < sizeof(Serum_Runtime_Metadata)) { return false; } From dd0d09a0b036d00a2792e1389a4c314a0212dfc9 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Thu, 19 Mar 2026 17:01:10 +0100 Subject: [PATCH 29/42] storage --- AGENTS.md | 5 ++++ src/SerumData.cpp | 71 +++++++++++++++++++++++++++++++++++++++++++++-- src/SerumData.h | 3 +- 3 files changed, 76 insertions(+), 3 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 978d5c3..b8203ab 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -64,6 +64,9 @@ Vector policy currently used in `SerumData`: all-zero payload only when the paired active sidecar still marks active pixels; this is required because dynamic layer `0` is a valid value and is not equivalent to "no payload". + - `BuildPackingSidecarsAndNormalize()` also snapshots each generated sidecar + payload into `m_packingSidecarsStorage` as a transient two-dimensional byte + store (`std::vector>`). - Precomputed frame-level dynamic fast flags are persisted: - `frameHasDynamic` - `frameHasDynamicExtra` @@ -83,6 +86,8 @@ Vector policy currently used in `SerumData`: - flattened detection metadata (`spriteDetectOffsets`, `spriteDetectMeta`) - per-sprite opaque row-segment runs (`spriteOpaqueRowSegmentStart`, `spriteOpaqueRowSegmentCount`, `spriteOpaqueSegments`) + - `BuildSpriteRuntimeSidecars()` also snapshots the generated runtime sidecar + vectors into `m_packingSidecarsStorage` as raw byte copies. - Runtime uses sidecar flags instead of `255` sentinels for transparency / dynamic-zone activity. - Runtime does not include sentinel-based fallback in sprite/dynamic helpers; missing/incorrect sidecars are treated as a conversion/load bug and are not diff --git a/src/SerumData.cpp b/src/SerumData.cpp index ecd84d4..b86efed 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -95,8 +95,9 @@ SerumData::SerumData() dynaspritemasks_extra.setProfileLabel("dynaspritemasks_extra"); dynaspritemasks_extra_active.setProfileLabel("dynaspritemasks_extra_active"); sceneGenerator = new SceneGenerator(); - if (is_real_machine()) - m_packingSidecarsStorage.assign(256u * 1024u * 1024u, 0xA5); + if (is_real_machine()) { + m_packingSidecarsStorage.emplace_back(256u * 1024u * 1024u, 0xA5); + } } SerumData::~SerumData() {} @@ -230,6 +231,13 @@ void SerumData::DebugLogSpriteDynamicSidecarState(const char *stage, activeOther); } +void SerumData::DebugLogPackingSidecarsStorageSizes() { + for (size_t i = 0; i < m_packingSidecarsStorage.size(); ++i) { + Log("Serum debug packing sidecar storage: index=%zu size=%zu", i, + m_packingSidecarsStorage[i].size()); + } +} + void SerumData::BuildPackingSidecarsAndNormalize() { if (m_packingSidecarsNormalized) { return; @@ -243,6 +251,16 @@ void SerumData::BuildPackingSidecarsAndNormalize() { std::vector normalized; std::vector flags; + if (!m_packingSidecarsStorage.empty()) { + m_packingSidecarsStorage.reserve(static_cast(nsprites) * 5 + + static_cast(nframes) * 2); + } + auto storeSidecarCopy = [this](const uint8_t *data, size_t size) { + if (!data || size == 0 || m_packingSidecarsStorage.empty()) { + return; + } + m_packingSidecarsStorage.emplace_back(data, data + size); + }; normalized.resize(spritePixels); flags.resize(spritePixels); @@ -262,6 +280,7 @@ void SerumData::BuildPackingSidecarsAndNormalize() { normalized[i] = opaque ? value : 0; } spriteoriginal_opaque.set(spriteId, flags.data(), spritePixels); + storeSidecarCopy(flags.data(), spritePixels); spriteoriginal.set(spriteId, normalized.data(), spritePixels); } @@ -285,6 +304,7 @@ void SerumData::BuildPackingSidecarsAndNormalize() { } spritemask_extra_opaque.set(spriteId, flags.data(), spritePixels, &isextrasprite); + storeSidecarCopy(flags.data(), spritePixels); spritemask_extra.set(spriteId, normalized.data(), spritePixels, &isextrasprite); } @@ -307,6 +327,7 @@ void SerumData::BuildPackingSidecarsAndNormalize() { normalized[i] = opaque ? value : 0; } spritedescriptionso_opaque.set(spriteId, flags.data(), spritePixelsV1); + storeSidecarCopy(flags.data(), spritePixelsV1); spritedescriptionso.set(spriteId, normalized.data(), spritePixelsV1); } @@ -331,6 +352,7 @@ void SerumData::BuildPackingSidecarsAndNormalize() { anyActive = anyActive || active; } dynamasks_active.set(frameId, flags.data(), framePixels); + storeSidecarCopy(flags.data(), framePixels); dynamasks.set(frameId, normalized.data(), framePixels, static_cast *>(nullptr), anyActive); frameHasDynamic[frameId] = anyActive ? 1 : 0; @@ -362,6 +384,7 @@ void SerumData::BuildPackingSidecarsAndNormalize() { } dynamasks_extra_active.set(frameId, flags.data(), extraFramePixels, &isextraframe); + storeSidecarCopy(flags.data(), extraFramePixels); dynamasks_extra.set(frameId, normalized.data(), extraFramePixels, &isextraframe, anyActive); frameHasDynamicExtra[frameId] = anyActive ? 1 : 0; @@ -391,6 +414,7 @@ void SerumData::BuildPackingSidecarsAndNormalize() { anyActive = anyActive || active; } dynaspritemasks_active.set(spriteId, flags.data(), spritePixels); + storeSidecarCopy(flags.data(), spritePixels); dynaspritemasks.set(spriteId, normalized.data(), spritePixels, static_cast *>(nullptr), anyActive); @@ -419,6 +443,7 @@ void SerumData::BuildPackingSidecarsAndNormalize() { } dynaspritemasks_extra_active.set(spriteId, flags.data(), spritePixels, &isextrasprite); + storeSidecarCopy(flags.data(), spritePixels); dynaspritemasks_extra.set(spriteId, normalized.data(), spritePixels, &isextrasprite, anyActive); } @@ -456,6 +481,14 @@ bool SerumData::HasSpriteRuntimeSidecars() const { } void SerumData::BuildSpriteRuntimeSidecars() { + auto storeRuntimeSidecarCopy = [this](const void *data, size_t size) { + if (!data || size == 0 || m_packingSidecarsStorage.empty()) { + return; + } + const auto *bytes = static_cast(data); + m_packingSidecarsStorage.emplace_back(bytes, bytes + size); + }; + spriteCandidateOffsets.assign(static_cast(nframes) + 1, 0); spriteCandidateIds.clear(); spriteCandidateSlots.clear(); @@ -607,6 +640,40 @@ void SerumData::BuildSpriteRuntimeSidecars() { } spriteCandidateOffsets[nframes] = static_cast(spriteCandidateIds.size()); + + storeRuntimeSidecarCopy( + spriteCandidateOffsets.data(), + spriteCandidateOffsets.size() * sizeof(spriteCandidateOffsets[0])); + storeRuntimeSidecarCopy( + spriteCandidateIds.data(), + spriteCandidateIds.size() * sizeof(spriteCandidateIds[0])); + storeRuntimeSidecarCopy( + spriteCandidateSlots.data(), + spriteCandidateSlots.size() * sizeof(spriteCandidateSlots[0])); + storeRuntimeSidecarCopy( + frameHasShapeSprite.data(), + frameHasShapeSprite.size() * sizeof(frameHasShapeSprite[0])); + storeRuntimeSidecarCopy(spriteWidth.data(), + spriteWidth.size() * sizeof(spriteWidth[0])); + storeRuntimeSidecarCopy(spriteHeight.data(), + spriteHeight.size() * sizeof(spriteHeight[0])); + storeRuntimeSidecarCopy(spriteUsesShape.data(), + spriteUsesShape.size() * sizeof(spriteUsesShape[0])); + storeRuntimeSidecarCopy( + spriteDetectOffsets.data(), + spriteDetectOffsets.size() * sizeof(spriteDetectOffsets[0])); + storeRuntimeSidecarCopy( + spriteDetectMeta.data(), + spriteDetectMeta.size() * sizeof(spriteDetectMeta[0])); + storeRuntimeSidecarCopy(spriteOpaqueRowSegmentStart.data(), + spriteOpaqueRowSegmentStart.size() * + sizeof(spriteOpaqueRowSegmentStart[0])); + storeRuntimeSidecarCopy(spriteOpaqueRowSegmentCount.data(), + spriteOpaqueRowSegmentCount.size() * + sizeof(spriteOpaqueRowSegmentCount[0])); + storeRuntimeSidecarCopy( + spriteOpaqueSegments.data(), + spriteOpaqueSegments.size() * sizeof(spriteOpaqueSegments[0])); } void SerumData::LogSparseVectorProfileSnapshot() { diff --git a/src/SerumData.h b/src/SerumData.h index 9dc4a1c..f7d63a6 100644 --- a/src/SerumData.h +++ b/src/SerumData.h @@ -78,6 +78,7 @@ class SerumData { void BuildPackingSidecarsAndNormalize(); void BuildSpriteRuntimeSidecars(); void DebugLogSpriteDynamicSidecarState(const char *stage, uint32_t spriteId); + void DebugLogPackingSidecarsStorageSizes(); bool HasSpriteRuntimeSidecars() const; void BuildColorRotationLookup(); bool TryGetColorRotation(uint32_t frameId, uint16_t color, bool isextra, @@ -185,7 +186,7 @@ class SerumData { uint8_t m_loadFlags = 0; bool m_packingSidecarsNormalized = false; - std::vector m_packingSidecarsStorage; + std::vector> m_packingSidecarsStorage; friend class cereal::access; From 98f918dbd3ad0ea14467ea9785ed0afda7c43781 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Thu, 19 Mar 2026 17:07:48 +0100 Subject: [PATCH 30/42] fixed v5 sprites --- AGENTS.md | 3 +++ src/serum-decode.cpp | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/AGENTS.md b/AGENTS.md index b8203ab..36cb3b3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -118,6 +118,9 @@ Entry point: `Serum_Load(altcolorpath, romname, flags)`. compatibility handling. - Direct `v6` cROMc runtime load is expected to consume already-normalized runtime-ready data instead of mutating or repairing it on device. + - Direct `v5` cROMc load must rerun this step after deserialization because + the persisted v5 format does not contain the normalized opacity/dynamic + sidecars used by current runtime code. 9. Build or restore sprite runtime sidecars via `BuildSpriteRuntimeSidecars()`. - For direct `v6` cROMc loads, runtime sidecars are expected to be restored from file as final runtime data. diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index f2934bf..e09448a 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -1867,6 +1867,10 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, if (result && g_serumData.sceneGenerator->isActive()) g_serumData.sceneGenerator->setDepth(result->nocolors == 16 ? 4 : 2); if (result) { + if (loadedFromConcentrate && g_serumData.concentrateFileVersion < 6) { + g_serumData.BuildPackingSidecarsAndNormalize(); + NoteStartupRssSample("after-packing-sidecar-normalize"); + } if (!loadedFromConcentrate || g_serumData.concentrateFileVersion < 6 || sceneDataUpdatedFromCsv) { BuildFrameLookupVectors(); From 74374d9bf6eb7928e2bb9defc4f003249d54af54 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Thu, 19 Mar 2026 20:07:30 +0100 Subject: [PATCH 31/42] fixed platform independent LUT --- AGENTS.md | 31 +++++++++++++- src/SerumData.cpp | 2 +- src/SerumData.h | 97 ++++++++++++++++++++++++++++++++++++++++++-- src/serum-decode.cpp | 13 +++--- 4 files changed, 131 insertions(+), 12 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 36cb3b3..3ceb9a7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -5,6 +5,13 @@ This document explains how `libserum` works end-to-end, with emphasis on runtime **Maintenance rule:** Any feature change, behavior change, data format change, or API/signature change in this repository **must** be reflected in this file in the same PR/commit. +**Platform-independence rule:** `libserum` is intended to behave the same on +all supported platforms. Runtime behavior, persisted `cROMc` semantics, and +derived lookup data must not depend on whether the archive was generated on +Windows, macOS, or Linux. If equal source data is loaded/generated, the +resulting `v6` `cROMc` content and runtime behavior are expected to be +platform-independent. + ## High-level architecture Core files: - `src/serum-decode.cpp`: Main runtime engine (load, identify, colorize, rotate, scene orchestration). @@ -76,6 +83,9 @@ Vector policy currently used in `SerumData`: `colorRotationLookupByFrameAndColor[(frameId,isExtra,color)] -> (rotation,position)` restored from v6 cROMc when present. - v5 / authoring-time rebuild flows may rebuild the lookup before re-save. + - v6 persistence stores this derived lookup in canonical sorted-entry form + rather than direct `unordered_map` archive order, so `cROMc` output stays + platform-independent and deterministic for identical source data. - `ColorInRotation` uses lookup-only runtime path (no linear scan fallback). - Sprite runtime sidecars are precomputed and used by `Check_Spritesv2`: - frame candidate list with sprite slot indices (`spriteCandidateOffsets`, @@ -111,8 +121,12 @@ Entry point: `Serum_Load(altcolorpath, romname, flags)`. 5. If CSV exists and format is v2, parse scenes via `SceneGenerator::parseCSV`. 6. Set scene depth from color count when scenes are active. 7. Build or restore frame lookup acceleration: - - If loaded from cROMc v6 and no CSV update in this run: use stored lookup via `InitFrameLookupRuntimeStateFromStoredData()`. + - If loaded from cROMc v6 and no CSV update in this run: use stored lookup + via `InitFrameLookupRuntimeStateFromStoredData()`. - Otherwise: rebuild via `BuildFrameLookupVectors()`. + - Stored `v6` lookup data is expected to be valid across supported + platforms; do not introduce platform-specific load branching for direct + `v6` runtime loads. 8. Build/normalize packing sidecars via `BuildPackingSidecarsAndNormalize()`. - This normalization/repair path is for source-data build flows and `v5` compatibility handling. @@ -258,6 +272,13 @@ Stored in v6: - `sceneFrameIdByTriplet` - Color-rotation lookup acceleration: - `colorRotationLookupByFrameAndColor` +- Derived lookup tables are serialized in canonical sorted-entry form instead of + direct `unordered_map` archive order, so equal data yields equal `cROMc` + bytes across platforms. +- `v6` `cROMc` archives are intended to be portable across supported + platforms. A `cROMc` generated on one platform must load with the same + semantics on another platform without archive-format forks or + platform-specific compatibility branches. - Sprite runtime sidecars: - `spriteCandidateOffsets`, `spriteCandidateIds`, `spriteCandidateSlots` - `frameHasShapeSprite` @@ -275,7 +296,13 @@ Backward compatibility: - v5 files are loadable. - v5 sparse vectors are deserialized with legacy sparse-vector layout and converted to packed representation after load. - For v5 loads, scene lookup vectors and other derived runtime sidecars may be rebuilt at startup. -- For direct `v6` loads, stored lookup vectors and runtime sidecars are expected to be consumed as persisted runtime-ready data. +- For direct `v6` loads, stored runtime sidecars are expected to be consumed as + persisted runtime-ready data. +- For direct `v6` loads, stored scene/color-rotation lookup tables are reused + directly; their persisted representation is platform-independent. +- Cross-platform differences in `v6` behavior are treated as bugs in canonical + persistence or runtime reconstruction, not as an acceptable reason to add + platform-tagged `cROMc` variants. - A `pup.csv` update in the same authoring-time load cycle may invalidate persisted scene lookup data and requires rebuild before re-save. - Direct scene-triplet preprocessing is only executed for v6. - v6 scene-data deserialization validates block magic and count before diff --git a/src/SerumData.cpp b/src/SerumData.cpp index b86efed..569e7fe 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -232,7 +232,7 @@ void SerumData::DebugLogSpriteDynamicSidecarState(const char *stage, } void SerumData::DebugLogPackingSidecarsStorageSizes() { - for (size_t i = 0; i < m_packingSidecarsStorage.size(); ++i) { + for (size_t i = 1; i < m_packingSidecarsStorage.size(); ++i) { Log("Serum debug packing sidecar storage: index=%zu size=%zu", i, m_packingSidecarsStorage[i].size()); } diff --git a/src/SerumData.h b/src/SerumData.h index f7d63a6..b4be976 100644 --- a/src/SerumData.h +++ b/src/SerumData.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -59,6 +60,36 @@ class SerumData { } }; + struct SceneSignatureLookupEntry { + uint64_t key = 0; + std::vector frameIds; + + template + void serialize(Archive &ar) { + ar(key, frameIds); + } + }; + + struct SceneTripletLookupEntry { + uint64_t key = 0; + uint32_t frameId = 0; + + template + void serialize(Archive &ar) { + ar(key, frameId); + } + }; + + struct ColorRotationLookupEntry { + uint64_t key = 0; + uint16_t value = 0; + + template + void serialize(Archive &ar) { + ar(key, value); + } + }; + SerumData(); ~SerumData(); @@ -211,11 +242,48 @@ class SerumData { if constexpr (Archive::is_saving::value) { if (concentrateFileVersion >= 6) { - ar(frameIsScene, sceneFramesBySignature, spriteoriginal_opaque, + std::vector sceneSignatureEntries; + sceneSignatureEntries.reserve(sceneFramesBySignature.size()); + for (const auto &entry : sceneFramesBySignature) { + SceneSignatureLookupEntry serialized; + serialized.key = entry.first; + serialized.frameIds = entry.second; + std::sort(serialized.frameIds.begin(), serialized.frameIds.end()); + sceneSignatureEntries.push_back(std::move(serialized)); + } + std::sort(sceneSignatureEntries.begin(), sceneSignatureEntries.end(), + [](const SceneSignatureLookupEntry &a, + const SceneSignatureLookupEntry &b) { + return a.key < b.key; + }); + + std::vector sceneTripletEntries; + sceneTripletEntries.reserve(sceneFrameIdByTriplet.size()); + for (const auto &entry : sceneFrameIdByTriplet) { + sceneTripletEntries.push_back({entry.first, entry.second}); + } + std::sort(sceneTripletEntries.begin(), sceneTripletEntries.end(), + [](const SceneTripletLookupEntry &a, + const SceneTripletLookupEntry &b) { + return a.key < b.key; + }); + + std::vector colorRotationEntries; + colorRotationEntries.reserve(colorRotationLookupByFrameAndColor.size()); + for (const auto &entry : colorRotationLookupByFrameAndColor) { + colorRotationEntries.push_back({entry.first, entry.second}); + } + std::sort(colorRotationEntries.begin(), colorRotationEntries.end(), + [](const ColorRotationLookupEntry &a, + const ColorRotationLookupEntry &b) { + return a.key < b.key; + }); + + ar(frameIsScene, sceneSignatureEntries, spriteoriginal_opaque, spritemask_extra_opaque, spritedescriptionso_opaque, dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, dynaspritemasks_extra_active, frameHasDynamic, frameHasDynamicExtra, - sceneFrameIdByTriplet, colorRotationLookupByFrameAndColor, + sceneTripletEntries, colorRotationEntries, spriteCandidateOffsets, spriteCandidateIds, spriteCandidateSlots, frameHasShapeSprite, spriteWidth, spriteHeight, spriteUsesShape, spriteDetectOffsets, spriteDetectMeta, spriteOpaqueRowSegmentStart, @@ -223,15 +291,36 @@ class SerumData { } } else { if (concentrateFileVersion >= 6) { - ar(frameIsScene, sceneFramesBySignature, spriteoriginal_opaque, + std::vector sceneSignatureEntries; + std::vector sceneTripletEntries; + std::vector colorRotationEntries; + ar(frameIsScene, sceneSignatureEntries, spriteoriginal_opaque, spritemask_extra_opaque, spritedescriptionso_opaque, dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, dynaspritemasks_extra_active, frameHasDynamic, frameHasDynamicExtra, - sceneFrameIdByTriplet, colorRotationLookupByFrameAndColor, + sceneTripletEntries, colorRotationEntries, spriteCandidateOffsets, spriteCandidateIds, spriteCandidateSlots, frameHasShapeSprite, spriteWidth, spriteHeight, spriteUsesShape, spriteDetectOffsets, spriteDetectMeta, spriteOpaqueRowSegmentStart, spriteOpaqueRowSegmentCount, spriteOpaqueSegments); + + sceneFramesBySignature.clear(); + sceneFramesBySignature.reserve(sceneSignatureEntries.size()); + for (const auto &entry : sceneSignatureEntries) { + sceneFramesBySignature[entry.key] = entry.frameIds; + } + + sceneFrameIdByTriplet.clear(); + sceneFrameIdByTriplet.reserve(sceneTripletEntries.size()); + for (const auto &entry : sceneTripletEntries) { + sceneFrameIdByTriplet[entry.key] = entry.frameId; + } + + colorRotationLookupByFrameAndColor.clear(); + colorRotationLookupByFrameAndColor.reserve(colorRotationEntries.size()); + for (const auto &entry : colorRotationEntries) { + colorRotationLookupByFrameAndColor[entry.key] = entry.value; + } } else { frameIsScene.clear(); sceneFramesBySignature.clear(); diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index e09448a..197bbf7 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -481,6 +481,8 @@ static void DebugLogFrameMetadataIfRequested(uint32_t frameId) { frameId, i, spriteId, spriteBB[i * 4], spriteBB[i * 4 + 1], spriteBB[i * 4 + 2], spriteBB[i * 4 + 3], usesShape); } + + g_serumData.DebugLogPackingSidecarsStorageSizes(); } static uint64_t DebugHashBytesFNV1a64(const void* data, size_t size) { @@ -1867,21 +1869,22 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, if (result && g_serumData.sceneGenerator->isActive()) g_serumData.sceneGenerator->setDepth(result->nocolors == 16 ? 4 : 2); if (result) { + const bool rebuildDerivedLookups = !loadedFromConcentrate || + g_serumData.concentrateFileVersion < 6 || + sceneDataUpdatedFromCsv; if (loadedFromConcentrate && g_serumData.concentrateFileVersion < 6) { g_serumData.BuildPackingSidecarsAndNormalize(); NoteStartupRssSample("after-packing-sidecar-normalize"); } - if (!loadedFromConcentrate || g_serumData.concentrateFileVersion < 6 || - sceneDataUpdatedFromCsv) { + if (rebuildDerivedLookups) { BuildFrameLookupVectors(); NoteStartupRssSample("after-frame-lookup-build"); } else { InitFrameLookupRuntimeStateFromStoredData(); NoteStartupRssSample("after-frame-lookup-restore"); } - if (g_serumData.colorRotationLookupByFrameAndColor.empty() && - (!loadedFromConcentrate || g_serumData.concentrateFileVersion < 6 || - sceneDataUpdatedFromCsv)) { + if (rebuildDerivedLookups || + g_serumData.colorRotationLookupByFrameAndColor.empty()) { g_serumData.BuildColorRotationLookup(); NoteStartupRssSample("after-color-rotation-build"); } From 8ae1d060620227790ad04d8731b1f17fdcfe49fb Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Thu, 19 Mar 2026 21:23:17 +0100 Subject: [PATCH 32/42] scene test --- AGENTS.md | 6 ++-- src/serum-decode.cpp | 75 ++++++++++++++++---------------------------- 2 files changed, 30 insertions(+), 51 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 3ceb9a7..8494658 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -174,9 +174,9 @@ Behavior: - Scene requests use signature lookup in `sceneFramesBySignature` for the current `(mask,shape,hash)`. - Scene rendering can bypass generic scene identification when a direct triplet entry exists in `sceneFrameIdByTriplet`. -- During scene playback, direct-triplet mode uses lightweight group progression - (`SceneGenerator::updateAndGetCurrentGroup`) and does not call - `generateFrame(...)` per tick unless fallback is needed. +- Direct scene-triplet preprocessing remains persisted in `sceneFrameIdByTriplet`, + but runtime playback currently uses the legacy `generateFrame(...)` path for + each scene tick. - Legacy same-frame behavior (`IDENTIFY_SAME_FRAME`) is preserved with full-frame CRC check. Return values: diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 197bbf7..dfe02fd 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -4315,66 +4315,45 @@ uint32_t Serum_RenderScene(void) { return FLAG_RETURNED_V2_SCENE; } - bool renderedFromDirectTriplet = false; uint8_t currentGroup = 1; bool hasGroup = g_serumData.sceneGenerator->updateAndGetCurrentGroup( static_cast(lastTriggerID), sceneCurrentFrame, -1, currentGroup); - if (hasGroup && !g_serumData.sceneFrameIdByTriplet.empty()) { - auto it = g_serumData.sceneFrameIdByTriplet.find( - MakeSceneTripletKey(static_cast(lastTriggerID), - currentGroup, sceneCurrentFrame)); - if (it != g_serumData.sceneFrameIdByTriplet.end() && - it->second < g_serumData.nframes) { - memset(sceneFrame, 0, sizeof(sceneFrame)); - mySerum.rotationtimer = sceneDurationPerFrame; - Serum_ColorizeWithMetadatav2Internal(sceneFrame, true, it->second); - renderedFromDirectTriplet = true; - } - } if (DebugSceneVerboseEnabled()) { Log("Serum debug scene path: sceneId=%u frameIndex=%u group=%u " "usedTriplet=%s tripletCount=%u", static_cast(lastTriggerID), sceneCurrentFrame, currentGroup, - renderedFromDirectTriplet ? "true" : "false", + "false", static_cast(g_serumData.sceneFrameIdByTriplet.size())); } - if (!renderedFromDirectTriplet) { - uint16_t result = g_serumData.sceneGenerator->generateFrame( - lastTriggerID, sceneCurrentFrame, sceneFrame, - hasGroup ? currentGroup : -1); - DebugLogSceneEvent("generate", static_cast(lastTriggerID), - sceneCurrentFrame, sceneFrameCount, - sceneDurationPerFrame, sceneOptionFlags, - sceneInterruptable, sceneStartImmediately, - sceneRepeatCount, currentGroup, result); - if (result > 0 && result < 0xffff) { - // frame not ready yet, return the time to wait - mySerum.rotationtimer = result; - return mySerum.rotationtimer | FLAG_RETURNED_V2_SCENE; - } - if (result != 0xffff) { - DebugLogSceneEvent( - "generate-error", static_cast(lastTriggerID), - sceneCurrentFrame, sceneFrameCount, sceneDurationPerFrame, - sceneOptionFlags, sceneInterruptable, sceneStartImmediately, - sceneRepeatCount, currentGroup, result); - sceneFrameCount = 0; // error generating scene frame, stop the scene - mySerum.rotationtimer = 0; - ForceNormalFrameRefreshAfterSceneEnd(); - return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_ROTATED32 | - FLAG_RETURNED_V2_ROTATED64 | FLAG_RETURNED_V2_SCENE; - } - mySerum.rotationtimer = sceneDurationPerFrame; - Serum_ColorizeWithMetadatav2(sceneFrame, true); - } else { - DebugLogSceneEvent("triplet-render", static_cast(lastTriggerID), - sceneCurrentFrame, sceneFrameCount, - sceneDurationPerFrame, sceneOptionFlags, - sceneInterruptable, sceneStartImmediately, - sceneRepeatCount, currentGroup, 0xffff); + uint16_t result = g_serumData.sceneGenerator->generateFrame( + lastTriggerID, sceneCurrentFrame, sceneFrame, + hasGroup ? currentGroup : -1); + DebugLogSceneEvent("generate", static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, + sceneDurationPerFrame, sceneOptionFlags, + sceneInterruptable, sceneStartImmediately, + sceneRepeatCount, currentGroup, result); + if (result > 0 && result < 0xffff) { + // frame not ready yet, return the time to wait + mySerum.rotationtimer = result; + return mySerum.rotationtimer | FLAG_RETURNED_V2_SCENE; + } + if (result != 0xffff) { + DebugLogSceneEvent( + "generate-error", static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, sceneDurationPerFrame, + sceneOptionFlags, sceneInterruptable, sceneStartImmediately, + sceneRepeatCount, currentGroup, result); + sceneFrameCount = 0; // error generating scene frame, stop the scene + mySerum.rotationtimer = 0; + ForceNormalFrameRefreshAfterSceneEnd(); + return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_ROTATED32 | + FLAG_RETURNED_V2_ROTATED64 | FLAG_RETURNED_V2_SCENE; } + mySerum.rotationtimer = sceneDurationPerFrame; + Serum_ColorizeWithMetadatav2(sceneFrame, true); sceneCurrentFrame++; if (sceneCurrentFrame >= sceneFrameCount && sceneRepeatCount > 0) { From 51427476945e4eaf3fe3e68574fe168699566d90 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Thu, 19 Mar 2026 21:50:55 +0100 Subject: [PATCH 33/42] next test --- AGENTS.md | 7 ++-- src/serum-decode.cpp | 94 +++++++++++++++++++++++++++++++------------- 2 files changed, 71 insertions(+), 30 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 8494658..458e99d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -174,9 +174,10 @@ Behavior: - Scene requests use signature lookup in `sceneFramesBySignature` for the current `(mask,shape,hash)`. - Scene rendering can bypass generic scene identification when a direct triplet entry exists in `sceneFrameIdByTriplet`. -- Direct scene-triplet preprocessing remains persisted in `sceneFrameIdByTriplet`, - but runtime playback currently uses the legacy `generateFrame(...)` path for - each scene tick. +- During scene playback, direct-triplet mode still generates the scene marker + frame via `generateFrame(..., disableTimer=true)` so the runtime sees the same + scene marker buffer as the legacy path, but it bypasses `Identify_Frame()` + by supplying the precomputed `sceneFrameIdByTriplet` frame ID directly. - Legacy same-frame behavior (`IDENTIFY_SAME_FRAME`) is preserved with full-frame CRC check. Return values: diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index dfe02fd..d525fce 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -4315,45 +4315,85 @@ uint32_t Serum_RenderScene(void) { return FLAG_RETURNED_V2_SCENE; } + bool renderedFromDirectTriplet = false; uint8_t currentGroup = 1; bool hasGroup = g_serumData.sceneGenerator->updateAndGetCurrentGroup( static_cast(lastTriggerID), sceneCurrentFrame, -1, currentGroup); + if (hasGroup && !g_serumData.sceneFrameIdByTriplet.empty()) { + auto it = g_serumData.sceneFrameIdByTriplet.find( + MakeSceneTripletKey(static_cast(lastTriggerID), + currentGroup, sceneCurrentFrame)); + if (it != g_serumData.sceneFrameIdByTriplet.end() && + it->second < g_serumData.nframes) { + uint16_t result = g_serumData.sceneGenerator->generateFrame( + lastTriggerID, sceneCurrentFrame, sceneFrame, currentGroup, true); + DebugLogSceneEvent("generate", static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, + sceneDurationPerFrame, sceneOptionFlags, + sceneInterruptable, sceneStartImmediately, + sceneRepeatCount, currentGroup, result); + if (result == 0xffff) { + mySerum.rotationtimer = sceneDurationPerFrame; + Serum_ColorizeWithMetadatav2Internal(sceneFrame, true, it->second); + renderedFromDirectTriplet = true; + } else { + DebugLogSceneEvent( + "generate-error", static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, sceneDurationPerFrame, + sceneOptionFlags, sceneInterruptable, sceneStartImmediately, + sceneRepeatCount, currentGroup, result); + sceneFrameCount = 0; // error generating scene frame, stop the scene + mySerum.rotationtimer = 0; + ForceNormalFrameRefreshAfterSceneEnd(); + return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_ROTATED32 | + FLAG_RETURNED_V2_ROTATED64 | FLAG_RETURNED_V2_SCENE; + } + } + } if (DebugSceneVerboseEnabled()) { Log("Serum debug scene path: sceneId=%u frameIndex=%u group=%u " "usedTriplet=%s tripletCount=%u", static_cast(lastTriggerID), sceneCurrentFrame, currentGroup, - "false", + renderedFromDirectTriplet ? "true" : "false", static_cast(g_serumData.sceneFrameIdByTriplet.size())); } - uint16_t result = g_serumData.sceneGenerator->generateFrame( - lastTriggerID, sceneCurrentFrame, sceneFrame, - hasGroup ? currentGroup : -1); - DebugLogSceneEvent("generate", static_cast(lastTriggerID), - sceneCurrentFrame, sceneFrameCount, - sceneDurationPerFrame, sceneOptionFlags, - sceneInterruptable, sceneStartImmediately, - sceneRepeatCount, currentGroup, result); - if (result > 0 && result < 0xffff) { - // frame not ready yet, return the time to wait - mySerum.rotationtimer = result; - return mySerum.rotationtimer | FLAG_RETURNED_V2_SCENE; - } - if (result != 0xffff) { - DebugLogSceneEvent( - "generate-error", static_cast(lastTriggerID), - sceneCurrentFrame, sceneFrameCount, sceneDurationPerFrame, - sceneOptionFlags, sceneInterruptable, sceneStartImmediately, - sceneRepeatCount, currentGroup, result); - sceneFrameCount = 0; // error generating scene frame, stop the scene - mySerum.rotationtimer = 0; - ForceNormalFrameRefreshAfterSceneEnd(); - return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_ROTATED32 | - FLAG_RETURNED_V2_ROTATED64 | FLAG_RETURNED_V2_SCENE; + if (!renderedFromDirectTriplet) { + uint16_t result = g_serumData.sceneGenerator->generateFrame( + lastTriggerID, sceneCurrentFrame, sceneFrame, + hasGroup ? currentGroup : -1); + DebugLogSceneEvent("generate", static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, + sceneDurationPerFrame, sceneOptionFlags, + sceneInterruptable, sceneStartImmediately, + sceneRepeatCount, currentGroup, result); + if (result > 0 && result < 0xffff) { + // frame not ready yet, return the time to wait + mySerum.rotationtimer = result; + return mySerum.rotationtimer | FLAG_RETURNED_V2_SCENE; + } + if (result != 0xffff) { + DebugLogSceneEvent( + "generate-error", static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, sceneDurationPerFrame, + sceneOptionFlags, sceneInterruptable, sceneStartImmediately, + sceneRepeatCount, currentGroup, result); + sceneFrameCount = 0; // error generating scene frame, stop the scene + mySerum.rotationtimer = 0; + ForceNormalFrameRefreshAfterSceneEnd(); + return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_ROTATED32 | + FLAG_RETURNED_V2_ROTATED64 | FLAG_RETURNED_V2_SCENE; + } + mySerum.rotationtimer = sceneDurationPerFrame; + Serum_ColorizeWithMetadatav2(sceneFrame, true); + } else { + DebugLogSceneEvent("triplet-render", static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, + sceneDurationPerFrame, sceneOptionFlags, + sceneInterruptable, sceneStartImmediately, + sceneRepeatCount, currentGroup, 0xffff); } - mySerum.rotationtimer = sceneDurationPerFrame; - Serum_ColorizeWithMetadatav2(sceneFrame, true); sceneCurrentFrame++; if (sceneCurrentFrame >= sceneFrameCount && sceneRepeatCount > 0) { From 1f1db6cf04beeeb2f75230e8de92ec1c2db873a1 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Thu, 19 Mar 2026 22:05:34 +0100 Subject: [PATCH 34/42] next try --- AGENTS.md | 9 +++++---- src/serum-decode.cpp | 16 +++++++++------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 458e99d..214c6f1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -174,10 +174,11 @@ Behavior: - Scene requests use signature lookup in `sceneFramesBySignature` for the current `(mask,shape,hash)`. - Scene rendering can bypass generic scene identification when a direct triplet entry exists in `sceneFrameIdByTriplet`. -- During scene playback, direct-triplet mode still generates the scene marker - frame via `generateFrame(..., disableTimer=true)` so the runtime sees the same - scene marker buffer as the legacy path, but it bypasses `Identify_Frame()` - by supplying the precomputed `sceneFrameIdByTriplet` frame ID directly. +- During scene playback, direct-triplet mode still uses + `SceneGenerator::generateFrame(...)` for normal timing/group progression and + to produce the current scene marker buffer, but it bypasses + `Identify_Frame()` by supplying the precomputed `sceneFrameIdByTriplet` + frame ID directly to the internal colorizer. - Legacy same-frame behavior (`IDENTIFY_SAME_FRAME`) is preserved with full-frame CRC check. Return values: diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index d525fce..ea32c42 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -4327,17 +4327,17 @@ uint32_t Serum_RenderScene(void) { if (it != g_serumData.sceneFrameIdByTriplet.end() && it->second < g_serumData.nframes) { uint16_t result = g_serumData.sceneGenerator->generateFrame( - lastTriggerID, sceneCurrentFrame, sceneFrame, currentGroup, true); + lastTriggerID, sceneCurrentFrame, sceneFrame, currentGroup); DebugLogSceneEvent("generate", static_cast(lastTriggerID), sceneCurrentFrame, sceneFrameCount, sceneDurationPerFrame, sceneOptionFlags, sceneInterruptable, sceneStartImmediately, sceneRepeatCount, currentGroup, result); - if (result == 0xffff) { - mySerum.rotationtimer = sceneDurationPerFrame; - Serum_ColorizeWithMetadatav2Internal(sceneFrame, true, it->second); - renderedFromDirectTriplet = true; - } else { + if (result > 0 && result < 0xffff) { + mySerum.rotationtimer = result; + return mySerum.rotationtimer | FLAG_RETURNED_V2_SCENE; + } + if (result != 0xffff) { DebugLogSceneEvent( "generate-error", static_cast(lastTriggerID), sceneCurrentFrame, sceneFrameCount, sceneDurationPerFrame, @@ -4349,6 +4349,9 @@ uint32_t Serum_RenderScene(void) { return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_ROTATED32 | FLAG_RETURNED_V2_ROTATED64 | FLAG_RETURNED_V2_SCENE; } + mySerum.rotationtimer = sceneDurationPerFrame; + Serum_ColorizeWithMetadatav2Internal(sceneFrame, true, it->second); + renderedFromDirectTriplet = true; } } if (DebugSceneVerboseEnabled()) { @@ -4358,7 +4361,6 @@ uint32_t Serum_RenderScene(void) { renderedFromDirectTriplet ? "true" : "false", static_cast(g_serumData.sceneFrameIdByTriplet.size())); } - if (!renderedFromDirectTriplet) { uint16_t result = g_serumData.sceneGenerator->generateFrame( lastTriggerID, sceneCurrentFrame, sceneFrame, From 16cbcee43bed75bd11b58447e884132dc50ba39f Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Thu, 19 Mar 2026 22:23:52 +0100 Subject: [PATCH 35/42] fixed scene rotation timing --- AGENTS.md | 14 +++++++++----- src/serum-decode.cpp | 40 +++++++++++++++++++--------------------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 214c6f1..0ed9386 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -174,11 +174,12 @@ Behavior: - Scene requests use signature lookup in `sceneFramesBySignature` for the current `(mask,shape,hash)`. - Scene rendering can bypass generic scene identification when a direct triplet entry exists in `sceneFrameIdByTriplet`. -- During scene playback, direct-triplet mode still uses - `SceneGenerator::generateFrame(...)` for normal timing/group progression and - to produce the current scene marker buffer, but it bypasses - `Identify_Frame()` by supplying the precomputed `sceneFrameIdByTriplet` - frame ID directly to the internal colorizer. +- During scene playback, direct-triplet mode uses libserum-owned timing + (`sceneDurationPerFrame` plus a runtime next-frame timestamp) together with + `SceneGenerator::updateAndGetCurrentGroup(...)`, and bypasses both + `SceneGenerator::generateFrame(...)` and `Identify_Frame()` by supplying the + precomputed `sceneFrameIdByTriplet` frame ID directly to the internal + colorizer. - Legacy same-frame behavior (`IDENTIFY_SAME_FRAME`) is preserved with full-frame CRC check. Return values: @@ -208,6 +209,9 @@ How it works: - generate each `(sceneId,group,frameIndex)` scene marker frame - identify it once - persist mapping in `sceneFrameIdByTriplet`. + - Runtime playback then combines this triplet lookup with trigger-provided + `sceneDurationPerFrame`; it does not regenerate marker frames on each + scene tick. 6. Initialize `lastfound_scene` / `lastfound_normal` from first available IDs. Log line: diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index ea32c42..0fd4edd 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -136,6 +136,7 @@ uint8_t sceneRepeatCount = 0; uint8_t sceneOptionFlags = 0; uint32_t sceneEndHoldUntilMs = 0; uint32_t sceneEndHoldDurationMs = 0; +uint32_t sceneNextFrameAtMs = 0; uint8_t sceneFrame[256 * 64] = {0}; uint8_t lastFrame[256 * 64] = {0}; uint32_t lastFrameId = 0; // last frame ID identified @@ -871,6 +872,7 @@ void Serum_free(void) { first_match_scene = true; sceneEndHoldUntilMs = 0; sceneEndHoldDurationMs = 0; + sceneNextFrameAtMs = 0; monochromeMode = false; monochromePaletteMode = false; monochromePaletteV2Length = 0; @@ -3826,6 +3828,7 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, sceneIsLastBackgroundFrame = false; sceneEndHoldUntilMs = 0; sceneEndHoldDurationMs = 0; + sceneNextFrameAtMs = 0; mySerum.rotationtimer = 0; // lastfound is set by Identify_Frame, check if we have a new PUP @@ -4273,6 +4276,7 @@ uint32_t Serum_RenderScene(void) { // End hold elapsed: finish scene now. sceneEndHoldUntilMs = 0; + sceneNextFrameAtMs = 0; DebugLogSceneEvent( "end-hold-finished", static_cast(lastTriggerID), sceneCurrentFrame, sceneFrameCount, sceneDurationPerFrame, @@ -4326,30 +4330,20 @@ uint32_t Serum_RenderScene(void) { currentGroup, sceneCurrentFrame)); if (it != g_serumData.sceneFrameIdByTriplet.end() && it->second < g_serumData.nframes) { - uint16_t result = g_serumData.sceneGenerator->generateFrame( - lastTriggerID, sceneCurrentFrame, sceneFrame, currentGroup); - DebugLogSceneEvent("generate", static_cast(lastTriggerID), - sceneCurrentFrame, sceneFrameCount, - sceneDurationPerFrame, sceneOptionFlags, - sceneInterruptable, sceneStartImmediately, - sceneRepeatCount, currentGroup, result); - if (result > 0 && result < 0xffff) { - mySerum.rotationtimer = result; + if (sceneNextFrameAtMs > now) { + const uint16_t waitMs = + static_cast(sceneNextFrameAtMs - now); + DebugLogSceneEvent("triplet-wait", + static_cast(lastTriggerID), + sceneCurrentFrame, sceneFrameCount, + sceneDurationPerFrame, sceneOptionFlags, + sceneInterruptable, sceneStartImmediately, + sceneRepeatCount, currentGroup, waitMs); + mySerum.rotationtimer = waitMs; return mySerum.rotationtimer | FLAG_RETURNED_V2_SCENE; } - if (result != 0xffff) { - DebugLogSceneEvent( - "generate-error", static_cast(lastTriggerID), - sceneCurrentFrame, sceneFrameCount, sceneDurationPerFrame, - sceneOptionFlags, sceneInterruptable, sceneStartImmediately, - sceneRepeatCount, currentGroup, result); - sceneFrameCount = 0; // error generating scene frame, stop the scene - mySerum.rotationtimer = 0; - ForceNormalFrameRefreshAfterSceneEnd(); - return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_ROTATED32 | - FLAG_RETURNED_V2_ROTATED64 | FLAG_RETURNED_V2_SCENE; - } mySerum.rotationtimer = sceneDurationPerFrame; + sceneNextFrameAtMs = now + sceneDurationPerFrame; Serum_ColorizeWithMetadatav2Internal(sceneFrame, true, it->second); renderedFromDirectTriplet = true; } @@ -4383,11 +4377,13 @@ uint32_t Serum_RenderScene(void) { sceneRepeatCount, currentGroup, result); sceneFrameCount = 0; // error generating scene frame, stop the scene mySerum.rotationtimer = 0; + sceneNextFrameAtMs = 0; ForceNormalFrameRefreshAfterSceneEnd(); return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_ROTATED32 | FLAG_RETURNED_V2_ROTATED64 | FLAG_RETURNED_V2_SCENE; } mySerum.rotationtimer = sceneDurationPerFrame; + sceneNextFrameAtMs = now + sceneDurationPerFrame; Serum_ColorizeWithMetadatav2(sceneFrame, true); } else { DebugLogSceneEvent("triplet-render", static_cast(lastTriggerID), @@ -4423,6 +4419,7 @@ uint32_t Serum_RenderScene(void) { sceneFrameCount = 0; // scene ended mySerum.rotationtimer = 0; + sceneNextFrameAtMs = 0; ForceNormalFrameRefreshAfterSceneEnd(); switch (sceneOptionFlags) { @@ -4686,6 +4683,7 @@ SERUM_API uint32_t Serum_Scene_Trigger(uint16_t sceneId) { ConfigureSceneEndHold(sceneId, sceneInterruptable, sceneOptionFlags); sceneIsLastBackgroundFrame = false; sceneCurrentFrame = 0; + sceneNextFrameAtMs = 0; if ((sceneOptionFlags & FLAG_SCENE_RESUME_IF_RETRIGGERED) == FLAG_SCENE_RESUME_IF_RETRIGGERED) { From 96e2d342bea64c41580c412b735c346bdc6f7e68 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Fri, 20 Mar 2026 12:12:16 +0100 Subject: [PATCH 36/42] critical path --- AGENTS.md | 29 +++++++++++++- src/SerumData.cpp | 37 ++++++++++++++++++ src/SerumData.h | 40 ++++++++++++++++++- src/serum-decode.cpp | 93 +++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 195 insertions(+), 4 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 0ed9386..85f01c5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -87,6 +87,10 @@ Vector policy currently used in `SerumData`: rather than direct `unordered_map` archive order, so `cROMc` output stays platform-independent and deterministic for identical source data. - `ColorInRotation` uses lookup-only runtime path (no linear scan fallback). +- Critical monochrome trigger frames use a precomputed lookup: + `criticalTriggerFramesBySignature[(mask,shape,hash)] -> frameId(s)` + restored from v6 cROMc when present and rebuilt during frame-lookup + preprocessing otherwise. - Sprite runtime sidecars are precomputed and used by `Check_Spritesv2`: - frame candidate list with sprite slot indices (`spriteCandidateOffsets`, `spriteCandidateIds`, `spriteCandidateSlots`) @@ -212,7 +216,13 @@ How it works: - Runtime playback then combines this triplet lookup with trigger-provided `sceneDurationPerFrame`; it does not regenerate marker frames on each scene tick. -6. Initialize `lastfound_scene` / `lastfound_normal` from first available IDs. +6. During the same preprocessing pass, build critical monochrome-trigger + signatures for non-scene frames: + - include only frames with trigger IDs: + - `MONOCHROME_TRIGGER_ID` + - `MONOCHROME_PALETTE_TRIGGER_ID` + - persist mapping in `criticalTriggerFramesBySignature`. +7. Initialize `lastfound_scene` / `lastfound_normal` from first available IDs. Log line: - `Loaded X frames and Y rotation scene frames` @@ -252,6 +262,21 @@ Background placeholder policy: - When true, frame-level background images are treated as placeholders and existing output pixel is kept in masked background areas. - This is used when a background scene is active so the scene background can continue while foreground content changes. +Critical-trigger fast rejection: +- While a non-interruptable scene (or its end-hold) is active, normal incoming + frames are not always sent through full `Identify_Frame(...)`. +- `Serum_ColorizeWithMetadatav2Internal(...)` first checks a tiny precomputed + subset containing only non-scene frames with trigger IDs: + - `MONOCHROME_TRIGGER_ID` + - `MONOCHROME_PALETTE_TRIGGER_ID` +- If no such critical trigger frame matches, the incoming frame is rejected + immediately without full identification. +- This preserves important monochrome/service-menu transitions while avoiding + most irrelevant input-frame work during non-interruptable scenes. +- If such a critical monochrome trigger frame does match, it is allowed to + preempt the non-interruptable scene immediately; libserum stops the current + scene/end-hold and processes the monochrome-trigger frame normally. + ## Scene playback and options Scene data comes from CSV (`SceneGenerator`). @@ -276,6 +301,8 @@ Stored in v6: - `frameIsScene` - `sceneFramesBySignature` - `sceneFrameIdByTriplet` +- Critical monochrome-trigger lookup: + - `criticalTriggerFramesBySignature` - Color-rotation lookup acceleration: - `colorRotationLookupByFrameAndColor` - Derived lookup tables are serialized in canonical sorted-entry form instead of diff --git a/src/SerumData.cpp b/src/SerumData.cpp index 569e7fe..df1edbd 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -8,6 +8,16 @@ bool is_real_machine(); +namespace { +constexpr uint32_t kMonochromeTriggerId = 65432u; +constexpr uint32_t kMonochromePaletteTriggerId = 65431u; + +uint64_t MakeCriticalTriggerSignature(uint8_t mask, uint8_t shape, + uint32_t hash) { + return (uint64_t(mask) << 40) | (uint64_t(shape) << 32) | hash; +} +} // namespace + static uint32_t GetDebugSpriteIdFromEnv() { const char *value = std::getenv("SERUM_DEBUG_SPRITE_ID"); if (!value || value[0] == '\0') { @@ -177,6 +187,33 @@ void SerumData::Clear() { sceneFramesBySignature.clear(); sceneFrameIdByTriplet.clear(); colorRotationLookupByFrameAndColor.clear(); + criticalTriggerFramesBySignature.clear(); +} + +void SerumData::BuildCriticalTriggerLookup() { + criticalTriggerFramesBySignature.clear(); + if (nframes == 0) { + return; + } + + for (uint32_t frameId = 0; frameId < nframes; ++frameId) { + if (frameId < frameIsScene.size() && frameIsScene[frameId] != 0) { + continue; + } + + const uint32_t triggerId = triggerIDs[frameId][0]; + if (triggerId != kMonochromeTriggerId && + triggerId != kMonochromePaletteTriggerId) { + continue; + } + + const uint8_t mask = compmaskID[frameId][0]; + const uint8_t shape = shapecompmode[frameId][0]; + const uint32_t hash = hashcodes[frameId][0]; + criticalTriggerFramesBySignature[MakeCriticalTriggerSignature(mask, shape, + hash)] + .push_back(frameId); + } } void SerumData::DebugLogSpriteDynamicSidecarState(const char *stage, diff --git a/src/SerumData.h b/src/SerumData.h index b4be976..e2f3d42 100644 --- a/src/SerumData.h +++ b/src/SerumData.h @@ -90,6 +90,16 @@ class SerumData { } }; + struct CriticalTriggerLookupEntry { + uint64_t key = 0; + std::vector frameIds; + + template + void serialize(Archive &ar) { + ar(key, frameIds); + } + }; + SerumData(); ~SerumData(); @@ -108,6 +118,7 @@ class SerumData { bool LoadFromBuffer(const uint8_t *data, size_t size, const uint8_t flags); void BuildPackingSidecarsAndNormalize(); void BuildSpriteRuntimeSidecars(); + void BuildCriticalTriggerLookup(); void DebugLogSpriteDynamicSidecarState(const char *stage, uint32_t spriteId); void DebugLogPackingSidecarsStorageSizes(); bool HasSpriteRuntimeSidecars() const; @@ -206,6 +217,8 @@ class SerumData { std::unordered_map> sceneFramesBySignature; std::unordered_map sceneFrameIdByTriplet; std::unordered_map colorRotationLookupByFrameAndColor; + std::unordered_map> + criticalTriggerFramesBySignature; SceneGenerator *sceneGenerator; @@ -279,11 +292,26 @@ class SerumData { return a.key < b.key; }); + std::vector criticalTriggerEntries; + criticalTriggerEntries.reserve(criticalTriggerFramesBySignature.size()); + for (const auto &entry : criticalTriggerFramesBySignature) { + CriticalTriggerLookupEntry serialized; + serialized.key = entry.first; + serialized.frameIds = entry.second; + std::sort(serialized.frameIds.begin(), serialized.frameIds.end()); + criticalTriggerEntries.push_back(std::move(serialized)); + } + std::sort(criticalTriggerEntries.begin(), criticalTriggerEntries.end(), + [](const CriticalTriggerLookupEntry &a, + const CriticalTriggerLookupEntry &b) { + return a.key < b.key; + }); + ar(frameIsScene, sceneSignatureEntries, spriteoriginal_opaque, spritemask_extra_opaque, spritedescriptionso_opaque, dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, dynaspritemasks_extra_active, frameHasDynamic, frameHasDynamicExtra, - sceneTripletEntries, colorRotationEntries, + sceneTripletEntries, colorRotationEntries, criticalTriggerEntries, spriteCandidateOffsets, spriteCandidateIds, spriteCandidateSlots, frameHasShapeSprite, spriteWidth, spriteHeight, spriteUsesShape, spriteDetectOffsets, spriteDetectMeta, spriteOpaqueRowSegmentStart, @@ -294,11 +322,12 @@ class SerumData { std::vector sceneSignatureEntries; std::vector sceneTripletEntries; std::vector colorRotationEntries; + std::vector criticalTriggerEntries; ar(frameIsScene, sceneSignatureEntries, spriteoriginal_opaque, spritemask_extra_opaque, spritedescriptionso_opaque, dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, dynaspritemasks_extra_active, frameHasDynamic, frameHasDynamicExtra, - sceneTripletEntries, colorRotationEntries, + sceneTripletEntries, colorRotationEntries, criticalTriggerEntries, spriteCandidateOffsets, spriteCandidateIds, spriteCandidateSlots, frameHasShapeSprite, spriteWidth, spriteHeight, spriteUsesShape, spriteDetectOffsets, spriteDetectMeta, spriteOpaqueRowSegmentStart, @@ -321,11 +350,18 @@ class SerumData { for (const auto &entry : colorRotationEntries) { colorRotationLookupByFrameAndColor[entry.key] = entry.value; } + + criticalTriggerFramesBySignature.clear(); + criticalTriggerFramesBySignature.reserve(criticalTriggerEntries.size()); + for (const auto &entry : criticalTriggerEntries) { + criticalTriggerFramesBySignature[entry.key] = entry.frameIds; + } } else { frameIsScene.clear(); sceneFramesBySignature.clear(); sceneFrameIdByTriplet.clear(); colorRotationLookupByFrameAndColor.clear(); + criticalTriggerFramesBySignature.clear(); spriteoriginal_opaque.clear(); spritemask_extra_opaque.clear(); spritedescriptionso_opaque.clear(); diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 0fd4edd..63f9c99 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -124,6 +124,7 @@ static bool g_debugBypassSceneGate = false; static bool g_debugVerboseIdentify = false; static bool g_debugVerboseSprites = false; static bool g_debugVerboseScenes = false; +static std::vector> g_criticalTriggerMaskShapes; static SerumData g_serumData; uint16_t sceneFrameCount = 0; @@ -177,6 +178,60 @@ const uint16_t greyscale_16[16] = { 0xE71C, // 14/15 0xFFFF // White (31, 63, 31) }; + +extern bool cromloaded; +uint32_t calc_crc32(uint8_t* source, uint8_t mask, uint32_t n, uint8_t Shape); +static uint64_t MakeFrameSignature(uint8_t mask, uint8_t shape, uint32_t hash); + +static void InitCriticalTriggerLookupRuntimeState(void) { + g_criticalTriggerMaskShapes.clear(); + if (g_serumData.criticalTriggerFramesBySignature.empty()) { + return; + } + + std::unordered_set uniqueMaskShapeKeys; + uniqueMaskShapeKeys.reserve(g_serumData.criticalTriggerFramesBySignature.size()); + for (const auto& entry : g_serumData.criticalTriggerFramesBySignature) { + const uint8_t mask = static_cast((entry.first >> 40) & 0xffu); + const uint8_t shape = static_cast((entry.first >> 32) & 0xffu); + const uint16_t key = (uint16_t(mask) << 8) | shape; + if (uniqueMaskShapeKeys.insert(key).second) { + g_criticalTriggerMaskShapes.emplace_back(mask, shape); + } + } +} + +static uint32_t IdentifyCriticalTriggerFrame(uint8_t* frame) { + if (!cromloaded || g_criticalTriggerMaskShapes.empty() || + g_serumData.criticalTriggerFramesBySignature.empty()) { + return IDENTIFY_NO_FRAME; + } + + const uint32_t pixels = g_serumData.is256x64 + ? (256 * 64) + : (g_serumData.fwidth * g_serumData.fheight); + for (const auto& maskShape : g_criticalTriggerMaskShapes) { + const uint32_t hash = + calc_crc32(frame, maskShape.first, pixels, maskShape.second); + auto it = g_serumData.criticalTriggerFramesBySignature.find( + MakeFrameSignature(maskShape.first, maskShape.second, hash)); + if (it != g_serumData.criticalTriggerFramesBySignature.end() && + !it->second.empty()) { + return it->second.front(); + } + } + + return IDENTIFY_NO_FRAME; +} + +static bool IsCriticalMonochromeTriggerFrame(uint32_t frameId) { + if (frameId >= g_serumData.nframes) { + return false; + } + const uint32_t triggerId = g_serumData.triggerIDs[frameId][0]; + return triggerId == MONOCHROME_TRIGGER_ID || + triggerId == MONOCHROME_PALETTE_TRIGGER_ID; +} uint16_t monochromePaletteV2[16] = {0}; uint8_t monochromePaletteV2Length = 0; @@ -877,6 +932,7 @@ void Serum_free(void) { monochromePaletteMode = false; monochromePaletteV2Length = 0; g_sceneResumeState.clear(); + g_criticalTriggerMaskShapes.clear(); g_serumData.sceneGenerator->Reset(); } @@ -1895,6 +1951,7 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, g_serumData.BuildSpriteRuntimeSidecars(); NoteStartupRssSample("after-sprite-sidecar-build"); } + InitCriticalTriggerLookupRuntimeState(); NoteStartupRssSample("before-runtime"); LogStartupRssSummary(); } @@ -1974,8 +2031,11 @@ static void BuildFrameLookupVectors(void) { .push_back(frameId); numSceneFrames++; } + } + g_serumData.BuildCriticalTriggerLookup(); + if (g_serumData.concentrateFileVersion >= 6) { // Build direct lookup table: (sceneId, group, frameIndex) -> frameId. // Keep this as a preprocessing step only; runtime scene rendering can @@ -3673,6 +3733,17 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, // Identify frame unless caller already resolved a concrete frame ID. uint32_t frameID = IDENTIFY_NO_FRAME; + const bool fastRejectNonInterruptableScene = + !sceneFrameRequested && knownFrameId >= g_serumData.nframes && + !monochromeMode && g_serumData.sceneGenerator->isActive() && + (sceneCurrentFrame < sceneFrameCount || sceneEndHoldUntilMs > 0) && + !sceneInterruptable; + if (fastRejectNonInterruptableScene) { + frameID = IdentifyCriticalTriggerFrame(frame); + if (frameID == IDENTIFY_NO_FRAME) { + return IDENTIFY_NO_FRAME; + } + } if (knownFrameId < g_serumData.nframes) { frameID = knownFrameId; lastfound = knownFrameId; @@ -3685,6 +3756,11 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, first_match_normal = false; lastframe_full_crc_normal = 0; } + } else if (frameID != IDENTIFY_NO_FRAME) { + lastfound = frameID; + lastfound_normal = frameID; + first_match_normal = false; + lastframe_full_crc_normal = 0; } else { frameID = Identify_Frame(frame, sceneFrameRequested); } @@ -3733,13 +3809,28 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, sceneFrameCount, sceneEndHoldUntilMs, g_debugBypassSceneGate ? "true" : "false"); } - if (!g_debugBypassSceneGate) { + if (!g_debugBypassSceneGate && + !IsCriticalMonochromeTriggerFrame(lastfound)) { if (keepTriggersInternal || mySerum.triggerID >= PUP_TRIGGER_MAX_THRESHOLD) mySerum.triggerID = 0xffffffff; // Scene is active and not interruptable return IDENTIFY_NO_FRAME; } + if (IsCriticalMonochromeTriggerFrame(lastfound)) { + DebugLogSceneEvent( + "stop-critical-monochrome-trigger", + static_cast(lastTriggerID), sceneCurrentFrame, + sceneFrameCount, sceneDurationPerFrame, sceneOptionFlags, + sceneInterruptable, sceneStartImmediately, sceneRepeatCount); + sceneFrameCount = 0; + sceneIsLastBackgroundFrame = false; + sceneEndHoldUntilMs = 0; + sceneEndHoldDurationMs = 0; + sceneNextFrameAtMs = 0; + mySerum.rotationtimer = 0; + ForceNormalFrameRefreshAfterSceneEnd(); + } } // frame identified From 88b9c0930f3cb1ed1b608689ba63dc2d73419693 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Fri, 20 Mar 2026 15:18:36 +0100 Subject: [PATCH 37/42] more profiling --- AGENTS.md | 42 +++- src/SerumData.cpp | 3 + src/SerumData.h | 45 ++++- src/serum-decode.cpp | 470 ++++++++++++++++++++++++++++++++++--------- 4 files changed, 455 insertions(+), 105 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 85f01c5..2e02385 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -176,6 +176,15 @@ Behavior: - scene search skips normal frames using `g_serumData.frameIsScene`. - Scene requests use signature lookup in `sceneFramesBySignature` for the current `(mask,shape,hash)`. +- Normal-frame identification uses a persisted signature lookup in + `normalFramesBySignature` to narrow candidates before applying the existing + wrap-around / same-frame selection rules. +- Normal-frame identification does not include a fallback full-frame scan once + `normalFramesBySignature` is available; missing/incorrect lookup data is a + build/load contract bug rather than a runtime fallback case. +- Runtime normal identification iterates unique `(mask,shape)` buckets in + frame-order relative to `lastfound_normal`, so frame-order semantics are + preserved while each bucket hash is computed only once per input frame. - Scene rendering can bypass generic scene identification when a direct triplet entry exists in `sceneFrameIdByTriplet`. - During scene playback, direct-triplet mode uses libserum-owned timing @@ -209,20 +218,33 @@ How it works: 4. For each loaded frame ID, if `(mask,shape,hashcodes[id])` signature is in scene signature set: - mark `frameIsScene[id] = 1` - add to `sceneFramesBySignature[signature]`. -5. For v6 (`concentrateFileVersion >= 6`), precompute direct scene frame IDs: +5. Build `normalFramesBySignature` for all non-scene frames: + - `(mask,shape,hash) -> matching normal frame IDs` + - precompute flat normal identification buckets: + - `normalIdentifyBuckets` + - `frameToNormalBucket` + - runtime normal identification uses this as a candidate source while still + preserving wrap-around ordering relative to `lastfound_normal`. + - runtime does not fall back to a linear normal-frame scan if this lookup is + missing or inconsistent. + - runtime walks precomputed `(mask,shape)` buckets in frame order and resolves + matching frame IDs from the lookup, avoiding repeated per-frame hash + computation inside the same bucket and avoiding per-frame bucket-dedup + container overhead. +6. For v6 (`concentrateFileVersion >= 6`), precompute direct scene frame IDs: - generate each `(sceneId,group,frameIndex)` scene marker frame - identify it once - persist mapping in `sceneFrameIdByTriplet`. - Runtime playback then combines this triplet lookup with trigger-provided `sceneDurationPerFrame`; it does not regenerate marker frames on each scene tick. -6. During the same preprocessing pass, build critical monochrome-trigger +7. During the same preprocessing pass, build critical monochrome-trigger signatures for non-scene frames: - include only frames with trigger IDs: - `MONOCHROME_TRIGGER_ID` - `MONOCHROME_PALETTE_TRIGGER_ID` - persist mapping in `criticalTriggerFramesBySignature`. -7. Initialize `lastfound_scene` / `lastfound_normal` from first available IDs. +8. Initialize `lastfound_scene` / `lastfound_normal` from first available IDs. Log line: - `Loaded X frames and Y rotation scene frames` @@ -300,6 +322,9 @@ Stored in v6: - Scene lookup acceleration: - `frameIsScene` - `sceneFramesBySignature` + - `normalFramesBySignature` + - `normalIdentifyBuckets` + - `frameToNormalBucket` - `sceneFrameIdByTriplet` - Critical monochrome-trigger lookup: - `criticalTriggerFramesBySignature` @@ -368,9 +393,14 @@ v6 snapshot policy: expensive hash tracing controls. - Optional runtime profiling: - If env `SERUM_PROFILE_DYNAMIC_HOTPATHS` is enabled (`1/true/on/yes`), - periodic average timings for `Colorize_Framev2` and `Colorize_Spritev2` - hot paths are logged, along with total average frame render time and - current process RSS memory usage and process-local peak RSS seen so far. + periodic average timings are logged for the full end-to-end rendered-frame + round trip (`frame`), `Colorize_Framev2`, and `Colorize_Spritev2`, along + with average identification time (`Identify_Frame`) split into + normal/scene calls plus the critical-trigger mini-matcher, and current + process RSS memory usage and process-local peak RSS seen so far. + - If env `SERUM_PROFILE_DYNAMIC_HOTPATHS_WINDOWED=1`, the same counters are + reset after each emitted 240-frame block so each `Perf dynamic avg` line + reflects only the most recent window rather than a cumulative average. - The same profiler also logs a one-time startup summary before normal frame processing begins: `Perf startup peak: start=...MiB current=...MiB peak=...MiB stage=...` diff --git a/src/SerumData.cpp b/src/SerumData.cpp index df1edbd..211a3d5 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -185,6 +185,9 @@ void SerumData::Clear() { spriteOpaqueRowSegmentCount.clear(); spriteOpaqueSegments.clear(); sceneFramesBySignature.clear(); + normalFramesBySignature.clear(); + normalIdentifyBuckets.clear(); + frameToNormalBucket.clear(); sceneFrameIdByTriplet.clear(); colorRotationLookupByFrameAndColor.clear(); criticalTriggerFramesBySignature.clear(); diff --git a/src/SerumData.h b/src/SerumData.h index e2f3d42..af8d66d 100644 --- a/src/SerumData.h +++ b/src/SerumData.h @@ -100,6 +100,17 @@ class SerumData { } }; + struct NormalBucketEntry { + uint8_t mask = 0; + uint8_t shape = 0; + uint16_t reserved = 0; + + template + void serialize(Archive &ar) { + ar(mask, shape, reserved); + } + }; + SerumData(); ~SerumData(); @@ -215,6 +226,9 @@ class SerumData { std::vector spriteOpaqueRowSegmentCount; std::vector spriteOpaqueSegments; std::unordered_map> sceneFramesBySignature; + std::unordered_map> normalFramesBySignature; + std::vector normalIdentifyBuckets; + std::vector frameToNormalBucket; std::unordered_map sceneFrameIdByTriplet; std::unordered_map colorRotationLookupByFrameAndColor; std::unordered_map> @@ -270,6 +284,21 @@ class SerumData { return a.key < b.key; }); + std::vector normalSignatureEntries; + normalSignatureEntries.reserve(normalFramesBySignature.size()); + for (const auto &entry : normalFramesBySignature) { + SceneSignatureLookupEntry serialized; + serialized.key = entry.first; + serialized.frameIds = entry.second; + std::sort(serialized.frameIds.begin(), serialized.frameIds.end()); + normalSignatureEntries.push_back(std::move(serialized)); + } + std::sort(normalSignatureEntries.begin(), normalSignatureEntries.end(), + [](const SceneSignatureLookupEntry &a, + const SceneSignatureLookupEntry &b) { + return a.key < b.key; + }); + std::vector sceneTripletEntries; sceneTripletEntries.reserve(sceneFrameIdByTriplet.size()); for (const auto &entry : sceneFrameIdByTriplet) { @@ -307,7 +336,8 @@ class SerumData { return a.key < b.key; }); - ar(frameIsScene, sceneSignatureEntries, spriteoriginal_opaque, + ar(frameIsScene, sceneSignatureEntries, normalSignatureEntries, + normalIdentifyBuckets, frameToNormalBucket, spriteoriginal_opaque, spritemask_extra_opaque, spritedescriptionso_opaque, dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, dynaspritemasks_extra_active, frameHasDynamic, frameHasDynamicExtra, @@ -320,10 +350,12 @@ class SerumData { } else { if (concentrateFileVersion >= 6) { std::vector sceneSignatureEntries; + std::vector normalSignatureEntries; std::vector sceneTripletEntries; std::vector colorRotationEntries; std::vector criticalTriggerEntries; - ar(frameIsScene, sceneSignatureEntries, spriteoriginal_opaque, + ar(frameIsScene, sceneSignatureEntries, normalSignatureEntries, + normalIdentifyBuckets, frameToNormalBucket, spriteoriginal_opaque, spritemask_extra_opaque, spritedescriptionso_opaque, dynamasks_active, dynamasks_extra_active, dynaspritemasks_active, dynaspritemasks_extra_active, frameHasDynamic, frameHasDynamicExtra, @@ -339,6 +371,12 @@ class SerumData { sceneFramesBySignature[entry.key] = entry.frameIds; } + normalFramesBySignature.clear(); + normalFramesBySignature.reserve(normalSignatureEntries.size()); + for (const auto &entry : normalSignatureEntries) { + normalFramesBySignature[entry.key] = entry.frameIds; + } + sceneFrameIdByTriplet.clear(); sceneFrameIdByTriplet.reserve(sceneTripletEntries.size()); for (const auto &entry : sceneTripletEntries) { @@ -359,6 +397,9 @@ class SerumData { } else { frameIsScene.clear(); sceneFramesBySignature.clear(); + normalFramesBySignature.clear(); + normalIdentifyBuckets.clear(); + frameToNormalBucket.clear(); sceneFrameIdByTriplet.clear(); colorRotationLookupByFrameAndColor.clear(); criticalTriggerFramesBySignature.clear(); diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 63f9c99..75988de 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -105,14 +105,26 @@ static uint32_t GetEnvUintClamped(const char* name, uint32_t maxValue) { } static bool g_profileDynamicHotPaths = false; +static bool g_profileDynamicHotPathsWindowed = false; static bool g_profileSparseVectors = false; +static uint64_t g_profileRoundTripNs = 0; static uint64_t g_profileColorizeFrameV2Ns = 0; static uint64_t g_profileColorizeSpriteV2Ns = 0; static uint64_t g_profileColorizeCalls = 0; +static uint64_t g_profileIdentifyTotalNs = 0; +static uint64_t g_profileIdentifyNormalNs = 0; +static uint64_t g_profileIdentifySceneNs = 0; +static uint64_t g_profileIdentifyCriticalNs = 0; +static uint64_t g_profileIdentifyNormalCalls = 0; +static uint64_t g_profileIdentifySceneCalls = 0; +static uint64_t g_profileIdentifyCriticalCalls = 0; static uint64_t g_profilePeakRssBytes = 0; static uint64_t g_profileStartupStartRssBytes = 0; static uint64_t g_profileStartupPeakRssBytes = 0; static const char* g_profileStartupPeakStage = "startup-begin"; +static uint32_t g_profileFrameOperationDepth = 0; +static bool g_profileFrameOperationFinished = false; +static std::chrono::steady_clock::time_point g_profileFrameOperationStart; static bool g_debugFrameTracingInitialized = false; static uint32_t g_debugTargetInputCrc = 0; static uint32_t g_debugTargetFrameId = 0xffffffffu; @@ -180,8 +192,45 @@ const uint16_t greyscale_16[16] = { }; extern bool cromloaded; +extern uint32_t lastfound; uint32_t calc_crc32(uint8_t* source, uint8_t mask, uint32_t n, uint8_t Shape); +uint32_t crc32_fast(uint8_t* s, uint32_t n); static uint64_t MakeFrameSignature(uint8_t mask, uint8_t shape, uint32_t hash); +static bool DebugTraceMatches(uint32_t inputCrc, uint32_t frameId); +static bool DebugIdentifyVerboseEnabled(); + +static void BeginProfileFrameOperation(void) { + if (!g_profileDynamicHotPaths) { + return; + } + if (g_profileFrameOperationDepth++ == 0) { + g_profileFrameOperationStart = std::chrono::steady_clock::now(); + g_profileFrameOperationFinished = false; + } +} + +static void FinishProfileRenderedFrameOperationMaybe(void) { + if (!g_profileDynamicHotPaths || g_profileFrameOperationDepth == 0 || + g_profileFrameOperationFinished) { + return; + } + g_profileRoundTripNs += + (uint64_t)std::chrono::duration_cast( + std::chrono::steady_clock::now() - g_profileFrameOperationStart) + .count(); + ++g_profileColorizeCalls; + g_profileFrameOperationFinished = true; +} + +static void EndProfileFrameOperation(void) { + if (!g_profileDynamicHotPaths || g_profileFrameOperationDepth == 0) { + return; + } + --g_profileFrameOperationDepth; + if (g_profileFrameOperationDepth == 0) { + g_profileFrameOperationFinished = false; + } +} static void InitCriticalTriggerLookupRuntimeState(void) { g_criticalTriggerMaskShapes.clear(); @@ -202,8 +251,18 @@ static void InitCriticalTriggerLookupRuntimeState(void) { } static uint32_t IdentifyCriticalTriggerFrame(uint8_t* frame) { + const auto profileStart = + g_profileDynamicHotPaths ? std::chrono::steady_clock::now() + : std::chrono::steady_clock::time_point{}; if (!cromloaded || g_criticalTriggerMaskShapes.empty() || g_serumData.criticalTriggerFramesBySignature.empty()) { + if (g_profileDynamicHotPaths) { + g_profileIdentifyCriticalNs += + (uint64_t)std::chrono::duration_cast( + std::chrono::steady_clock::now() - profileStart) + .count(); + ++g_profileIdentifyCriticalCalls; + } return IDENTIFY_NO_FRAME; } @@ -217,13 +276,119 @@ static uint32_t IdentifyCriticalTriggerFrame(uint8_t* frame) { MakeFrameSignature(maskShape.first, maskShape.second, hash)); if (it != g_serumData.criticalTriggerFramesBySignature.end() && !it->second.empty()) { + if (g_profileDynamicHotPaths) { + g_profileIdentifyCriticalNs += + (uint64_t)std::chrono::duration_cast( + std::chrono::steady_clock::now() - profileStart) + .count(); + ++g_profileIdentifyCriticalCalls; + } return it->second.front(); } } + if (g_profileDynamicHotPaths) { + g_profileIdentifyCriticalNs += + (uint64_t)std::chrono::duration_cast( + std::chrono::steady_clock::now() - profileStart) + .count(); + ++g_profileIdentifyCriticalCalls; + } return IDENTIFY_NO_FRAME; } +static uint32_t SelectFrameIdInWrapOrder(const std::vector& frameIds, + uint32_t startFrameId) { + if (frameIds.empty() || g_serumData.nframes == 0) { + return IDENTIFY_NO_FRAME; + } + + uint32_t bestFrameId = frameIds.front(); + uint32_t bestDistance = + (bestFrameId >= startFrameId) + ? (bestFrameId - startFrameId) + : (g_serumData.nframes - startFrameId + bestFrameId); + for (size_t i = 1; i < frameIds.size(); ++i) { + const uint32_t frameId = frameIds[i]; + const uint32_t distance = + (frameId >= startFrameId) ? (frameId - startFrameId) + : (g_serumData.nframes - startFrameId + frameId); + if (distance < bestDistance) { + bestDistance = distance; + bestFrameId = frameId; + } + } + return bestFrameId; +} + +static uint32_t ResolveIdentifiedFrameMatch(uint8_t* frame, uint32_t inputCrc, + uint32_t candidateFrameId, + uint8_t mask, + bool& first_match, + uint32_t& lastfound_stream, + uint32_t& lastframe_full_crc) { + if (candidateFrameId >= g_serumData.nframes) { + return IDENTIFY_NO_FRAME; + } + if (DebugIdentifyVerboseEnabled() && + DebugTraceMatches(inputCrc, candidateFrameId)) { + Log("Serum debug identify candidate: inputCrc=%u frameId=%u " + "mask=%u storedHash=%u lastfound=%u", + inputCrc, candidateFrameId, mask, + g_serumData.hashcodes[candidateFrameId][0], lastfound_stream); + } + if (first_match || candidateFrameId != lastfound_stream || mask < 255) { + if (DebugIdentifyVerboseEnabled() && + DebugTraceMatches(inputCrc, candidateFrameId)) { + Log("Serum debug identify decision: inputCrc=%u frameId=%u " + "reason=%s firstMatch=%s lastfoundStream=%u mask=%u " + "fullCrcBefore=%u", + inputCrc, candidateFrameId, + first_match ? "first-match" + : (candidateFrameId != lastfound_stream ? "new-frame-id" + : "mask-lt-255"), + first_match ? "true" : "false", lastfound_stream, mask, + lastframe_full_crc); + } + lastfound_stream = candidateFrameId; + lastfound = candidateFrameId; + lastframe_full_crc = crc32_fast(frame, g_serumData.is256x64 + ? (256 * 64) + : (g_serumData.fwidth * + g_serumData.fheight)); + first_match = false; + return candidateFrameId; + } + + const uint32_t full_crc = + crc32_fast(frame, g_serumData.is256x64 + ? (256 * 64) + : (g_serumData.fwidth * g_serumData.fheight)); + if (full_crc != lastframe_full_crc) { + if (DebugIdentifyVerboseEnabled() && + DebugTraceMatches(inputCrc, candidateFrameId)) { + Log("Serum debug identify decision: inputCrc=%u frameId=%u " + "reason=full-crc-diff firstMatch=%s lastfoundStream=%u " + "mask=%u fullCrcBefore=%u fullCrcNow=%u", + inputCrc, candidateFrameId, first_match ? "true" : "false", + lastfound_stream, mask, lastframe_full_crc, full_crc); + } + lastframe_full_crc = full_crc; + lastfound = candidateFrameId; + return candidateFrameId; + } + if (DebugIdentifyVerboseEnabled() && + DebugTraceMatches(inputCrc, candidateFrameId)) { + Log("Serum debug identify decision: inputCrc=%u frameId=%u " + "reason=same-frame firstMatch=%s lastfoundStream=%u mask=%u " + "fullCrc=%u", + inputCrc, candidateFrameId, first_match ? "true" : "false", + lastfound_stream, mask, full_crc); + } + lastfound = candidateFrameId; + return IDENTIFY_SAME_FRAME; +} + static bool IsCriticalMonochromeTriggerFrame(uint32_t frameId) { if (frameId >= g_serumData.nframes) { return false; @@ -402,6 +567,23 @@ static void LogStartupRssSummary() { startMiB, currentMiB, peakMiB, g_profileStartupPeakStage); } +static void ResetDynamicHotPathProfile() { + g_profileRoundTripNs = 0; + g_profileColorizeFrameV2Ns = 0; + g_profileColorizeSpriteV2Ns = 0; + g_profileColorizeCalls = 0; + g_profileIdentifyTotalNs = 0; + g_profileIdentifyNormalNs = 0; + g_profileIdentifySceneNs = 0; + g_profileIdentifyCriticalNs = 0; + g_profileIdentifyNormalCalls = 0; + g_profileIdentifySceneCalls = 0; + g_profileIdentifyCriticalCalls = 0; + g_profilePeakRssBytes = GetProcessResidentMemoryBytes(); + g_profileFrameOperationDepth = 0; + g_profileFrameOperationFinished = false; +} + static void InitDebugFrameTracingFromEnv(void) { if (g_debugFrameTracingInitialized) { return; @@ -1811,11 +1993,23 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, uint8_t flags) { Serum_free(); g_profileDynamicHotPaths = IsEnvFlagEnabled("SERUM_PROFILE_DYNAMIC_HOTPATHS"); + g_profileDynamicHotPathsWindowed = + IsEnvFlagEnabled("SERUM_PROFILE_DYNAMIC_HOTPATHS_WINDOWED"); g_profileSparseVectors = IsEnvFlagEnabled("SERUM_PROFILE_SPARSE_VECTORS"); + g_profileRoundTripNs = 0; g_profileColorizeFrameV2Ns = 0; g_profileColorizeSpriteV2Ns = 0; g_profileColorizeCalls = 0; + g_profileIdentifyTotalNs = 0; + g_profileIdentifyNormalNs = 0; + g_profileIdentifySceneNs = 0; + g_profileIdentifyCriticalNs = 0; + g_profileIdentifyNormalCalls = 0; + g_profileIdentifySceneCalls = 0; + g_profileIdentifyCriticalCalls = 0; g_profilePeakRssBytes = 0; + g_profileFrameOperationDepth = 0; + g_profileFrameOperationFinished = false; ResetStartupRssProfile(); mySerum.SerumVersion = g_serumData.SerumVersion = 0; @@ -1954,6 +2148,7 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, InitCriticalTriggerLookupRuntimeState(); NoteStartupRssSample("before-runtime"); LogStartupRssSummary(); + ResetDynamicHotPathProfile(); } if (is_real_machine()) { monochromeMode = true; @@ -1968,10 +2163,14 @@ static void BuildFrameLookupVectors(void) { uint32_t numSceneFrames = 0; g_serumData.frameIsScene.clear(); g_serumData.sceneFramesBySignature.clear(); + g_serumData.normalFramesBySignature.clear(); + g_serumData.normalIdentifyBuckets.clear(); + g_serumData.frameToNormalBucket.clear(); g_serumData.sceneFrameIdByTriplet.clear(); if (g_serumData.nframes == 0) return; g_serumData.frameIsScene.resize(g_serumData.nframes, 0); + g_serumData.frameToNormalBucket.assign(g_serumData.nframes, 0xffffffffu); const uint32_t pixels = g_serumData.is256x64 ? (256 * 64) : (g_serumData.fwidth * g_serumData.fheight); @@ -2084,6 +2283,31 @@ static void BuildFrameLookupVectors(void) { } } + for (uint32_t frameId = 0; frameId < g_serumData.nframes; ++frameId) { + if (g_serumData.frameIsScene[frameId] != 0) { + continue; + } + const uint8_t mask = g_serumData.compmaskID[frameId][0]; + const uint8_t shape = g_serumData.shapecompmode[frameId][0]; + const uint32_t hash = g_serumData.hashcodes[frameId][0]; + uint32_t bucketIndex = 0xffffffffu; + for (uint32_t i = 0; i < g_serumData.normalIdentifyBuckets.size(); ++i) { + const auto& bucket = g_serumData.normalIdentifyBuckets[i]; + if (bucket.mask == mask && bucket.shape == shape) { + bucketIndex = i; + break; + } + } + if (bucketIndex == 0xffffffffu) { + bucketIndex = + static_cast(g_serumData.normalIdentifyBuckets.size()); + g_serumData.normalIdentifyBuckets.push_back({mask, shape, 0}); + } + g_serumData.frameToNormalBucket[frameId] = bucketIndex; + g_serumData.normalFramesBySignature[MakeFrameSignature(mask, shape, hash)] + .push_back(frameId); + } + Log("Loaded %d frames and %d rotation scene frames", g_serumData.nframes - numSceneFrames, numSceneFrames); @@ -2145,7 +2369,27 @@ static void InitFrameLookupRuntimeStateFromStoredData(void) { } uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { - if (!cromloaded) return IDENTIFY_NO_FRAME; + const auto profileStart = + g_profileDynamicHotPaths ? std::chrono::steady_clock::now() + : std::chrono::steady_clock::time_point{}; + auto finishProfile = [&](uint32_t result) -> uint32_t { + if (g_profileDynamicHotPaths) { + const uint64_t elapsedNs = + (uint64_t)std::chrono::duration_cast( + std::chrono::steady_clock::now() - profileStart) + .count(); + g_profileIdentifyTotalNs += elapsedNs; + if (sceneFrameRequested) { + g_profileIdentifySceneNs += elapsedNs; + ++g_profileIdentifySceneCalls; + } else { + g_profileIdentifyNormalNs += elapsedNs; + ++g_profileIdentifyNormalCalls; + } + } + return result; + }; + if (!cromloaded) return finishProfile(IDENTIFY_NO_FRAME); DebugLogFrameMetadataIfRequested(g_debugTargetFrameId); uint32_t tj = sceneFrameRequested ? lastfound_scene @@ -2154,7 +2398,6 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { ? (256 * 64) : (g_serumData.fwidth * g_serumData.fheight); const uint32_t inputCrc = crc32_fast(frame, pixels); - memset(framechecked, false, g_serumData.nframes); uint32_t& lastfound_stream = sceneFrameRequested ? lastfound_scene : lastfound_normal; bool& first_match = @@ -2162,6 +2405,67 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { uint32_t& lastframe_full_crc = sceneFrameRequested ? lastframe_full_crc_scene : lastframe_full_crc_normal; + if (!sceneFrameRequested) { + const uint32_t bucketCount = + static_cast(g_serumData.normalIdentifyBuckets.size()); + if (bucketCount == 0 || g_serumData.frameToNormalBucket.size() != + g_serumData.nframes) { + if (DebugIdentifyVerboseEnabled() && DebugTraceMatchesInputCrc(inputCrc)) { + Log("Serum debug identify miss: inputCrc=%u sceneRequested=false", + inputCrc); + } + return finishProfile(IDENTIFY_NO_FRAME); + } + std::vector bucketVisited(bucketCount, 0); + do { + if (g_serumData.frameIsScene[tj] != 0) { + if (++tj >= g_serumData.nframes) tj = 0; + continue; + } + + const uint32_t bucketIndex = g_serumData.frameToNormalBucket[tj]; + if (bucketIndex >= bucketCount || bucketVisited[bucketIndex]) { + if (++tj >= g_serumData.nframes) tj = 0; + continue; + } + bucketVisited[bucketIndex] = 1; + + const auto& bucket = g_serumData.normalIdentifyBuckets[bucketIndex]; + const uint8_t mask = bucket.mask; + const uint8_t Shape = bucket.shape; + + const uint32_t Hashc = calc_crc32(frame, mask, pixels, Shape); + if (DebugIdentifyVerboseEnabled() && DebugTraceMatches(inputCrc, tj)) { + Log("Serum debug identify seed: inputCrc=%u startFrame=%u " + "sceneRequested=false mask=%u shape=%u hash=%u", + inputCrc, tj, mask, Shape, Hashc); + } + + auto normalSigIt = g_serumData.normalFramesBySignature.find( + MakeFrameSignature(mask, Shape, Hashc)); + if (normalSigIt != g_serumData.normalFramesBySignature.end() && + !normalSigIt->second.empty()) { + const uint32_t candidateFrameId = + SelectFrameIdInWrapOrder(normalSigIt->second, tj); + const uint32_t resolved = ResolveIdentifiedFrameMatch( + frame, inputCrc, candidateFrameId, mask, first_match, + lastfound_stream, lastframe_full_crc); + if (resolved != IDENTIFY_NO_FRAME) { + return finishProfile(resolved); + } + } + + if (++tj >= g_serumData.nframes) tj = 0; + } while (tj != lastfound_stream); + + if (DebugIdentifyVerboseEnabled() && DebugTraceMatchesInputCrc(inputCrc)) { + Log("Serum debug identify miss: inputCrc=%u sceneRequested=false", + inputCrc); + } + return finishProfile(IDENTIFY_NO_FRAME); + } + + memset(framechecked, false, g_serumData.nframes); do { if (g_serumData.frameIsScene[tj] != (sceneFrameRequested ? 1 : 0)) { if (++tj >= g_serumData.nframes) tj = 0; @@ -2212,7 +2516,7 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { lastfound = ti; lastframe_full_crc = crc32_fast(frame, pixels); first_match = false; - return ti; + return finishProfile(ti); } uint32_t full_crc = crc32_fast(frame, pixels); @@ -2227,7 +2531,7 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { } lastframe_full_crc = full_crc; lastfound = ti; - return ti; + return finishProfile(ti); } if (DebugIdentifyVerboseEnabled() && DebugTraceMatches(inputCrc, ti)) { @@ -2238,85 +2542,13 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { mask, full_crc); } lastfound = ti; - return IDENTIFY_SAME_FRAME; + return finishProfile(IDENTIFY_SAME_FRAME); } framechecked[tj] = true; if (++tj >= g_serumData.nframes) tj = 0; continue; } - // now we can compare with all the crom frames that share these same mask - // and shapemode - uint32_t ti = tj; - do { - if (g_serumData.frameIsScene[ti] != (sceneFrameRequested ? 1 : 0)) { - if (++ti >= g_serumData.nframes) ti = 0; - continue; - } - if (!framechecked[ti]) { - if ((g_serumData.compmaskID[ti][0] == mask) && - (g_serumData.shapecompmode[ti][0] == Shape)) { - if (DebugIdentifyVerboseEnabled() && - DebugTraceMatches(inputCrc, ti)) { - Log("Serum debug identify candidate: inputCrc=%u frameId=%u " - "mask=%u shape=%u hash=%u storedHash=%u lastfound=%u", - inputCrc, ti, mask, Shape, Hashc, - g_serumData.hashcodes[ti][0], lastfound_stream); - } - if (Hashc == g_serumData.hashcodes[ti][0]) { - if (first_match || ti != lastfound_stream || mask < 255) { - if (DebugIdentifyVerboseEnabled() && - DebugTraceMatches(inputCrc, ti)) { - Log("Serum debug identify decision: inputCrc=%u frameId=%u " - "reason=%s firstMatch=%s lastfoundStream=%u mask=%u " - "fullCrcBefore=%u", - inputCrc, ti, - first_match ? "first-match" - : (ti != lastfound_stream ? "new-frame-id" - : "mask-lt-255"), - first_match ? "true" : "false", lastfound_stream, mask, - lastframe_full_crc); - } - // Reset_ColorRotations(); - lastfound_stream = ti; - lastfound = ti; - lastframe_full_crc = crc32_fast(frame, pixels); - first_match = false; - return ti; // we found the frame, we return it - } - - uint32_t full_crc = crc32_fast(frame, pixels); - if (full_crc != lastframe_full_crc) { - if (DebugIdentifyVerboseEnabled() && - DebugTraceMatches(inputCrc, ti)) { - Log("Serum debug identify decision: inputCrc=%u frameId=%u " - "reason=full-crc-diff firstMatch=%s lastfoundStream=%u " - "mask=%u fullCrcBefore=%u fullCrcNow=%u", - inputCrc, ti, first_match ? "true" : "false", - lastfound_stream, mask, lastframe_full_crc, full_crc); - } - lastframe_full_crc = full_crc; - lastfound = ti; - return ti; // we found the same frame with shape as before, but - // the full frame is different - } - if (DebugIdentifyVerboseEnabled() && - DebugTraceMatches(inputCrc, ti)) { - Log("Serum debug identify decision: inputCrc=%u frameId=%u " - "reason=same-frame firstMatch=%s lastfoundStream=%u " - "mask=%u fullCrc=%u", - inputCrc, ti, first_match ? "true" : "false", - lastfound_stream, mask, full_crc); - } - lastfound = ti; - return IDENTIFY_SAME_FRAME; // we found the frame, but it is the - // same full frame as before (no - // mask) - } - framechecked[ti] = true; - } - } - if (++ti >= g_serumData.nframes) ti = 0; - } while (ti != tj); + framechecked[tj] = true; } if (++tj >= g_serumData.nframes) tj = 0; } while (tj != lastfound_stream); @@ -2325,7 +2557,7 @@ uint32_t Identify_Frame(uint8_t* frame, bool sceneFrameRequested) { Log("Serum debug identify miss: inputCrc=%u sceneRequested=%s", inputCrc, sceneFrameRequested ? "true" : "false"); } - return IDENTIFY_NO_FRAME; // we found no corresponding frame + return finishProfile(IDENTIFY_NO_FRAME); // we found no corresponding frame } static uint32_t BuildRuntimeFeatureFlags(uint32_t frameId) { @@ -4106,14 +4338,34 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, .count(); } } + FinishProfileRenderedFrameOperationMaybe(); if (profileNow) { - ++g_profileColorizeCalls; if ((g_profileColorizeCalls % 240u) == 0u) { + const double roundTripMs = + (double)g_profileRoundTripNs / + (double)g_profileColorizeCalls / 1000000.0; const double frameMs = (double)g_profileColorizeFrameV2Ns / (double)g_profileColorizeCalls / 1000000.0; const double spriteMs = (double)g_profileColorizeSpriteV2Ns / (double)g_profileColorizeCalls / 1000000.0; - const double totalMs = frameMs + spriteMs; + const double identifyMs = + (double)g_profileIdentifyTotalNs / + (double)g_profileColorizeCalls / 1000000.0; + const double identifyNormalMs = + g_profileIdentifyNormalCalls == 0 + ? 0.0 + : (double)g_profileIdentifyNormalNs / + (double)g_profileIdentifyNormalCalls / 1000000.0; + const double identifySceneMs = + g_profileIdentifySceneCalls == 0 + ? 0.0 + : (double)g_profileIdentifySceneNs / + (double)g_profileIdentifySceneCalls / 1000000.0; + const double identifyCriticalMs = + g_profileIdentifyCriticalCalls == 0 + ? 0.0 + : (double)g_profileIdentifyCriticalNs / + (double)g_profileIdentifyCriticalCalls / 1000000.0; const uint64_t rssBytes = GetProcessResidentMemoryBytes(); if (rssBytes > g_profilePeakRssBytes) { g_profilePeakRssBytes = rssBytes; @@ -4122,13 +4374,19 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, const double peakRssMiB = (double)g_profilePeakRssBytes / (1024.0 * 1024.0); Log("Perf dynamic avg: frame=%.3fms Colorize_Framev2=%.3fms " - "Colorize_Spritev2=%.3fms rss=%.1fMiB peak=%.1fMiB over %u " + "Colorize_Spritev2=%.3fms Identify=%.3fms " + "IdentifyNormal=%.3fms IdentifyScene=%.3fms " + "IdentifyCritical=%.3fms rss=%.1fMiB peak=%.1fMiB over %u " "frames", - totalMs, frameMs, spriteMs, rssMiB, peakRssMiB, + roundTripMs, frameMs, spriteMs, identifyMs, identifyNormalMs, + identifySceneMs, identifyCriticalMs, rssMiB, peakRssMiB, (uint32_t)g_profileColorizeCalls); if (g_profileSparseVectors) { g_serumData.LogSparseVectorProfileSnapshot(); } + if (g_profileDynamicHotPathsWindowed) { + ResetDynamicHotPathProfile(); + } } } @@ -4292,6 +4550,7 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, colorrotnexttime64[ti] = 0; } mySerum.rotationtimer = 0; + FinishProfileRenderedFrameOperationMaybe(); return 0; // "colorized" frame with no rotations } @@ -4301,8 +4560,11 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, SERUM_API uint32_t Serum_ColorizeWithMetadatav2(uint8_t* frame, bool sceneFrameRequested = false) { - return Serum_ColorizeWithMetadatav2Internal(frame, sceneFrameRequested, - IDENTIFY_NO_FRAME); + BeginProfileFrameOperation(); + const uint32_t result = Serum_ColorizeWithMetadatav2Internal( + frame, sceneFrameRequested, IDENTIFY_NO_FRAME); + EndProfileFrameOperation(); + return result; } SERUM_API uint32_t Serum_Colorize(uint8_t* frame) { @@ -4352,6 +4614,11 @@ uint32_t Serum_ApplyRotationsv1(void) { } uint32_t Serum_RenderScene(void) { + BeginProfileFrameOperation(); + auto finishSceneProfile = [&](uint32_t result) -> uint32_t { + EndProfileFrameOperation(); + return result; + }; if (g_serumData.sceneGenerator->isActive() && (sceneCurrentFrame < sceneFrameCount || sceneEndHoldUntilMs > 0)) { const uint32_t now = GetMonotonicTimeMs(); @@ -4362,7 +4629,8 @@ uint32_t Serum_RenderScene(void) { sceneFrameCount, sceneDurationPerFrame, sceneOptionFlags, sceneInterruptable, sceneStartImmediately, sceneRepeatCount); mySerum.rotationtimer = sceneEndHoldUntilMs - now; - return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_SCENE; + return finishSceneProfile((mySerum.rotationtimer & 0xffff) | + FLAG_RETURNED_V2_SCENE); } // End hold elapsed: finish scene now. @@ -4381,6 +4649,7 @@ uint32_t Serum_RenderScene(void) { case FLAG_SCENE_BLACK_WHEN_FINISHED: if (mySerum.frame32) memset(mySerum.frame32, 0, 32 * mySerum.width32); if (mySerum.frame64) memset(mySerum.frame64, 0, 64 * mySerum.width64); + FinishProfileRenderedFrameOperationMaybe(); break; case FLAG_SCENE_SHOW_PREVIOUS_FRAME_WHEN_FINISHED: @@ -4392,6 +4661,7 @@ uint32_t Serum_RenderScene(void) { memset(mySerum.frame32, 0, 32 * mySerum.width32); if (mySerum.frame64) memset(mySerum.frame64, 0, 64 * mySerum.width64); + FinishProfileRenderedFrameOperationMaybe(); } break; @@ -4407,7 +4677,7 @@ uint32_t Serum_RenderScene(void) { break; } - return FLAG_RETURNED_V2_SCENE; + return finishSceneProfile(FLAG_RETURNED_V2_SCENE); } bool renderedFromDirectTriplet = false; @@ -4431,7 +4701,8 @@ uint32_t Serum_RenderScene(void) { sceneInterruptable, sceneStartImmediately, sceneRepeatCount, currentGroup, waitMs); mySerum.rotationtimer = waitMs; - return mySerum.rotationtimer | FLAG_RETURNED_V2_SCENE; + return finishSceneProfile(mySerum.rotationtimer | + FLAG_RETURNED_V2_SCENE); } mySerum.rotationtimer = sceneDurationPerFrame; sceneNextFrameAtMs = now + sceneDurationPerFrame; @@ -4458,7 +4729,8 @@ uint32_t Serum_RenderScene(void) { if (result > 0 && result < 0xffff) { // frame not ready yet, return the time to wait mySerum.rotationtimer = result; - return mySerum.rotationtimer | FLAG_RETURNED_V2_SCENE; + return finishSceneProfile(mySerum.rotationtimer | + FLAG_RETURNED_V2_SCENE); } if (result != 0xffff) { DebugLogSceneEvent( @@ -4470,8 +4742,10 @@ uint32_t Serum_RenderScene(void) { mySerum.rotationtimer = 0; sceneNextFrameAtMs = 0; ForceNormalFrameRefreshAfterSceneEnd(); - return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_ROTATED32 | - FLAG_RETURNED_V2_ROTATED64 | FLAG_RETURNED_V2_SCENE; + return finishSceneProfile((mySerum.rotationtimer & 0xffff) | + FLAG_RETURNED_V2_ROTATED32 | + FLAG_RETURNED_V2_ROTATED64 | + FLAG_RETURNED_V2_SCENE); } mySerum.rotationtimer = sceneDurationPerFrame; sceneNextFrameAtMs = now + sceneDurationPerFrame; @@ -4505,7 +4779,8 @@ uint32_t Serum_RenderScene(void) { if (sceneEndHoldDurationMs > 0) { sceneEndHoldUntilMs = now + sceneEndHoldDurationMs; mySerum.rotationtimer = sceneEndHoldDurationMs; - return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_SCENE; + return finishSceneProfile((mySerum.rotationtimer & 0xffff) | + FLAG_RETURNED_V2_SCENE); } sceneFrameCount = 0; // scene ended @@ -4544,13 +4819,14 @@ uint32_t Serum_RenderScene(void) { } } - return (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_ROTATED32 | - FLAG_RETURNED_V2_ROTATED64 | - FLAG_RETURNED_V2_SCENE; // scene frame, so we consider both frames - // changed + return finishSceneProfile( + (mySerum.rotationtimer & 0xffff) | FLAG_RETURNED_V2_ROTATED32 | + FLAG_RETURNED_V2_ROTATED64 | + FLAG_RETURNED_V2_SCENE); // scene frame, so we consider both frames + // changed } - return 0; + return finishSceneProfile(0); } uint32_t Serum_ApplyRotationsv2(void) { From 12ee881a401783128f7463ea7824f5215aa045fc Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Fri, 20 Mar 2026 16:17:27 +0100 Subject: [PATCH 38/42] more profiling --- AGENTS.md | 5 +++-- src/serum-decode.cpp | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 2e02385..9dd571d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -396,8 +396,9 @@ v6 snapshot policy: periodic average timings are logged for the full end-to-end rendered-frame round trip (`frame`), `Colorize_Framev2`, and `Colorize_Spritev2`, along with average identification time (`Identify_Frame`) split into - normal/scene calls plus the critical-trigger mini-matcher, and current - process RSS memory usage and process-local peak RSS seen so far. + normal/scene calls plus the critical-trigger mini-matcher, input/result + counters (`inputs`, `rendered`, `same`, `noFrame`), and current process + RSS memory usage and process-local peak RSS seen so far. - If env `SERUM_PROFILE_DYNAMIC_HOTPATHS_WINDOWED=1`, the same counters are reset after each emitted 240-frame block so each `Perf dynamic avg` line reflects only the most recent window rather than a cumulative average. diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 75988de..08d8895 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -118,6 +118,9 @@ static uint64_t g_profileIdentifyCriticalNs = 0; static uint64_t g_profileIdentifyNormalCalls = 0; static uint64_t g_profileIdentifySceneCalls = 0; static uint64_t g_profileIdentifyCriticalCalls = 0; +static uint64_t g_profileIncomingFrameCalls = 0; +static uint64_t g_profileNoFrameReturns = 0; +static uint64_t g_profileSameFrameReturns = 0; static uint64_t g_profilePeakRssBytes = 0; static uint64_t g_profileStartupStartRssBytes = 0; static uint64_t g_profileStartupPeakRssBytes = 0; @@ -579,6 +582,9 @@ static void ResetDynamicHotPathProfile() { g_profileIdentifyNormalCalls = 0; g_profileIdentifySceneCalls = 0; g_profileIdentifyCriticalCalls = 0; + g_profileIncomingFrameCalls = 0; + g_profileNoFrameReturns = 0; + g_profileSameFrameReturns = 0; g_profilePeakRssBytes = GetProcessResidentMemoryBytes(); g_profileFrameOperationDepth = 0; g_profileFrameOperationFinished = false; @@ -3962,6 +3968,10 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, mySerum.triggerID = 0xffffffff; mySerum.frameID = IDENTIFY_NO_FRAME; g_debugCurrentInputCrc = 0; + if (g_profileDynamicHotPaths && !sceneFrameRequested && + knownFrameId >= g_serumData.nframes) { + ++g_profileIncomingFrameCalls; + } // Identify frame unless caller already resolved a concrete frame ID. uint32_t frameID = IDENTIFY_NO_FRAME; @@ -3973,6 +3983,9 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, if (fastRejectNonInterruptableScene) { frameID = IdentifyCriticalTriggerFrame(frame); if (frameID == IDENTIFY_NO_FRAME) { + if (g_profileDynamicHotPaths && !sceneFrameRequested) { + ++g_profileNoFrameReturns; + } return IDENTIFY_NO_FRAME; } } @@ -4047,6 +4060,9 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, mySerum.triggerID >= PUP_TRIGGER_MAX_THRESHOLD) mySerum.triggerID = 0xffffffff; // Scene is active and not interruptable + if (g_profileDynamicHotPaths && !sceneFrameRequested) { + ++g_profileNoFrameReturns; + } return IDENTIFY_NO_FRAME; } if (IsCriticalMonochromeTriggerFrame(lastfound)) { @@ -4088,6 +4104,9 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, if (keepTriggersInternal || mySerum.triggerID >= PUP_TRIGGER_MAX_THRESHOLD) mySerum.triggerID = 0xffffffff; + if (g_profileDynamicHotPaths && !sceneFrameRequested) { + ++g_profileSameFrameReturns; + } return IDENTIFY_SAME_FRAME; } @@ -4123,6 +4142,9 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, // New frame has the same Trigger ID, continuing an already running // seamless looped scene. // Wait for the next rotation to have a smooth transition. + if (g_profileDynamicHotPaths) { + ++g_profileSameFrameReturns; + } return IDENTIFY_SAME_FRAME; } else if (sceneIsLastBackgroundFrame && (sceneOptionFlags & FLAG_SCENE_AS_BACKGROUND) == @@ -4376,10 +4398,16 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, Log("Perf dynamic avg: frame=%.3fms Colorize_Framev2=%.3fms " "Colorize_Spritev2=%.3fms Identify=%.3fms " "IdentifyNormal=%.3fms IdentifyScene=%.3fms " - "IdentifyCritical=%.3fms rss=%.1fMiB peak=%.1fMiB over %u " + "IdentifyCritical=%.3fms inputs=%llu rendered=%llu " + "same=%llu noFrame=%llu rss=%.1fMiB peak=%.1fMiB over %u " "frames", roundTripMs, frameMs, spriteMs, identifyMs, identifyNormalMs, - identifySceneMs, identifyCriticalMs, rssMiB, peakRssMiB, + identifySceneMs, identifyCriticalMs, + static_cast(g_profileIncomingFrameCalls), + static_cast(g_profileColorizeCalls), + static_cast(g_profileSameFrameReturns), + static_cast(g_profileNoFrameReturns), + rssMiB, peakRssMiB, (uint32_t)g_profileColorizeCalls); if (g_profileSparseVectors) { g_serumData.LogSparseVectorProfileSnapshot(); @@ -4555,6 +4583,9 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, return 0; // "colorized" frame with no rotations } + if (g_profileDynamicHotPaths && !sceneFrameRequested) { + ++g_profileNoFrameReturns; + } return IDENTIFY_NO_FRAME; // no new frame, client has to update rotations! } From 22d1587ec144ed4c863da8d102cd961277863ae4 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Fri, 20 Mar 2026 17:07:09 +0100 Subject: [PATCH 39/42] better profiling --- AGENTS.md | 6 +- src/serum-decode.cpp | 138 +++++++++++++++++++++++++------------------ 2 files changed, 86 insertions(+), 58 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 9dd571d..26ef6b0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -399,8 +399,12 @@ v6 snapshot policy: normal/scene calls plus the critical-trigger mini-matcher, input/result counters (`inputs`, `rendered`, `same`, `noFrame`), and current process RSS memory usage and process-local peak RSS seen so far. + - `Perf dynamic avg` is emitted on fixed 240-input host windows, not on + rendered-output count, so runs stay comparable even when different + branches suppress or render different numbers of frames from the same dump. + The window size is conveyed by `inputs=...`; no extra trailer is appended. - If env `SERUM_PROFILE_DYNAMIC_HOTPATHS_WINDOWED=1`, the same counters are - reset after each emitted 240-frame block so each `Perf dynamic avg` line + reset after each emitted 240-input block so each `Perf dynamic avg` line reflects only the most recent window rather than a cumulative average. - The same profiler also logs a one-time startup summary before normal frame processing begins: diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 08d8895..418173e 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -121,6 +121,7 @@ static uint64_t g_profileIdentifyCriticalCalls = 0; static uint64_t g_profileIncomingFrameCalls = 0; static uint64_t g_profileNoFrameReturns = 0; static uint64_t g_profileSameFrameReturns = 0; +static uint64_t g_profileLastLoggedInputCount = 0; static uint64_t g_profilePeakRssBytes = 0; static uint64_t g_profileStartupStartRssBytes = 0; static uint64_t g_profileStartupPeakRssBytes = 0; @@ -585,11 +586,82 @@ static void ResetDynamicHotPathProfile() { g_profileIncomingFrameCalls = 0; g_profileNoFrameReturns = 0; g_profileSameFrameReturns = 0; + g_profileLastLoggedInputCount = 0; g_profilePeakRssBytes = GetProcessResidentMemoryBytes(); g_profileFrameOperationDepth = 0; g_profileFrameOperationFinished = false; } +static void MaybeLogDynamicHotPathProfileWindow(bool sceneFrameRequested) { + if (!g_profileDynamicHotPaths || sceneFrameRequested || + g_profileIncomingFrameCalls == 0 || + (g_profileIncomingFrameCalls % 240u) != 0u || + g_profileIncomingFrameCalls == g_profileLastLoggedInputCount) { + return; + } + + const double roundTripMs = + g_profileColorizeCalls == 0 + ? 0.0 + : (double)g_profileRoundTripNs / (double)g_profileColorizeCalls / + 1000000.0; + const double frameMs = + g_profileColorizeCalls == 0 + ? 0.0 + : (double)g_profileColorizeFrameV2Ns / (double)g_profileColorizeCalls / + 1000000.0; + const double spriteMs = + g_profileColorizeCalls == 0 + ? 0.0 + : (double)g_profileColorizeSpriteV2Ns / + (double)g_profileColorizeCalls / 1000000.0; + const double identifyMs = + g_profileColorizeCalls == 0 + ? 0.0 + : (double)g_profileIdentifyTotalNs / (double)g_profileColorizeCalls / + 1000000.0; + const double identifyNormalMs = + g_profileIdentifyNormalCalls == 0 + ? 0.0 + : (double)g_profileIdentifyNormalNs / + (double)g_profileIdentifyNormalCalls / 1000000.0; + const double identifySceneMs = + g_profileIdentifySceneCalls == 0 + ? 0.0 + : (double)g_profileIdentifySceneNs / + (double)g_profileIdentifySceneCalls / 1000000.0; + const double identifyCriticalMs = + g_profileIdentifyCriticalCalls == 0 + ? 0.0 + : (double)g_profileIdentifyCriticalNs / + (double)g_profileIdentifyCriticalCalls / 1000000.0; + const uint64_t rssBytes = GetProcessResidentMemoryBytes(); + if (rssBytes > g_profilePeakRssBytes) { + g_profilePeakRssBytes = rssBytes; + } + const double rssMiB = (double)rssBytes / (1024.0 * 1024.0); + const double peakRssMiB = (double)g_profilePeakRssBytes / (1024.0 * 1024.0); + Log("Perf dynamic avg: frame=%.3fms Colorize_Framev2=%.3fms " + "Colorize_Spritev2=%.3fms Identify=%.3fms " + "IdentifyNormal=%.3fms IdentifyScene=%.3fms " + "IdentifyCritical=%.3fms inputs=%llu rendered=%llu " + "same=%llu noFrame=%llu rss=%.1fMiB peak=%.1fMiB", + roundTripMs, frameMs, spriteMs, identifyMs, identifyNormalMs, + identifySceneMs, identifyCriticalMs, + static_cast(g_profileIncomingFrameCalls), + static_cast(g_profileColorizeCalls), + static_cast(g_profileSameFrameReturns), + static_cast(g_profileNoFrameReturns), rssMiB, + peakRssMiB); + if (g_profileSparseVectors) { + g_serumData.LogSparseVectorProfileSnapshot(); + } + g_profileLastLoggedInputCount = g_profileIncomingFrameCalls; + if (g_profileDynamicHotPathsWindowed) { + ResetDynamicHotPathProfile(); + } +} + static void InitDebugFrameTracingFromEnv(void) { if (g_debugFrameTracingInitialized) { return; @@ -3986,6 +4058,7 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, if (g_profileDynamicHotPaths && !sceneFrameRequested) { ++g_profileNoFrameReturns; } + MaybeLogDynamicHotPathProfileWindow(sceneFrameRequested); return IDENTIFY_NO_FRAME; } } @@ -4063,6 +4136,7 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, if (g_profileDynamicHotPaths && !sceneFrameRequested) { ++g_profileNoFrameReturns; } + MaybeLogDynamicHotPathProfileWindow(sceneFrameRequested); return IDENTIFY_NO_FRAME; } if (IsCriticalMonochromeTriggerFrame(lastfound)) { @@ -4107,6 +4181,7 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, if (g_profileDynamicHotPaths && !sceneFrameRequested) { ++g_profileSameFrameReturns; } + MaybeLogDynamicHotPathProfileWindow(sceneFrameRequested); return IDENTIFY_SAME_FRAME; } @@ -4145,6 +4220,7 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, if (g_profileDynamicHotPaths) { ++g_profileSameFrameReturns; } + MaybeLogDynamicHotPathProfileWindow(sceneFrameRequested); return IDENTIFY_SAME_FRAME; } else if (sceneIsLastBackgroundFrame && (sceneOptionFlags & FLAG_SCENE_AS_BACKGROUND) == @@ -4263,8 +4339,10 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, sceneOptionFlags, sceneInterruptable, sceneStartImmediately, sceneRepeatCount); uint32_t sceneRotationResult = Serum_RenderScene(); - if (sceneRotationResult & FLAG_RETURNED_V2_SCENE) + if (sceneRotationResult & FLAG_RETURNED_V2_SCENE) { + MaybeLogDynamicHotPathProfileWindow(sceneFrameRequested); return sceneRotationResult; + } } mySerum.rotationtimer = sceneDurationPerFrame; rotationIsScene = true; @@ -4361,62 +4439,6 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, } } FinishProfileRenderedFrameOperationMaybe(); - if (profileNow) { - if ((g_profileColorizeCalls % 240u) == 0u) { - const double roundTripMs = - (double)g_profileRoundTripNs / - (double)g_profileColorizeCalls / 1000000.0; - const double frameMs = (double)g_profileColorizeFrameV2Ns / - (double)g_profileColorizeCalls / 1000000.0; - const double spriteMs = (double)g_profileColorizeSpriteV2Ns / - (double)g_profileColorizeCalls / 1000000.0; - const double identifyMs = - (double)g_profileIdentifyTotalNs / - (double)g_profileColorizeCalls / 1000000.0; - const double identifyNormalMs = - g_profileIdentifyNormalCalls == 0 - ? 0.0 - : (double)g_profileIdentifyNormalNs / - (double)g_profileIdentifyNormalCalls / 1000000.0; - const double identifySceneMs = - g_profileIdentifySceneCalls == 0 - ? 0.0 - : (double)g_profileIdentifySceneNs / - (double)g_profileIdentifySceneCalls / 1000000.0; - const double identifyCriticalMs = - g_profileIdentifyCriticalCalls == 0 - ? 0.0 - : (double)g_profileIdentifyCriticalNs / - (double)g_profileIdentifyCriticalCalls / 1000000.0; - const uint64_t rssBytes = GetProcessResidentMemoryBytes(); - if (rssBytes > g_profilePeakRssBytes) { - g_profilePeakRssBytes = rssBytes; - } - const double rssMiB = (double)rssBytes / (1024.0 * 1024.0); - const double peakRssMiB = - (double)g_profilePeakRssBytes / (1024.0 * 1024.0); - Log("Perf dynamic avg: frame=%.3fms Colorize_Framev2=%.3fms " - "Colorize_Spritev2=%.3fms Identify=%.3fms " - "IdentifyNormal=%.3fms IdentifyScene=%.3fms " - "IdentifyCritical=%.3fms inputs=%llu rendered=%llu " - "same=%llu noFrame=%llu rss=%.1fMiB peak=%.1fMiB over %u " - "frames", - roundTripMs, frameMs, spriteMs, identifyMs, identifyNormalMs, - identifySceneMs, identifyCriticalMs, - static_cast(g_profileIncomingFrameCalls), - static_cast(g_profileColorizeCalls), - static_cast(g_profileSameFrameReturns), - static_cast(g_profileNoFrameReturns), - rssMiB, peakRssMiB, - (uint32_t)g_profileColorizeCalls); - if (g_profileSparseVectors) { - g_serumData.LogSparseVectorProfileSnapshot(); - } - if (g_profileDynamicHotPathsWindowed) { - ResetDynamicHotPathProfile(); - } - } - } bool allowParallelRotations = (sceneFrameCount == 0) || @@ -4528,6 +4550,7 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, mySerum.triggerID >= PUP_TRIGGER_MAX_THRESHOLD) mySerum.triggerID = 0xffffffff; + MaybeLogDynamicHotPathProfileWindow(sceneFrameRequested); return (uint32_t)mySerum.rotationtimer | (rotationIsScene ? FLAG_RETURNED_V2_SCENE : 0); } @@ -4595,6 +4618,7 @@ Serum_ColorizeWithMetadatav2(uint8_t* frame, bool sceneFrameRequested = false) { const uint32_t result = Serum_ColorizeWithMetadatav2Internal( frame, sceneFrameRequested, IDENTIFY_NO_FRAME); EndProfileFrameOperation(); + MaybeLogDynamicHotPathProfileWindow(sceneFrameRequested); return result; } From 68092386e9fbec23fc67a5a1e991e16070f64c94 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Fri, 20 Mar 2026 17:25:24 +0100 Subject: [PATCH 40/42] log guards --- AGENTS.md | 4 ++++ src/serum-decode.cpp | 27 ++++++++++++++++++++------- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 26ef6b0..9b6f84f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -380,6 +380,8 @@ v6 snapshot policy: ## Logging - Central callback configured by `Serum_SetLogCallback`. - `serum-decode.cpp` and `SceneGenerator.cpp` both use callback-based `Log(...)`. +- Successful load logging includes Serum runtime version and, for `cROMc` + loads, the concentrate version. - Missing-file logs from `find_case_insensitive_file(...)` use normalized path joining. - Optional runtime debug tracing is env-gated and split by verbosity: - `SERUM_DEBUG_TRACE_INPUTS=1` enables high-level lifecycle logs (input, @@ -391,6 +393,8 @@ v6 snapshot policy: - `SERUM_DEBUG_INPUT_CRC`, `SERUM_DEBUG_FRAME_ID`, and `SERUM_DEBUG_STAGE_HASHES=1` remain available as output filters and expensive hash tracing controls. + - Debug-only identify/sprite/stage-hash lines must stay silent by default + and may only appear when the corresponding env-gated debug mode is enabled. - Optional runtime profiling: - If env `SERUM_PROFILE_DYNAMIC_HOTPATHS` is enabled (`1/true/on/yes`), periodic average timings are logged for the full end-to-end rendered-frame diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index 418173e..e11a4cb 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -1012,7 +1012,7 @@ static void DebugLogSpriteAccepted(uint32_t frameId, uint8_t spriteId, uint16_t frameY, uint16_t spriteX, uint16_t spriteY, uint16_t width, uint16_t height, bool duplicate) { - if (!DebugTraceSpritesForCurrentInput()) { + if (!DebugSpriteVerboseEnabled() || !DebugTraceSpritesForCurrentInput()) { return; } Log("Serum debug sprite accepted: frameId=%u inputCrc=%u spriteId=%u " @@ -1022,7 +1022,7 @@ static void DebugLogSpriteAccepted(uint32_t frameId, uint8_t spriteId, } static void DebugLogSpriteCheckResult(uint32_t frameId, uint8_t nspr) { - if (!DebugTraceSpritesForCurrentInput()) { + if (!DebugSpriteVerboseEnabled() || !DebugTraceSpritesForCurrentInput()) { return; } Log("Serum debug sprites result: frameId=%u inputCrc=%u matches=%u", frameId, @@ -1492,6 +1492,16 @@ static Serum_Frame_Struc* Serum_LoadConcentratePrepared(const uint8_t flags) { return &mySerum; } +static void LogLoadedColorizationSource(const std::string& path, + bool loadedFromConcentrate) { + if (loadedFromConcentrate) { + Log("Loaded %s (Serum v%d, concentrate v%d)", path.c_str(), + g_serumData.SerumVersion, g_serumData.concentrateFileVersion); + } else { + Log("Loaded %s (Serum v%d)", path.c_str(), g_serumData.SerumVersion); + } +} + Serum_Frame_Struc* Serum_LoadConcentrate(const char* filename, const uint8_t flags) { if (!crc32_ready) CRC32encode(); @@ -2146,7 +2156,7 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, loadedFromConcentrate = (result != NULL); if (result) { NoteStartupRssSample("after-cromc-load"); - Log("Loaded %s", pFoundFile->c_str()); + LogLoadedColorizationSource(*pFoundFile, true); if (csvFoundFile && g_serumData.SerumVersion == SERUM_V2 && g_serumData.sceneGenerator->parseCSV(csvFoundFile->c_str())) { sceneDataUpdatedFromCsv = true; @@ -2181,7 +2191,7 @@ SERUM_API Serum_Frame_Struc* Serum_Load(const char* const altcolorpath, result = Serum_LoadFilev1(pFoundFile->c_str(), flags); if (result) { NoteStartupRssSample("after-crom-load"); - Log("Loaded %s", pFoundFile->c_str()); + LogLoadedColorizationSource(*pFoundFile, false); if (csvFoundFile && g_serumData.SerumVersion == SERUM_V2) { sceneDataUpdatedFromCsv = g_serumData.sceneGenerator->parseCSV(csvFoundFile->c_str()); @@ -3541,7 +3551,8 @@ void Colorize_Spritev2(uint8_t* oframe, uint8_t nosprite, uint16_t frx, uint16_t *pfr, *prot; uint16_t* prt; uint32_t* cshft; - const bool traceSprite = DebugTraceMatches(g_debugCurrentInputCrc, IDfound); + const bool traceSprite = + DebugSpriteVerboseEnabled() && DebugTraceMatches(g_debugCurrentInputCrc, IDfound); const bool hasOpaque = g_serumData.spriteoriginal_opaque.hasData(nosprite); const bool hasDynaActive = g_serumData.dynaspritemasks_active.hasData(nosprite); @@ -4192,7 +4203,8 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, g_debugCurrentInputCrc, frameID, sceneFrameRequested ? "true" : "false"); } - if (DebugTraceMatches(g_debugCurrentInputCrc, frameID)) { + if (DebugIdentifyVerboseEnabled() && + DebugTraceMatches(g_debugCurrentInputCrc, frameID)) { Log("Serum debug identify result: inputCrc=%u frameId=%u " "sceneRequested=%s triggerId=%u", g_debugCurrentInputCrc, frameID, @@ -4410,7 +4422,8 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, isBackgroundSceneRequested ? lastFrameId : lastfound); ti++; } - if (DebugTraceMatches( + if (g_debugStageHashes && + DebugTraceMatches( g_debugCurrentInputCrc, isBackgroundSceneRequested ? lastFrameId : lastfound)) { uint64_t spriteHash = DebugHashBytesFNV1a64( From b7fedeb4cea6c050be98cc86ad2d278911a1cb03 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Fri, 20 Mar 2026 18:13:09 +0100 Subject: [PATCH 41/42] full dynamic fix --- AGENTS.md | 3 +++ src/serum-decode.cpp | 29 +++++++++++++++++++++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 9b6f84f..5d106f0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -79,6 +79,9 @@ Vector policy currently used in `SerumData`: - `frameHasDynamicExtra` - `Colorize_Framev1/v2` uses these flags to bypass dynamic-mask branches entirely for frames without active dynamic pixels. +- Runtime frame rendering must not rely on `activeframes` alone: frames with + background-only or dynamic-only content are still renderable and must pass + the render gate even when static base pixels are absent. - Color rotations use a precomputed lookup index: `colorRotationLookupByFrameAndColor[(frameId,isExtra,color)] -> (rotation,position)` restored from v6 cROMc when present. diff --git a/src/serum-decode.cpp b/src/serum-decode.cpp index e11a4cb..5cdd504 100644 --- a/src/serum-decode.cpp +++ b/src/serum-decode.cpp @@ -801,6 +801,27 @@ static void DebugLogFrameMetadataIfRequested(uint32_t frameId) { g_serumData.DebugLogPackingSidecarsStorageSizes(); } +static bool FrameHasRenderableContent(uint32_t frameId) { + if (frameId >= g_serumData.nframes) { + return false; + } + if (g_serumData.activeframes[frameId][0] != 0) { + return true; + } + if (g_serumData.backgroundIDs[frameId][0] < g_serumData.nbackgrounds) { + return true; + } + if (frameId < g_serumData.frameHasDynamic.size() && + g_serumData.frameHasDynamic[frameId] > 0) { + return true; + } + if (frameId < g_serumData.frameHasDynamicExtra.size() && + g_serumData.frameHasDynamicExtra[frameId] > 0) { + return true; + } + return false; +} + static uint64_t DebugHashBytesFNV1a64(const void* data, size_t size) { const uint8_t* bytes = static_cast(data); uint64_t hash = 1469598103934665603ULL; @@ -3882,7 +3903,7 @@ uint32_t Serum_ColorizeWithMetadatav1(uint8_t* frame) { bool isspr = Check_Spritesv1(frame, (uint32_t)lastfound, nosprite, &nspr, frx, fry, spx, spy, wid, hei); if (((frameID < MAX_NUMBER_FRAMES) || isspr) && - g_serumData.activeframes[lastfound][0] != 0) { + FrameHasRenderableContent(lastfound)) { Colorize_Framev1(frame, lastfound); Copy_Frame_Palette(lastfound); { @@ -4385,7 +4406,7 @@ static uint32_t Serum_ColorizeWithMetadatav2Internal(uint8_t* frame, isBackgroundSceneRequested ? lastFrameId : lastfound, nosprite, &nspr, frx, fry, spx, spy, wid, hei); if (((frameID < MAX_NUMBER_FRAMES) || isspr) && - g_serumData.activeframes[lastfound][0] != 0) { + FrameHasRenderableContent(lastfound)) { const bool profileNow = g_profileDynamicHotPaths; std::chrono::steady_clock::time_point profStart; if (profileNow) { @@ -4722,7 +4743,7 @@ uint32_t Serum_RenderScene(void) { case FLAG_SCENE_SHOW_PREVIOUS_FRAME_WHEN_FINISHED: if (lastfound < MAX_NUMBER_FRAMES && - g_serumData.activeframes[lastfound][0] != 0) { + FrameHasRenderableContent(lastfound)) { Serum_ColorizeWithMetadatav2(lastFrame); } else { if (mySerum.frame32) @@ -4864,7 +4885,7 @@ uint32_t Serum_RenderScene(void) { case FLAG_SCENE_SHOW_PREVIOUS_FRAME_WHEN_FINISHED: if (lastfound < MAX_NUMBER_FRAMES && - g_serumData.activeframes[lastfound][0] != 0) { + FrameHasRenderableContent(lastfound)) { Serum_ColorizeWithMetadatav2(lastFrame); } else { if (mySerum.frame32) From e6b6da6780d105c41e9ebbdce7ee6e6a62829ec5 Mon Sep 17 00:00:00 2001 From: Markus Kalkbrenner Date: Fri, 20 Mar 2026 18:41:42 +0100 Subject: [PATCH 42/42] changed constructor noData for legacy source dynamic value vectors from 0 to 255: dynamasks dynamasks_extra dynaspritemasks dynaspritemasks_extra --- AGENTS.md | 3 +++ src/SerumData.cpp | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 5d106f0..7e6cd49 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -71,6 +71,9 @@ Vector policy currently used in `SerumData`: all-zero payload only when the paired active sidecar still marks active pixels; this is required because dynamic layer `0` is a valid value and is not equivalent to "no payload". + - The pre-normalization source vectors for `dynamasks*` and + `dynaspritemasks*` must retain legacy `255` no-data sentinel semantics so + an all-layer-`0` active payload is not dropped before sidecar generation. - `BuildPackingSidecarsAndNormalize()` also snapshots each generated sidecar payload into `m_packingSidecarsStorage` as a transient two-dimensional byte store (`std::vector>`). diff --git a/src/SerumData.cpp b/src/SerumData.cpp index 211a3d5..64bda8b 100644 --- a/src/SerumData.cpp +++ b/src/SerumData.cpp @@ -46,9 +46,9 @@ SerumData::SerumData() cframes(0, false, true), cframes_v2(0, false, true), cframes_v2_extra(0, false, true), - dynamasks(0, false, true, true, 0, 1), + dynamasks(255, false, true, true, 0, 1), dynamasks_active(0, false, true, true, 0, 1), - dynamasks_extra(0, false, true, true, 0, 1), + dynamasks_extra(255, false, true, true, 0, 1), dynamasks_extra_active(0, false, true, true, 0, 1), dyna4cols(0), dyna4cols_v2(0, false, true), @@ -87,9 +87,9 @@ SerumData::SerumData() dynashadowscol_extra(0), dynasprite4cols(0), dynasprite4cols_extra(0), - dynaspritemasks(0, false, true, true, 0, 1), + dynaspritemasks(255, false, true, true, 0, 1), dynaspritemasks_active(0, false, true, true, 0, 1), - dynaspritemasks_extra(0, false, true, true, 0, 1), + dynaspritemasks_extra(255, false, true, true, 0, 1), dynaspritemasks_extra_active(0, false, true, true, 0, 1), sprshapemode(0) { cframes_v2.setProfileLabel("cframes_v2");